In [8]:
import psycopg2
from decouple import config

!. ../.env

conn = psycopg2.connect("postgres://" + config("POSTGRES_USERNAME") + ":" + config("POSTGRES_PASSWORD") + "@raja.db.elephantsql.com:5432/mozfsrjp")
curs = conn.cursor()

In [9]:
curs.execute("""
    DROP TABLE comments;
""")

In [10]:
curs.execute("""
    CREATE TABLE comments (
        id BIGINT PRIMARY KEY,
        author VARCHAR(100),
        time BIGINT,
        comment_text TEXT,
        parent_id BIGINT,
        saltiness FLOAT
    );
""")

In [11]:
curs.close()
conn.commit()

### Load the CSV

In [12]:
import re
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

analyzer = SentimentIntensityAnalyzer()

def escape_string(text):
    if isinstance(text, str):
        text = re.sub(r"\"", "\\\"", text)
        text = re.sub(r"'", "\\'", text)
        return text
    else:
        return "-"

def convert_int(x):
    try:
        return int(x)
    except:
        return -1

def get_saltiness(x):
    if isinstance(x, str):
        res = analyzer.polarity_scores(x)["neg"]
        return res
    return 0.0

In [13]:
import pandas as pd

hn_df = pd.read_csv("../csv/most_recent_1_5mm.csv")

In [77]:
top_1000 = hn_df["author"].value_counts().index[:1000]
hn_df = hn_df[hn_df["author"].isin(top_1000)]

In [14]:
from psycopg2.extras import execute_batch

batchsize = 10000
import numpy as np

for ix in range(0, len(hn_df)+1, batchsize):
    
    print(f"Batch {ix} / {len(hn_df)} -- {ix/len(hn_df)*100:.2f}%")
    
    batch = hn_df[ix:ix+batchsize]
    batch = [
        [
            row[1][1],
            row[1][2],
            row[1][3],
            row[1][4],
            convert_int(row[1][7]),
            get_saltiness(row[1][4]),
        ]
        for row in batch.iterrows()
    ]
    
    query = """
        INSERT INTO comments (id, author, time, comment_text, parent_id, saltiness)
        VALUES (%s, %s, %s, %s, %s, %s);
    """
    
    curs = conn.cursor()
    execute_batch(curs, query, batch)
    curs.close()

Batch 0 / 1499356 -- 0.00%
Batch 10000 / 1499356 -- 0.67%
Batch 20000 / 1499356 -- 1.33%
Batch 30000 / 1499356 -- 2.00%
Batch 40000 / 1499356 -- 2.67%
Batch 50000 / 1499356 -- 3.33%
Batch 60000 / 1499356 -- 4.00%
Batch 70000 / 1499356 -- 4.67%
Batch 80000 / 1499356 -- 5.34%
Batch 90000 / 1499356 -- 6.00%
Batch 100000 / 1499356 -- 6.67%
Batch 110000 / 1499356 -- 7.34%
Batch 120000 / 1499356 -- 8.00%
Batch 130000 / 1499356 -- 8.67%
Batch 140000 / 1499356 -- 9.34%
Batch 150000 / 1499356 -- 10.00%
Batch 160000 / 1499356 -- 10.67%
Batch 170000 / 1499356 -- 11.34%
Batch 180000 / 1499356 -- 12.01%
Batch 190000 / 1499356 -- 12.67%
Batch 200000 / 1499356 -- 13.34%
Batch 210000 / 1499356 -- 14.01%
Batch 220000 / 1499356 -- 14.67%
Batch 230000 / 1499356 -- 15.34%
Batch 240000 / 1499356 -- 16.01%
Batch 250000 / 1499356 -- 16.67%
Batch 260000 / 1499356 -- 17.34%
Batch 270000 / 1499356 -- 18.01%
Batch 280000 / 1499356 -- 18.67%
Batch 290000 / 1499356 -- 19.34%
Batch 300000 / 1499356 -- 20.01%
Batch 

In [15]:
conn.commit()

In [16]:
query = """
    SELECT *
    FROM comments
    LIMIT 1000
"""
curs = conn.cursor()
curs.execute(query)
res = curs.fetchall()

In [17]:
res[0]

(20954035,
 'chrisnager',
 1568309269,
 'Such a great idea. Thank you!',
 20948826,
 0.0)

In [18]:
res[3]

(20954038,
 'weare138',
 1568309300,
 '&gt; Most software engineers have a healthy circle of friends and social life.<p>I understand the point you&#x27;re trying to make but this isn&#x27;t a truism. It&#x27;s a subjective opinion that wouldn&#x27;t apply to any industry much less tech, which itself isn&#x27;t specific to software engineering. This is an issue that effects all specialized fields of study, interests and careers. I don&#x27;t feel generalizing the issue in a blanket statement that the problem is specific to the individual in question and somehow not related to circumstances beyond that individual&#x27;s control is beneficial to the OP and anyone dealing with similar issues.<p>How many people can astronauts have honest conversations with about traveling in space that other people would understand and empathize with? Obviously very few, but it would be inaccurate and irresponsible to lay blame on an astronaut if they feel socially isolated as a result.<p>Later in your post