In [None]:
import psycopg2
import pandas as pd
import os
import itertools
import numpy as np

conn = psycopg2.connect(
    host='localhost',
    port=54320,
    dbname='my_database',
    password='pass',
    user='user',
    )

In [None]:
from sqlalchemy import event, create_engine, Integer, String, DateTime
engine = create_engine('postgresql+psycopg2://user:pass@localhost:54320/my_database')

# @event.listens_for(engine, 'before_cursor_execute')
# def receive_before_cursor_execute(conn, cursor, statement, params, context, executemany):
#     if executemany:
#         cursor.fast_executemany = True
#         cursor.commit()

In [None]:
def create_neighbourhood_table():
    cur = conn.cursor()
    query = """
    CREATE TABLE IF NOT EXISTS neighbourhood (
    user_id integer NOT NULL,
    response_user_id integer NOT NULL,
    type character varying(50) NOT NULL,
    date timestamp without time zone NOT NULL
    )
    """
    cur.execute(query)
    conn.commit()
    cur.close()

In [None]:
def create_user_mapping_table():
    cur = conn.cursor()
    query = """
    CREATE TABLE IF NOT EXISTS user_mapping (
    user_id integer NOT NULL,
    label character varying(50) NOT NULL,
    date timestamp without time zone NOT NULL
    )
    """
    cur.execute(query)
    conn.commit()
    cur.close()

In [None]:
def create_counted_neighbours_table():
    cur = conn.cursor()
    query = """
    CREATE TABLE IF NOT EXISTS counted_neighbours (
    user_id integer NOT NULL,
    number_of_links integer NOT NULL,
    type character varying(50) NOT NULL,
    date timestamp without time zone NOT NULL
    )
    """
    cur.execute(query)
    conn.commit()
    cur.close()

In [None]:
#create_neighbourhood_table()
#create_user_mapping_table()
#create_counted_neighbours_table()

In [None]:
def feed_neighbourhood_table():
    cur = conn.cursor()
    query = """
    WITH posts_neighbours AS (
        SELECT p.author_id as user_id, c.author_id as response_user_id, 'post_response' as type, c.date as date 
        FROM posts p join comments c on p.id = c.post_id 
        WHERE p.author_id <> c.author_id
    ),
    comments_neighbours AS (
    SELECT pc.author_id as user_id, c.author_id as response_user_id, 'comment_response' as type, c.date as date 
    FROM comments pc join comments c on pc.id = c.parentcomment_id 
    WHERE pc.author_id <> c.author_id
    )
    INSERT INTO neighbourhood
    SELECT * FROM posts_neighbours UNION SELECT * FROM comments_neighbours
    """
    cur.execute(query)
    conn.commit()
    cur.close()

In [None]:
feed_neighbourhood_table()

In [None]:
def feed_user_mapping_table(engine):
    df = pd.read_csv("All_Labeled_users" + "/labeled_users.csv")[["user_id","label","start_date"]]
    df.columns = ["user_id","label","date"]
    df.to_sql('user_mapping', engine, if_exists='replace',index=False,
          dtype={"user_id": Integer(),"label": String(), "date": DateTime()})

In [None]:
feed_user_mapping_table(engine)

In [None]:
def feed_counted_neighbours_table():
    cur = conn.cursor()
    query = """
    WITH mapped_table AS (
    SELECT n.user_id, m.label, m.date, n.type 
    FROM neighbourhood n JOIN user_mapping m 
    ON n.response_user_id = m.user_id 
    AND m.date <= n.date AND n.date < m.date + interval '14 day')
    
    INSERT INTO counted_neighbours
    SELECT mt.user_id as user_id, count(*) as number_of_links, mt.label as type, mt.date AS date 
    FROM mapped_table mt 
    GROUP BY mt.user_id, mt.label, mt.date
    """
    cur.execute(query)
    conn.commit()
    cur.close()

In [None]:
feed_counted_neighbours_table()