In [1]:
import pandas as pd
import numpy as np
import psycopg2

In [None]:
class TwitterDB:
    def __init__(self, host, database, user, password):
        self.host = host
        self.database = database
        self.user = user
        self.password = password
        self.conn = self._connect()

    def _connect(self):
        try:
            conn = psycopg2.connect(
                host=self.host,
                database=self.database,
                user=self.user,
                password=self.password
            )
            self.cur = self.conn.cursor()
            print("Connection to PostgreSQL database successful")
        except Exception as e:
            print(f"Connection failed. Error: {e}")
    
    def disconnect(self):
        try:
            self.cur.close()
            self.close()
            print("Disconnected from PostgreSQL database")
        except Exception as e:
            print(f"Error: {e}")

    def insert_user(self, user):
        try:
            query = f"""INSERT INTO twitter_users (user_id, name, screen_name, date, twitter_join_date, location, description, verified, followers_count, friends_count, listed_count, favourites_count, preferred_language)
                    VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                    ON CONFLICT (user_id, date) DO NOTHING"""
            data = (
                user.user_id, 
                user.name, 
                user.screen_name, 
                user.date, 
                user.twitter_join_date, 
                user.location, 
                user.description, 
                user.verified, 
                user.followers_count, 
                user.friends_count, 
                user.listed_count, 
                user.favourites_count, 
                user.preferred_language
            )
            self.cur.execute(query, data)
            self.conn.commit()
        except Exception as e:
            self.conn.rollback()
            print(f"Error: {e}")
    
    def insert_users(self, users):
        for user in users:
            self.insert_user(user)
    
    def get_user(self, user_id):
        try:
            self.cur.execute("SELECT * FROM twitter_users WHERE user_id = %s", (user_id,))
            user_data = self.cur.fetchone()
            if user_data:
                user = TwitterUser(*user_data)
                return user
            else:
                return None
        except Exception as e:
            print(f"Error: {e}")
    
    def get_users(self):
        try:
            self.cur.execute("SELECT * FROM twitter_users")
            users_data = self.cur.fetchall()
            users = []
            for user_data in users_data:
                user = TwitterUser(*user_data)
                users.append(user)
            return users
        except Exception as e:
            print(f"Error: {e}")

    def close(self):
        self.conn.close()

In [None]:
db = TwitterDB("localhost", "postgres", "postgres", "")

In [None]:
df_users = pd.read_csv("data/users.csv")

In [None]:
# Sort dataframe by date column
df_users = df_users.sort_values('date')

In [None]:
df_users.head()

In [None]:
db.insert_users(df_users)

In [None]:
db.close()

In [8]:
# Loop through rows of dataframe
# for i, row in df_users.iterrows():
#     # Check if user already exists in fact table
#     cur.execute("SELECT * FROM User_table_fact WHERE user = %s", (row['user'],))
#     user_fact = cur.fetchone()
    
#     # If user exists, update row in fact table
#     if user_fact:
#         cur.execute("UPDATE User_table_fact SET name = %s, screen_name = %s, date = %s, twitter_join_date = %s, location = %s, description = %s, verified = %s, followers_count = %s, friends_count = %s, listed_count = %s, favourites_count = %s, preferred_language = %s WHERE user_id = %s", (row['name'], row['screen_name'], row['date'], row['twitter_join_date'], row['location'], row['description'], row['verified'], row['followers_count'], row['friends_count'], row['listed_count'], row['favourites_count'], row['preferred_language'], user_fact[0]))
#         user_id = user_fact[0]
#     # If user does not exist, insert row into fact table
#     else:
#         cur.execute("INSERT INTO User_table_fact (user, name, screen_name, date, twitter_join_date, location, description, verified, followers_count, friends_count, listed_count, favourites_count, preferred_language) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) RETURNING user_id", (row['user'], row['name'], row['screen_name'], row['date'], row['twitter_join_date'], row['location'], row['description'], row['verified'], row['followers_count'], row['friends_count'], row['listed_count'], row['favourites_count'], row['preferred_language']))
#         user_id = cur.fetchone()[0]
    
#     # Insert row into dim table
#     cur.execute("INSERT INTO User_table_dim (user_id, date, twitter_join_date, location, description, verified, followers_count, friends_count, listed_count, favourites_count, preferred_language) VALUES (%s, %s, %s, %s, %s, %s, %s, %s