# Creating a Postgres database to store 'user' and 'tweet' data:

In [1]:
import pandas as pd
import numpy as np
import ast
import decimal

In [2]:
# Creating a dataframe of twitter data from .csv file:
df = pd.read_csv(r"Economy_USA_final.csv")

##### Extracting 'user' information from the data:

In [3]:
df_u = df["user"]

In [4]:
data = []

for i in range(1,len(df_u)):
        df2 = df_u[i]
        
        tree = ast.parse(df2, mode='eval')

        # compiling the ast into a code object:
        clause = compile(tree, '<AST>', 'eval')

        # make the globals contain only the Decimal class,
        # and eval the compiled object
        df3 = eval(clause, dict(Decimal=decimal.Decimal))

        df4 = pd.DataFrame(df3,index=[i])
        data.append(df4)

In [5]:
user_df = pd.concat(data)

In [6]:
user_df.drop(columns=['url',
       'entities', 'utc_offset',
       'time_zone', 'geo_enabled', 'lang',
       'contributors_enabled', 'is_translator', 'is_translation_enabled',
       'profile_background_color', 'profile_background_image_url',
       'profile_background_image_url_https', 'profile_background_tile',
       'profile_image_url', 'profile_image_url_https', 'profile_banner_url',
       'profile_link_color', 'profile_sidebar_border_color',
       'profile_sidebar_fill_color', 'profile_text_color',
       'profile_use_background_image', 'has_extended_profile',
       'default_profile', 'default_profile_image', 'following',
       'follow_request_sent', 'notifications', 'translator_type','withheld_in_countries'],inplace=True)

In [7]:
user_df.rename(columns={'id':'user_id','id_str':'user_id_str'},inplace=True)

In [8]:
user_df.reset_index(drop=True,inplace=True)

##### Extracting 'tweet' information from the data:

In [9]:
tweet_df = df.drop(columns=['Unnamed: 0','truncated','metadata','entities','user'])

In [10]:
df.rename(columns={'id':'tweet_id'},inplace=True)

In [11]:
# Adding user_id to 'tweet' data:
u_id = user_df['user_id']
tweet_df['user_id'] = u_id

In [12]:
tweet_df.reset_index(drop=True,inplace=True)

##### Creating database in Postgres:

In [13]:
from sqlalchemy import create_engine
engine = create_engine('postgresql://postgres:postgres@localhost:5432/twitter')
# engine = create_engine('postgresql+psycopg2://postgres:password@localhost/db_project')

##### Storing 'user' and 'tweet' data in our database:

In [14]:
user_df.to_sql('user_df',con=engine,index=False)
tweet_df.to_sql('tweet_df',con=engine,index=False)