# Modules and data

In [2]:
import psycopg2
from sqlalchemy import create_engine
import pandas as pd
import os

In [4]:
df = pd.read_csv('https://raw.githubusercontent.com/JacopoMalatesta/imdb_most_popular_films/main/data/cleaned_df.csv',
            index_col = 0)

# Secret info

Let's safely load our Postgres username and password with the OS module

In [6]:
postgres_user = os.environ.get('postgres_username')
postgres_psw = os.environ.get('postgres_psw')

# Creating the databse

Creating the connection

In [7]:
conn = psycopg2.connect(host = 'localhost', user = postgres_user, password = postgres_psw)

Creating the cursor

In [8]:
cursor = conn.cursor()

We need to run these lines to prevent errors when dropping the database

In [9]:
#  https://stackoverflow.com/questions/34484066/create-a-postgres-database-using-python

from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT 

conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)

Dropping the 'imdb' database if it exists

In [10]:
cursor.execute('DROP DATABASE IF EXISTS imdb')

Let's create it

In [11]:
cursor.execute('CREATE DATABASE imdb')

Let's close down the connection

In [12]:
conn.close()

# Putting the data in the database

Let's split the dataframe into three. 

In [20]:
films = df[['id', 'title', 'release_date', 'country', 'language', 'genre', 'studios', 'color', 'aspect_ratio', 'last_updated']]

stats = df[['id', 'runtime', 'budget', 'revenue', 'imdb_rating', 'imdb_rating_count', 'metascore', 'user_review_count',
           'critic_review_count']]

people = df[['id', 'director', 'writer', 'actors', 'cinematographer', 'editor', 'composer', 'production_designer', 
             'art_director','costume_designer', 'producers']]

To use Pandas with our database we need to work with sql_alchemy. So let's create the engine

In [29]:
engine = create_engine(f'postgresql+psycopg2://{postgres_user}:{postgres_psw}@localhost/imdb')

Let's upload our three dataframes on the database

In [31]:
films.to_sql(con = engine, name = 'films', if_exists = 'replace', index = False)
stats.to_sql(con = engine, name = 'stats', if_exists = 'replace', index = False)
people.to_sql(con = engine, name = 'people', if_exists = 'replace', index = False)