## Importing Libraries

In [1]:
import psycopg2
import pandas as pd
import sys
import os

## Convert IMDB Data to CSV

In [2]:
# Use your custom path
file_path = '../../../imdb_data/title.basics.tsv.gz'

In [3]:
df = pd.read_csv(file_path, delimiter='\t', low_memory=False)

In [4]:
df.columns

Index(['tconst', 'titleType', 'primaryTitle', 'originalTitle', 'isAdult',
       'startYear', 'endYear', 'runtimeMinutes', 'genres'],
      dtype='object')

In [5]:
df.head()

Unnamed: 0,tconst,titleType,primaryTitle,originalTitle,isAdult,startYear,endYear,runtimeMinutes,genres
0,tt0000001,short,Carmencita,Carmencita,0,1894,\N,1,"Documentary,Short"
1,tt0000002,short,Le clown et ses chiens,Le clown et ses chiens,0,1892,\N,5,"Animation,Short"
2,tt0000003,short,Pauvre Pierrot,Pauvre Pierrot,0,1892,\N,4,"Animation,Comedy,Romance"
3,tt0000004,short,Un bon bock,Un bon bock,0,1892,\N,\N,"Animation,Short"
4,tt0000005,short,Blacksmith Scene,Blacksmith Scene,0,1893,\N,1,"Comedy,Short"


In [6]:
df.to_csv('../../../imdb_data/title_basics.csv', index=False)

## PSQL Management Functions

In [14]:
# Use your own names and credentials
file_path = '../../../imdb_data/title_basics.csv'
table_name = 'title_basics'
dbname = 'imdb_ex'
host = 'localhost'
port = '5432'
user = 'postgres'
pwd = 'postgres'

In [10]:
def create_table_{table_name}():
    conn=psycopg2.connect(dbname=dbname, user=user, password=pwd, host=host, port=port)
    cur=conn.cursor()
    cur.execute("""CREATE TABLE title_basics (id CHAR(10), 
                                          titleType VARCHAR(30), 
                                          primaryTitle TEXT, 
                                          originalTitle TEXT, 
                                          isAdult CHAR(10), 
                                          startYear CHAR(5), 
                                          endYear CHAR(5), 
                                          runtimeMinutes TEXT, 
                                          genres TEXT)
                                          """)
    conn.commit()
    conn.close()
    print(f'Creation of {table_name}: SUCCESSFUL')

In [11]:
def load_data_2_table(file_path, table_name, dbname, host, port, user, pwd):
    '''
    This function upload csv to a target table
    '''
    try:
        conn = psycopg2.connect(dbname=dbname, host=host, port=port, user=user, password=pwd)
        print("Connecting to Database")
        cur = conn.cursor()
        f = open(file_path, "r")
        
        # Truncate the table first
        cur.execute("Truncate {} Cascade;".format(table_name))
        print("Truncated {}".format(table_name))
        
        # Load table from the file with header
        cur.copy_expert("copy {} from STDIN CSV HEADER QUOTE '\"'".format(table_name), f)
        cur.execute("commit;")
        print("Loaded data into {}".format(table_name))
        conn.close()
        print("DB connection closed.")
        
    except Exception as e:
        print("Error: {}".format(str(e)))
        sys.exit(1)

In [15]:
load_data_2_table(file_path, table_name, dbname, host, port, user, pwd)

Connecting to Database
Truncated title_basics
Loaded data into title_basics
DB connection closed.
