In [1]:
import numpy as np
import pandas as pd
import csv

from sqlalchemy import create_engine

# Extract

### Extract CSVs into DataFrames

In [2]:
imdb_path = "resources/imdb_top_1000.csv"
academy_award_path = "resources/academy_awards.csv"

imdb_df = pd.read_csv(imdb_path)
academy_award_df = pd.read_csv(academy_award_path)

# Transform

### Transform IMDB dataframe

In [3]:
imdb_df.head()

Unnamed: 0,Poster_Link,Series_Title,Released_Year,Certificate,Runtime,Genre,IMDB_Rating,Overview,Meta_score,Director,Star1,Star2,Star3,Star4,No_of_Votes,Gross
0,https://m.media-amazon.com/images/M/MV5BMDFkYT...,The Shawshank Redemption,1994,A,142 min,Drama,9.3,Two imprisoned men bond over a number of years...,80.0,Frank Darabont,Tim Robbins,Morgan Freeman,Bob Gunton,William Sadler,2343110,28341469
1,https://m.media-amazon.com/images/M/MV5BM2MyNj...,The Godfather,1972,A,175 min,"Crime, Drama",9.2,An organized crime dynasty's aging patriarch t...,100.0,Francis Ford Coppola,Marlon Brando,Al Pacino,James Caan,Diane Keaton,1620367,134966411
2,https://m.media-amazon.com/images/M/MV5BMTMxNT...,The Dark Knight,2008,UA,152 min,"Action, Crime, Drama",9.0,When the menace known as the Joker wreaks havo...,84.0,Christopher Nolan,Christian Bale,Heath Ledger,Aaron Eckhart,Michael Caine,2303232,534858444
3,https://m.media-amazon.com/images/M/MV5BMWMwMG...,The Godfather: Part II,1974,A,202 min,"Crime, Drama",9.0,The early life and career of Vito Corleone in ...,90.0,Francis Ford Coppola,Al Pacino,Robert De Niro,Robert Duvall,Diane Keaton,1129952,57300000
4,https://m.media-amazon.com/images/M/MV5BMWU4N2...,12 Angry Men,1957,U,96 min,"Crime, Drama",9.0,A jury holdout attempts to prevent a miscarria...,96.0,Sidney Lumet,Henry Fonda,Lee J. Cobb,Martin Balsam,John Fiedler,689845,4360000


### Transform Academy Award dataframe

In [4]:
academy_award_df.head()

Unnamed: 0,Year,Ceremony,Award,Winner,Name,Film
0,1927/1928,1,Actor,,Richard Barthelmess,The Noose
1,1927/1928,1,Actor,1.0,Emil Jannings,The Last Command
2,1927/1928,1,Actress,,Louise Dresser,A Ship Comes In
3,1927/1928,1,Actress,1.0,Janet Gaynor,7th Heaven
4,1927/1928,1,Actress,,Gloria Swanson,Sadie Thompson


In [5]:
# Selecting columns in academy award dataframe
academy_award_df2 = academy_award_df[['Award','Winner','Name','Film']]

# drop empty film & Name cells
academy_award_df2 = academy_award_df2.dropna(subset=['Film'])
academy_award_df2 = academy_award_df2.dropna(subset=['Name'])

# drop empty winner cells as they are only nominees 
academy_award_df2 = academy_award_df2.dropna(subset=['Winner'])

# # dropping times where a movie is credited with an award twice (for example, a movie wins best cinematography, and there's 2 cinematographers)
academy_award_df2 = academy_award_df2.drop_duplicates(subset=['Award','Film'])
academy_award_df2 = academy_award_df2.drop_duplicates(subset=['Award','Name'])

academy_award_df2


Unnamed: 0,Award,Winner,Name,Film
1,Actor,1.0,Emil Jannings,The Last Command
3,Actress,1.0,Janet Gaynor,7th Heaven
6,Art Direction,1.0,William Cameron Menzies,The Dove; Tempest
9,Cinematography,1.0,Charles Rosher,Sunrise
11,Directing (Comedy Picture),1.0,Lewis Milestone,Two Arabian Knights
...,...,...,...,...
9936,Sound Editing,1.0,Mad Max: Fury Road,Mark Mangini and David White
9942,Sound Mixing,1.0,Mad Max: Fury Road,"Chris Jenkins, Gregg Rudloff and Ben Osmo"
9946,Visual Effects,1.0,Ex Machina,"Andrew Whitehurst, Paul Norris, Mark Ardington..."
9951,Writing (Adapted Screenplay),1.0,The Big Short,Screenplay by Charles Randolph and Adam McKay


### ERD TABLES

In [12]:
#movies table
movies_table=imdb_df[["Series_Title","Released_Year","Certificate","Runtime","Genre","IMDB_Rating","Meta_score","Director","Gross"]]
movies_table

Unnamed: 0,Series_Title,Released_Year,Certificate,Runtime,Genre,IMDB_Rating,Meta_score,Director,Gross
0,The Shawshank Redemption,1994,A,142 min,Drama,9.3,80.0,Frank Darabont,28341469
1,The Godfather,1972,A,175 min,"Crime, Drama",9.2,100.0,Francis Ford Coppola,134966411
2,The Dark Knight,2008,UA,152 min,"Action, Crime, Drama",9.0,84.0,Christopher Nolan,534858444
3,The Godfather: Part II,1974,A,202 min,"Crime, Drama",9.0,90.0,Francis Ford Coppola,57300000
4,12 Angry Men,1957,U,96 min,"Crime, Drama",9.0,96.0,Sidney Lumet,4360000
...,...,...,...,...,...,...,...,...,...
995,Breakfast at Tiffany's,1961,A,115 min,"Comedy, Drama, Romance",7.6,76.0,Blake Edwards,
996,Giant,1956,G,201 min,"Drama, Western",7.6,84.0,George Stevens,
997,From Here to Eternity,1953,Passed,118 min,"Drama, Romance, War",7.6,85.0,Fred Zinnemann,30500000
998,Lifeboat,1944,,97 min,"Drama, War",7.6,78.0,Alfred Hitchcock,


In [29]:
#directors table
directors=pd.DataFrame(movies_table["Director"].unique())
directors.rename(columns={0:"Director_Name"})

Unnamed: 0,Director_Name
0,Frank Darabont
1,Francis Ford Coppola
2,Christopher Nolan
3,Sidney Lumet
4,Peter Jackson
...,...
543,Martin Rosen
544,Wolfgang Reitherman
545,Richard Lester
546,Blake Edwards


In [30]:
#directed_by table
directored_by=movies_table[["Series_Title","Director"]]
directored_by=directored_by.rename(columns={"Director":"Director_Name"})
directored_by

Unnamed: 0,Series_Title,Director_Name
0,The Shawshank Redemption,Frank Darabont
1,The Godfather,Francis Ford Coppola
2,The Dark Knight,Christopher Nolan
3,The Godfather: Part II,Francis Ford Coppola
4,12 Angry Men,Sidney Lumet
...,...,...
995,Breakfast at Tiffany's,Blake Edwards
996,Giant,George Stevens
997,From Here to Eternity,Fred Zinnemann
998,Lifeboat,Alfred Hitchcock


In [38]:
merge_test=pd.merge(academy_award_df2,movies_table,how='left',left_on="Film",right_on="Series_Title")
merge_test=merge_test.dropna(subset=["IMDB_Rating"])
merge_test=merge_test.drop(columns=["Film","Name"])
merge_test

Unnamed: 0,Award,Winner,Series_Title,Released_Year,Certificate,Runtime,Genre,IMDB_Rating,Meta_score,Director,Gross
12,Special Award,1.0,The Circus,1928,Passed,72 min,"Comedy, Romance",8.1,90.0,Charles Chaplin,
24,Directing,1.0,All Quiet on the Western Front,1930,U,152 min,"Drama, War",8.0,91.0,Lewis Milestone,3270000
60,Actor,1.0,It Happened One Night,1934,Approved,105 min,"Comedy, Romance",8.1,87.0,Frank Capra,4360000
61,Actress,1.0,It Happened One Night,1934,Approved,105 min,"Comedy, Romance",8.1,87.0,Frank Capra,4360000
122,Special Award,1.0,A Star Is Born,2018,UA,136 min,"Drama, Music, Romance",7.6,88.0,Bradley Cooper,215288866
...,...,...,...,...,...,...,...,...,...,...,...
1734,Actor in a Supporting Role,1.0,Whiplash,2014,A,106 min,"Drama, Music",8.5,88.0,Damien Chazelle,13092000
1736,Actress in a Supporting Role,1.0,Boyhood,2014,A,165 min,Drama,7.9,100.0,Richard Linklater,25379975
1754,Actor in a Leading Role,1.0,The Revenant,2015,A,156 min,"Action, Adventure, Drama",8.0,76.0,Alejandro G. Iñárritu,183637894
1755,Actor in a Supporting Role,1.0,Bridge of Spies,2015,UA,142 min,"Drama, History, Thriller",7.6,81.0,Steven Spielberg,72313754


In [39]:
merge_test2=pd.merge(academy_award_df2,movies_table,how="left",left_on="Name",right_on="Series_Title")
merge_test2=merge_test2.dropna(subset=["IMDB_Rating"])
merge_test2=merge_test2.drop(columns=["Film","Name"])
merge_test2

Unnamed: 0,Award,Winner,Series_Title,Released_Year,Certificate,Runtime,Genre,IMDB_Rating,Meta_score,Director,Gross
1758,Animated Feature Film,1.0,Inside Out,2015,U,95 min,"Animation, Adventure, Comedy",8.1,94.0,Pete Docter,356461711
1759,Costume Design,1.0,Mad Max: Fury Road,2015,UA,120 min,"Action, Adventure, Sci-Fi",8.1,90.0,George Miller,154058340
1762,Film Editing,1.0,Mad Max: Fury Road,2015,UA,120 min,"Action, Adventure, Sci-Fi",8.1,90.0,George Miller,154058340
1763,Makeup and Hairstyling,1.0,Mad Max: Fury Road,2015,UA,120 min,"Action, Adventure, Sci-Fi",8.1,90.0,George Miller,154058340
1764,Music (Original Score),1.0,The Hateful Eight,2015,A,168 min,"Crime, Drama, Mystery",7.8,68.0,Quentin Tarantino,54117416
1766,Best Picture,1.0,Spotlight,2015,A,129 min,"Biography, Crime, Drama",8.1,93.0,Tom McCarthy,45055776
1767,Production Design,1.0,Mad Max: Fury Road,2015,UA,120 min,"Action, Adventure, Sci-Fi",8.1,90.0,George Miller,154058340
1770,Sound Editing,1.0,Mad Max: Fury Road,2015,UA,120 min,"Action, Adventure, Sci-Fi",8.1,90.0,George Miller,154058340
1771,Sound Mixing,1.0,Mad Max: Fury Road,2015,UA,120 min,"Action, Adventure, Sci-Fi",8.1,90.0,George Miller,154058340
1772,Visual Effects,1.0,Ex Machina,2014,UA,108 min,"Drama, Sci-Fi, Thriller",7.7,78.0,Alex Garland,25442958


In [43]:
#oscar_winners table
oscar_winners=pd.merge(merge_test,merge_test2,how='outer')
oscar_winners=oscar_winners.drop(columns=["Certificate","Winner","Released_Year","Runtime","Genre","IMDB_Rating","Meta_score","Director","Gross"])
oscar_winners

Unnamed: 0,Award,Series_Title
0,Special Award,The Circus
1,Directing,All Quiet on the Western Front
2,Actor,It Happened One Night
3,Actress,It Happened One Night
4,Special Award,A Star Is Born
...,...,...
130,Sound Editing,Mad Max: Fury Road
131,Sound Mixing,Mad Max: Fury Road
132,Visual Effects,Ex Machina
133,Writing (Adapted Screenplay),The Big Short


In [11]:
# oscar_categories table
# all unique award categories from the academy award dataframe

oscar_categories_df = pd.DataFrame(academy_award_df2['Award'].unique())

oscar_categories_df=oscar_categories_df.rename(columns={0:"Category Name"})

oscar_categories_df

Unnamed: 0,Category Name
0,Actor
1,Actress
2,Art Direction
3,Cinematography
4,Directing (Comedy Picture)
...,...
100,Animated Feature Film
101,Writing (Adapted Screenplay)
102,Sound Mixing
103,Makeup and Hairstyling


In [None]:
#genres table
#1NF table of (1) movie titles and (2) genres 

In [7]:
# Testing stuff, IMDB movie names

imdb_df['Series_Title'].count()

imdb_df['Series_Title'].nunique()


999

# Load

### Create database connection

In [8]:
connection_string = "postgres:postgres@localhost:5432/movies_project_db"
engine = create_engine(f'postgresql://{connection_string}')

### Load DataFrames into database

In [9]:
premise_transformed.to_sql(name='premise', con=engine, if_exists='append', index=True)

NameError: name 'premise_transformed' is not defined

In [None]:
county_transformed.to_sql(name='county', con=engine, if_exists='append', index=True)