In [1]:
#import dependencies
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import requests
import time
from scipy.stats import linregress
import matplotlib.cm as cm
import matplotlib.colors as mcol
pd.set_option('display.max_rows', 1000)

api_key = "example key"

In [2]:
#Reading in csv and previewing data
movie_data_df = pd.read_csv("imdb (1000 movies) in june 2022.csv")
movie_data_df.head()

Unnamed: 0,ranking of movie\r\n,movie name\r\n,Year,certificate,runtime,genre,RATING,metascore,DETAIL ABOUT MOVIE\n,DIRECTOR\r\n,ACTOR 1\n,ACTOR 2\n,ACTOR 3,ACTOR 4,votes,GROSS COLLECTION\r\n
0,1,The Shawshank Redemption,-1994,15,142 min,Drama,9.3,81.0,Two imprisoned men bond over a number of years...,Frank Darabont,Tim Robbins,Morgan Freeman,Bob Gunton,William Sadler,2603314,$28.34M
1,2,The Godfather,-1972,X,175 min,"Crime, Drama",9.2,100.0,The aging patriarch of an organized crime dyna...,Francis Ford Coppola,Marlon Brando,Al Pacino,James Caan,Diane Keaton,1798731,$134.97M
2,3,The Dark Knight,-2008,12A,152 min,"Action, Crime, Drama",9.0,84.0,When the menace known as the Joker wreaks havo...,Christopher Nolan,Christian Bale,Heath Ledger,Aaron Eckhart,Michael Caine,2574810,$534.86M
3,4,The Lord of the Rings: The Return of the King,-2003,12A,201 min,"Action, Adventure, Drama",9.0,94.0,Gandalf and Aragorn lead the World of Men agai...,Peter Jackson,Elijah Wood,Viggo Mortensen,Ian McKellen,Orlando Bloom,1787701,$377.85M
4,5,Schindler's List,-1993,15,195 min,"Biography, Drama, History",9.0,94.0,"In German-occupied Poland during World War II,...",Steven Spielberg,Liam Neeson,Ralph Fiennes,Ben Kingsley,Caroline Goodall,1323776,$96.90M


In [3]:
#renaming columns
clean_df = movie_data_df.rename(columns={
    "ranking of movie\r\n":"IMDB Rank",
    "movie name\r\n":"Movie Name",
    "certificate":"Rating",
    "runtime":"Runtime (Minutes)",
    "genre":"Genre",
    "RATING":"IMDB Score",
    "metascore":"Metascore",
    "DETAIL ABOUT MOVIE\n":"Details",
    "DIRECTOR\r\n":"Director",
    "ACTOR 1\n":"Actor 1",
    "ACTOR 2\n": "Actor 2",
    "ACTOR 3":"Actor 3",
    "ACTOR 4":"Actor 4",
    "votes":"IMDB Votes",
    "GROSS COLLECTION\r\n":"BoxOffice (Millions)"
})


#cleaning up columns and setting data types
#dropping null values
clean_df=clean_df.dropna()

# #cleaning year - removing "-" and setting to int

clean_df['Year'] = clean_df['Year'].str.extract(pat='(\d+)', expand=False)
clean_df["Year"]=clean_df["Year"].astype("int")

# #cleaning runtime and setting as int
clean_df["Runtime (Minutes)"]=clean_df["Runtime (Minutes)"].str.split(" ").str[0]
clean_df["Runtime (Minutes)"]=clean_df["Runtime (Minutes)"].astype("int")

# #clean up Genre to only get the first genre
clean_df["Main Genre"]=clean_df["Genre"].str.split(",").str[0]
clean_df["Sub Genre"]=clean_df["Genre"].str.split(",").str[1]
clean_df = clean_df.drop(columns="Genre")


#setting IMDB Score as int
clean_df["IMDB Score"]=clean_df["IMDB Score"].astype("float")

# #cleaning box office. Removing "$" and "M" and setting to float
clean_df["BoxOffice (Millions)"]=clean_df["BoxOffice (Millions)"].str.split("M").str[0]
clean_df["BoxOffice (Millions)"]=clean_df["BoxOffice (Millions)"].str.split("$").str[1]
clean_df["BoxOffice (Millions)"]=clean_df["BoxOffice (Millions)"].astype("float")

#Cleaning up IMDB votes and setting as int
clean_df["IMDB Votes"]=clean_df["IMDB Votes"].str.replace(",","")
clean_df["IMDB Votes"]=clean_df["IMDB Votes"].astype("int")


#standardize movie ratings
clean_df["Rating"]=clean_df["Rating"].replace("A","G")
clean_df["Rating"]=clean_df["Rating"].replace("U","G")
clean_df["Rating"]=clean_df["Rating"].replace("AA","PG")
clean_df["Rating"]=clean_df["Rating"].replace("12","PG")
clean_df["Rating"]=clean_df["Rating"].replace("15","PG-13")
clean_df["Rating"]=clean_df["Rating"].replace("12A","PG-13")
clean_df["Rating"]=clean_df["Rating"].replace("UA","PG-13")
clean_df["Rating"]=clean_df["Rating"].replace("18","R")
clean_df["Rating"]=clean_df["Rating"].replace("X","NC17")




#resetting index and displaying
clean_df=clean_df.reset_index()
clean_df=clean_df.drop(columns="index")
clean_df.head()







Unnamed: 0,IMDB Rank,Movie Name,Year,Rating,Runtime (Minutes),IMDB Score,Metascore,Details,Director,Actor 1,Actor 2,Actor 3,Actor 4,IMDB Votes,BoxOffice (Millions),Main Genre,Sub Genre
0,1,The Shawshank Redemption,1994,PG-13,142,9.3,81.0,Two imprisoned men bond over a number of years...,Frank Darabont,Tim Robbins,Morgan Freeman,Bob Gunton,William Sadler,2603314,28.34,Drama,
1,2,The Godfather,1972,NC17,175,9.2,100.0,The aging patriarch of an organized crime dyna...,Francis Ford Coppola,Marlon Brando,Al Pacino,James Caan,Diane Keaton,1798731,134.97,Crime,Drama
2,3,The Dark Knight,2008,PG-13,152,9.0,84.0,When the menace known as the Joker wreaks havo...,Christopher Nolan,Christian Bale,Heath Ledger,Aaron Eckhart,Michael Caine,2574810,534.86,Action,Crime
3,4,The Lord of the Rings: The Return of the King,2003,PG-13,201,9.0,94.0,Gandalf and Aragorn lead the World of Men agai...,Peter Jackson,Elijah Wood,Viggo Mortensen,Ian McKellen,Orlando Bloom,1787701,377.85,Action,Adventure
4,5,Schindler's List,1993,PG-13,195,9.0,94.0,"In German-occupied Poland during World War II,...",Steven Spielberg,Liam Neeson,Ralph Fiennes,Ben Kingsley,Caroline Goodall,1323776,96.9,Biography,Drama


In [4]:
#creating a new column to store award info
clean_df["Awards"]=""
clean_df

Unnamed: 0,IMDB Rank,Movie Name,Year,Rating,Runtime (Minutes),IMDB Score,Metascore,Details,Director,Actor 1,Actor 2,Actor 3,Actor 4,IMDB Votes,BoxOffice (Millions),Main Genre,Sub Genre,Awards
0,1,The Shawshank Redemption,1994,PG-13,142,9.3,81.0,Two imprisoned men bond over a number of years...,Frank Darabont,Tim Robbins,Morgan Freeman,Bob Gunton,William Sadler,2603314,28.34,Drama,,
1,2,The Godfather,1972,NC17,175,9.2,100.0,The aging patriarch of an organized crime dyna...,Francis Ford Coppola,Marlon Brando,Al Pacino,James Caan,Diane Keaton,1798731,134.97,Crime,Drama,
2,3,The Dark Knight,2008,PG-13,152,9.0,84.0,When the menace known as the Joker wreaks havo...,Christopher Nolan,Christian Bale,Heath Ledger,Aaron Eckhart,Michael Caine,2574810,534.86,Action,Crime,
3,4,The Lord of the Rings: The Return of the King,2003,PG-13,201,9.0,94.0,Gandalf and Aragorn lead the World of Men agai...,Peter Jackson,Elijah Wood,Viggo Mortensen,Ian McKellen,Orlando Bloom,1787701,377.85,Action,Adventure,
4,5,Schindler's List,1993,PG-13,195,9.0,94.0,"In German-occupied Poland during World War II,...",Steven Spielberg,Liam Neeson,Ralph Fiennes,Ben Kingsley,Caroline Goodall,1323776,96.9,Biography,Drama,
5,6,The Godfather Part II,1974,NC17,202,9.0,90.0,The early life and career of Vito Corleone in ...,Francis Ford Coppola,Al Pacino,Robert De Niro,Robert Duvall,Diane Keaton,1239027,57.3,Crime,Drama,
6,7,12 Angry Men,1957,G,96,9.0,96.0,The jury in a New York City murder trial is fr...,Sidney Lumet,Henry Fonda,Lee J. Cobb,Martin Balsam,John Fiedler,769113,4.36,Crime,Drama,
7,9,Pulp Fiction,1994,R,154,8.9,94.0,"The lives of two mob hitmen, a boxer, a gangst...",Quentin Tarantino,John Travolta,Uma Thurman,Samuel L. Jackson,Bruce Willis,1995346,107.93,Crime,Drama,
8,10,Inception,2010,PG-13,148,8.8,74.0,A thief who steals corporate secrets through t...,Christopher Nolan,Leonardo DiCaprio,Joseph Gordon-Levitt,Elliot Page,Ken Watanabe,2284252,292.58,Action,Adventure,
9,11,The Lord of the Rings: The Two Towers,2002,PG-13,179,8.8,87.0,While Frodo and Sam edge closer to Mordor with...,Peter Jackson,Elijah Wood,Ian McKellen,Viggo Mortensen,Orlando Bloom,1614489,342.55,Action,Adventure,


In [5]:
#search OMDB API for award info and put result in our new column

# for index, row in clean_df.iterrows():
#     try:
#         params = {
#         "apikey": api_key,
#         "t": row["Movie Name"]}
    
#         url = "http://www.omdbapi.com"
#         res = requests.get(url,params=params).json()
#         clean_df.loc[index,"Awards"]=res["Awards"]
#     except: 
#         pass
    

In [6]:
clean_df["Awards"]


0      Nominated for 7 Oscars. 21 wins & 43 nominatio...
1           Won 3 Oscars. 32 wins & 31 nominations total
2         Won 2 Oscars. 162 wins & 163 nominations total
3        Won 11 Oscars. 213 wins & 124 nominations total
4           Won 7 Oscars. 91 wins & 49 nominations total
5           Won 6 Oscars. 17 wins & 22 nominations total
6      Nominated for 3 Oscars. 17 wins & 13 nominatio...
7            Won 1 Oscar. 70 wins & 75 nominations total
8         Won 4 Oscars. 159 wins & 220 nominations total
9         Won 2 Oscars. 130 wins & 138 nominations total
10     Nominated for 1 Oscar. 12 wins & 38 nomination...
11        Won 4 Oscars. 123 wins & 127 nominations total
12          Won 6 Oscars. 51 wins & 75 nominations total
13                                3 wins & 6 nominations
14          Won 4 Oscars. 42 wins & 52 nominations total
15           Won 1 Oscar. 44 wins & 38 nominations total
16           Won 1 Oscar. 26 wins & 20 nominations total
17          Won 5 Oscars. 40 wi

In [7]:
#put clean dataframe into csv
clean_df.to_csv("Refined_Data.csv",index=False)

In [8]:
#read clean csv to perform data analysis
refined_df=pd.read_csv("Refined_Data.csv")


In [9]:
clean_df["Rating"]=clean_df["Rating"].replace("U","G")
clean_df["Rating"]=clean_df["Rating"].replace("AA","PG")
clean_df["Rating"]=clean_df["Rating"].replace("12","PG")
clean_df["Rating"]=clean_df["Rating"].replace("15","PG-13")
clean_df["Rating"]=clean_df["Rating"].replace("12A","PG-13")
clean_df["Rating"]=clean_df["Rating"].replace("UA","PG-13")
clean_df["Rating"]=clean_df["Rating"].replace("18","R")
clean_df["Rating"]=clean_df["Rating"].replace("X","NC17")
clean_df

Unnamed: 0,IMDB Rank,Movie Name,Year,Rating,Runtime (Minutes),IMDB Score,Metascore,Details,Director,Actor 1,Actor 2,Actor 3,Actor 4,IMDB Votes,BoxOffice (Millions),Main Genre,Sub Genre,Awards
0,1,The Shawshank Redemption,1994,PG-13,142,9.3,81.0,Two imprisoned men bond over a number of years...,Frank Darabont,Tim Robbins,Morgan Freeman,Bob Gunton,William Sadler,2603314,28.34,Drama,,Nominated for 7 Oscars. 21 wins & 43 nominatio...
1,2,The Godfather,1972,NC17,175,9.2,100.0,The aging patriarch of an organized crime dyna...,Francis Ford Coppola,Marlon Brando,Al Pacino,James Caan,Diane Keaton,1798731,134.97,Crime,Drama,Won 3 Oscars. 32 wins & 31 nominations total
2,3,The Dark Knight,2008,PG-13,152,9.0,84.0,When the menace known as the Joker wreaks havo...,Christopher Nolan,Christian Bale,Heath Ledger,Aaron Eckhart,Michael Caine,2574810,534.86,Action,Crime,Won 2 Oscars. 162 wins & 163 nominations total
3,4,The Lord of the Rings: The Return of the King,2003,PG-13,201,9.0,94.0,Gandalf and Aragorn lead the World of Men agai...,Peter Jackson,Elijah Wood,Viggo Mortensen,Ian McKellen,Orlando Bloom,1787701,377.85,Action,Adventure,Won 11 Oscars. 213 wins & 124 nominations total
4,5,Schindler's List,1993,PG-13,195,9.0,94.0,"In German-occupied Poland during World War II,...",Steven Spielberg,Liam Neeson,Ralph Fiennes,Ben Kingsley,Caroline Goodall,1323776,96.9,Biography,Drama,Won 7 Oscars. 91 wins & 49 nominations total
5,6,The Godfather Part II,1974,NC17,202,9.0,90.0,The early life and career of Vito Corleone in ...,Francis Ford Coppola,Al Pacino,Robert De Niro,Robert Duvall,Diane Keaton,1239027,57.3,Crime,Drama,Won 6 Oscars. 17 wins & 22 nominations total
6,7,12 Angry Men,1957,G,96,9.0,96.0,The jury in a New York City murder trial is fr...,Sidney Lumet,Henry Fonda,Lee J. Cobb,Martin Balsam,John Fiedler,769113,4.36,Crime,Drama,Nominated for 3 Oscars. 17 wins & 13 nominatio...
7,9,Pulp Fiction,1994,R,154,8.9,94.0,"The lives of two mob hitmen, a boxer, a gangst...",Quentin Tarantino,John Travolta,Uma Thurman,Samuel L. Jackson,Bruce Willis,1995346,107.93,Crime,Drama,Won 1 Oscar. 70 wins & 75 nominations total
8,10,Inception,2010,PG-13,148,8.8,74.0,A thief who steals corporate secrets through t...,Christopher Nolan,Leonardo DiCaprio,Joseph Gordon-Levitt,Elliot Page,Ken Watanabe,2284252,292.58,Action,Adventure,Won 4 Oscars. 159 wins & 220 nominations total
9,11,The Lord of the Rings: The Two Towers,2002,PG-13,179,8.8,87.0,While Frodo and Sam edge closer to Mordor with...,Peter Jackson,Elijah Wood,Ian McKellen,Viggo Mortensen,Orlando Bloom,1614489,342.55,Action,Adventure,Won 2 Oscars. 130 wins & 138 nominations total
