In [72]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# 1. Read data

In [73]:
dataset_path = "IMDB-Movie-Data.csv"

In [74]:
data = pd.read_csv(dataset_path)

# 2. View data

In [75]:
data.head()

Unnamed: 0,Rank,Title,Genre,Description,Director,Actors,Year,Runtime (Minutes),Rating,Votes,Revenue (Millions),Metascore
0,1,Guardians of the Galaxy,"Action,Adventure,Sci-Fi",A group of intergalactic criminals are forced ...,James Gunn,"Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S...",2014,121,8.1,757074,333.13,76.0
1,2,Prometheus,"Adventure,Mystery,Sci-Fi","Following clues to the origin of mankind, a te...",Ridley Scott,"Noomi Rapace, Logan Marshall-Green, Michael Fa...",2012,124,7.0,485820,126.46,65.0
2,3,Split,"Horror,Thriller",Three girls are kidnapped by a man with a diag...,M. Night Shyamalan,"James McAvoy, Anya Taylor-Joy, Haley Lu Richar...",2016,117,7.3,157606,138.12,62.0
3,4,Sing,"Animation,Comedy,Family","In a city of humanoid animals, a hustling thea...",Christophe Lourdelet,"Matthew McConaughey,Reese Witherspoon, Seth Ma...",2016,108,7.2,60545,270.32,59.0
4,5,Suicide Squad,"Action,Adventure,Fantasy",A secret government agency recruits some of th...,David Ayer,"Will Smith, Jared Leto, Margot Robbie, Viola D...",2016,123,6.2,393727,325.02,40.0


# 3. Understand some basic information about the data

In [76]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 12 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Rank                1000 non-null   int64  
 1   Title               1000 non-null   object 
 2   Genre               1000 non-null   object 
 3   Description         1000 non-null   object 
 4   Director            1000 non-null   object 
 5   Actors              1000 non-null   object 
 6   Year                1000 non-null   int64  
 7   Runtime (Minutes)   1000 non-null   int64  
 8   Rating              1000 non-null   float64
 9   Votes               1000 non-null   int64  
 10  Revenue (Millions)  872 non-null    float64
 11  Metascore           936 non-null    float64
dtypes: float64(3), int64(4), object(5)
memory usage: 93.9+ KB


In [77]:
data.describe()

Unnamed: 0,Rank,Year,Runtime (Minutes),Rating,Votes,Revenue (Millions),Metascore
count,1000.0,1000.0,1000.0,1000.0,1000.0,872.0,936.0
mean,500.5,2012.783,113.172,6.7232,169808.3,82.956376,58.985043
std,288.819436,3.205962,18.810908,0.945429,188762.6,103.25354,17.194757
min,1.0,2006.0,66.0,1.9,61.0,0.0,11.0
25%,250.75,2010.0,100.0,6.2,36309.0,13.27,47.0
50%,500.5,2014.0,111.0,6.8,110799.0,47.985,59.5
75%,750.25,2016.0,123.0,7.4,239909.8,113.715,72.0
max,1000.0,2016.0,191.0,9.0,1791916.0,936.63,100.0


# 4. Data Selection - Indexing and Slicing data

In [78]:
# Extract data as series
genre = data["Genre"]
genre

Unnamed: 0,Genre
0,"Action,Adventure,Sci-Fi"
1,"Adventure,Mystery,Sci-Fi"
2,"Horror,Thriller"
3,"Animation,Comedy,Family"
4,"Action,Adventure,Fantasy"
...,...
995,"Crime,Drama,Mystery"
996,Horror
997,"Drama,Music,Romance"
998,"Adventure,Comedy"


In [79]:
# Extract data as dataframe
genre = data[["Genre"]]
genre

Unnamed: 0,Genre
0,"Action,Adventure,Sci-Fi"
1,"Adventure,Mystery,Sci-Fi"
2,"Horror,Thriller"
3,"Animation,Comedy,Family"
4,"Action,Adventure,Fantasy"
...,...
995,"Crime,Drama,Mystery"
996,Horror
997,"Drama,Music,Romance"
998,"Adventure,Comedy"


In [80]:
some_cols = data[["Title", "Description", "Year", "Rating"]]
some_cols

Unnamed: 0,Title,Description,Year,Rating
0,Guardians of the Galaxy,A group of intergalactic criminals are forced ...,2014,8.1
1,Prometheus,"Following clues to the origin of mankind, a te...",2012,7.0
2,Split,Three girls are kidnapped by a man with a diag...,2016,7.3
3,Sing,"In a city of humanoid animals, a hustling thea...",2016,7.2
4,Suicide Squad,A secret government agency recruits some of th...,2016,6.2
...,...,...,...,...
995,Secret in Their Eyes,"A tight-knit team of rising investigators, alo...",2015,6.2
996,Hostel: Part II,Three American college students studying abroa...,2007,5.5
997,Step Up 2: The Streets,Romantic sparks occur between two dance studen...,2008,6.2
998,Search Party,A pair of friends embark on a mission to reuni...,2014,5.6


In [81]:
data.iloc[30:40][["Title", "Genre", "Description"]]

Unnamed: 0,Title,Genre,Description
30,Why Him?,Comedy,A holiday gathering threatens to go off the ra...
31,Nocturnal Animals,"Drama,Thriller",A wealthy art gallery owner is haunted by her ...
32,X-Men: Apocalypse,"Action,Adventure,Sci-Fi",After the re-emergence of the world's first mu...
33,Deadpool,"Action,Adventure,Comedy",A fast-talking mercenary with a morbid sense o...
34,Resident Evil: The Final Chapter,"Action,Horror,Sci-Fi",Alice returns to where the nightmare began: Th...
35,Captain America: Civil War,"Action,Adventure,Sci-Fi",Political interference in the Avengers' activi...
36,Interstellar,"Adventure,Drama,Sci-Fi",A team of explorers travel through a wormhole ...
37,Doctor Strange,"Action,Adventure,Fantasy",While on a journey of physical and spiritual h...
38,The Magnificent Seven,"Action,Adventure,Western",Seven gunmen in the old west gradually come to...
39,5- 25- 77,"Comedy,Drama","Alienated, hopeful-filmmaker Pat Johnson's epi..."


# 5. Data Selection - Based on Conditional filtering

In [82]:
data[(data["Year"] >= 2011) & (data["Year"] <= 2013) & (data["Rating"] >= 7.0) & (data["Revenue (Millions)"] >= data["Revenue (Millions)"].quantile(0.9))]

Unnamed: 0,Rank,Title,Genre,Description,Director,Actors,Year,Runtime (Minutes),Rating,Votes,Revenue (Millions),Metascore
76,77,The Avengers,"Action,Sci-Fi",Earth's mightiest heroes must come together an...,Joss Whedon,"Robert Downey Jr., Chris Evans, Scarlett Johan...",2012,143,8.1,1045588,623.28,69.0
114,115,Harry Potter and the Deathly Hallows: Part 2,"Adventure,Drama,Fantasy","Harry, Ron and Hermione search for Voldemort's...",David Yates,"Daniel Radcliffe, Emma Watson, Rupert Grint, M...",2011,130,8.1,590595,380.96,87.0
124,125,The Dark Knight Rises,"Action,Thriller",Eight years after the Joker's reign of anarchy...,Christopher Nolan,"Christian Bale, Tom Hardy, Anne Hathaway,Gary ...",2012,164,8.5,1222645,448.13,78.0
134,135,Furious 6,"Action,Crime,Thriller",Hobbs has Dominic and Brian reassemble their c...,Justin Lin,"Vin Diesel, Paul Walker, Dwayne Johnson, Miche...",2013,130,7.1,318051,238.67,61.0
151,152,The Hunger Games,"Adventure,Sci-Fi,Thriller",Katniss Everdeen voluntarily takes her younger...,Gary Ross,"Jennifer Lawrence, Josh Hutcherson, Liam Hemsw...",2012,142,7.2,735604,408.0,68.0
174,175,Frozen,"Animation,Adventure,Comedy",When the newly crowned Queen Elsa accidentally...,Chris Buck,"Kristen Bell, Idina Menzel, Jonathan Groff, Jo...",2013,102,7.5,451894,400.74,74.0
270,271,Skyfall,"Action,Adventure,Thriller",Bond's loyalty to M is tested when her past co...,Sam Mendes,"Daniel Craig, Javier Bardem, Naomie Harris, Ju...",2012,143,7.8,547386,304.36,81.0
271,272,The Hobbit: An Unexpected Journey,"Adventure,Fantasy","A reluctant hobbit, Bilbo Baggins, sets out to...",Peter Jackson,"Martin Freeman, Ian McKellen, Richard Armitage...",2012,169,7.9,668651,303.0,58.0
279,280,Iron Man Three,"Action,Adventure,Sci-Fi",When Tony Stark's world is torn apart by a for...,Shane Black,"Robert Downey Jr., Guy Pearce, Gwyneth Paltrow...",2013,130,7.2,591023,408.99,62.0
294,295,Man of Steel,"Action,Adventure,Fantasy","Clark Kent, one of the last of an extinguished...",Zack Snyder,"Henry Cavill, Amy Adams, Michael Shannon, Dian...",2013,143,7.1,577010,291.02,55.0


# 6. Groupby Operations

In [83]:
data.groupby("Director")[["Rating"]].mean().sort_values(by="Rating", ascending=False)

Unnamed: 0_level_0,Rating
Director,Unnamed: 1_level_1
Nitesh Tiwari,8.80
Christopher Nolan,8.68
Olivier Nakache,8.60
Makoto Shinkai,8.60
Aamir Khan,8.50
...,...
Micheal Bafaro,3.50
Jonathan Holbrook,3.20
Shawn Burkett,2.70
James Wong,2.70


# 7. Sorting Operations

In [84]:
data.groupby("Director")[["Rating", "Metascore"]].mean().sort_values(["Rating", "Metascore"], ascending=False).head(10)

Unnamed: 0_level_0,Rating,Metascore
Director,Unnamed: 1_level_1,Unnamed: 2_level_1
Nitesh Tiwari,8.8,
Christopher Nolan,8.68,74.8
Makoto Shinkai,8.6,79.0
Olivier Nakache,8.6,57.0
Florian Henckel von Donnersmarck,8.5,89.0
Aamir Khan,8.5,42.0
Damien Chazelle,8.4,90.5
Naoko Yamada,8.4,80.0
Lee Unkrich,8.3,92.0
Thomas Vinterberg,8.3,76.0


# 8. View missing values

In [85]:
data.isnull().sum()

Unnamed: 0,0
Rank,0
Title,0
Genre,0
Description,0
Director,0
Actors,0
Year,0
Runtime (Minutes),0
Rating,0
Votes,0


# 9. Deal with missing values - Deleting

In [86]:
data.drop("Metascore", axis=1).head()

Unnamed: 0,Rank,Title,Genre,Description,Director,Actors,Year,Runtime (Minutes),Rating,Votes,Revenue (Millions)
0,1,Guardians of the Galaxy,"Action,Adventure,Sci-Fi",A group of intergalactic criminals are forced ...,James Gunn,"Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S...",2014,121,8.1,757074,333.13
1,2,Prometheus,"Adventure,Mystery,Sci-Fi","Following clues to the origin of mankind, a te...",Ridley Scott,"Noomi Rapace, Logan Marshall-Green, Michael Fa...",2012,124,7.0,485820,126.46
2,3,Split,"Horror,Thriller",Three girls are kidnapped by a man with a diag...,M. Night Shyamalan,"James McAvoy, Anya Taylor-Joy, Haley Lu Richar...",2016,117,7.3,157606,138.12
3,4,Sing,"Animation,Comedy,Family","In a city of humanoid animals, a hustling thea...",Christophe Lourdelet,"Matthew McConaughey,Reese Witherspoon, Seth Ma...",2016,108,7.2,60545,270.32
4,5,Suicide Squad,"Action,Adventure,Fantasy",A secret government agency recruits some of th...,David Ayer,"Will Smith, Jared Leto, Margot Robbie, Viola D...",2016,123,6.2,393727,325.02


In [87]:
data.dropna()

Unnamed: 0,Rank,Title,Genre,Description,Director,Actors,Year,Runtime (Minutes),Rating,Votes,Revenue (Millions),Metascore
0,1,Guardians of the Galaxy,"Action,Adventure,Sci-Fi",A group of intergalactic criminals are forced ...,James Gunn,"Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S...",2014,121,8.1,757074,333.13,76.0
1,2,Prometheus,"Adventure,Mystery,Sci-Fi","Following clues to the origin of mankind, a te...",Ridley Scott,"Noomi Rapace, Logan Marshall-Green, Michael Fa...",2012,124,7.0,485820,126.46,65.0
2,3,Split,"Horror,Thriller",Three girls are kidnapped by a man with a diag...,M. Night Shyamalan,"James McAvoy, Anya Taylor-Joy, Haley Lu Richar...",2016,117,7.3,157606,138.12,62.0
3,4,Sing,"Animation,Comedy,Family","In a city of humanoid animals, a hustling thea...",Christophe Lourdelet,"Matthew McConaughey,Reese Witherspoon, Seth Ma...",2016,108,7.2,60545,270.32,59.0
4,5,Suicide Squad,"Action,Adventure,Fantasy",A secret government agency recruits some of th...,David Ayer,"Will Smith, Jared Leto, Margot Robbie, Viola D...",2016,123,6.2,393727,325.02,40.0
...,...,...,...,...,...,...,...,...,...,...,...,...
993,994,Resident Evil: Afterlife,"Action,Adventure,Horror",While still out to destroy the evil Umbrella C...,Paul W.S. Anderson,"Milla Jovovich, Ali Larter, Wentworth Miller,K...",2010,97,5.9,140900,60.13,37.0
994,995,Project X,Comedy,3 high school seniors throw a birthday party t...,Nima Nourizadeh,"Thomas Mann, Oliver Cooper, Jonathan Daniel Br...",2012,88,6.7,164088,54.72,48.0
996,997,Hostel: Part II,Horror,Three American college students studying abroa...,Eli Roth,"Lauren German, Heather Matarazzo, Bijou Philli...",2007,94,5.5,73152,17.54,46.0
997,998,Step Up 2: The Streets,"Drama,Music,Romance",Romantic sparks occur between two dance studen...,Jon M. Chu,"Robert Hoffman, Briana Evigan, Cassie Ventura,...",2008,98,6.2,70699,58.01,50.0


# 10. Dealing with missing values - Filling

In [88]:
revenue_mean = data["Revenue (Millions)"].mean()
print("The mean revenue is:", revenue_mean)
metascore_median = data["Metascore"].median()
print("The median metascore is:", metascore_median)

data["Revenue (Millions)"].fillna(revenue_mean, inplace=True)
data["Metascore"].fillna(metascore_median, inplace=True)
data.isnull().sum()

The mean revenue is: 82.95637614678898
The median metascore is: 59.5


Unnamed: 0,0
Rank,0
Title,0
Genre,0
Description,0
Director,0
Actors,0
Year,0
Runtime (Minutes),0
Rating,0
Votes,0


# 11. apply() function

In [89]:
def rating_group(rating):
  if rating >= 7.5:
    return "Good"
  elif rating >= 6.0:
    return "Average"
  else:
    return "Bad"

data["Rating_category"] = data["Rating"].apply(rating_group)
data.head()

Unnamed: 0,Rank,Title,Genre,Description,Director,Actors,Year,Runtime (Minutes),Rating,Votes,Revenue (Millions),Metascore,Rating_category
0,1,Guardians of the Galaxy,"Action,Adventure,Sci-Fi",A group of intergalactic criminals are forced ...,James Gunn,"Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S...",2014,121,8.1,757074,333.13,76.0,Good
1,2,Prometheus,"Adventure,Mystery,Sci-Fi","Following clues to the origin of mankind, a te...",Ridley Scott,"Noomi Rapace, Logan Marshall-Green, Michael Fa...",2012,124,7.0,485820,126.46,65.0,Average
2,3,Split,"Horror,Thriller",Three girls are kidnapped by a man with a diag...,M. Night Shyamalan,"James McAvoy, Anya Taylor-Joy, Haley Lu Richar...",2016,117,7.3,157606,138.12,62.0,Average
3,4,Sing,"Animation,Comedy,Family","In a city of humanoid animals, a hustling thea...",Christophe Lourdelet,"Matthew McConaughey,Reese Witherspoon, Seth Ma...",2016,108,7.2,60545,270.32,59.0,Average
4,5,Suicide Squad,"Action,Adventure,Fantasy",A secret government agency recruits some of th...,David Ayer,"Will Smith, Jared Leto, Margot Robbie, Viola D...",2016,123,6.2,393727,325.02,40.0,Average


In [90]:
def review_director(metascore_of_director):
  if metascore_of_director >= 90:
    return "Excellent"
  elif metascore_of_director >= 70:
    return "Good"
  elif metascore_of_director >= 50:
    return "Average"
  else:
    return "Bad"

data_director = data.groupby("Director")[["Metascore"]].mean()
data_director["Director_category"] = data_director["Metascore"].apply(review_director)
data_director

Unnamed: 0_level_0,Metascore,Director_category
Director,Unnamed: 1_level_1,Unnamed: 2_level_1
Aamir Khan,42.0,Bad
Abdellatif Kechiche,88.0,Good
Adam Leon,77.0,Good
Adam McKay,65.5,Average
Adam Shankman,64.0,Average
...,...,...
Xavier Dolan,61.0,Average
Yimou Zhang,42.0,Bad
Yorgos Lanthimos,77.5,Good
Zack Snyder,48.0,Bad
