In [2]:

import pandas as pd

try:
    df = pd.read_csv('../data/IMDB_Dataset.csv', low_memory=False)
except FileNotFoundError as e:
    print("File not found. Download the IMDB_Dataset.")
    raise e

In [3]:
pd.options.display.max_columns = None
pd.options.display.max_rows = 100

df.columns

Index(['Best Picture', 'Certificate (GB)', 'Certificate (US)', 'Color',
       'Contains Genre?', 'Contains Production Company?', 'Continent',
       'Country', 'Genres (1st)', 'Genres (2nd)', 'Genres (3rd)',
       'Genres (full list)', 'Image Url (Title)', 'IMDB Url (Person)',
       'IMDB Url (title)', 'Language', 'Person Name', 'Person Name ID', 'Plot',
       'Plot (medium)', 'Production Companies (1st)',
       'Production Companies (2nd)', 'Production Companies (3rd)',
       'Production Companies (List)', 'Region', 'Tagline', 'Title', 'Title Id',
       'What did they do ?', 'Who did they play ?', 'Year of Release',
       'Billing (position in cast list)', 'IMDB Rating', 'Number of people',
       'Number of titles', 'Number Of Votes', 'Runtime (Minutes)'],
      dtype='object')

In [10]:
# Filtering the DataFrame
filtered_df = df[
    (df["Year of Release"] >= 1980)
    & (df["Year of Release"] <= 2022)
    & (df["Language"] == "English")
    & (df["Runtime (Minutes)"] >= 60)
    & (df["Runtime (Minutes)"] <= 360)
    & (df["IMDB Rating"] > 0)
    & (df["Number Of Votes"] > 1000)
    # & (df["Contains Production Company?"] == True)
    & (df["Certificate (US)"] != None)
]

# filtered_df = filtered_df.drop_duplicates(subset='Title', keep='first')
# Group by 'title' and aggregate 'Person Name' into a list
actor_list = filtered_df.groupby("Title")["Person Name"].agg(list).reset_index()

# Merge this list back into the original DataFrame
filtered_df = filtered_df.merge(
    actor_list, on="Title", how="left", suffixes=("", "_list")
)

# Rename the new column to 'Lead Actors'
filtered_df.rename(columns={"Person Name_list": "Lead Actors"}, inplace=True)

# Drop duplicates based on 'title' column
filtered_df = filtered_df.drop_duplicates(subset="Title", keep="first")

# Drop the original 'Person Name' column
filtered_df = filtered_df.drop(
    columns=[
        "Color",
        "Continent",
        "Country",
        "Person Name",
        "Billing (position in cast list)",
        "Number of people",
        "Number of titles",
        "Person Name ID",
        "Contains Production Company?",
        "Contains Genre?",
        "Language",
        "Region",
        "Who did they play ?",
        "IMDB Url (Person)",
    ]
)

filtered_df

Unnamed: 0,Best Picture,Certificate (GB),Certificate (US),Genres (1st),Genres (2nd),Genres (3rd),Genres (full list),Image Url (Title),IMDB Url (title),Plot,Plot (medium),Production Companies (1st),Production Companies (2nd),Production Companies (3rd),Production Companies (List),Tagline,Title,Title Id,What did they do ?,Year of Release,IMDB Rating,Number Of Votes,Runtime (Minutes),Lead Actors
0,,PG,PG,Adventure,Comedy,Mystery,"Adventure,Comedy,Mystery,Romance,Sci-Fi",https://m.media-amazon.com/images/M/MV5BZjI0YT...,https://www.imdb.com/title/tt0094890/,The seniors return to Earth to visit their rel...,"Art, Ben and Joe are back! So are their wives ...",Zanuck/Brown Productions,Twentieth Century Fox,,Zanuck/Brown Productions; Twentieth Century Fox;,"""This holiday season",Cocoon: The Return,tt0094890,actress,1988,5.4,17023.0,116.0,"[Priscilla Ashley Behne, Bill Wohrman, Barret ..."
1,,15,R,Comedy,,,Comedy,https://m.media-amazon.com/images/M/MV5BNGM1ND...,https://www.imdb.com/title/tt0277371/,A sendup of all the teen movies that have accu...,"At John Hughes High School, the students are t...",Columbia Pictures,Original Film,Neal H. Moritz Productions,Columbia Pictures; Original Film; Neal H. Mori...,The Teen Mother Of All Movies!,Not Another Teen Movie,tt0277371,actor,2001,5.7,111707.0,89.0,"[Lukas Behnken, Cody McMains, Deon Richmond, S..."
2,,PG,PG-13,Drama,Romance,,"Drama,Romance",https://m.media-amazon.com/images/M/MV5BNzI4OT...,https://www.imdb.com/title/tt0482463/,A chef with a mysterious past spends the day w...,An international soccer star is on his way to ...,Metanoia Films,Bella Production,Burnside Entertainment,Metanoia Films; Bella Production; Burnside Ent...,One Person Can Change Your Life Forever,Bella,tt0482463,actor,2006,7.1,10907.0,91.0,"[Lukas Behnken, Dominic Colón, Hudson Cooper, ..."
3,,,PG-13,Drama,,,Drama,https://m.media-amazon.com/images/M/MV5BMjAzMT...,https://www.imdb.com/title/tt0293357/,Two brothers and their surfing buddies face ne...,,Capstone Pictures,,,Capstone Pictures; ;,Dream The Extreme,Local Boys,tt0293357,actor,2002,6.6,1007.0,102.0,"[Lukas Behnken, Travis Aaron Wade, Dick Dale, ..."
4,,15,R,Action,Horror,Sci-Fi,"Action,Horror,Sci-Fi,Thriller",https://m.media-amazon.com/images/M/MV5BNDlmOT...,https://www.imdb.com/title/tt0109965/,"Sean Barker, unwilling host to an alien bio-ar...",Sean Barker became the unwilling host to an al...,Biomorphs Inc.,L.A. Hero,,Biomorphs Inc.; L.A. Hero;,Sci-Fi's Most Powerful Alien-Human Hybrid Retu...,Guyver: Dark Hero,tt0109965,actor,1994,5.8,3675.0,118.0,"['Evil' Ted Smith, Billi Lee, Brian Simpson, C..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
551800,,18,R,Documentary,Crime,,"Documentary,Crime",https://m.media-amazon.com/images/M/MV5BNDIyMj...,https://www.imdb.com/title/tt0157894/,A documentary of the decline of America. It fe...,Composed of archival material and exclusive fo...,Filmlink International,Towa Productions,,Filmlink International; Towa Productions;,WE DARE YOU! To see the most shocking film eve...,The Killing of America,tt0157894,director,1981,7.6,2265.0,90.0,[Sheldon Renan]
551801,,18,TV-MA,Documentary,Crime,History,"Documentary,Crime,History",https://m.media-amazon.com/images/M/MV5BZTQyYT...,https://www.imdb.com/title/tt13649700/,"In the early 1980s, the crack epidemic tore th...",,,,,,,"Crack: Cocaine, Corruption & Conspiracy",tt13649700,director,2021,6.7,3729.0,89.0,[Stanley Nelson]
551880,,,,Documentary,History,,"Documentary,History",https://m.media-amazon.com/images/M/MV5BMjA1ND...,https://www.imdb.com/title/tt0762111/,"Featuring never-before-seen footage, this docu...",,Firelight Media Inc.,,,Firelight Media Inc.; ;,,Jonestown: The Life and Death of Peoples Temple,tt0762111,director,2006,7.8,5582.0,86.0,[Stanley Nelson]
551881,,15,Not Rated,Documentary,History,News,"Documentary,History,News",https://m.media-amazon.com/images/M/MV5BMTg2NT...,https://www.imdb.com/title/tt2124803/,The story of two coalitions -- ACT UP and TAG ...,"In the early years of the AIDS epidemic, the d...",Public Square Films,Ninety Thousand Words,Ted Snowdon Foundation,Public Square Films; Ninety Thousand Words; Te...,,How to Survive a Plague,tt2124803,director,2012,7.6,4287.0,110.0,[David France]


In [11]:
null_counts = filtered_df.isnull().sum()
null_counts

Best Picture                   17707
Certificate (GB)                3407
Certificate (US)                1466
Genres (1st)                       1
Genres (2nd)                    2842
Genres (3rd)                    8254
Genres (full list)                 1
Image Url (Title)                  1
IMDB Url (title)                   0
Plot                               2
Plot (medium)                   2943
Production Companies (1st)       283
Production Companies (2nd)      3777
Production Companies (3rd)      8121
Production Companies (List)      283
Tagline                         4016
Title                              0
Title Id                           0
What did they do ?                 0
Year of Release                    0
IMDB Rating                        0
Number Of Votes                    0
Runtime (Minutes)                  0
Lead Actors                        0
dtype: int64

In [13]:
filtered_df.sort_values(by='IMDB Rating', ascending=False).set_index('IMDB Rating').head(6).set_index(['Title'])

Unnamed: 0_level_0,Best Picture,Certificate (GB),Certificate (US),Genres (1st),Genres (2nd),Genres (3rd),Genres (full list),Image Url (Title),IMDB Url (title),Plot,Plot (medium),Production Companies (1st),Production Companies (2nd),Production Companies (3rd),Production Companies (List),Tagline,Title Id,What did they do ?,Year of Release,Number Of Votes,Runtime (Minutes),Lead Actors
Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
The Shawshank Redemption,Nominated,15,R,Drama,,,Drama,https://m.media-amazon.com/images/M/MV5BNDE3OD...,https://www.imdb.com/title/tt0111161/,"Over the course of several years, two convicts...",Chronicles the experiences of a formerly succe...,Castle Rock Entertainment,,,Castle Rock Entertainment; ;,Fear can hold you prisoner. Hope can set you f...,tt0111161,actor,1994,2791424.0,142.0,"[James Whitmore, Donald Zinn, Frank Medrano, E..."
National Theatre Live: Prima Facie,,15,,Drama,,,Drama,https://m.media-amazon.com/images/M/MV5BNDE0ND...,https://www.imdb.com/title/tt21093976/,"Tessa is a young, brilliant barrister. From wo...","Tessa is a thoroughbred. A young, brilliant ba...",Empire Street Productions,National Theatre,,Empire Street Productions; National Theatre;,Every winner might be the one who loses the ne...,tt21093976,director,2022,1414.0,120.0,[Justin Martin]
The Lord of the Rings: The Return of the King,Winner,12,PG-13,Action,Adventure,Drama,"Action,Adventure,Drama,Fantasy",https://m.media-amazon.com/images/M/MV5BNzA5ZD...,https://www.imdb.com/title/tt0167260/,Gandalf and Aragorn lead the World of Men agai...,The final confrontation between the forces of ...,New Line Cinema,WingNut Films,The Saul Zaentz Company,New Line Cinema; WingNut Films; The Saul Zaent...,The eye of the enemy is moving.,tt0167260,actor,2003,1911940.0,201.0,"[Karl Urban, Bruce Phillips, Jason Fitch, Shan..."
The Dark Knight,,12,PG-13,Action,Crime,Drama,"Action,Crime,Drama,Thriller",https://m.media-amazon.com/images/M/MV5BMTMxNT...,https://www.imdb.com/title/tt0468569/,When the menace known as the Joker wreaks havo...,Set within a year after the events of Batman B...,Warner Bros.,Legendary Entertainment,Syncopy,Warner Bros.; Legendary Entertainment; Syncopy,Why So Serious?,tt0468569,actor,2008,2771597.0,152.0,"[Michael Jai White, Keith Kupferer, Charles Ve..."
Helsreach: The Movie,,,,Sci-Fi,,,Sci-Fi,https://m.media-amazon.com/images/M/MV5BOTRhZT...,https://www.imdb.com/title/tt12820524/,The Black Templars and the Astra Militarum's S...,"Set during the Third War for Armageddon, Helsr...",,,,,,tt12820524,director,2019,1311.0,150.0,[Richard Boylan]
Dominion,,,Not Rated,Documentary,Horror,,"Documentary,Horror",https://m.media-amazon.com/images/M/MV5BOWMyNz...,https://www.imdb.com/title/tt5773402/,"Dominion uses drones, hidden and handheld came...","Dominion uses drones, hidden and handheld came...",,,,,,tt5773402,director,2018,4113.0,120.0,[Chris Delforce]
Schindler's List,Winner,15,R,Biography,Drama,History,"Biography,Drama,History",https://m.media-amazon.com/images/M/MV5BNDE4OT...,https://www.imdb.com/title/tt0108052/,"In German-occupied Poland during World War II,...",Oskar Schindler is a vain and greedy German bu...,Universal Pictures,Amblin Entertainment,,Universal Pictures; Amblin Entertainment;,"""Whoever saves one life",tt0108052,actress,1993,1403829.0,195.0,"[Elina Löwensohn, Ewa Kolasinska, Grzegorz Kwa..."
The Beatles: Get Back - The Rooftop Concert,,12,PG-13,Documentary,Music,,"Documentary,Music",https://m.media-amazon.com/images/M/MV5BNDg2OT...,https://www.imdb.com/title/tt16899584/,For the first time in its entirety - The Beatl...,The Beatles' unforgettable concert on the roof...,Walt Disney Studios Motion Pictures,Apple Corps,WingNut Films,Walt Disney Studios Motion Pictures; Apple Cor...,,tt16899584,director,2022,1320.0,65.0,[Peter Jackson]
Dr. Babasaheb Ambedkar,,,,Biography,History,,"Biography,History",https://m.media-amazon.com/images/M/MV5BNGNjZD...,https://www.imdb.com/title/tt0270321/,"Biopic of B. R. Ambedkar, known mainly for his...",A portrait of the one of our greatest social r...,National Film Development Corporation of India...,,,National Film Development Corporation of India...,,tt0270321,actor,2000,2641.0,197.0,"[Rahul Solapurkar, Luan Bexheti, Anjan Srivast..."
David Attenborough: A Life on Our Planet,,PG,PG,Documentary,Biography,,"Documentary,Biography",https://m.media-amazon.com/images/M/MV5BY2FkMj...,https://www.imdb.com/title/tt11989890/,One man has seen more of the natural world tha...,One man has seen more of the natural world tha...,Altitude Film Entertainment,Silverback Films,World Wildlife Fund,Altitude Film Entertainment; Silverback Films;...,He introduced us to the world. Now he tells hi...,tt11989890,director,2020,33584.0,83.0,"[Jonathan Hughes, Alastair Fothergill, Keith S..."


In [12]:
# save it as a csv file
filtered_df.to_csv('../data/filtered_df.csv', index=False)