### 2018 Data

In [2]:
import pandas as pd
import numpy as np

In [3]:
link = "https://en.wikipedia.org/wiki/List_of_American_films_of_2018" # fetching all the 2018 movies from wikipedia
df1 = pd.read_html(link, header=0)[2]
df2 = pd.read_html(link, header=0)[3]
df3 = pd.read_html(link, header=0)[4]
df4 = pd.read_html(link, header=0)[5]

In [4]:
df4.sample(2)

Unnamed: 0,Opening,Opening.1,Title,Production company,Cast and crew,Ref.
45,N O V E M B E R,30,The Possession of Hannah Grace,Screen Gems,Diederik van Rooijen (director); Brian Sieve (...,[225]
10,O C T O B E R,19,Halloween,Universal Pictures / Miramax / Blumhouse Produ...,David Gordon Green (director/screenplay); Jeff...,[192]


In [5]:
df = pd.concat([df1,df2,df3,df4],ignore_index=True) #concat all the dataframes in a single for preprocessing
df

Unnamed: 0,Opening,Opening.1,Title,Production company,Cast and crew,Ref.
0,J A N U A R Y,5,Insidious: The Last Key,Universal Pictures / Blumhouse Productions / S...,Adam Robitel (director); Leigh Whannell (scree...,[2]
1,J A N U A R Y,5,The Strange Ones,Vertical Entertainment,Christopher Radcliff (director/screenplay); La...,[3]
2,J A N U A R Y,12,The Commuter,Lionsgate / StudioCanal / The Picture Company,Jaume Collet-Serra (director); Byron Willinger...,[4]
3,J A N U A R Y,12,Proud Mary,Screen Gems,"Babak Najafi (director); John S. Newman, Chris...",[5]
4,J A N U A R Y,12,Acts of Violence,Lionsgate Premiere,Brett Donowho (director); Nicolas Aaron Mezzan...,[6]
...,...,...,...,...,...,...
243,D E C E M B E R,21,Second Act,STX Entertainment,"Peter Segal (director); Justin Zackham, Elaine...",[237]
244,D E C E M B E R,25,Holmes & Watson,Columbia Pictures / Gary Sanchez Productions /...,Etan Cohen (director/screenplay); Will Ferrell...,[142]
245,D E C E M B E R,25,Vice,Annapurna Pictures / Plan B Entertainment,Adam McKay (director/screenplay); Christian Ba...,[117]
246,D E C E M B E R,25,On the Basis of Sex,Focus Features,Mimi Leder (director); Daniel Stiepleman (scre...,[207]


#### As it is observed in the above table, we do not have the genre column so we will get the genre data from IMDb. You will need to create an API key in order to access the API key.

### pip install tmdbv3api

In [6]:
#get your API Key here: https://www.themoviedb.org/settings/api
from tmdbv3api import TMDb
import json
import requests
tmdb = TMDb()

api = 'cf1e5b9d8cf31e19913dd6bc256abb2a'
tmdb.api_key=api # insert your api key here


In [7]:
from tmdbv3api import Movie

tmdb_movie = Movie()

def get_genre(x): #pass in the title of the movies
    genres = []
    result = tmdb_movie.search(x) #the title will be searched in the tmdb_movie
    movie_id = result[0].id #we will match the "id" with the "title"
    response = requests.get(f'https://api.themoviedb.org/3/movie/{movie_id}?api_key={tmdb.api_key}') #we will get the result from the IMDb data
    data_json = response.json() #we will then convert it to a json file
    if data_json['genres']: #in the json file we will only need to extract the "genre"
        genre_str = " " 
        for i in range(0,len(data_json['genres'])):
            genres.append(data_json['genres'][i]['name']) #we will then add the "genre" to the empty genre list we created above
        return genre_str.join(genres)
    else:
        np.nan # we will return the results but if we don't find anything we will consider it as a missing value


In [8]:
df['genres']= df['Title'].map(lambda x : get_genre(x))

df

Unnamed: 0,Opening,Opening.1,Title,Production company,Cast and crew,Ref.,genres
0,J A N U A R Y,5,Insidious: The Last Key,Universal Pictures / Blumhouse Productions / S...,Adam Robitel (director); Leigh Whannell (scree...,[2],Horror Thriller
1,J A N U A R Y,5,The Strange Ones,Vertical Entertainment,Christopher Radcliff (director/screenplay); La...,[3],Drama Mystery
2,J A N U A R Y,12,The Commuter,Lionsgate / StudioCanal / The Picture Company,Jaume Collet-Serra (director); Byron Willinger...,[4],Action Thriller Mystery
3,J A N U A R Y,12,Proud Mary,Screen Gems,"Babak Najafi (director); John S. Newman, Chris...",[5],Thriller Action Crime
4,J A N U A R Y,12,Acts of Violence,Lionsgate Premiere,Brett Donowho (director); Nicolas Aaron Mezzan...,[6],Action Crime Thriller
...,...,...,...,...,...,...,...
243,D E C E M B E R,21,Second Act,STX Entertainment,"Peter Segal (director); Justin Zackham, Elaine...",[237],Romance Comedy
244,D E C E M B E R,25,Holmes & Watson,Columbia Pictures / Gary Sanchez Productions /...,Etan Cohen (director/screenplay); Will Ferrell...,[142],Comedy Mystery Crime
245,D E C E M B E R,25,Vice,Annapurna Pictures / Plan B Entertainment,Adam McKay (director/screenplay); Christian Ba...,[117],Thriller Science Fiction Action Adventure
246,D E C E M B E R,25,On the Basis of Sex,Focus Features,Mimi Leder (director); Daniel Stiepleman (scre...,[207],Drama History


In [9]:
import re
import pandas as pd

def extract_info(text):
    # Initialize the director to None
    director_name = None
    # Initialize the actors to None
    actor_1, actor_2, actor_3 = None, None, None
    
    # Check if the text contains a director's name and role(s)
    director_match = re.search(r'([a-zA-Z\s]+) \(([^)]+)\)', text)  # Extract name and roles inside parentheses
    if director_match:
        name = director_match.group(1).strip()  # Extract the name (e.g., Michael Moore)
        roles = director_match.group(2).strip()  # Extract the roles (e.g., director/screenplay/narrator)
        
        # If 'director' is in the roles, assign only the name to the director column
        if 'director' in roles:
            director_name = name  # Only assign the name to the director column if 'director' is present in the roles
            
            # Remove the director's name from the actors list to avoid duplication
            text = text.replace(name + " (" + roles + ")", "")  # Remove the entire director's entry

    # Extract actors from the cast and crew field (after the last semicolon)
    actors = re.split(r';', text)[-1].strip().split(', ')  # Split by semicolon and take actors from the last part
    
    # Remove the director from the actors list (if it was listed there)
    actors = [actor for actor in actors if actor != director_name]
    
    # Assign actors if they exist
    actor_1 = actors[0] if len(actors) > 0 else None
    actor_2 = actors[1] if len(actors) > 1 else None
    actor_3 = actors[2] if len(actors) > 2 else None

    # Return the extracted information in a pandas Series
    return pd.Series([director_name, actor_1, actor_2, actor_3])



In [10]:
df[['director_name', 'actor_1_name', 'actor_2_name', 'actor_3_name']] = df['Cast and crew'].apply(extract_info)


In [11]:
df=df.rename(columns={'Title':'movie_title'})
df.sample(2)

Unnamed: 0,Opening,Opening.1,movie_title,Production company,Cast and crew,Ref.,genres,director_name,actor_1_name,actor_2_name,actor_3_name
234,D E C E M B E R,7,Ben Is Back,LD Entertainment / Lionsgate / Roadside Attrac...,Peter Hedges (director/screenplay); Julia Robe...,[228],Drama,Peter Hedges,Julia Roberts,Lucas Hedges,Courtney B. Vance
86,M A Y,11,Terminal,RLJE Films,Vaughn Stein (director/screenplay); Margot Rob...,[83],Crime Drama Thriller,Vaughn Stein,Margot Robbie,Simon Pegg,Dexter Fletcher


In [12]:
new_df18 = df[['director_name','actor_1_name','actor_2_name','actor_3_name','genres','movie_title']]
new_df18.sample(2)

Unnamed: 0,director_name,actor_1_name,actor_2_name,actor_3_name,genres,movie_title
1,Christopher Radcliff,Alex Pettyfer,James Freedson-Jackson,Emily Althaus,Drama Mystery,The Strange Ones
124,Genndy Tartakovsky,Adam Sandler,Andy Samberg,Selena Gomez,Animation Comedy Family Fantasy,Hotel Transylvania 3: Summer Vacation


In [13]:
new_df18.isna().sum()

director_name     3
actor_1_name      0
actor_2_name      4
actor_3_name     15
genres            0
movie_title       0
dtype: int64

In [14]:
new_df18.fillna('unknown',inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df18.fillna('unknown',inplace=True)


In [15]:
new_df18.isna().sum()

director_name    0
actor_1_name     0
actor_2_name     0
actor_3_name     0
genres           0
movie_title      0
dtype: int64

In [16]:
new_df18['movie_title']= new_df18['movie_title'].str.lower()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df18['movie_title']= new_df18['movie_title'].str.lower()


In [17]:

new_df18['comb'] = new_df18['actor_1_name'] + ' ' + new_df18['actor_2_name'] + ' '+ new_df18['actor_3_name'] + ' '+ new_df18['director_name'] +' ' + new_df18['genres']
new_df18.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df18['comb'] = new_df18['actor_1_name'] + ' ' + new_df18['actor_2_name'] + ' '+ new_df18['actor_3_name'] + ' '+ new_df18['director_name'] +' ' + new_df18['genres']


Unnamed: 0,director_name,actor_1_name,actor_2_name,actor_3_name,genres,movie_title,comb
0,Adam Robitel,Lin Shaye,Angus Sampson,Leigh Whannell,Horror Thriller,insidious: the last key,Lin Shaye Angus Sampson Leigh Whannell Adam Ro...
1,Christopher Radcliff,Alex Pettyfer,James Freedson-Jackson,Emily Althaus,Drama Mystery,the strange ones,Alex Pettyfer James Freedson-Jackson Emily Alt...
2,Serra,Liam Neeson,Vera Farmiga,Patrick Wilson,Action Thriller Mystery,the commuter,Liam Neeson Vera Farmiga Patrick Wilson Serra ...
3,Babak Najafi,Taraji P. Henson,Jahi Di'Allo Winston,Billy Brown,Thriller Action Crime,proud mary,Taraji P. Henson Jahi Di'Allo Winston Billy Br...
4,Brett Donowho,Bruce Willis,Cole Hauser,Shawn Ashmore,Action Crime Thriller,acts of violence,Bruce Willis Cole Hauser Shawn Ashmore Brett D...


### Data 2019

In [None]:
link = "https://en.wikipedia.org/wiki/List_of_American_films_of_2019" # fetching all the 2019 movies from wikipedia
df5 = pd.read_html(link, header=0)[2]
df6 = pd.read_html(link, header=0)[3]
df7 = pd.read_html(link, header=0)[4]
df8 = pd.read_html(link, header=0)[5]

In [19]:
df9 = pd.concat([df5,df6,df7,df8],ignore_index=True) #concat all the dataframes in a single for preprocessing
df9

Unnamed: 0,Opening,Opening.1,Title,Production company,Cast and crew,Ref.
0,J A N U A R Y,4,Escape Room,Columbia Pictures / Original Film,"Adam Robitel (director); Bragi F. Schut, Maria...",[2]
1,J A N U A R Y,4,Rust Creek,IFC Films / Lunacy Productions,Jen McGowan (director); Julie Lipson (screenpl...,[3]
2,J A N U A R Y,4,American Hangman,Hangman Justice Productions,Wilson Coneybeare (director/screenplay); Donal...,[4]
3,J A N U A R Y,11,A Dog's Way Home,Columbia Pictures,Charles Martin Smith (director); W. Bruce Came...,[5]
4,J A N U A R Y,11,The Upside,STX Entertainment,Neil Burger (director); Jon Hartmere (screenpl...,[6]
...,...,...,...,...,...,...
245,D E C E M B E R,25,Spies in Disguise,20th Century Fox Animation / Blue Sky Studios ...,"Nick Bruno, Troy Quane (directors); Brad Copel...",[133]
246,D E C E M B E R,25,Little Women,Columbia Pictures / Regency Enterprises,Greta Gerwig (director/screenplay); Saoirse Ro...,[226]
247,D E C E M B E R,25,1917,Universal Pictures / DreamWorks Pictures,Sam Mendes (director/screenplay); Krysty Wilso...,[227]
248,D E C E M B E R,25,Just Mercy,Warner Bros. Pictures / Participant Media,"Destin Daniel Cretton (director/screenplay), A...",[228]


In [20]:
df9['genres']= df9['Title'].map(lambda x : get_genre(x))

df9

Unnamed: 0,Opening,Opening.1,Title,Production company,Cast and crew,Ref.,genres
0,J A N U A R Y,4,Escape Room,Columbia Pictures / Original Film,"Adam Robitel (director); Bragi F. Schut, Maria...",[2],Horror Thriller Mystery
1,J A N U A R Y,4,Rust Creek,IFC Films / Lunacy Productions,Jen McGowan (director); Julie Lipson (screenpl...,[3],Thriller Drama Action Crime
2,J A N U A R Y,4,American Hangman,Hangman Justice Productions,Wilson Coneybeare (director/screenplay); Donal...,[4],Thriller
3,J A N U A R Y,11,A Dog's Way Home,Columbia Pictures,Charles Martin Smith (director); W. Bruce Came...,[5],Drama Adventure Family
4,J A N U A R Y,11,The Upside,STX Entertainment,Neil Burger (director); Jon Hartmere (screenpl...,[6],Comedy Drama
...,...,...,...,...,...,...,...
245,D E C E M B E R,25,Spies in Disguise,20th Century Fox Animation / Blue Sky Studios ...,"Nick Bruno, Troy Quane (directors); Brad Copel...",[133],Animation Action Adventure Comedy Family
246,D E C E M B E R,25,Little Women,Columbia Pictures / Regency Enterprises,Greta Gerwig (director/screenplay); Saoirse Ro...,[226],Drama Romance History
247,D E C E M B E R,25,1917,Universal Pictures / DreamWorks Pictures,Sam Mendes (director/screenplay); Krysty Wilso...,[227],War History Thriller Drama
248,D E C E M B E R,25,Just Mercy,Warner Bros. Pictures / Participant Media,"Destin Daniel Cretton (director/screenplay), A...",[228],Drama Crime History


In [21]:
df9[['director_name', 'actor_1_name', 'actor_2_name', 'actor_3_name']] = df9['Cast and crew'].apply(extract_info)
df9.head(7)

Unnamed: 0,Opening,Opening.1,Title,Production company,Cast and crew,Ref.,genres,director_name,actor_1_name,actor_2_name,actor_3_name
0,J A N U A R Y,4,Escape Room,Columbia Pictures / Original Film,"Adam Robitel (director); Bragi F. Schut, Maria...",[2],Horror Thriller Mystery,Adam Robitel,Taylor Russell,Logan Miller,Deborah Ann Woll
1,J A N U A R Y,4,Rust Creek,IFC Films / Lunacy Productions,Jen McGowan (director); Julie Lipson (screenpl...,[3],Thriller Drama Action Crime,Jen McGowan,Hermione Corfield,Jay Paulson,Sean O'Bryan
2,J A N U A R Y,4,American Hangman,Hangman Justice Productions,Wilson Coneybeare (director/screenplay); Donal...,[4],Thriller,Wilson Coneybeare,Donald Sutherland,Vincent Kartheiser,Oliver Dennis
3,J A N U A R Y,11,A Dog's Way Home,Columbia Pictures,Charles Martin Smith (director); W. Bruce Came...,[5],Drama Adventure Family,Charles Martin Smith,Bryce Dallas Howard,Edward James Olmos,Alexandra Shipp
4,J A N U A R Y,11,The Upside,STX Entertainment,Neil Burger (director); Jon Hartmere (screenpl...,[6],Comedy Drama,Neil Burger,Bryan Cranston,Kevin Hart,Nicole Kidman
5,J A N U A R Y,11,Replicas,Entertainment Studios,Jeffrey Nachmanoff (director); Chad St. John (...,[7],Science Fiction Thriller Drama,Jeffrey Nachmanoff,Keanu Reeves,,
6,J A N U A R Y,15,After Darkness,Grindstone Entertainment Group,Batán Silva (director); Fernando Diez Barroso ...,[8],Drama Family Thriller Science Fiction,n Silva,Natalia Dyer,Kyra Sedgwick,Tim Daly


In [22]:
df9=df9.rename(columns={'Title':'movie_title'})
df9.head(4)

Unnamed: 0,Opening,Opening.1,movie_title,Production company,Cast and crew,Ref.,genres,director_name,actor_1_name,actor_2_name,actor_3_name
0,J A N U A R Y,4,Escape Room,Columbia Pictures / Original Film,"Adam Robitel (director); Bragi F. Schut, Maria...",[2],Horror Thriller Mystery,Adam Robitel,Taylor Russell,Logan Miller,Deborah Ann Woll
1,J A N U A R Y,4,Rust Creek,IFC Films / Lunacy Productions,Jen McGowan (director); Julie Lipson (screenpl...,[3],Thriller Drama Action Crime,Jen McGowan,Hermione Corfield,Jay Paulson,Sean O'Bryan
2,J A N U A R Y,4,American Hangman,Hangman Justice Productions,Wilson Coneybeare (director/screenplay); Donal...,[4],Thriller,Wilson Coneybeare,Donald Sutherland,Vincent Kartheiser,Oliver Dennis
3,J A N U A R Y,11,A Dog's Way Home,Columbia Pictures,Charles Martin Smith (director); W. Bruce Came...,[5],Drama Adventure Family,Charles Martin Smith,Bryce Dallas Howard,Edward James Olmos,Alexandra Shipp


In [23]:
new_df19 = df9[['director_name','actor_1_name','actor_2_name','actor_3_name','genres','movie_title']]
new_df19.sample(2)

Unnamed: 0,director_name,actor_1_name,actor_2_name,actor_3_name,genres,movie_title
58,Leonetti,Kiernan Shipka,Stanley Tucci,Miranda Otto,Horror Drama Thriller Fantasy,The Silence
158,Matthew Thompson,Josh Pinkowski,Izzy Pollak,Blake Kevin Dwyer,Family,"Debunkers, Inc."


In [24]:
new_df19.isna().sum()

director_name     2
actor_1_name      0
actor_2_name      3
actor_3_name     20
genres            0
movie_title       0
dtype: int64

In [25]:
new_df19.fillna('unknown',inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df19.fillna('unknown',inplace=True)


In [26]:
new_df19.isna().sum()

director_name    0
actor_1_name     0
actor_2_name     0
actor_3_name     0
genres           0
movie_title      0
dtype: int64

In [27]:
new_df19['movie_title']= new_df19['movie_title'].str.lower()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df19['movie_title']= new_df19['movie_title'].str.lower()


In [28]:

new_df19['comb'] = new_df19['actor_1_name'] + ' ' + new_df19['actor_2_name'] + ' '+ new_df19['actor_3_name'] + ' '+ new_df19['director_name'] +' ' + new_df19['genres']
new_df19.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df19['comb'] = new_df19['actor_1_name'] + ' ' + new_df19['actor_2_name'] + ' '+ new_df19['actor_3_name'] + ' '+ new_df19['director_name'] +' ' + new_df19['genres']


Unnamed: 0,director_name,actor_1_name,actor_2_name,actor_3_name,genres,movie_title,comb
0,Adam Robitel,Taylor Russell,Logan Miller,Deborah Ann Woll,Horror Thriller Mystery,escape room,Taylor Russell Logan Miller Deborah Ann Woll A...
1,Jen McGowan,Hermione Corfield,Jay Paulson,Sean O'Bryan,Thriller Drama Action Crime,rust creek,Hermione Corfield Jay Paulson Sean O'Bryan Jen...
2,Wilson Coneybeare,Donald Sutherland,Vincent Kartheiser,Oliver Dennis,Thriller,american hangman,Donald Sutherland Vincent Kartheiser Oliver De...
3,Charles Martin Smith,Bryce Dallas Howard,Edward James Olmos,Alexandra Shipp,Drama Adventure Family,a dog's way home,Bryce Dallas Howard Edward James Olmos Alexand...
4,Neil Burger,Bryan Cranston,Kevin Hart,Nicole Kidman,Comedy Drama,the upside,Bryan Cranston Kevin Hart Nicole Kidman Neil B...


In [29]:
my_df = pd.concat([new_df18,new_df19],ignore_index=True)

my_df

Unnamed: 0,director_name,actor_1_name,actor_2_name,actor_3_name,genres,movie_title,comb
0,Adam Robitel,Lin Shaye,Angus Sampson,Leigh Whannell,Horror Thriller,insidious: the last key,Lin Shaye Angus Sampson Leigh Whannell Adam Ro...
1,Christopher Radcliff,Alex Pettyfer,James Freedson-Jackson,Emily Althaus,Drama Mystery,the strange ones,Alex Pettyfer James Freedson-Jackson Emily Alt...
2,Serra,Liam Neeson,Vera Farmiga,Patrick Wilson,Action Thriller Mystery,the commuter,Liam Neeson Vera Farmiga Patrick Wilson Serra ...
3,Babak Najafi,Taraji P. Henson,Jahi Di'Allo Winston,Billy Brown,Thriller Action Crime,proud mary,Taraji P. Henson Jahi Di'Allo Winston Billy Br...
4,Brett Donowho,Bruce Willis,Cole Hauser,Shawn Ashmore,Action Crime Thriller,acts of violence,Bruce Willis Cole Hauser Shawn Ashmore Brett D...
...,...,...,...,...,...,...,...
493,Troy Quane,Will Smith,Tom Holland,Rashida Jones,Animation Action Adventure Comedy Family,spies in disguise,Will Smith Tom Holland Rashida Jones Troy Quan...
494,Greta Gerwig,Saoirse Ronan,Emma Watson,Florence Pugh,Drama Romance History,little women,Saoirse Ronan Emma Watson Florence Pugh Greta ...
495,Sam Mendes,George MacKay,Dean-Charles Chapman,Mark Strong,War History Thriller Drama,1917,George MacKay Dean-Charles Chapman Mark Strong...
496,Destin Daniel Cretton,Michael B. Jordan,Jamie Foxx,Brie Larson,Drama Crime History,just mercy,Michael B. Jordan Jamie Foxx Brie Larson Desti...


In [30]:
old = pd.read_csv(r'D:\Data Science Pojects\Netflix Recommendation System\data_2017\new_data.csv')

old.sample(2)

Unnamed: 0,director_name,actor_1_name,actor_2_name,actor_3_name,movie_title,genres,comb
1560,Peter Lepeniotis,Liam Neeson,Brendan Fraser,Sarah Gadon,the nut job,Adventure Animation Comedy Family,Liam Neeson Brendan Fraser Sarah Gadon Peter L...
1397,Kevin Smith,Stephen Root,Mike Starr,George Carlin,jersey girl,Comedy Drama Romance,Stephen Root Mike Starr George Carlin Kevin Sm...


In [31]:
final = pd.concat([old,my_df],ignore_index=True)
final

Unnamed: 0,director_name,actor_1_name,actor_2_name,actor_3_name,movie_title,genres,comb
0,James Cameron,CCH Pounder,Joel David Moore,Wes Studi,avatar,Action Adventure Fantasy Sci-Fi,CCH Pounder Joel David Moore Wes Studi James C...
1,Gore Verbinski,Johnny Depp,Orlando Bloom,Jack Davenport,pirates of the caribbean: at world's end,Action Adventure Fantasy,Johnny Depp Orlando Bloom Jack Davenport Gore ...
2,Sam Mendes,Christoph Waltz,Rory Kinnear,Stephanie Sigman,spectre,Action Adventure Thriller,Christoph Waltz Rory Kinnear Stephanie Sigman ...
3,Christopher Nolan,Tom Hardy,Christian Bale,Joseph Gordon-Levitt,the dark knight rises,Action Thriller,Tom Hardy Christian Bale Joseph Gordon-Levitt ...
4,Doug Walker,Doug Walker,Rob Walker,unknown,star wars: episode vii - the force awakens ...,Documentary,Doug Walker Rob Walker unknown Doug Walker Doc...
...,...,...,...,...,...,...,...
5871,Troy Quane,Will Smith,Tom Holland,Rashida Jones,spies in disguise,Animation Action Adventure Comedy Family,Will Smith Tom Holland Rashida Jones Troy Quan...
5872,Greta Gerwig,Saoirse Ronan,Emma Watson,Florence Pugh,little women,Drama Romance History,Saoirse Ronan Emma Watson Florence Pugh Greta ...
5873,Sam Mendes,George MacKay,Dean-Charles Chapman,Mark Strong,1917,War History Thriller Drama,George MacKay Dean-Charles Chapman Mark Strong...
5874,Destin Daniel Cretton,Michael B. Jordan,Jamie Foxx,Brie Larson,just mercy,Drama Crime History,Michael B. Jordan Jamie Foxx Brie Larson Desti...


In [32]:
final.isna().sum()

director_name    0
actor_1_name     0
actor_2_name     0
actor_3_name     0
movie_title      0
genres           0
comb             0
dtype: int64

In [33]:
final.drop_duplicates(keep='last',inplace=True)

final

Unnamed: 0,director_name,actor_1_name,actor_2_name,actor_3_name,movie_title,genres,comb
0,James Cameron,CCH Pounder,Joel David Moore,Wes Studi,avatar,Action Adventure Fantasy Sci-Fi,CCH Pounder Joel David Moore Wes Studi James C...
1,Gore Verbinski,Johnny Depp,Orlando Bloom,Jack Davenport,pirates of the caribbean: at world's end,Action Adventure Fantasy,Johnny Depp Orlando Bloom Jack Davenport Gore ...
2,Sam Mendes,Christoph Waltz,Rory Kinnear,Stephanie Sigman,spectre,Action Adventure Thriller,Christoph Waltz Rory Kinnear Stephanie Sigman ...
3,Christopher Nolan,Tom Hardy,Christian Bale,Joseph Gordon-Levitt,the dark knight rises,Action Thriller,Tom Hardy Christian Bale Joseph Gordon-Levitt ...
4,Doug Walker,Doug Walker,Rob Walker,unknown,star wars: episode vii - the force awakens ...,Documentary,Doug Walker Rob Walker unknown Doug Walker Doc...
...,...,...,...,...,...,...,...
5871,Troy Quane,Will Smith,Tom Holland,Rashida Jones,spies in disguise,Animation Action Adventure Comedy Family,Will Smith Tom Holland Rashida Jones Troy Quan...
5872,Greta Gerwig,Saoirse Ronan,Emma Watson,Florence Pugh,little women,Drama Romance History,Saoirse Ronan Emma Watson Florence Pugh Greta ...
5873,Sam Mendes,George MacKay,Dean-Charles Chapman,Mark Strong,1917,War History Thriller Drama,George MacKay Dean-Charles Chapman Mark Strong...
5874,Destin Daniel Cretton,Michael B. Jordan,Jamie Foxx,Brie Larson,just mercy,Drama Crime History,Michael B. Jordan Jamie Foxx Brie Larson Desti...


In [35]:
final.to_csv('final_data.csv',index=False)