In [1]:
import pandas as pd
import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
from requests.exceptions import Timeout, RequestException, ConnectionError
import numpy as np
from functools import lru_cache
from tmdbv3api import TMDb
import json
import requests
tmdb = TMDb()
tmdb.api_key = '6d8bfe0dbef34d25e64f64cabec93e20' 
from tmdbv3api import Movie
tmdb_movie = Movie()

# Setup retry strategy
retry_strategy = Retry(
    total=5,  # Total number of retries
    backoff_factor=2,  # Exponential backoff factor
    status_forcelist=[429, 500, 502, 503, 504],  # Retry on these HTTP status codes
    method_whitelist=["HEAD", "GET", "OPTIONS"]  # Retry only on these methods
)

# Create an HTTP adapter with the retry strategy
adapter = HTTPAdapter(max_retries=retry_strategy)

# Create a session and mount the adapter
session = requests.Session()
session.mount("https://", adapter)
session.mount("http://", adapter)



# Using LRU cache to store results of previously fetched movie genres
@lru_cache(maxsize=1000)
def fetch_movie_genres(movie_id):
    try:
        response = requests.get(f'https://api.themoviedb.org/3/movie/{movie_id}?api_key={tmdb.api_key}')
        if response.status_code != 200:
            return np.nan
        
        data_json = response.json()
        if 'genres' in data_json and data_json['genres']:
            return " ".join([genre['name'] for genre in data_json['genres']])
        else:
            return np.nan
    except Exception as e:
        print(f"Error fetching genres for movie ID {movie_id}: {e}")
        return np.nan

def get_genres(x):
    try:
        result = tmdb_movie.search(x)
        if not result:
            return np.nan
        
        movie_id = result[0].id
        return fetch_movie_genres(movie_id)
    except Exception as e:
        print(f"Error fetching genres for {x}: {e}")
        return np.nan

def get_director(x):
    if " (director)" in x: #we are getting "director"
        return x.split(" (director)")[0]
    elif " (directors)" in x: #we are getting "directors" with "s"
        return x.split(" (directors)")[0]
    else:
        return x.split(" (director/screenplay)")[0]
    

def get_actor1(x):
    return ((x.split("screenplay); ")[-1]).split(", ")[0])

def get_actor2(x):
    if len((x.split("screenplay); ")[-1]).split(", ")) < 2:
        return np.NaN
    else:
        return ((x.split("screenplay); ")[-1]).split(", ")[1])
def get_actor3(x):
    if len((x.split("screenplay); ")[-1]).split(", ")) < 3:
        return np.NaN
    else:
        return ((x.split("screenplay); ")[-1]).split(", ")[2])


def fetch_html(url):
    try:
        response = session.get(url, timeout=20)
        response.raise_for_status()  # Raise an exception for HTTP errors
        return response.content
    except Timeout:
        print(f"Request timed out for URL {url}.")
        return None
    except ConnectionError as e:
        print(f"Connection error for URL {url}: {e}")
        return None
    except RequestException as e:
        print(f"Request error fetching data from URL {url}: {e}")
        return None
    except Exception as e:
        print(f"Unexpected error fetching data from URL {url}: {e}")
        return None
    
def create_df(url):
    # Fetch HTML content from the URL
    html_content = fetch_html(url)

    # Read HTML data using pandas if content is fetched successfully
    if html_content:
        try:
            # Reading multiple tables on the page
            tables = pd.read_html(html_content, header=0)
        
            # Extract specific tables
            df1 = tables[2]
            df2 = tables[3]
            df3 = tables[4]
            df4 = tables[5]
        
        except Exception as e:
            print(f"Error reading HTML content: {e}")
    else:
        print("Failed to fetch HTML content.")
    
    movies = pd.concat([df1,df2,df3,df4],axis=0)
    movies = movies[['Title','Cast and crew']]
    movies['genres'] = movies['Title'].apply(lambda x:get_genres(x))
    movies['director_name'] = movies['Cast and crew'].map(lambda x: get_director(x))

    movies['actor_1_name'] = movies['Cast and crew'].map(lambda x: get_actor1(x))
    movies['actor_2_name'] = movies['Cast and crew'].map(lambda x: get_actor2(x))
    movies['actor_3_name'] = movies['Cast and crew'].map(lambda x: get_actor3(x))

    movies.drop("Cast and crew",axis=1,inplace=True)
    movies['comb'] = movies['actor_1_name']+" "+movies['actor_2_name']+" "+movies['actor_3_name']+" "+movies['director_name']
    movies = movies.rename(columns={'Title':'movie_title'})


    return movies
    



  retry_strategy = Retry(


In [18]:
import pandas as pd
import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
from requests.exceptions import Timeout, RequestException, ConnectionError
import numpy as np
from functools import lru_cache
from tmdbv3api import TMDb, Movie
from concurrent.futures import ThreadPoolExecutor, as_completed

tmdb = TMDb()
tmdb.api_key = '6d8bfe0dbef34d25e64f64cabec93e20'
tmdb_movie = Movie()

# Setup retry strategy
retry_strategy = Retry(
    total=5,  # Total number of retries
    backoff_factor=2,  # Exponential backoff factor
    status_forcelist=[429, 500, 502, 503, 504],  # Retry on these HTTP status codes
    method_whitelist=["HEAD", "GET", "OPTIONS"]  # Retry only on these methods
)

# Create an HTTP adapter with the retry strategy
adapter = HTTPAdapter(max_retries=retry_strategy)

# Create a session and mount the adapter
session = requests.Session()
session.mount("https://", adapter)
session.mount("http://", adapter)

# Using LRU cache to store results of previously fetched movie genres
@lru_cache(maxsize=1000)
def fetch_movie_genres(movie_id):
    try:
        response = session.get(f'https://api.themoviedb.org/3/movie/{movie_id}?api_key={tmdb.api_key}')
        if response.status_code != 200:
            return np.nan
        
        data_json = response.json()
        if 'genres' in data_json and data_json['genres']:
            return " ".join([genre['name'] for genre in data_json['genres']])
        else:
            return np.nan
    except Exception as e:
        print(f"Error fetching genres for movie ID {movie_id}: {e}")
        return np.nan

def get_genres(x):
    try:
        result = tmdb_movie.search(x)
        if not result:
            return np.nan
        
        movie_id = result[0].id
        return fetch_movie_genres(movie_id)
    except Exception as e:
        print(f"Error fetching genres for {x}: {e}")
        return np.nan

def get_director(x):
    if " (director)" in x:
        return x.split(" (director)")[0]
    elif " (directors)" in x:
        return x.split(" (directors)")[0]
    else:
        return x.split(" (director/screenplay)")[0]

def get_actor1(x):
    return ((x.split("screenplay); ")[-1]).split(", ")[0])

def get_actor2(x):
    actors = (x.split("screenplay); ")[-1]).split(", ")
    return actors[1] if len(actors) > 1 else np.NaN

def get_actor3(x):
    actors = (x.split("screenplay); ")[-1]).split(", ")
    return actors[2] if len(actors) > 2 else np.NaN

def fetch_html(url):
    try:
        response = session.get(url, timeout=20)
        response.raise_for_status()
        return response.content
    except Timeout:
        print(f"Request timed out for URL {url}.")
        return None
    except ConnectionError as e:
        print(f"Connection error for URL {url}: {e}")
        return None
    except RequestException as e:
        print(f"Request error fetching data from URL {url}: {e}")
        return None
    except Exception as e:
        print(f"Unexpected error fetching data from URL {url}: {e}")
        return None

def process_actors_directors(cast_crew):
    return {
        'director_name': get_director(cast_crew),
        'actor_1_name': get_actor1(cast_crew),
        'actor_2_name': get_actor2(cast_crew),
        'actor_3_name': get_actor3(cast_crew)
    }

def create_df(url):
    # Fetch HTML content from the URL
    html_content = fetch_html(url)

    # Read HTML data using pandas if content is fetched successfully
    if html_content:
        try:
            # Reading multiple tables on the page
            tables = pd.read_html(html_content, header=0)
            # Extract specific tables
            df1 = tables[2]
            df2 = tables[3]
            df3 = tables[4]
            df4 = tables[5]
        except Exception as e:
            print(f"Error reading HTML content: {e}")
            return None
    else:
        print("Failed to fetch HTML content.")
        return None

    movies = pd.concat([df1, df2, df3, df4], axis=0)
    movies = movies[['Title', 'Cast and crew']]
    movies['movie_title'] = movies['Title'].str.lower()
    
    # Parallelize the process of extracting director and actors
    with ThreadPoolExecutor(max_workers=10) as executor:
        futures = {executor.submit(process_actors_directors, row['Cast and crew']): index for index, row in movies.iterrows()}
        for future in as_completed(futures):
            index = futures[future]
            try:
                result = future.result()
                movies.at[index, 'director_name'] = result['director_name']
                movies.at[index, 'actor_1_name'] = result['actor_1_name']
                movies.at[index, 'actor_2_name'] = result['actor_2_name']
                movies.at[index, 'actor_3_name'] = result['actor_3_name']
            except Exception as e:
                print(f"Error processing actors and directors for row {index}: {e}")

    movies.drop("Cast and crew", axis=1, inplace=True)
    movies['comb'] = movies['actor_1_name'] + " " + movies['actor_2_name'] + " " + movies['actor_3_name'] + " " + movies['director_name']

    # Parallelize genre fetching
    with ThreadPoolExecutor(max_workers=10) as executor:
        future_to_title = {executor.submit(get_genres, title): title for title in movies['movie_title']}
        for future in as_completed(future_to_title):
            title = future_to_title[future]
            try:
                genres = future.result()
                movies.loc[movies['movie_title'] == title, 'genres'] = genres
            except Exception as e:
                print(f"Error fetching genres for {title}: {e}")

    return movies

# Example usage
url = "https://en.wikipedia.org/wiki/List_of_American_films_of_2018"
movies_2018 = create_df(url)



  retry_strategy = Retry(
  movies.at[index, 'director_name'] = result['director_name']
  movies.at[index, 'actor_1_name'] = result['actor_1_name']
  movies.at[index, 'actor_2_name'] = result['actor_2_name']
  movies.at[index, 'actor_3_name'] = result['actor_3_name']
  movies.loc[movies['movie_title'] == title, 'genres'] = genres


In [19]:
movies_2018

Unnamed: 0,Title,movie_title,director_name,actor_1_name,actor_2_name,actor_3_name,comb,genres
0,Insidious: The Last Key,insidious: the last key,Ruben Fleischer,Tom Hardy,Michelle Williams,Riz Ahmed,Tom Hardy Michelle Williams Riz Ahmed Ruben Fl...,Horror Mystery Thriller
1,The Strange Ones,the strange ones,Bradley Cooper,Bradley Cooper,Lady Gaga,Andrew Dice Clay,Bradley Cooper Lady Gaga Andrew Dice Clay Brad...,Drama Mystery
2,The Commuter,the commuter,Peter Bogdanovich,Peter Bogdanovich (director/screenplay),,,,Action Thriller Mystery
3,Proud Mary,proud mary,Damien Chazelle,Ryan Gosling,Claire Foy,Jason Clarke,Ryan Gosling Claire Foy Jason Clarke Damien Ch...,Thriller Action Crime
4,Acts of Violence,acts of violence,Drew Goddard,Jeff Bridges,Cynthia Erivo,Dakota Johnson,Jeff Bridges Cynthia Erivo Dakota Johnson Drew...,Action Crime Thriller
...,...,...,...,...,...,...,...,...
56,Second Act,second act,Peter Segal,Jennifer Lopez,Leah Remini,Vanessa Hudgens,Jennifer Lopez Leah Remini Vanessa Hudgens Pet...,Romance Comedy
57,Holmes & Watson,holmes & watson,Etan Cohen,Will Ferrell,John C. Reilly,Rebecca Hall,Will Ferrell John C. Reilly Rebecca Hall Etan ...,Mystery Adventure Comedy Crime
58,Vice,vice,Adam McKay,Christian Bale,Amy Adams,Steve Carell,Christian Bale Amy Adams Steve Carell Adam McKay,Comedy Drama
59,On the Basis of Sex,on the basis of sex,Mimi Leder,Felicity Jones,Armie Hammer,Justin Theroux,Felicity Jones Armie Hammer Justin Theroux Mim...,Drama History


In [20]:
movies_2018.duplicated().sum()

0

In [21]:
movies_2018.isnull().sum()

Title            0
movie_title      0
director_name    0
actor_1_name     0
actor_2_name     5
actor_3_name     9
comb             9
genres           0
dtype: int64

In [22]:
movies_2018.dropna(inplace=True)

In [23]:
movies_2018.isnull().sum()

Title            0
movie_title      0
director_name    0
actor_1_name     0
actor_2_name     0
actor_3_name     0
comb             0
genres           0
dtype: int64

In [24]:
len(movies_2018)

237

In [26]:
movies_2018

Unnamed: 0,movie_title,director_name,actor_1_name,actor_2_name,actor_3_name,comb,genres
0,insidious: the last key,Ruben Fleischer,Tom Hardy,Michelle Williams,Riz Ahmed,Tom Hardy Michelle Williams Riz Ahmed Ruben Fl...,Horror Mystery Thriller
1,the strange ones,Bradley Cooper,Bradley Cooper,Lady Gaga,Andrew Dice Clay,Bradley Cooper Lady Gaga Andrew Dice Clay Brad...,Drama Mystery
3,proud mary,Damien Chazelle,Ryan Gosling,Claire Foy,Jason Clarke,Ryan Gosling Claire Foy Jason Clarke Damien Ch...,Thriller Action Crime
4,acts of violence,Drew Goddard,Jeff Bridges,Cynthia Erivo,Dakota Johnson,Jeff Bridges Cynthia Erivo Dakota Johnson Drew...,Action Crime Thriller
5,freak show,Ari Sandel,Wendi McLendon-Covey,Madison Iseman,Jeremy Ray Taylor,Wendi McLendon-Covey Madison Iseman Jeremy Ray...,Comedy Drama
...,...,...,...,...,...,...,...
56,second act,Peter Segal,Jennifer Lopez,Leah Remini,Vanessa Hudgens,Jennifer Lopez Leah Remini Vanessa Hudgens Pet...,Romance Comedy
57,holmes & watson,Etan Cohen,Will Ferrell,John C. Reilly,Rebecca Hall,Will Ferrell John C. Reilly Rebecca Hall Etan ...,Mystery Adventure Comedy Crime
58,vice,Adam McKay,Christian Bale,Amy Adams,Steve Carell,Christian Bale Amy Adams Steve Carell Adam McKay,Comedy Drama
59,on the basis of sex,Mimi Leder,Felicity Jones,Armie Hammer,Justin Theroux,Felicity Jones Armie Hammer Justin Theroux Mim...,Drama History


In [25]:
movies_2018 = movies_2018.drop('Title',axis=1)

In [27]:
movies_2018.to_csv('data_2018_american.csv',index=False)

In [28]:

# Using LRU cache to store results of previously fetched movie genres
@lru_cache(maxsize=1000)
def fetch_movie_genres(movie_id):
    try:
        response = session.get(f'https://api.themoviedb.org/3/movie/{movie_id}?api_key={tmdb.api_key}')
        if response.status_code != 200:
            return np.nan
        
        data_json = response.json()
        if 'genres' in data_json and data_json['genres']:
            return " ".join([genre['name'] for genre in data_json['genres']])
        else:
            return np.nan
    except Exception as e:
        print(f"Error fetching genres for movie ID {movie_id}: {e}")
        return np.nan

def get_genres(x):
    try:
        result = tmdb_movie.search(x)
        if not result:
            return np.nan
        
        movie_id = result[0].id
        return fetch_movie_genres(movie_id)
    except Exception as e:
        print(f"Error fetching genres for {x}: {e}")
        return np.nan

def get_director(x):
    if " (director)" in x:
        return x.split(" (director)")[0]
    elif " (directors)" in x:
        return x.split(" (directors)")[0]
    else:
        return x.split(" (director/screenplay)")[0]

def get_actor1(x):
    return ((x.split("screenplay); ")[-1]).split(", ")[0])

def get_actor2(x):
    actors = (x.split("screenplay); ")[-1]).split(", ")
    return actors[1] if len(actors) > 1 else np.NaN

def get_actor3(x):
    actors = (x.split("screenplay); ")[-1]).split(", ")
    return actors[2] if len(actors) > 2 else np.NaN

def fetch_html(url):
    try:
        response = session.get(url, timeout=20)
        response.raise_for_status()
        return response.content
    except Timeout:
        print(f"Request timed out for URL {url}.")
        return None
    except ConnectionError as e:
        print(f"Connection error for URL {url}: {e}")
        return None
    except RequestException as e:
        print(f"Request error fetching data from URL {url}: {e}")
        return None
    except Exception as e:
        print(f"Unexpected error fetching data from URL {url}: {e}")
        return None

def process_actors_directors(cast_crew):
    return {
        'director_name': get_director(cast_crew),
        'actor_1_name': get_actor1(cast_crew),
        'actor_2_name': get_actor2(cast_crew),
        'actor_3_name': get_actor3(cast_crew)
    }

def create_df(url):
    # Fetch HTML content from the URL
    html_content = fetch_html(url)

    # Read HTML data using pandas if content is fetched successfully
    if html_content:
        try:
            # Reading multiple tables on the page
            tables = pd.read_html(html_content, header=0)
            # Extract specific tables
            df1 = tables[2]
            df2 = tables[3]
            df3 = tables[4]
            df4 = tables[5]
        except Exception as e:
            print(f"Error reading HTML content: {e}")
            return None
    else:
        print("Failed to fetch HTML content.")
        return None

    movies = pd.concat([df1, df2, df3, df4], axis=0)
    movies = movies[['Title', 'Cast and crew']]
    movies['movie_title'] = movies['Title'].str.lower()
    
    # Parallelize the process of extracting director and actors
    with ThreadPoolExecutor(max_workers=10) as executor:
        futures = {executor.submit(process_actors_directors, row['Cast and crew']): index for index, row in movies.iterrows()}
        for future in as_completed(futures):
            index = futures[future]
            try:
                result = future.result()
                movies.at[index, 'director_name'] = result['director_name']
                movies.at[index, 'actor_1_name'] = result['actor_1_name']
                movies.at[index, 'actor_2_name'] = result['actor_2_name']
                movies.at[index, 'actor_3_name'] = result['actor_3_name']
            except Exception as e:
                print(f"Error processing actors and directors for row {index}: {e}")

    movies.drop("Cast and crew", axis=1, inplace=True)
    movies['comb'] = movies['actor_1_name'] + " " + movies['actor_2_name'] + " " + movies['actor_3_name'] + " " + movies['director_name']

    # Parallelize genre fetching
    with ThreadPoolExecutor(max_workers=10) as executor:
        future_to_title = {executor.submit(get_genres, title): title for title in movies['movie_title']}
        for future in as_completed(future_to_title):
            title = future_to_title[future]
            try:
                genres = future.result()
                movies.loc[movies['movie_title'] == title, 'genres'] = genres
            except Exception as e:
                print(f"Error fetching genres for {title}: {e}")

    return movies




In [29]:

movies_2019 = create_df("https://en.wikipedia.org/wiki/List_of_American_films_of_2019")

  movies.at[index, 'director_name'] = result['director_name']
  movies.at[index, 'actor_1_name'] = result['actor_1_name']
  movies.at[index, 'actor_2_name'] = result['actor_2_name']
  movies.at[index, 'actor_3_name'] = result['actor_3_name']
  movies.loc[movies['movie_title'] == title, 'genres'] = genres


Error fetching genres for little monsters: HTTPSConnectionPool(host='api.themoviedb.org', port=443): Max retries exceeded with url: /3/search/movie?api_key=6d8bfe0dbef34d25e64f64cabec93e20&query=little%20monsters&page=1&language=en-US (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x000001B7CB2BFF10>, 'Connection to api.themoviedb.org timed out. (connect timeout=None)'))
Error fetching genres for wounds: HTTPSConnectionPool(host='api.themoviedb.org', port=443): Max retries exceeded with url: /3/search/movie?api_key=6d8bfe0dbef34d25e64f64cabec93e20&query=wounds&page=1&language=en-US (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x000001B7CE26F290>, 'Connection to api.themoviedb.org timed out. (connect timeout=None)'))
Error fetching genres for black and blue: HTTPSConnectionPool(host='api.themoviedb.org', port=443): Max retries exceeded with url: /3/search/movie?api_key=6d8bfe0dbef34d25e64f64cabec93e20&query=black%20and%20blue

In [30]:
movies_2019.head(5)

Unnamed: 0,Title,movie_title,director_name,actor_1_name,actor_2_name,actor_3_name,comb,genres
0,Escape Room,escape room,Todd Phillips,Joaquin Phoenix,Robert De Niro,Zazie Beetz,Joaquin Phoenix Robert De Niro Zazie Beetz Tod...,Horror Thriller Mystery
1,Rust Creek,rust creek,Noah Hawley,Natalie Portman,Jon Hamm,Zazie Beetz,Natalie Portman Jon Hamm Zazie Beetz Noah Hawley,Thriller Drama Action Crime
2,American Hangman,american hangman,Craig Brewer,Eddie Murphy,Keegan-Michael Key,Mike Epps,Eddie Murphy Keegan-Michael Key Mike Epps Crai...,Thriller
3,A Dog's Way Home,a dog's way home,Vincenzo Natali,Patrick Wilson,Harrison Gilbertson,Rachel Wilson,Patrick Wilson Harrison Gilbertson Rachel Wils...,Drama Adventure Family
4,The Upside,the upside,Kevin McMullin,Keean Johnson,Jaeden Martell,Alex Neustaedter,Keean Johnson Jaeden Martell Alex Neustaedter ...,Comedy Drama


In [31]:
movies_2019.drop('Title',axis=1,inplace=True)

In [32]:
movies_2019.head()

Unnamed: 0,movie_title,director_name,actor_1_name,actor_2_name,actor_3_name,comb,genres
0,escape room,Todd Phillips,Joaquin Phoenix,Robert De Niro,Zazie Beetz,Joaquin Phoenix Robert De Niro Zazie Beetz Tod...,Horror Thriller Mystery
1,rust creek,Noah Hawley,Natalie Portman,Jon Hamm,Zazie Beetz,Natalie Portman Jon Hamm Zazie Beetz Noah Hawley,Thriller Drama Action Crime
2,american hangman,Craig Brewer,Eddie Murphy,Keegan-Michael Key,Mike Epps,Eddie Murphy Keegan-Michael Key Mike Epps Crai...,Thriller
3,a dog's way home,Vincenzo Natali,Patrick Wilson,Harrison Gilbertson,Rachel Wilson,Patrick Wilson Harrison Gilbertson Rachel Wils...,Drama Adventure Family
4,the upside,Kevin McMullin,Keean Johnson,Jaeden Martell,Alex Neustaedter,Keean Johnson Jaeden Martell Alex Neustaedter ...,Comedy Drama


In [33]:
movies_2019.isnull().sum()

movie_title       0
director_name     0
actor_1_name      0
actor_2_name      0
actor_3_name     18
comb             18
genres            8
dtype: int64

In [34]:
movies_2019.dropna(inplace=True)

In [35]:
movies_2019.isnull().sum()

movie_title      0
director_name    0
actor_1_name     0
actor_2_name     0
actor_3_name     0
comb             0
genres           0
dtype: int64

In [36]:
len(movies_2019)

221

In [37]:
movies_2019.duplicated().sum()

0

In [38]:
movies_2019.to_csv("data_2019_american.csv",index=False)

In [39]:
movies_2020 = create_df("https://en.wikipedia.org/wiki/List_of_American_films_of_2020")

  movies.at[index, 'director_name'] = result['director_name']
  movies.at[index, 'actor_1_name'] = result['actor_1_name']
  movies.at[index, 'actor_2_name'] = result['actor_2_name']
  movies.at[index, 'actor_3_name'] = result['actor_3_name']
  movies.loc[movies['movie_title'] == title, 'genres'] = genres


Error fetching genres for no films had been released on this date as resulting the closure of theaters caused by the covid-19 pandemic: attribute name must be string, not 'int'
Error fetching genres for the spongebob movie: sponge on the run (canadian theatrical release): attribute name must be string, not 'int'
Error fetching genres for the craft: legacy: HTTPSConnectionPool(host='api.themoviedb.org', port=443): Max retries exceeded with url: /3/search/movie?api_key=6d8bfe0dbef34d25e64f64cabec93e20&query=the%20craft%3A%20legacy&page=1&language=en-US (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x000001B7DB67DFD0>, 'Connection to api.themoviedb.org timed out. (connect timeout=None)'))
Error fetching genres for holidate: HTTPSConnectionPool(host='api.themoviedb.org', port=443): Max retries exceeded with url: /3/search/movie?api_key=6d8bfe0dbef34d25e64f64cabec93e20&query=holidate&page=1&language=en-US (Caused by ConnectTimeoutError(<urllib3.connection.HTTP

In [40]:
movies_2020.head()

Unnamed: 0,Title,movie_title,director_name,actor_1_name,actor_2_name,actor_3_name,comb,genres
0,The Grudge,the grudge,Nicolas Pesce,Andrea Riseborough,Demián Bichir,John Cho,Andrea Riseborough Demián Bichir John Cho Nico...,Horror Mystery Thriller
1,Underwater,underwater,Walt Dohrn,Anna Kendrick,Justin Timberlake,Rachel Bloom,Anna Kendrick Justin Timberlake Rachel Bloom W...,Horror Science Fiction Action Adventure
2,Like a Boss,like a boss,Miguel Arteta,Tiffany Haddish,Rose Byrne,Salma Hayek,Tiffany Haddish Rose Byrne Salma Hayek Miguel ...,Comedy
3,Three Christs,three christs,Brandon Cronenberg,Andrea Riseborough,Christopher Abbott,Rossif Sutherland,Andrea Riseborough Christopher Abbott Rossif S...,Drama
4,Inherit the Viper,inherit the viper,Darren Lynn Bousman,Maggie Q,Alex Essoe,Luke Hemsworth,Maggie Q Alex Essoe Luke Hemsworth Darren Lynn...,Crime Thriller Drama


In [41]:
movies_2020.isnull().sum()

Title             0
movie_title       0
director_name     0
actor_1_name      0
actor_2_name      1
actor_3_name     31
comb             31
genres           50
dtype: int64

In [42]:
len(movies_2020)

275

In [43]:
movies_2020.dropna(inplace=True)

In [44]:
movies_2020.isnull().sum()

Title            0
movie_title      0
director_name    0
actor_1_name     0
actor_2_name     0
actor_3_name     0
comb             0
genres           0
dtype: int64

In [45]:
len(movies_2020)

198

In [47]:
movies_2020.drop('Title',axis=1,inplace=True)

In [48]:
movies_2020.head()

Unnamed: 0,movie_title,director_name,actor_1_name,actor_2_name,actor_3_name,comb,genres
0,the grudge,Nicolas Pesce,Andrea Riseborough,Demián Bichir,John Cho,Andrea Riseborough Demián Bichir John Cho Nico...,Horror Mystery Thriller
1,underwater,Walt Dohrn,Anna Kendrick,Justin Timberlake,Rachel Bloom,Anna Kendrick Justin Timberlake Rachel Bloom W...,Horror Science Fiction Action Adventure
2,like a boss,Miguel Arteta,Tiffany Haddish,Rose Byrne,Salma Hayek,Tiffany Haddish Rose Byrne Salma Hayek Miguel ...,Comedy
3,three christs,Brandon Cronenberg,Andrea Riseborough,Christopher Abbott,Rossif Sutherland,Andrea Riseborough Christopher Abbott Rossif S...,Drama
4,inherit the viper,Darren Lynn Bousman,Maggie Q,Alex Essoe,Luke Hemsworth,Maggie Q Alex Essoe Luke Hemsworth Darren Lynn...,Crime Thriller Drama


In [49]:
movies_2020.to_csv('data_2020_american.csv')

In [50]:
movies_2021 = create_df("https://en.wikipedia.org/wiki/List_of_American_films_of_2021")

  movies.at[index, 'director_name'] = result['director_name']
  movies.at[index, 'actor_1_name'] = result['actor_1_name']
  movies.at[index, 'actor_2_name'] = result['actor_2_name']
  movies.at[index, 'actor_3_name'] = result['actor_3_name']


Error processing actors and directors for row 97: argument of type 'float' is not iterable


  movies.loc[movies['movie_title'] == title, 'genres'] = genres


Error fetching genres for let us in: HTTPSConnectionPool(host='api.themoviedb.org', port=443): Max retries exceeded with url: /3/search/movie?api_key=6d8bfe0dbef34d25e64f64cabec93e20&query=let%20us%20in&page=1&language=en-US (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x000001B7E390B850>, 'Connection to api.themoviedb.org timed out. (connect timeout=None)'))
Error fetching genres for black widow: HTTPSConnectionPool(host='api.themoviedb.org', port=443): Max retries exceeded with url: /3/search/movie?api_key=6d8bfe0dbef34d25e64f64cabec93e20&query=black%20widow&page=1&language=en-US (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x000001B7E592A090>, 'Connection to api.themoviedb.org timed out. (connect timeout=None)'))
Error fetching genres for fear street part two: 1978: HTTPSConnectionPool(host='api.themoviedb.org', port=443): Max retries exceeded with url: /3/search/movie?api_key=6d8bfe0dbef34d25e64f64cabec93e20&query=fear

In [52]:
movies_2021.drop('Title',axis=1,inplace=True)

In [53]:
movies_2021.isnull().sum()

movie_title       1
director_name     0
actor_1_name      0
actor_2_name      0
actor_3_name     26
comb             26
genres           15
dtype: int64

In [55]:
movies_2021.dropna(inplace=True)

In [56]:
movies_2021.isnull().sum()

movie_title      0
director_name    0
actor_1_name     0
actor_2_name     0
actor_3_name     0
comb             0
genres           0
dtype: int64

In [57]:
len(movies_2021)

325

In [59]:
movies_2021.to_csv("data_2021_american.csv",index=False)

In [60]:
movies_2022 = create_df("https://en.wikipedia.org/wiki/List_of_American_films_of_2022")

  movies.at[index, 'director_name'] = result['director_name']
  movies.at[index, 'actor_1_name'] = result['actor_1_name']
  movies.at[index, 'actor_2_name'] = result['actor_2_name']
  movies.at[index, 'actor_3_name'] = result['actor_3_name']


Error processing actors and directors for row 87: argument of type 'float' is not iterable
Error processing actors and directors for row 88: argument of type 'float' is not iterable
Error processing actors and directors for row 89: argument of type 'float' is not iterable
Error processing actors and directors for row 90: argument of type 'float' is not iterable


  movies.loc[movies['movie_title'] == title, 'genres'] = genres


Error fetching genres for apollo 10 1⁄2: a space age childhood: attribute name must be string, not 'int'
Error fetching genres for do revenge: HTTPSConnectionPool(host='api.themoviedb.org', port=443): Max retries exceeded with url: /3/search/movie?api_key=6d8bfe0dbef34d25e64f64cabec93e20&query=do%20revenge&page=1&language=en-US (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x000001B7F9F91150>, 'Connection to api.themoviedb.org timed out. (connect timeout=None)'))
Error fetching genres for luckiest girl alive: HTTPSConnectionPool(host='api.themoviedb.org', port=443): Max retries exceeded with url: /3/search/movie?api_key=6d8bfe0dbef34d25e64f64cabec93e20&query=luckiest%20girl%20alive&page=1&language=en-US (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x000001B7FA017310>, 'Connection to api.themoviedb.org timed out. (connect timeout=None)'))
Error fetching genres for dead for a dollar: HTTPSConnectionPool(host='api.themoviedb.o

In [61]:
movies_2022

Unnamed: 0,Title,movie_title,director_name,actor_1_name,actor_2_name,actor_3_name,comb,genres
0,The 355,the 355,John Lee Hancock,Donald Sutherland,Jaeden Martell,Joe Tippett,Donald Sutherland Jaeden Martell Joe Tippett J...,Action Thriller
1,The Legend of La Llorona,the legend of la llorona,Damien Leone,Lauren LaVera,Elliot Fullam,Sarah Voigt,Lauren LaVera Elliot Fullam Sarah Voigt Damien...,Family Animation Fantasy Horror
2,The Commando,the commando,"Will Speck, Josh Gordon",Javier Bardem,Constance Wu,Winslow Fegley,Javier Bardem Constance Wu Winslow Fegley Will...,Action Crime Thriller
3,American Siege,american siege,David O. Russell,Christian Bale,Margot Robbie,John David Washington,Christian Bale Margot Robbie John David Washin...,Action Adventure Thriller
4,Scream,scream,Todd Field,Cate Blanchett,Noémie Merlant,Nina Hoss,Cate Blanchett Noémie Merlant Nina Hoss Todd F...,Crime Horror Mystery
...,...,...,...,...,...,...,...,...
86,"Alice, Darling","alice, darling",Mary Nighy,Anna Kendrick,Kaniehtiio Horn,Charlie Carrick,Anna Kendrick Kaniehtiio Horn Charlie Carrick ...,Thriller Drama Romance
87,,,"Maya Forbes, Wallace Wolodarsky (directors/scr...",Sigourney Weaver,Kevin Kline,Morena Baccarin,Sigourney Weaver Kevin Kline Morena Baccarin M...,
88,,,Mike Barker,Mila Kunis,,,,
89,,,Walter Hill,Christoph Waltz,Willem Dafoe,Rachel Brosnahan,Christoph Waltz Willem Dafoe Rachel Brosnahan ...,


In [62]:
movies_2022.isnull().sum()

Title             4
movie_title       4
director_name     0
actor_1_name      0
actor_2_name     14
actor_3_name     24
comb             24
genres           14
dtype: int64

In [63]:
movies_2022.dropna(inplace=True)

In [64]:
len(movies_2022)

281

In [66]:
movies_2022.drop('Title',inplace=True,axis=1)

In [68]:
movies_2022.to_csv('data_2022_american.csv',index=False)

In [69]:
movies_2023 = create_df("https://en.wikipedia.org/wiki/List_of_American_films_of_2023")

  movies.at[index, 'director_name'] = result['director_name']
  movies.at[index, 'actor_1_name'] = result['actor_1_name']
  movies.at[index, 'actor_2_name'] = result['actor_2_name']
  movies.at[index, 'actor_3_name'] = result['actor_3_name']
  movies.loc[movies['movie_title'] == title, 'genres'] = genres


Error fetching genres for the exorcist: believer: HTTPSConnectionPool(host='api.themoviedb.org', port=443): Max retries exceeded with url: /3/search/movie?api_key=6d8bfe0dbef34d25e64f64cabec93e20&query=the%20exorcist%3A%20believer&page=1&language=en-US (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x000001B7FF656F50>, 'Connection to api.themoviedb.org timed out. (connect timeout=None)'))
Error fetching genres for the caine mutiny court-martial: HTTPSConnectionPool(host='api.themoviedb.org', port=443): Max retries exceeded with url: /3/search/movie?api_key=6d8bfe0dbef34d25e64f64cabec93e20&query=the%20caine%20mutiny%20court-martial&page=1&language=en-US (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x000001B7FF691F10>, 'Connection to api.themoviedb.org timed out. (connect timeout=None)'))
Error fetching genres for dicks: the musical: HTTPSConnectionPool(host='api.themoviedb.org', port=443): Max retries exceeded with url: /3/se

In [71]:
len(movies_2023)

343

In [72]:
movies_2023.isnull().sum()

Title             0
movie_title       0
director_name     0
actor_1_name      0
actor_2_name      8
actor_3_name     21
comb             21
genres            8
dtype: int64

In [73]:
movies_2023.dropna(inplace=True)

In [74]:
movies_2023.drop('Title',axis=1,inplace=True)

In [75]:
movies_2023.head()

Unnamed: 0,movie_title,director_name,actor_1_name,actor_2_name,actor_3_name,comb,genres
1,the old way,Anna Zlokovic,Hadley Robinson,Emily Hampshire,Brandon Mychal Smith,Hadley Robinson Emily Hampshire Brandon Mychal...,Western Drama
2,the devil conspiracy,Nathan Frankowski,Alice Orr-Ewing,Joe Doyle,Eveline Hall,Alice Orr-Ewing Joe Doyle Eveline Hall Nathan ...,Horror Fantasy Science Fiction Thriller
3,plane,Jean-François Richet,Gerard Butler,Mike Colter,Yoson An,Gerard Butler Mike Colter Yoson An Jean-Franço...,Action
4,house party,Calmatic,Tosin Cole,Jacob Latimore,Karen Obilom,Tosin Cole Jacob Latimore Karen Obilom Calmatic,Comedy
5,sick,John Hyams,Gideon Adlon,Bethlehem Million,Marc Menchaca,Gideon Adlon Bethlehem Million Marc Menchaca J...,Horror Thriller Mystery


In [76]:
movies_2023.isna().sum()

movie_title      0
director_name    0
actor_1_name     0
actor_2_name     0
actor_3_name     0
comb             0
genres           0
dtype: int64

In [77]:
len(movies_2023)

315

In [78]:
movies_2023.to_csv('data_2023_american.csv',index=False)

In [79]:
movies_2024 = create_df("https://en.wikipedia.org/wiki/List_of_American_films_of_2024")

  movies.at[index, 'director_name'] = result['director_name']
  movies.at[index, 'actor_1_name'] = result['actor_1_name']
  movies.at[index, 'actor_2_name'] = result['actor_2_name']
  movies.at[index, 'actor_3_name'] = result['actor_3_name']
  movies.loc[movies['movie_title'] == title, 'genres'] = genres


Error fetching genres for challengers: HTTPSConnectionPool(host='api.themoviedb.org', port=443): Max retries exceeded with url: /3/search/movie?api_key=6d8bfe0dbef34d25e64f64cabec93e20&query=challengers&page=1&language=en-US (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x000001B7FFC184D0>, 'Connection to api.themoviedb.org timed out. (connect timeout=None)'))
Error fetching genres for fly me to the moon: HTTPSConnectionPool(host='api.themoviedb.org', port=443): Max retries exceeded with url: /3/search/movie?api_key=6d8bfe0dbef34d25e64f64cabec93e20&query=fly%20me%20to%20the%20moon&page=1&language=en-US (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x000001B7FFE0F590>, 'Connection to api.themoviedb.org timed out. (connect timeout=None)'))
Error fetching genres for lumina: HTTPSConnectionPool(host='api.themoviedb.org', port=443): Max retries exceeded with url: /3/search/movie?api_key=6d8bfe0dbef34d25e64f64cabec93e20&query=lumi

In [80]:
movies_2024.head()

Unnamed: 0,Title,movie_title,director_name,actor_1_name,actor_2_name,actor_3_name,comb,genres
0,The Mummy Murders,the mummy murders,Todd Phillips,Joaquin Phoenix,Lady Gaga,Zazie Beetz,Joaquin Phoenix Lady Gaga Zazie Beetz Todd Phi...,Horror Crime
1,Self Reliance,self reliance,Marc Forster,Ariella Glaser,Orlando Schwerdt,Bryce Gheisar,Ariella Glaser Orlando Schwerdt Bryce Gheisar ...,Comedy Thriller
2,DarkGame,darkgame,Potsy Ponciroli,Joseph Gordon-Levitt,Lily James,Himesh Patel,Joseph Gordon-Levitt Lily James Himesh Patel P...,Horror Thriller
3,Night Swim,night swim,Morgan Neville,Morgan Neville (director),,,,Horror
4,He Went That Way,he went that way,Damien Leone,Lauren LaVera,Elliot Fullam,Samantha Scaffidi,Lauren LaVera Elliot Fullam Samantha Scaffidi ...,Mystery Thriller Crime Drama


In [81]:
movies_2024.isnull().sum()

Title             0
movie_title       0
director_name     0
actor_1_name      0
actor_2_name      5
actor_3_name     26
comb             26
genres           10
dtype: int64

In [82]:
movies_2024.dropna(inplace=True)

In [83]:
movies_2024.drop('Title',axis=1,inplace=True)

In [84]:
movies_2024.isnull().sum()

movie_title      0
director_name    0
actor_1_name     0
actor_2_name     0
actor_3_name     0
comb             0
genres           0
dtype: int64

In [85]:
len(movies_2024)

270

In [86]:
movies_2024.to_csv("data_2024_american.csv",index=False)

In [87]:
movies_2025 = create_df("https://en.wikipedia.org/wiki/List_of_American_films_of_2025")

  movies.at[index, 'director_name'] = result['director_name']
  movies.at[index, 'actor_1_name'] = result['actor_1_name']
  movies.at[index, 'actor_2_name'] = result['actor_2_name']
  movies.at[index, 'actor_3_name'] = result['actor_3_name']


Error processing actors and directors for row 11: argument of type 'float' is not iterable
Error processing actors and directors for row 0: argument of type 'float' is not iterable
Error fetching genres for untitled eighth mission: impossible film: attribute name must be string, not 'int'
Error fetching genres for the passion of the christ: resurrection - chapter i: attribute name must be string, not 'int'


  movies.loc[movies['movie_title'] == title, 'genres'] = genres


Error fetching genres for untitled joseph kosinski film: attribute name must be string, not 'int'
Error fetching genres for untitled matt stone and trey parker film: attribute name must be string, not 'int'
Error fetching genres for untitled jurassic world film: attribute name must be string, not 'int'
Error fetching genres for upcoming paul thomas anderson film: attribute name must be string, not 'int'
Error fetching genres for nan: quote_from_bytes() expected bytes


In [88]:
movies_2025.head()

Unnamed: 0,Title,movie_title,director_name,actor_1_name,actor_2_name,actor_3_name,comb,genres
0,Minecraft,minecraft,Maggie Gyllenhaal,Jessie Buckley,Christian Bale,Penélope Cruz,Jessie Buckley Christian Bale Penélope Cruz Ma...,Adventure Fantasy Family Action
1,The Amateur,the amateur,Trey Parker,Kendrick Lamar,,,,Thriller
2,Drop,drop,Christopher Landon,Meghann Fahy,Brandon Sklenar,Jeffery Self,Meghann Fahy Brandon Sklenar Jeffery Self Chri...,Thriller
3,Michael,michael,Simon McQuoid,Lewis Tan,Karl Urban,Jessica McNamee,Lewis Tan Karl Urban Jessica McNamee Simon McQ...,Comedy Drama Fantasy
4,The Passion of the Christ: Resurrection - Chap...,the passion of the christ: resurrection - chap...,Jennifer Kaytin Robinson,Jennifer Kaytin Robinson (director); Leah McKe...,,,,


In [89]:
movies_2025.isnull().sum()

Title             1
movie_title       1
director_name     0
actor_1_name      0
actor_2_name      8
actor_3_name     19
comb             19
genres            7
dtype: int64

In [90]:
len(movies_2025)

43

In [91]:
movies_2025.drop('Title',axis=1,inplace=True)

In [92]:
movies_2025.dropna(inplace=True)

In [93]:
movies_2025.isnull().sum()

movie_title      0
director_name    0
actor_1_name     0
actor_2_name     0
actor_3_name     0
comb             0
genres           0
dtype: int64

In [94]:
len(movies_2025)

19

In [95]:
movies_2025.to_csv('data_2025_american.csv',index=False)

In [96]:
data = pd.read_csv(r"D:\Movies\final_movies_hindi_data.csv")
data1 = pd.read_csv('D:\Movies\data_2018_american.csv')
data2 = pd.read_csv('D:\Movies\data_2019_american.csv')
data3 = pd.read_csv('D:\Movies\data_2020_american.csv')
data4 = pd.read_csv('D:\Movies\data_2021_american.csv')
data5 = pd.read_csv('D:\Movies\data_2022_american.csv')
data6 = pd.read_csv('D:\Movies\data_2023_american.csv')
data7 = pd.read_csv("D:\Movies\data_2024_american.csv")
data8 = pd.read_csv("D:\Movies\data_2025_american.csv")

In [97]:
print(len(data)+len(data1)+len(data2)+len(data3)+len(data4)+len(data5)+len(data6)+len(data7)+len(data8))

7611


In [99]:
print(len(data))

5745


In [100]:
print(len(data1)+len(data2)+len(data3)+len(data4)+len(data5)+len(data6)+len(data7)+len(data8))

1866


In [101]:
final_movies_data =  pd.concat([data,data1,data2,data3,data4,data5,data6,data7,data8],axis=0).reset_index()

In [102]:
final_movies_data

Unnamed: 0.1,index,director_name,actor_1_name,actor_2_name,actor_3_name,genres,movie_title,comb,Unnamed: 0
0,0,James Cameron,CCH Pounder,Joel David Moore,Wes Studi,Action Adventure Fantasy Sci-Fi,avatar,CCH Pounder Joel David Moore Wes Studi James C...,
1,1,Gore Verbinski,Johnny Depp,Orlando Bloom,Jack Davenport,Action Adventure Fantasy,pirates of the caribbean: at world's end,Johnny Depp Orlando Bloom Jack Davenport Gore ...,
2,2,Sam Mendes,Christoph Waltz,Rory Kinnear,Stephanie Sigman,Action Adventure Thriller,spectre,Christoph Waltz Rory Kinnear Stephanie Sigman ...,
3,3,Christopher Nolan,Tom Hardy,Christian Bale,Joseph Gordon-Levitt,Action Thriller,the dark knight rises,Tom Hardy Christian Bale Joseph Gordon-Levitt ...,
4,4,Andrew Stanton,Daryl Sabara,Samantha Morton,Polly Walker,Action Adventure Sci-Fi,john carter,Daryl Sabara Samantha Morton Polly Walker Andr...,
...,...,...,...,...,...,...,...,...,...
7606,14,Christopher Landon,Meghann Fahy,Brandon Sklenar,Jeffery Self,Horror Thriller,the black phone 2,Meghann Fahy Brandon Sklenar Jeffery Self Chri...,
7607,15,Simon McQuoid,Lewis Tan,Karl Urban,Jessica McNamee,Action Fantasy Adventure,mortal kombat 2,Lewis Tan Karl Urban Jessica McNamee Simon McQ...,
7608,16,Jon M. Chu,Cynthia Erivo,Ariana Grande,Michelle Yeoh,Drama Fantasy Music,wicked part two,Cynthia Erivo Ariana Grande Michelle Yeoh Jon ...,
7609,17,James Cameron,Sam Worthington,Zoe Saldana,Sigourney Weaver,Science Fiction Adventure Fantasy,avatar 3,Sam Worthington Zoe Saldana Sigourney Weaver J...,


In [103]:
final_movies_data.isnull().sum()

index               0
director_name       0
actor_1_name        0
actor_2_name        0
actor_3_name        0
genres              0
movie_title         0
comb                0
Unnamed: 0       7413
dtype: int64

In [104]:
final_movies_data.drop(["Unnamed: 0",'index'],axis=1,inplace=True)

In [105]:
final_movies_data

Unnamed: 0,director_name,actor_1_name,actor_2_name,actor_3_name,genres,movie_title,comb
0,James Cameron,CCH Pounder,Joel David Moore,Wes Studi,Action Adventure Fantasy Sci-Fi,avatar,CCH Pounder Joel David Moore Wes Studi James C...
1,Gore Verbinski,Johnny Depp,Orlando Bloom,Jack Davenport,Action Adventure Fantasy,pirates of the caribbean: at world's end,Johnny Depp Orlando Bloom Jack Davenport Gore ...
2,Sam Mendes,Christoph Waltz,Rory Kinnear,Stephanie Sigman,Action Adventure Thriller,spectre,Christoph Waltz Rory Kinnear Stephanie Sigman ...
3,Christopher Nolan,Tom Hardy,Christian Bale,Joseph Gordon-Levitt,Action Thriller,the dark knight rises,Tom Hardy Christian Bale Joseph Gordon-Levitt ...
4,Andrew Stanton,Daryl Sabara,Samantha Morton,Polly Walker,Action Adventure Sci-Fi,john carter,Daryl Sabara Samantha Morton Polly Walker Andr...
...,...,...,...,...,...,...,...
7606,Christopher Landon,Meghann Fahy,Brandon Sklenar,Jeffery Self,Horror Thriller,the black phone 2,Meghann Fahy Brandon Sklenar Jeffery Self Chri...
7607,Simon McQuoid,Lewis Tan,Karl Urban,Jessica McNamee,Action Fantasy Adventure,mortal kombat 2,Lewis Tan Karl Urban Jessica McNamee Simon McQ...
7608,Jon M. Chu,Cynthia Erivo,Ariana Grande,Michelle Yeoh,Drama Fantasy Music,wicked part two,Cynthia Erivo Ariana Grande Michelle Yeoh Jon ...
7609,James Cameron,Sam Worthington,Zoe Saldana,Sigourney Weaver,Science Fiction Adventure Fantasy,avatar 3,Sam Worthington Zoe Saldana Sigourney Weaver J...


In [106]:
final_movies_data.isnull().sum()

director_name    0
actor_1_name     0
actor_2_name     0
actor_3_name     0
genres           0
movie_title      0
comb             0
dtype: int64

In [107]:
final_movies_data['movie_title'] = final_movies_data['movie_title'].str.lower()

In [108]:
final_movies_data.to_csv("final_movies_data.csv",index=True)