In [2]:
import requests
import json
import pandas as pd

api_key = '4e9ae607a523010813557f75c871f4a5' # Replace this with your TMDB API key
base_url = 'https://api.themoviedb.org/3'
headers = {'Content-Type': 'application/json;charset=utf-8'}
movie_data = []

# loop through years from 1900 to 2022
for year in range(1900, 2023):
    # construct API URL to get movies released in a particular year
    url = f'{base_url}/discover/movie?api_key={api_key}&language=en-US&sort_by=popularity.desc&primary_release_year={year}'

    # make API request and get response
    response = requests.get(url, headers=headers)
    data = json.loads(response.text)
    
    # loop through results and get movie details
    for movie in data['results']:
        movie_id = movie['id']
        title = movie['title']
        release_date = movie['release_date']
        vote_average = movie['vote_average']
        overview = movie['overview']
        poster_path = movie['poster_path']
        
        # construct API URL to get additional movie details
        url = f'{base_url}/movie/{movie_id}?api_key={api_key}&language=en-US&append_to_response=credits'
        response = requests.get(url, headers=headers)
        data = json.loads(response.text)
        
        # get additional movie details
        budget = data['budget']
        genres = [genre['name'] for genre in data['genres']]
        runtime = data['runtime']
        revenue = data['revenue']
        actors = [actor['name'] for actor in data['credits']['cast']][:3]
        #print(data['credits']['crew'])
        director = [crew['name'] for crew in data['credits']['crew'] if crew['job'] == 'Director']
        #print(director)
        production_companies = [company['name'] for company in data['production_companies']]
        
        # append movie details to list
        movie_data.append({
            'id': movie_id,
            'director_name': director,         
            'actor_1_name': actors[0] if actors else "Unkown",
            'actor_2_name': actors[1] if (len(actors)>=2 and actors[1]) else "Unkown",
            'actor_3_name': actors[2] if (len(actors)>=3 and actors[2]) else "Unkown",
            'genres': genres,
            'movie_title': title,       
            'release_date': release_date,
            'vote_average': vote_average,
            'overview': overview,
            'poster_path': poster_path,
            'budget': budget,
            'runtime': runtime,
            'revenue': revenue,
            'production_companies': production_companies
        })
        

In [3]:
df = pd.DataFrame(movie_data)

# save dataframe to csv file
df.to_csv('movie_data.csv', index=False)

df.columns


Index(['id', 'director_name', 'actor_1_name', 'actor_2_name', 'actor_3_name',
       'genres', 'movie_title', 'release_date', 'vote_average', 'overview',
       'poster_path', 'budget', 'runtime', 'revenue', 'production_companies'],
      dtype='object')

In [4]:
c=['id', 'release_date','vote_average','overview','poster_path','runtime', 'revenue','budget','production_companies']
df.drop(labels=c,inplace=True,axis=1)

In [5]:
df['movie_title'] = df['movie_title'].str.lower()

In [6]:
df.head()

Unnamed: 0,director_name,actor_1_name,actor_2_name,actor_3_name,genres,movie_title
0,[Georges Méliès],Bleuette Bernon,Georges Méliès,Jehanne d'Alcy,"[Drama, History]",joan of arc
1,[Georges Méliès],Georges Méliès,Unkown,Unkown,"[Comedy, Fantasy, Music]",the one-man band
2,[J. Stuart Blackton],J. Stuart Blackton,Unkown,Unkown,"[Animation, Comedy, Fantasy]",the enchanted drawing
3,[George Albert Smith],Harold Smith,Unkown,Unkown,[Drama],grandma's reading glass
4,[Gabriel Veyre],Unkown,Unkown,Unkown,[Documentary],le village de namo - panorama pris d'une chais...


In [7]:
def get_director(x):
    return x[0] if len(x) else "unknown"


df['director_name'] = df['director_name'].apply(lambda x: get_director(x))


In [8]:
df['genres'] = df['genres'].apply(lambda x: ' '.join(x))

In [9]:
df.head()

Unnamed: 0,director_name,actor_1_name,actor_2_name,actor_3_name,genres,movie_title
0,Georges Méliès,Bleuette Bernon,Georges Méliès,Jehanne d'Alcy,Drama History,joan of arc
1,Georges Méliès,Georges Méliès,Unkown,Unkown,Comedy Fantasy Music,the one-man band
2,J. Stuart Blackton,J. Stuart Blackton,Unkown,Unkown,Animation Comedy Fantasy,the enchanted drawing
3,George Albert Smith,Harold Smith,Unkown,Unkown,Drama,grandma's reading glass
4,Gabriel Veyre,Unkown,Unkown,Unkown,Documentary,le village de namo - panorama pris d'une chais...


In [10]:
df['comb'] = df['actor_1_name'] + ' ' + df['actor_2_name'] + ' ' + df['actor_3_name'] + ' ' + df['director_name'] + ' ' + df['genres']

In [11]:
df.head()

Unnamed: 0,director_name,actor_1_name,actor_2_name,actor_3_name,genres,movie_title,comb
0,Georges Méliès,Bleuette Bernon,Georges Méliès,Jehanne d'Alcy,Drama History,joan of arc,Bleuette Bernon Georges Méliès Jehanne d'Alcy ...
1,Georges Méliès,Georges Méliès,Unkown,Unkown,Comedy Fantasy Music,the one-man band,Georges Méliès Unkown Unkown Georges Méliès Co...
2,J. Stuart Blackton,J. Stuart Blackton,Unkown,Unkown,Animation Comedy Fantasy,the enchanted drawing,J. Stuart Blackton Unkown Unkown J. Stuart Bla...
3,George Albert Smith,Harold Smith,Unkown,Unkown,Drama,grandma's reading glass,Harold Smith Unkown Unkown George Albert Smith...
4,Gabriel Veyre,Unkown,Unkown,Unkown,Documentary,le village de namo - panorama pris d'une chais...,Unkown Unkown Unkown Gabriel Veyre Documentary


In [12]:
df.tail()

Unnamed: 0,director_name,actor_1_name,actor_2_name,actor_3_name,genres,movie_title,comb
2455,Daniel Scheinert,Michelle Yeoh,Ke Huy Quan,Stephanie Hsu,Action Adventure Science Fiction,everything everywhere all at once,Michelle Yeoh Ke Huy Quan Stephanie Hsu Daniel...
2456,Álex de la Iglesia,Blanca Suárez,Alberto San Juan,Rubén Cortada,Comedy,four's a crowd,Blanca Suárez Alberto San Juan Rubén Cortada Á...
2457,Matt Bettinelli-Olpin,Melissa Barrera,Mason Gooding,Jenna Ortega,Horror Mystery Thriller,scream,Melissa Barrera Mason Gooding Jenna Ortega Mat...
2458,Lorcan Finnegan,Eva Green,Mark Strong,Chai Fonacier,Mystery Thriller Horror,nocebo,Eva Green Mark Strong Chai Fonacier Lorcan Fin...
2459,Alfonso Pineda Ulloa,José María Yázpik,Shannyn Sossamon,Tommy Flanagan,Action,there are no saints,José María Yázpik Shannyn Sossamon Tommy Flana...


In [13]:
df.to_csv("main_data.csv")

In [21]:
df['movie_title'][:15]

0                                           joan of arc
1                                      the one-man band
2                                 the enchanted drawing
3                               grandma's reading glass
4     le village de namo - panorama pris d'une chais...
5                                    let me dream again
6                                   the christmas dream
7     naval apprentices at sail drill on historic sh...
8                               how he missed his train
9                              explosion of a motor car
10                              sherlock holmes baffled
11                                   a fantastical meal
12                     the fat and lean wrestling match
13                          as seen through a telescope
14                             panorama of eiffel tower
Name: movie_title, dtype: object