In [1]:
import requests
import json
import pandas as pd

api_key = '4e9ae607a523010813557f75c871f4a5' # Replace this with your TMDB API key
base_url = 'https://api.themoviedb.org/3'
headers = {'Content-Type': 'application/json;charset=utf-8'}
movie_data = []

# loop through years from 1990 to 2022
for year in range(1990, 2023):
    # construct API URL to get movies released in a particular year
    url = f'{base_url}/discover/movie?api_key={api_key}&language=en-US&sort_by=popularity.desc&primary_release_year={year}'

    # make API request and get response
    response = requests.get(url, headers=headers)
    data = json.loads(response.text)
    
    # loop through results and get movie details
    for movie in data['results']:
        movie_id = movie['id']
        title = movie['title']
        release_date = movie['release_date']
        vote_average = movie['vote_average']
        overview = movie['overview']
        poster_path = movie['poster_path']
        
        # construct API URL to get additional movie details
        url = f'{base_url}/movie/{movie_id}?api_key={api_key}&language=en-US&append_to_response=credits'
        response = requests.get(url, headers=headers)
        data = json.loads(response.text)
        
        # get additional movie details
        budget = data['budget']
        genres = [genre['name'] for genre in data['genres']]
        runtime = data['runtime']
        revenue = data['revenue']
        actors = [actor['name'] for actor in data['credits']['cast']][:3]
        #print(data['credits']['crew'])
        director = [crew['name'] for crew in data['credits']['crew'] if crew['job'] == 'Director']
        #print(director)
        production_companies = [company['name'] for company in data['production_companies']]
        
        # append movie details to list
        movie_data.append({
            'id': movie_id,
            'director_name': director,         
            'actor_1_name': actors[0] if actors else "Unkown",
            'actor_2_name': actors[1] if (len(actors)>=2 and actors[1]) else "Unkown",
            'actor_3_name': actors[2] if (len(actors)>=3 and actors[2]) else "Unkown",
            'genres': genres,
            'movie_title': title,       
            'release_date': release_date,
            'vote_average': vote_average,
            'overview': overview,
            'poster_path': poster_path,
            'budget': budget,
            'runtime': runtime,
            'revenue': revenue,
            'production_companies': production_companies
        })
        

In [2]:
df = pd.DataFrame(movie_data)

# save dataframe to csv file
df.to_csv('movie_data.csv', index=False)

df.columns


Index(['id', 'director_name', 'actor_1_name', 'actor_2_name', 'actor_3_name',
       'genres', 'movie_title', 'release_date', 'vote_average', 'overview',
       'poster_path', 'budget', 'runtime', 'revenue', 'production_companies'],
      dtype='object')

In [3]:
c=['id', 'release_date','vote_average','overview','poster_path','runtime', 'revenue','budget','production_companies']
df.drop(labels=c,inplace=True,axis=1)

In [4]:
df['movie_title'] = df['movie_title'].str.lower()

In [5]:
df.head()

Unnamed: 0,director_name,actor_1_name,actor_2_name,actor_3_name,genres,movie_title
0,[Lam Nai-Choi],Pal Sinn Lap-man,Amy Yip,Hitomi Kudô,"[Fantasy, Drama, Horror]",erotic ghost story
1,[Mitsuo Hashimoto],Masako Nozawa,Kazuyuki Sogabe,Yuko Mita,"[Animation, Action, Science Fiction, Drama, Fa...",dragon ball z: bardock - the father of goku
2,[Férid Boughedir],Selim Boughedir,Mustafa Adouani,Rabia Ben Abdallah,"[Comedy, Drama]",halfaouine: boy of the terraces
3,[Daisuke Nishio],Masako Nozawa,Toshio Furukawa,Mayumi Tanaka,"[Action, Animation, Science Fiction]",dragon ball z: the tree of might
4,[Daisuke Nishio],Masako Nozawa,Toshio Furukawa,Mayumi Tanaka,"[Action, Animation, Science Fiction]",dragon ball z: the world's strongest


In [6]:
def get_director(x):
    return x[0] if len(x) else "unknown"


df['director_name'] = df['director_name'].apply(lambda x: get_director(x))


In [7]:
df['genres'] = df['genres'].apply(lambda x: ' '.join(x))

In [8]:
df.head()

Unnamed: 0,director_name,actor_1_name,actor_2_name,actor_3_name,genres,movie_title
0,Lam Nai-Choi,Pal Sinn Lap-man,Amy Yip,Hitomi Kudô,Fantasy Drama Horror,erotic ghost story
1,Mitsuo Hashimoto,Masako Nozawa,Kazuyuki Sogabe,Yuko Mita,Animation Action Science Fiction Drama Fantasy,dragon ball z: bardock - the father of goku
2,Férid Boughedir,Selim Boughedir,Mustafa Adouani,Rabia Ben Abdallah,Comedy Drama,halfaouine: boy of the terraces
3,Daisuke Nishio,Masako Nozawa,Toshio Furukawa,Mayumi Tanaka,Action Animation Science Fiction,dragon ball z: the tree of might
4,Daisuke Nishio,Masako Nozawa,Toshio Furukawa,Mayumi Tanaka,Action Animation Science Fiction,dragon ball z: the world's strongest


In [9]:
df['comb'] = df['actor_1_name'] + ' ' + df['actor_2_name'] + ' ' + df['actor_3_name'] + ' ' + df['director_name'] + ' ' + df['genres']

In [10]:
df.head()

Unnamed: 0,director_name,actor_1_name,actor_2_name,actor_3_name,genres,movie_title,comb
0,Lam Nai-Choi,Pal Sinn Lap-man,Amy Yip,Hitomi Kudô,Fantasy Drama Horror,erotic ghost story,Pal Sinn Lap-man Amy Yip Hitomi Kudô Lam Nai-C...
1,Mitsuo Hashimoto,Masako Nozawa,Kazuyuki Sogabe,Yuko Mita,Animation Action Science Fiction Drama Fantasy,dragon ball z: bardock - the father of goku,Masako Nozawa Kazuyuki Sogabe Yuko Mita Mitsuo...
2,Férid Boughedir,Selim Boughedir,Mustafa Adouani,Rabia Ben Abdallah,Comedy Drama,halfaouine: boy of the terraces,Selim Boughedir Mustafa Adouani Rabia Ben Abda...
3,Daisuke Nishio,Masako Nozawa,Toshio Furukawa,Mayumi Tanaka,Action Animation Science Fiction,dragon ball z: the tree of might,Masako Nozawa Toshio Furukawa Mayumi Tanaka Da...
4,Daisuke Nishio,Masako Nozawa,Toshio Furukawa,Mayumi Tanaka,Action Animation Science Fiction,dragon ball z: the world's strongest,Masako Nozawa Toshio Furukawa Mayumi Tanaka Da...


In [11]:
df.tail()

Unnamed: 0,director_name,actor_1_name,actor_2_name,actor_3_name,genres,movie_title,comb
655,Scott Mann,Grace Caroline Currey,Virginia Gardner,Jeffrey Dean Morgan,Thriller Drama,fall,Grace Caroline Currey Virginia Gardner Jeffrey...
656,Bobby Easley,Portia Chellelynn,Julie Anne Prescott,Andrea Collins,Horror,h.p. lovecraft's witch house,Portia Chellelynn Julie Anne Prescott Andrea C...
657,Mako Graves,Alexis Herrera Alquijay,Victor Arroyo,Kiara Bennett,Horror,solo somos carne,Alexis Herrera Alquijay Victor Arroyo Kiara Be...
658,Matthew Warchus,Alisha Weir,Emma Thompson,Lashana Lynch,Family Comedy Fantasy,roald dahl's matilda the musical,Alisha Weir Emma Thompson Lashana Lynch Matthe...
659,Jeff Fowler,James Marsden,Ben Schwartz,Tika Sumpter,Action Adventure Family Comedy,sonic the hedgehog 2,James Marsden Ben Schwartz Tika Sumpter Jeff F...


In [12]:
df.to_csv("main_data.csv")

In [13]:
df['movie_title'][:15]

0                              erotic ghost story
1     dragon ball z: bardock - the father of goku
2                 halfaouine: boy of the terraces
3                dragon ball z: the tree of might
4            dragon ball z: the world's strongest
5                          the godfather part iii
6                                      goodfellas
7                                    pretty woman
8                                      predator 2
9                                         rocky v
10                                          ghost
11                                      lionheart
12                            edward scissorhands
13                                 child's play 2
14                                        tremors
Name: movie_title, dtype: object