In [17]:
import requests
import json
import pandas as pd

api_key = '4e9ae607a523010813557f75c871f4a5' # TMDB API key
base_url = 'https://api.themoviedb.org/3'
headers = {'Content-Type': 'application/json;charset=utf-8'}
movie_data = []

# loop through years from 1990 to 2022
for year in range(1990, 2023):
    # construct API URL to get movies released in a particular year
    url = f'{base_url}/discover/movie?api_key={api_key}&language=en-US&sort_by=popularity.desc&primary_release_year={year}'

    # make API request and get response
    response = requests.get(url, headers=headers)
    data = json.loads(response.text)
    
    # loop through results and get movie details
    for movie in data['results']:
        movie_id = movie['id']
        title = movie['title']
        release_date = movie['release_date']
        vote_average = movie['vote_average']
        overview = movie['overview']
        poster_path = movie['poster_path']
        
        # construct API URL to get additional movie details
        url = f'{base_url}/movie/{movie_id}?api_key={api_key}&language=en-US&append_to_response=credits'
        response = requests.get(url, headers=headers)
        data = json.loads(response.text)
        
        # get additional movie details
        budget = data['budget']
        genres = [genre['name'] for genre in data['genres']]
        runtime = data['runtime']
        revenue = data['revenue']
        actors = [actor['name'] for actor in data['credits']['cast']][:3]
        #print(data['credits']['crew'])
        director = [crew['name'] for crew in data['credits']['crew'] if crew['job'] == 'Director']
        #print(director)
        production_companies = [company['name'] for company in data['production_companies']]
        
        # append movie details to list
        movie_data.append({
            'id': movie_id,
            'director_name': director,         
            'actor_1_name': actors[0] if actors else "Unkown",
            'actor_2_name': actors[1] if (len(actors)>=2 and actors[1]) else "Unkown",
            'actor_3_name': actors[2] if (len(actors)>=3 and actors[2]) else "Unkown",
            'genres': genres,
            'movie_title': title,       
            'release_date': release_date,
            'vote_average': vote_average,
            'overview': overview,
            'poster_path': poster_path,
            'budget': budget,
            'runtime': runtime,
            'revenue': revenue,
            'production_companies': production_companies
        })
        

In [18]:
df = pd.DataFrame(movie_data)

# save dataframe to csv file
df.to_csv('movie_data.csv', index=False)

df.columns


Index(['id', 'director_name', 'actor_1_name', 'actor_2_name', 'actor_3_name',
       'genres', 'movie_title', 'release_date', 'vote_average', 'overview',
       'poster_path', 'budget', 'runtime', 'revenue', 'production_companies'],
      dtype='object')

In [19]:
df.head()

Unnamed: 0,id,director_name,actor_1_name,actor_2_name,actor_3_name,genres,movie_title,release_date,vote_average,overview,poster_path,budget,runtime,revenue,production_companies
0,60898,[Lam Nai-Choi],Pal Sinn Lap-man,Amy Yip,Hitomi Kudô,"[Fantasy, Drama, Horror]",Erotic Ghost Story,1990-05-19,5.9,"Three vixens have meditated for 1,000 years to...",/fTUCwsxVoLFuodAtlL6aITP1B45.jpg,0,88,0,[Diagonal Pictures]
1,44281,[Férid Boughedir],Selim Boughedir,Mustafa Adouani,Rabia Ben Abdallah,"[Comedy, Drama]",Halfaouine: Boy of the Terraces,1990-09-14,6.8,Twelve-year-old Noura dangles uncertainly in t...,/uI7hb9flZetd0KZoN2BGHLq4uvZ.jpg,0,98,0,"[Les Films du Scarabée, Cinétéléfilms, France ..."
2,39323,[Mitsuo Hashimoto],Masako Nozawa,Kazuyuki Sogabe,Yuko Mita,"[Animation, Action, Science Fiction, Drama, Fa...",Dragon Ball Z: Bardock - The Father of Goku,1990-10-17,7.5,"Bardock, Son Goku's father, is a low-ranking S...",/docwOsWZuwJKQdwcYJtQPBcg6bm.jpg,0,48,0,"[Bird Studios, Toei Animation, Toei Company]"
3,39100,[Daisuke Nishio],Masako Nozawa,Toshio Furukawa,Mayumi Tanaka,"[Action, Animation, Science Fiction]",Dragon Ball Z: The World's Strongest,1990-03-10,6.5,The evil Dr. Kochin uses the dragon balls to r...,/5elbm3iLgGQ6nA5vqUmi9vIojbF.jpg,0,60,18000000,"[Toei Company, Toei Animation]"
4,39101,[Daisuke Nishio],Masako Nozawa,Toshio Furukawa,Mayumi Tanaka,"[Action, Animation, Science Fiction]",Dragon Ball Z: The Tree of Might,1990-06-07,6.5,Goku and friends must stop a band of space pir...,/fDX4Dp8IKvjBAaEb5MOJrGkxWX0.jpg,0,61,15000000,[Toei Animation]


In [20]:
c=['id', 'release_date','vote_average','overview','poster_path','runtime', 'revenue','budget','production_companies']
df.drop(labels=c,inplace=True,axis=1)

In [21]:
df['movie_title'] = df['movie_title'].str.lower()

In [22]:
df.head()

Unnamed: 0,director_name,actor_1_name,actor_2_name,actor_3_name,genres,movie_title
0,[Lam Nai-Choi],Pal Sinn Lap-man,Amy Yip,Hitomi Kudô,"[Fantasy, Drama, Horror]",erotic ghost story
1,[Férid Boughedir],Selim Boughedir,Mustafa Adouani,Rabia Ben Abdallah,"[Comedy, Drama]",halfaouine: boy of the terraces
2,[Mitsuo Hashimoto],Masako Nozawa,Kazuyuki Sogabe,Yuko Mita,"[Animation, Action, Science Fiction, Drama, Fa...",dragon ball z: bardock - the father of goku
3,[Daisuke Nishio],Masako Nozawa,Toshio Furukawa,Mayumi Tanaka,"[Action, Animation, Science Fiction]",dragon ball z: the world's strongest
4,[Daisuke Nishio],Masako Nozawa,Toshio Furukawa,Mayumi Tanaka,"[Action, Animation, Science Fiction]",dragon ball z: the tree of might


In [23]:
def get_director(x):
    return x[0] if len(x) else "unknown"


df['director_name'] = df['director_name'].apply(lambda x: get_director(x))


In [24]:
df['genres'] = df['genres'].apply(lambda x: ' '.join(x))

In [25]:
df.head()

Unnamed: 0,director_name,actor_1_name,actor_2_name,actor_3_name,genres,movie_title
0,Lam Nai-Choi,Pal Sinn Lap-man,Amy Yip,Hitomi Kudô,Fantasy Drama Horror,erotic ghost story
1,Férid Boughedir,Selim Boughedir,Mustafa Adouani,Rabia Ben Abdallah,Comedy Drama,halfaouine: boy of the terraces
2,Mitsuo Hashimoto,Masako Nozawa,Kazuyuki Sogabe,Yuko Mita,Animation Action Science Fiction Drama Fantasy,dragon ball z: bardock - the father of goku
3,Daisuke Nishio,Masako Nozawa,Toshio Furukawa,Mayumi Tanaka,Action Animation Science Fiction,dragon ball z: the world's strongest
4,Daisuke Nishio,Masako Nozawa,Toshio Furukawa,Mayumi Tanaka,Action Animation Science Fiction,dragon ball z: the tree of might


In [26]:
df['comb'] = df['actor_1_name'] + ' ' + df['actor_2_name'] + ' ' + df['actor_3_name'] + ' ' + df['director_name'] + ' ' + df['genres']

In [27]:
df.head()

Unnamed: 0,director_name,actor_1_name,actor_2_name,actor_3_name,genres,movie_title,comb
0,Lam Nai-Choi,Pal Sinn Lap-man,Amy Yip,Hitomi Kudô,Fantasy Drama Horror,erotic ghost story,Pal Sinn Lap-man Amy Yip Hitomi Kudô Lam Nai-C...
1,Férid Boughedir,Selim Boughedir,Mustafa Adouani,Rabia Ben Abdallah,Comedy Drama,halfaouine: boy of the terraces,Selim Boughedir Mustafa Adouani Rabia Ben Abda...
2,Mitsuo Hashimoto,Masako Nozawa,Kazuyuki Sogabe,Yuko Mita,Animation Action Science Fiction Drama Fantasy,dragon ball z: bardock - the father of goku,Masako Nozawa Kazuyuki Sogabe Yuko Mita Mitsuo...
3,Daisuke Nishio,Masako Nozawa,Toshio Furukawa,Mayumi Tanaka,Action Animation Science Fiction,dragon ball z: the world's strongest,Masako Nozawa Toshio Furukawa Mayumi Tanaka Da...
4,Daisuke Nishio,Masako Nozawa,Toshio Furukawa,Mayumi Tanaka,Action Animation Science Fiction,dragon ball z: the tree of might,Masako Nozawa Toshio Furukawa Mayumi Tanaka Da...


In [28]:
df.tail()

Unnamed: 0,director_name,actor_1_name,actor_2_name,actor_3_name,genres,movie_title,comb
655,Scott Mann,Grace Caroline Currey,Virginia Gardner,Jeffrey Dean Morgan,Thriller Drama,fall,Grace Caroline Currey Virginia Gardner Jeffrey...
656,Jacobo Martínez,María Romanillos,Ruth Díaz,Urko Olazábal,Drama Horror,13 exorcisms,María Romanillos Ruth Díaz Urko Olazábal Jacob...
657,Dean Fleischer Camp,Jenny Slate,Dean Fleischer Camp,Isabella Rossellini,Animation Comedy Family Drama Adventure,marcel the shell with shoes on,Jenny Slate Dean Fleischer Camp Isabella Rosse...
658,Bobby Easley,Portia Chellelynn,Julie Anne Prescott,Andrea Collins,Horror,h.p. lovecraft's witch house,Portia Chellelynn Julie Anne Prescott Andrea C...
659,Taika Waititi,Chris Hemsworth,Natalie Portman,Christian Bale,Fantasy Action Comedy,thor: love and thunder,Chris Hemsworth Natalie Portman Christian Bale...


In [29]:
df.to_csv("main_data.csv")

In [30]:
df['movie_title'][:15]

0                              erotic ghost story
1                 halfaouine: boy of the terraces
2     dragon ball z: bardock - the father of goku
3            dragon ball z: the world's strongest
4                dragon ball z: the tree of might
5                                      predator 2
6                                       lionheart
7                                    pretty woman
8                          the godfather part iii
9                                      goodfellas
10                            edward scissorhands
11                                        tremors
12                                          ghost
13                                   total recall
14                                 child's play 2
Name: movie_title, dtype: object