#  Data Extraction from API

In [None]:
import requests
import pandas as pd

import json

with open("api_token.json", "r") as f:
    token = json.load(f)

api_key = token["api_key"]
MOVIE_IDS = [0, 299534, 19995, 140607, 299536, 597, 135397,
             420818, 24428, 168259, 99861, 284054, 12445,
             181808, 330457, 351286, 109445, 321612, 260513]

def fetch_movie_details(movie_id):
    url = f"https://api.themoviedb.org/3/movie/{movie_id}"
    headers = {
        "Authorization": f"Bearer {API_KEY}",
        "accept": "application/json"
    }

    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()
        return response.json()
    except requests.exceptions.HTTPError as e:
        print(f"Error fetching movie ID {movie_id}: {e}")
        return None
    except Exception as e:
        print(f"Unexpected error for movie ID {movie_id}: {e}")
        return None

movies_data = []
for movie_id in MOVIE_IDS:
    movie = fetch_movie_details(movie_id)
    if movie:
        movies_data.append(movie)
    else:
        print(f"Skipped movie ID {movie_id}")

df = pd.DataFrame(movies_data)

df['genres'] = df['genres'].apply(lambda x: ', '.join([g['name'] for g in x]) if x else '')

print(f"\nSuccessfully fetched {len(df)} movies")
print("\nDataFrame info:")
print(df.info())
print("\nFirst 5 movies:")
print(df.head())

In [None]:
# Saving the dataset in CSV format
df.to_csv('tmdb_specific_movies.csv', index=False)
print("\nData saved to 'tmdb_specific_movies.csv'")

In [None]:
df.head(5)

Unnamed: 0,adult,backdrop_path,belongs_to_collection,budget,genres,homepage,id,imdb_id,origin_country,original_language,...,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,False,/7RyHsO4yDXtBv1zUU3mTpHeQ0d5.jpg,"{'id': 86311, 'name': 'The Avengers Collection...",356000000,"Adventure, Science Fiction, Action",https://www.marvel.com/movies/avengers-endgame,299534,tt4154796,[US],en,...,2019-04-24,2799439100,181,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Avenge the fallen.,Avengers: Endgame,False,8.238,26202
1,False,/vL5LR6WdxWPjLPFRLe133jXWsh5.jpg,"{'id': 87096, 'name': 'Avatar Collection', 'po...",237000000,"Action, Adventure, Fantasy, Science Fiction",https://www.avatar.com/movies/avatar,19995,tt0499549,[US],en,...,2009-12-15,2923706026,162,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Enter the world of Pandora.,Avatar,False,7.587,32106
2,False,/k6EOrckWFuz7I4z4wiRwz8zsj4H.jpg,"{'id': 10, 'name': 'Star Wars Collection', 'po...",245000000,"Adventure, Action, Science Fiction",http://www.starwars.com/films/star-wars-episod...,140607,tt2488496,[US],en,...,2015-12-15,2068223624,136,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Every generation has a story.,Star Wars: The Force Awakens,False,7.262,19662
3,False,/mDfJG3LC3Dqb67AZ52x3Z0jU0uB.jpg,"{'id': 86311, 'name': 'The Avengers Collection...",300000000,"Adventure, Action, Science Fiction",https://www.marvel.com/movies/avengers-infinit...,299536,tt4154756,[US],en,...,2018-04-25,2052415039,149,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Destiny arrives all the same.,Avengers: Infinity War,False,8.236,30381
4,False,/sCzcYW9h55WcesOqA12cgEr9Exw.jpg,,200000000,"Drama, Romance",https://www.paramountmovies.com/movies/titanic,597,tt0120338,[US],en,...,1997-11-18,2264162353,194,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Nothing on Earth could come between them.,Titanic,False,7.905,25862


In [None]:
df.columns

Index(['adult', 'backdrop_path', 'belongs_to_collection', 'budget', 'genres',
       'homepage', 'id', 'imdb_id', 'origin_country', 'original_language',
       'original_title', 'overview', 'popularity', 'poster_path',
       'production_companies', 'production_countries', 'release_date',
       'revenue', 'runtime', 'spoken_languages', 'status', 'tagline', 'title',
       'video', 'vote_average', 'vote_count'],
      dtype='object')

In [None]:
df.isnull().sum()

Unnamed: 0,0
adult,0
backdrop_path,0
belongs_to_collection,2
budget,0
genres,0
homepage,0
id,0
imdb_id,0
origin_country,0
original_language,0


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18 entries, 0 to 17
Data columns (total 26 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   adult                  18 non-null     bool   
 1   backdrop_path          18 non-null     object 
 2   belongs_to_collection  16 non-null     object 
 3   budget                 18 non-null     int64  
 4   genres                 18 non-null     object 
 5   homepage               18 non-null     object 
 6   id                     18 non-null     int64  
 7   imdb_id                18 non-null     object 
 8   origin_country         18 non-null     object 
 9   original_language      18 non-null     object 
 10  original_title         18 non-null     object 
 11  overview               18 non-null     object 
 12  popularity             18 non-null     float64
 13  poster_path            18 non-null     object 
 14  production_companies   18 non-null     object 
 15  producti

In [None]:
df.describe()

Unnamed: 0,budget,id,popularity,revenue,runtime,vote_average,vote_count
count,18.0,18.0,18.0,18.0,18.0,18.0,18.0
mean,215444400.0,192258.444444,20.856317,1691808000.0,137.944444,7.386833,19829.333333
std,68764920.0,132476.949284,8.066763,521083700.0,23.84872,0.501169,7365.914919
min,125000000.0,597.0,10.8758,1242805000.0,102.0,6.538,9866.0
25%,162500000.0,102257.0,14.960475,1334902000.0,125.25,7.1325,13648.0
50%,200000000.0,175033.5,18.1282,1484542000.0,135.5,7.2665,20141.0
75%,243000000.0,299535.5,22.660475,1957196000.0,147.5,7.69725,25232.0
max,365000000.0,420818.0,35.5559,2923706000.0,194.0,8.238,32106.0


In [None]:
df.shape

(18, 26)