In [2]:
'''with open('../requirements.txt', 'w') as f:
    f.write('pandas == 2.1.3\nnumpy == 1.26.0\npsycopg2 == 2.9.3\nsqlalchemy == 2.0.23')'''

In [None]:
# if necessary
#! pip install -r ../requirements.txt

# Movie Database
Written and executed in a jupyter notebook

<b> must run 

In [7]:
import pandas as pd
import numpy as np
import psycopg2 as ps # postgresql database connection
import os # for environment variables
from sqlalchemy import create_engine # for pandas and postgresql connection
import requests # for API connection later
import json # for easier API data search later
import re # for searching text
import datetime # for gathering dates and times

In [1]:
! pwd
! ls ../CSV

/Users/carterthurman/Documents/GitHub/Movie_DB/Notebooks
final_movies.csv movies.csv       trnd_movies.csv  up_movies.csv


In [20]:
# creating paths and dest file paths 
path = '../CSV/movies.csv'
dest = '../CSV/final_movies.csv'
trnd_dest = '../CSV/trnd_movies.csv'
up_dest = '../CSV/up_movies.csv'

## Reading Data

<b> must run 

In [16]:
# reading file to dataframe
df = pd.read_csv(path, index_col=0)
df

Unnamed: 0,Movie_Title,Year,Genres,Ratings,Tagline,Stars,Votes,Runtime,Gross
0,Blood Red Sky,(2021),"Action, Horror, Thriller",6.1,A woman with a mysterious illness is forced in...,Director:Peter Thorwarth| Stars:Peri B...,21062.0,121.0,
1,Masters of the Universe: Revelation,(2021– ),"Animation, Action, Adventure",5.0,The war for Eternia begins again in what may b...,"Stars:Chris Wood, Sarah Michel...",17870.0,25.0,
2,The Walking Dead,(2010–2022),"Drama, Horror, Thriller",8.2,Sheriff Deputy Rick Grimes wakes up from a com...,"Stars:Andrew Lincoln, Norman R...",885805.0,44.0,
3,Rick and Morty,(2013– ),"Animation, Adventure, Comedy",9.2,An animated series that follows the exploits o...,"Stars:Justin Roiland, Chris Pa...",414849.0,23.0,
4,Army of Thieves,(2021),"Action, Crime, Horror",,"A prequel, set before the events of Army of th...",Director:Matthias Schweighöfer| Stars:...,,,
...,...,...,...,...,...,...,...,...,...
9994,The Imperfects,(2021– ),"Adventure, Drama, Fantasy",,Add a Plot,"Stars:Morgan Taylor Campbell, ...",,,
9995,Arcane,(2021– ),"Animation, Action, Adventure",,Add a Plot,,,,
9996,Heart of Invictus,(2022– ),"Documentary, Sport",,Add a Plot,Director:Orlando von Einsiedel| Star:P...,,,
9997,The Imperfects,(2021– ),"Adventure, Drama, Fantasy",,Add a Plot,Director:Jovanka Vuckovic| Stars:Morga...,,,


In [17]:
# change column names
df.rename(columns=
                     {'MOVIES': 'Movie_Title'
                        ,'YEAR': 'Year'
                        ,'GENRE': 'Genres'
                        ,'RATING': 'Ratings'
                        ,'ONE-LINE': 'Tagline'
                        ,'STARS': 'Stars'
                        ,'VOTES': 'Votes'
                        ,'RunTime': 'Runtime'}, inplace=True)
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 9999 entries, 0 to 9998
Data columns (total 9 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Movie_Title  9999 non-null   object 
 1   Year         9355 non-null   object 
 2   Genres       9919 non-null   object 
 3   Ratings      8179 non-null   float64
 4   Tagline      9999 non-null   object 
 5   Stars        9543 non-null   object 
 6   Votes        8179 non-null   float64
 7   Runtime      7041 non-null   float64
 8   Gross        460 non-null    float64
dtypes: float64(4), object(5)
memory usage: 781.2+ KB


## Data Cleaning

In [18]:
# for each column in df_mov, check if the column is an 'object' ('O') datatype, 
# and if so, replace '\n' (newline) with empty string ''

for column in df.columns:
    if df['{}'.format(column)].dtype == 'O':
        # formating column name into the dataframe search, searching for '\n' and replacing with empty string ''
        df['{}'.format(column)] = df['{}'.format(column)].str.replace('\n', '')

# changing columns to correct datatypes after replacing specific characters

#df['Votes'] = df['Votes'].str.replace(',','')
df['Votes'] = df['Votes'].astype('float64')
#df['Gross'] = df['Gross'].str.replace('$','').str.replace('M', '')
df['Gross'] = df['Gross'].astype('float64')

In [19]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 9999 entries, 0 to 9998
Data columns (total 9 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Movie_Title  9999 non-null   object 
 1   Year         9355 non-null   object 
 2   Genres       9919 non-null   object 
 3   Ratings      8179 non-null   float64
 4   Tagline      9999 non-null   object 
 5   Stars        9543 non-null   object 
 6   Votes        8179 non-null   float64
 7   Runtime      7041 non-null   float64
 8   Gross        460 non-null    float64
dtypes: float64(4), object(5)
memory usage: 781.2+ KB


In [20]:
# cleaning up

# Grabs the first Genre and puts it into a new column based on ','
df['Main_Genre'] = df['Genres'].str.split(',', expand=True)[0]

# grabs first four digits (the year made) and puts it into a new column
df['Main_Year'] = df['Year'].str.extract(r'(\d{4})', expand=True)

# changing year to Int64 type for better aggregation
df['Main_Year'] = df['Main_Year'].astype('Int64')

# replacing 0 with np.nan to get better results
df['Gross'] = df['Gross'].replace(0, np.nan)

# Reorganizing columns
df = df[['Movie_Title', 'Main_Year', 'Main_Genre', 'Ratings', 'Tagline', 'Stars', 'Votes', 'Runtime', 'Gross', 'Genres', 'Year']]
df.rename(columns={'Year': 'Year_Range'}, inplace=True)

In [21]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 9999 entries, 0 to 9998
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Movie_Title  9999 non-null   object 
 1   Main_Year    9251 non-null   Int64  
 2   Main_Genre   9919 non-null   object 
 3   Ratings      8179 non-null   float64
 4   Tagline      9999 non-null   object 
 5   Stars        9543 non-null   object 
 6   Votes        8179 non-null   float64
 7   Runtime      7041 non-null   float64
 8   Gross        445 non-null    float64
 9   Genres       9919 non-null   object 
 10  Year_Range   9355 non-null   object 
dtypes: Int64(1), float64(4), object(6)
memory usage: 947.2+ KB


## Minor Data Searching

In [22]:
# finding top 100 movie title that contains regex string, and that DOES NOT (~) contain a specific string
# in this case we are finding a movie/TV title that contains Avatar Airbend and NOT a tagline that contains 'Live'
# so finding animated series instead of Live series

df[df['Movie_Title'].str.contains(r'Avatar.*Airbend') & ~df['Tagline'].str.contains('Live')].head(100)

Unnamed: 0,Movie_Title,Main_Year,Main_Genre,Ratings,Tagline,Stars,Votes,Runtime,Gross,Genres,Year_Range
129,Avatar: The Last Airbender,2005,Animation,9.3,"In a war-torn world of elemental magic, a youn...","Stars:Dee Bradley Baker, Zach ...",265845.0,23.0,,"Animation, Action, Adventure",(2005–2008)
6475,Avatar: The Last Airbender,2005,Animation,7.6,Sokka and Katara have to solve a centuries-old...,Director:Lauren MacMullan| Stars:Zach ...,2871.0,24.0,,"Animation, Action, Adventure",(2005–2008)
6476,Avatar: The Last Airbender,2005,Animation,8.7,"After getting a house to stay in, Aang and com...",Director:Lauren MacMullan| Stars:Zach ...,2857.0,25.0,,"Animation, Action, Adventure",(2005–2008)
6477,Avatar: The Last Airbender,2005,Animation,8.0,"In a nearby Earth Village, the gang meets a te...",Director:Dave Filoni| Stars:Zach Tyler...,3229.0,25.0,,"Animation, Action, Adventure",(2005–2008)
6478,Avatar: The Last Airbender,2005,Animation,7.8,"Aang, Katara, and Sokka have their friendship ...",Director:Dave Filoni| Stars:Zach Tyler...,3164.0,25.0,,"Animation, Action, Adventure",(2005–2008)
...,...,...,...,...,...,...,...,...,...,...,...
8506,Avatar: The Last Airbender,2005,Animation,8.7,"The gang, now hiding at Ember Island, watch a ...",Director:Giancarlo Volpe| Stars:Zach T...,3246.0,25.0,,"Animation, Action, Adventure",(2005–2008)
8507,Avatar: The Last Airbender,2005,Animation,9.2,Sozin's Comet is only three days away. Zuko te...,Director:Ethan Spaulding| Stars:Zach T...,3465.0,92.0,,"Animation, Action, Adventure",(2005–2008)
8508,Avatar: The Last Airbender,2005,Animation,9.5,Aang awakes on a mysterious island and confron...,Director:Giancarlo Volpe| Stars:Zach T...,3845.0,92.0,,"Animation, Action, Adventure",(2005–2008)
8509,Avatar: The Last Airbender,2005,Animation,9.8,Zuko battles his sister with Katara's help for...,Director:Joaquim Dos Santos| Stars:Zac...,5283.0,92.0,,"Animation, Action, Adventure",(2005–2008)


In [23]:
# finding unique values (won't print out, long list)
df['Gross'].unique()

# finding NA values sum, and count of non NA values
df['Gross'].isna().sum(), df['Gross'].count()

(9554, 445)

In [24]:
# grabbing median for one column (some scewed results) that aren't NA
df['Gross'].median(skipna=True)

8.55

In [25]:
# temporary dropping na values, grabbing Movie Title and Gross columns, sorting by Gross and Ascending
df[['Movie_Title', 'Gross']].dropna().sort_values(by='Gross', ascending=True)

Unnamed: 0,Movie_Title,Gross
512,The Clovehitch Killer,0.01
6056,Theo Who Lived,0.01
1216,Honeymoon,0.01
4196,The Sunshine Makers,0.01
1317,Duck Butter,0.01
...,...,...
196,Spider-Man,403.71
144,Jumanji: Welcome to the Jungle,404.52
226,Captain America: Civil War,408.08
578,Finding Dory,486.30


## To CSV

In [26]:
# making csv file "final_movies.csv"
df.to_csv(dest)
! ls ../CSV

ls: CSV: No such file or directory


## Getting data into PostgreSQL

In [5]:
# gathering environmental variable for the postgres db password
SQL_DB_KEY = os.environ.get('SQL_DB_KEY')
# gathering user environmental variable for postgres user id
ENV_USER = os.environ.get('USER')

In [9]:
# creating sqlachemy engine to read data from postgresql
engine = create_engine(f'postgresql+psycopg2://postgres:{SQL_DB_KEY}@localhost/{ENV_USER}')
engine

Engine(postgresql+psycopg2://postgres:***@localhost/carterthurman)

In [36]:
# 'append, replace, fail' arguments.
# index = False is 
df.to_sql('movie_table', engine, if_exists='replace', index=False)

999

## Putting data into PostgreSQL via chunks 
Alternate way, for bigger datasets

for chunk in pd.read_csv(dest, index_col=0, chunksize=1000): 
    chunk.to_sql('movie_table', engine, if_exists="replace")

## Reading Data from PostgreSQL

In [37]:
sql_df = pd.read_sql_query('SELECT * from public."movie_table"', engine)

In [38]:
sql_df

Unnamed: 0,Movie_Title,Main_Year,Main_Genre,Ratings,Tagline,Stars,Votes,Runtime,Gross,Genres,Year_Range
0,Blood Red Sky,2021.0,Action,6.1,A woman with a mysterious illness is forced in...,Director:Peter Thorwarth| Stars:Peri B...,21062.0,121.0,,"Action, Horror, Thriller",(2021)
1,Masters of the Universe: Revelation,2021.0,Animation,5.0,The war for Eternia begins again in what may b...,"Stars:Chris Wood, Sarah Michel...",17870.0,25.0,,"Animation, Action, Adventure",(2021– )
2,The Walking Dead,2010.0,Drama,8.2,Sheriff Deputy Rick Grimes wakes up from a com...,"Stars:Andrew Lincoln, Norman R...",885805.0,44.0,,"Drama, Horror, Thriller",(2010–2022)
3,Rick and Morty,2013.0,Animation,9.2,An animated series that follows the exploits o...,"Stars:Justin Roiland, Chris Pa...",414849.0,23.0,,"Animation, Adventure, Comedy",(2013– )
4,Army of Thieves,2021.0,Action,,"A prequel, set before the events of Army of th...",Director:Matthias Schweighöfer| Stars:...,,,,"Action, Crime, Horror",(2021)
...,...,...,...,...,...,...,...,...,...,...,...
9994,The Imperfects,2021.0,Adventure,,Add a Plot,"Stars:Morgan Taylor Campbell, ...",,,,"Adventure, Drama, Fantasy",(2021– )
9995,Arcane,2021.0,Animation,,Add a Plot,,,,,"Animation, Action, Adventure",(2021– )
9996,Heart of Invictus,2022.0,Documentary,,Add a Plot,Director:Orlando von Einsiedel| Star:P...,,,,"Documentary, Sport",(2022– )
9997,The Imperfects,2021.0,Adventure,,Add a Plot,Director:Jovanka Vuckovic| Stars:Morga...,,,,"Adventure, Drama, Fantasy",(2021– )


# API

## movie db
https://www.themoviedb.org/settings/api/stats <BR>
https://developer.themoviedb.org/reference/trending-all

<b> must run 

In [2]:
# gathering my environment keys
MDB_KEY = os.environ.get('MOVIE_DB_KEY')
RA_KEY = os.environ.get('READ_ACCESS_KEY')
ACCOUNT_KEY = os.environ.get('ACCOUNT_ID_KEY')

In [3]:
# gathering environmental variable for the postgres db password
SQL_DB_KEY = os.environ.get('SQL_DB_KEY')
# gathering user environmental variable for postgres user id
ENV_USER = os.environ.get('USER')

In [4]:
# creating sqlachemy engine to read data from postgresql
engine = create_engine(f'postgresql+psycopg2://postgres:{SQL_DB_KEY}@localhost/{ENV_USER}')
engine

Engine(postgresql+psycopg2://postgres:***@localhost/carterthurman)

In [5]:
# Testing connection

url = "https://api.themoviedb.org/3/authentication"

headers = {
    "accept": "application/json",
    "Authorization": "Bearer {0}".format(RA_KEY)
}

response = requests.get(url, headers=headers)

print(response.text)

{"success":true,"status_code":1,"status_message":"Success."}


### Trending Movies

To iterate through pages:

url = "https://api.themoviedb.org/3/trending/movie/day?api_key=THE_KEY&page=1"


In [32]:
%%time

# this section is to create the dataframe and gather the first 20 items

# gathering Trending Movies

# specified page number
url = "https://api.themoviedb.org/3/trending/movie/day?api_key=THE_KEY&page=1"

headers = {
    "accept": "application/json",
    "Authorization": "Bearer {}".format(RA_KEY)
}

response_trnd = requests.get(url, headers=headers)

print(response_trnd)

# decodes response into json object
json_data_trnd = response_trnd.content.decode()
# converts json object/string into python object
data_trnd = json.loads(json_data_trnd)

# gathering columns into list
trnd_columns = [i for i in data_trnd['results'][0].keys()]
# unpacking data from json object into the data argument and setting columns, making a dataframe
trnd_df = pd.DataFrame([*data_trnd['results']], columns=trnd_columns)
trnd_df

<Response [200]>
CPU times: user 53.2 ms, sys: 5.25 ms, total: 58.5 ms
Wall time: 322 ms


Unnamed: 0,adult,backdrop_path,id,title,original_language,original_title,overview,poster_path,media_type,genre_ids,popularity,release_date,video,vote_average,vote_count
0,False,/f1AQhx6ZfGhPZFTVKgxG91PhEYc.jpg,753342,Napoleon,en,Napoleon,An epic that details the checkered rise and fa...,/jE5o7y9K6pZtWNNMEw3IdpHuncR.jpg,movie,"[36, 10752, 18]",1092.626,2023-11-22,False,6.445,1005
1,False,/qgFrFXtiGfWN1zkk4sPd0nwFaBF.jpg,848187,Role Play,en,Role Play,Emma has a wonderful husband and two kids in t...,/7MhXiTmTl16LwXNPbWCmqxj7UxH.jpg,movie,"[28, 35, 10749]",26.68,2023-12-14,False,6.222,9
2,False,/rz8GGX5Id2hCW1KzAIY4xwbQw1w.jpg,955916,Lift,en,Lift,"An international heist crew, led by Cyrus Whit...",/paXvbaaT5kCZbCqiWrnf44tGAyn.jpg,movie,"[28, 35]",147.906,2024-01-10,False,7.2,5
3,False,/uKP0B8HUJ08fas7NF77Xwu0bolJ.jpg,1214314,One More Shot,en,One More Shot,Following the attack on the black site in Pola...,/gdF3Q1Mcr2XvxLPStQSoQIO2cIj.jpg,movie,"[28, 53]",38.525,2024-01-12,False,7.306,18
4,False,/rVJfabCz1ViynQCEz54MRqdZig1.jpg,1155089,Justice League: Crisis on Infinite Earths Part...,en,Justice League: Crisis on Infinite Earths Part...,Death is coming. Worse than death: oblivion. N...,/zR6C66EDklgTPLHRSmmMt5878MR.jpg,movie,"[16, 878, 28]",104.182,2024-01-09,False,7.9,53
5,False,/vdpE5pjJVql5aD6pnzRqlFmgxXf.jpg,906126,Society of the Snow,es,La sociedad de la nieve,"On October 13, 1972, Uruguayan Air Force Fligh...",/2e853FDVSIso600RqAMunPxiZjq.jpg,movie,"[18, 36]",1412.999,2023-12-13,False,8.085,595
6,False,/4MCKNAc6AbWjEsM2h9Xc29owo4z.jpg,866398,The Beekeeper,en,The Beekeeper,One man’s campaign for vengeance takes on nati...,/A7EByudX0eOzlkQ2FIbogzyazm2.jpg,movie,"[28, 53]",453.101,2024-01-10,False,7.52,25
7,False,/rLb2cwF3Pazuxaj0sRXQ037tGI1.jpg,872585,Oppenheimer,en,Oppenheimer,The story of J. Robert Oppenheimer's role in t...,/8Gxv8gSFCU0XGDykEGv7zR1n2ua.jpg,movie,"[18, 36]",674.156,2023-07-19,False,8.115,6049
8,False,/nHf61UzkfFno5X1ofIhugCPus2R.jpg,346698,Barbie,en,Barbie,Barbie and Ken are having the time of their li...,/iuFNMS8U5cb6xfzi51Dbkovj7vM.jpg,movie,"[35, 12, 14]",399.454,2023-07-19,False,7.146,6866
9,False,/plLqCMIYgvh3dqnIm16da7lDtmb.jpg,1072876,Self Reliance,en,Self Reliance,When a man is offered a million dollars to pla...,/4AH3S0xMEYW20KGI6CSPO1W70bo.jpg,movie,[35],16.292,2024-01-03,False,4.3,3


In [33]:
%%time

# gathers the total pages of response
match = re.search(r'"total_pages":(\d{4})', response_trnd.text)
print(match)
# if the response gave a correct variable and not an error, it will grab the page number by the first group
# else, it will print "Number not found"
if match:
    page_number = match.group(1)
    print(f"Number: {page_number}")
else:
    print("Number not found")

# converting page_nunber into 'int' variable
page_number = int(page_number)
    
# gathering Trending Movies one page at a time through iteration
# this whole block of code with try to do it and until it fails, it will continue. 
# once it fails, it will print "Out of Pages"

# this block of code takes around 30 seconds to run.
try:
    # for each page starting at page 2 (because we already have page 1), iterate through and gather the response
    # including the url which the page_number is being formatted into, the headers with the environmental keys
    # and then putting it into a response variable which is then being decoded into a json object
    # and then loading into a data_trnd json object that can be searched through
    
    for page in range(2, page_number):
        url = f"https://api.themoviedb.org/3/trending/movie/day?api_key=THE_KEY&page={page}"
        headers = {
            "accept": "application/json",
            "Authorization": "Bearer {}".format(RA_KEY)}
        response_trnd = requests.get(url, headers=headers)
        # decodes response into json object
        json_data_trnd = response_trnd.content.decode()
        # converts json object/string into python object
        data_trnd = json.loads(json_data_trnd)
        # if data_trnd exists, then add it to a temporary dataframe with correct columns, and then 
        # join it (concat it) into the already created dataframe while ignoring the index column.
        if data_trnd:
            trnd_df_temp = pd.DataFrame([*data_trnd['results']], columns=trnd_columns)
            trnd_df = pd.concat([trnd_df, trnd_df_temp], ignore_index=True)
        else:
            break
except:
    print("Out of Pages")
    
    
# gathering current date
trnd_df['curr_date'] = datetime.date.today().strftime("%m-%d-%Y")
# converting to pandas datetime type 
trnd_df['curr_date'] = pd.to_datetime(trnd_df['curr_date'])
# printing out our results
trnd_df

<re.Match object; span=(12739, 12757), match='"total_pages":1000'>
Number: 1000


Exception ignored in: <finalize object at 0x1728c9ca0; dead>
Traceback (most recent call last):
  File "/Users/carterthurman/anaconda3/envs/Data/lib/python3.9/weakref.py", line 591, in __call__
    return info.func(*info.args, **(info.kwargs or {}))
  File "/Users/carterthurman/anaconda3/envs/Data/lib/python3.9/site-packages/urllib3/connectionpool.py", line 1181, in _close_pool_connections
    conn.close()
  File "/Users/carterthurman/anaconda3/envs/Data/lib/python3.9/site-packages/urllib3/connection.py", line 272, in close
    super().close()
KeyboardInterrupt: 


Out of Pages
CPU times: user 2.2 s, sys: 127 ms, total: 2.33 s
Wall time: 11 s


Unnamed: 0,adult,backdrop_path,id,title,original_language,original_title,overview,poster_path,media_type,genre_ids,popularity,release_date,video,vote_average,vote_count,curr_date
0,False,/f1AQhx6ZfGhPZFTVKgxG91PhEYc.jpg,753342,Napoleon,en,Napoleon,An epic that details the checkered rise and fa...,/jE5o7y9K6pZtWNNMEw3IdpHuncR.jpg,movie,"[36, 10752, 18]",1092.626,2023-11-22,False,6.445,1005,2024-01-12
1,False,/qgFrFXtiGfWN1zkk4sPd0nwFaBF.jpg,848187,Role Play,en,Role Play,Emma has a wonderful husband and two kids in t...,/7MhXiTmTl16LwXNPbWCmqxj7UxH.jpg,movie,"[28, 35, 10749]",26.680,2023-12-14,False,6.222,9,2024-01-12
2,False,/rz8GGX5Id2hCW1KzAIY4xwbQw1w.jpg,955916,Lift,en,Lift,"An international heist crew, led by Cyrus Whit...",/paXvbaaT5kCZbCqiWrnf44tGAyn.jpg,movie,"[28, 35]",147.906,2024-01-10,False,7.200,5,2024-01-12
3,False,/uKP0B8HUJ08fas7NF77Xwu0bolJ.jpg,1214314,One More Shot,en,One More Shot,Following the attack on the black site in Pola...,/gdF3Q1Mcr2XvxLPStQSoQIO2cIj.jpg,movie,"[28, 53]",38.525,2024-01-12,False,7.306,18,2024-01-12
4,False,/rVJfabCz1ViynQCEz54MRqdZig1.jpg,1155089,Justice League: Crisis on Infinite Earths Part...,en,Justice League: Crisis on Infinite Earths Part...,Death is coming. Worse than death: oblivion. N...,/zR6C66EDklgTPLHRSmmMt5878MR.jpg,movie,"[16, 878, 28]",104.182,2024-01-09,False,7.900,53,2024-01-12
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
954,False,/A1Larywbw79kZQqkvCEiPHJqdLN.jpg,10494,Perfect Blue,ja,PERFECT BLUE,Pop singer Mima Kirigoe looks forward to a bri...,/6WTiOCfDPP8XV4jqfloiVWf7KHq.jpg,movie,"[16, 53]",62.429,1998-02-28,False,8.291,2283,2024-01-12
955,False,/aPvIX46VtxZDwu5bB2UcJ7xdpxs.jpg,977223,Polite Society,en,Polite Society,Martial artist-in-training Ria Khan believes s...,/lv1WqAo2ulQy9aSOG7ikR44p8RR.jpg,movie,"[35, 28, 18]",37.994,2023-04-27,False,6.380,118,2024-01-12
956,False,/3Nn5BOM1EVw1IYrv6MsbOS6N1Ol.jpg,162,Edward Scissorhands,en,Edward Scissorhands,A small suburban town receives a visit from a ...,/1RFIbuW9Z3eN9Oxw2KaQG5DfLmD.jpg,movie,"[14, 18, 10749]",57.406,1990-12-07,False,7.715,12284,2024-01-12
957,False,/1TecFmK3v6VJzsaGQgvrBiaypxW.jpg,1001500,The Starling Girl,en,The Starling Girl,17-year-old Jem Starling struggles with her pl...,/ysgnrkXwfFCxaBwviG2yNvzpfw5.jpg,movie,[18],13.415,2023-05-12,False,6.900,20,2024-01-12


In [21]:
# making trnd_movies.csv and showing schema
trnd_df.to_csv(trnd_dest)
! ls ../CSV
trnd_df.info()

final_movies.csv movies.csv       trnd_movies.csv  up_movies.csv
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9995 entries, 0 to 9994
Data columns (total 16 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   adult              9995 non-null   bool          
 1   backdrop_path      9626 non-null   object        
 2   id                 9995 non-null   int64         
 3   title              9995 non-null   object        
 4   original_language  9995 non-null   object        
 5   original_title     9995 non-null   object        
 6   overview           9995 non-null   object        
 7   poster_path        9937 non-null   object        
 8   media_type         9995 non-null   object        
 9   genre_ids          9995 non-null   object        
 10  popularity         9995 non-null   float64       
 11  release_date       9995 non-null   object        
 12  video              9995 non-null   bool          
 13

In [22]:
# 'append, replace, fail' arguments.
# index = False makes CSV not include index column
trnd_df.to_sql('trnd_movie_table', engine, if_exists='append', index=False)

995

### Upcoming Movies

In [40]:
# gathering Upcoming Movies

# specified page number
url = "https://api.themoviedb.org/3/movie/upcoming?language=en-US&page=1"

headers = {
    "accept": "application/json",
    "Authorization": "Bearer {}".format(RA_KEY)
}

response_up = requests.get(url, headers=headers)

print(response_up)

<Response [200]>


#### API testing

In [10]:
# we want 'results'
# 'dates' is the day we got this info
# results are our results
# total pages is how many we can iterate through
print("Keys in Object: ", *data.keys(), "\nTotal Pages: ", str(data['total_pages']))

Keys in Object:  dates page results total_pages total_results 
Total Pages:  49


In [13]:
# data of index one, keys and value
print(*data['results'][0].items())

('adult', False) ('backdrop_path', '/jXJxMcVoEuXzym3vFnjqDW4ifo6.jpg') ('genre_ids', [28, 12, 14]) ('id', 572802) ('original_language', 'en') ('original_title', 'Aquaman and the Lost Kingdom') ('overview', "Black Manta, still driven by the need to avenge his father's death and wielding the power of the mythic Black Trident, will stop at nothing to take Aquaman down once and for all. To defeat him, Aquaman must turn to his imprisoned brother Orm, the former King of Atlantis, to forge an unlikely alliance in order to save the world from irreversible destruction.") ('popularity', 1542.493) ('poster_path', '/8xV47NDrjdZDpkVcCFqkdHa3T0C.jpg') ('release_date', '2023-12-20') ('title', 'Aquaman and the Lost Kingdom') ('video', False) ('vote_average', 6.483) ('vote_count', 361)


In [120]:
for i in (data['results']):
    for clm, dta in i.items():
        print(clm, dta)

adult False
backdrop_path /xgGGinKRL8xeRkaAR9RMbtyk60y.jpg
genre_ids [16, 10751, 10402, 14, 35]
id 901362
original_language en
original_title Trolls Band Together
overview When Branch's brother, Floyd, is kidnapped for his musical talents by a pair of nefarious pop-star villains, Branch and Poppy embark on a harrowing and emotional journey to reunite the other brothers and rescue Floyd from a fate even worse than pop-culture obscurity.
popularity 1620.617
poster_path /qV4fdXXUm5xNlEJ2jw7af3XxuQB.jpg
release_date 2023-10-12
title Trolls Band Together
video False
vote_average 7.1
vote_count 304
adult False
backdrop_path /yOm993lsJyPmBodlYjgpPwBjXP9.jpg
genre_ids [35, 10751, 14]
id 787699
original_language en
original_title Wonka
overview Willy Wonka – chock-full of ideas and determined to change the world one delectable bite at a time – is proof that the best things in life begin with a dream, and if you’re lucky enough to meet Willy Wonka, anything is possible.
popularity 748.278
poster

#### Python - SQL

In [44]:
%%time

# this section is to create the dataframe and gather the first 20 items

# gathering Upcoming Movies

# specified page number
url = "https://api.themoviedb.org/3/movie/upcoming?language=en-US&page=1"

headers = {
    "accept": "application/json",
    "Authorization": "Bearer {}".format(RA_KEY)
}

response_up = requests.get(url, headers=headers)

print(response_up)

# decodes response into json object
json_data_up = response_up.content.decode()
# converts json object/string into python object
data_up = json.loads(json_data_up)

# making a list full of the column names
up_columns = [i for i in data_up['results'][0].keys()]
# unpacking data from json object into the data argument and setting columns, making a dataframe
up_df = pd.DataFrame([*data_up['results']], columns=up_columns)
up_df

<Response [200]>
CPU times: user 54.1 ms, sys: 4.93 ms, total: 59 ms
Wall time: 241 ms


Unnamed: 0,adult,backdrop_path,genre_ids,id,original_language,original_title,overview,popularity,poster_path,release_date,title,video,vote_average,vote_count
0,False,/yOm993lsJyPmBodlYjgpPwBjXP9.jpg,"[35, 10751, 14]",787699,en,Wonka,Willy Wonka – chock-full of ideas and determin...,654.778,/qhb1qOilapbapxWQn9jtRCMwXJF.jpg,2023-12-06,Wonka,False,7.1,934
1,False,/X8yF6STUk5Zr5nAuLBJiio8Sxh.jpg,"[18, 10749]",1143183,tl,Rewind,Mary loves John for as long as she can remembe...,466.285,/ru1i4ZR11lPPVArk3fOcO1VCOlD.jpg,2023-12-25,Rewind,False,7.059,17
2,False,/50stq3Jlny6oEgJjsXbQvbajCNw.jpg,"[10749, 18]",1020006,en,Priscilla,When teenage Priscilla Beaulieu meets Elvis Pr...,611.517,/uDCeELWWpsNq7ErM61Yuq70WAE9.jpg,2023-10-27,Priscilla,False,6.876,193
3,False,/4MCKNAc6AbWjEsM2h9Xc29owo4z.jpg,"[28, 53]",866398,en,The Beekeeper,One man’s campaign for vengeance takes on nati...,453.101,/A7EByudX0eOzlkQ2FIbogzyazm2.jpg,2024-01-10,The Beekeeper,False,7.5,20
4,False,/zX9m8h33pHXcES7ttO8v0ThiYj7.jpg,"[18, 28, 53]",1053592,es,Todos los nombres de Dios,"After being implicated in a terrorist attack, ...",373.468,/n15gfcgwV0LVPSobrayZcFHcwN6.jpg,2023-09-15,All the Names of God,False,7.048,42
5,False,/ptz5ETMxDoRRiE69BVuIxJzyTEO.jpg,"[16, 28, 12, 35, 10751]",940551,en,Migration,After a migrating duck family alights on their...,374.851,/ldfCF9RhR40mppkzmftxapaHeTo.jpg,2023-12-06,Migration,False,7.4,98
6,False,/vQpvNDc0AFao8BbWyXDFVVrqiZj.jpg,[27],853387,en,Lord of Misrule,When the daughter of the town's new priest goe...,326.801,/eCNJuGsCNdf2yf4F3UcDg1WZTbo.jpg,2023-10-26,Lord of Misrule,False,5.6,15
7,False,/4HodYYKEIsGOdinkGi2Ucz6X9i0.jpg,"[16, 28, 12, 878]",569094,en,Spider-Man: Across the Spider-Verse,"After reuniting with Gwen Stacy, Brooklyn’s fu...",326.69,/8Vt6mWEReuy4Of61Lnj5Xj704m8.jpg,2023-05-31,Spider-Man: Across the Spider-Verse,False,8.379,5466
8,False,/pA3vdhadJPxF5GA1uo8OPTiNQDT.jpg,"[28, 18]",678512,en,Sound of Freedom,"The story of Tim Ballard, a former US governme...",238.867,/qA5kPYZA7FkVvqcEfJRoOy4kpHg.jpg,2023-07-03,Sound of Freedom,False,8.068,1764
9,False,/mgTeI8UNJzre7YMt24lfQLyXnA8.jpg,[18],1075175,en,How to Have Sex,Three British teenage girls go on a rites-of-p...,196.703,/rafwrzslLb203hQFIU8s0yRk0Qy.jpg,2023-11-02,How to Have Sex,False,6.52,101


In [45]:
%%time

# gathers the total pages of response
match = re.search(r'"total_pages":(\d+)', response_up.text)

# if the response gave a correct variable and not an error, it will grab the page number by the first group
# else, it will print "Number not found"
if match:
    page_number = match.group(1)
    print(f"Number: {page_number}")
else:
    print("Number not found")

# converting page_nunber into 'int' variable
page_number = int(page_number)
    
# gathering Upcoming Movies one page at a time through iteration
# this whole block of code with try to do it and until it fails, it will continue. 
# once it fails, it will print "Out of Pages"

# this block of code takes around 30 seconds to run.
try:
    # for each page starting at page 2 (because we already have page 1), iterate through and gather the response
    # including the url which the page_number is being formatted into, the headers with the environmental keys
    # and then putting it into a response variable which is then being decoded into a json object
    # and then loading into a data_trnd json object that can be searched through
    
    for page in range(2, page_number):
        # specified page number
        url = f"https://api.themoviedb.org/3/movie/upcoming?language=en-US&page={page}"
        headers = {
            "accept": "application/json",
            "Authorization": "Bearer {}".format(RA_KEY)
        }
        response_up = requests.get(url, headers=headers)
        # decodes response into json object
        json_data_up = response_up.content.decode()
        # converts json object/string into python object
        data_up = json.loads(json_data_up)
        # if data_up exists, then add it to a temporary dataframe with correct columns, and then 
        # join it (concat it) into the already created dataframe while ignoring the index column.
        if data_up:
            up_df_temp = pd.DataFrame([*data_up['results']], columns=up_columns)
            up_df = pd.concat([up_df, up_df_temp], ignore_index=True)
        else:
            break
except:
    print("Out of Pages")
    
    
# gathering current date
up_df['curr_date'] = datetime.date.today().strftime("%m-%d-%Y")
# converting to pandas datetime type 
up_df['curr_date'] = pd.to_datetime(up_df['curr_date'])
# printing out our results
up_df

Number: 53
CPU times: user 1.99 s, sys: 106 ms, total: 2.1 s
Wall time: 6.22 s


Unnamed: 0,adult,backdrop_path,genre_ids,id,original_language,original_title,overview,popularity,poster_path,release_date,title,video,vote_average,vote_count,curr_date
0,False,/yOm993lsJyPmBodlYjgpPwBjXP9.jpg,"[35, 10751, 14]",787699,en,Wonka,Willy Wonka – chock-full of ideas and determin...,654.778,/qhb1qOilapbapxWQn9jtRCMwXJF.jpg,2023-12-06,Wonka,False,7.100,934,2024-01-12
1,False,/X8yF6STUk5Zr5nAuLBJiio8Sxh.jpg,"[18, 10749]",1143183,tl,Rewind,Mary loves John for as long as she can remembe...,466.285,/ru1i4ZR11lPPVArk3fOcO1VCOlD.jpg,2023-12-25,Rewind,False,7.059,17,2024-01-12
2,False,/50stq3Jlny6oEgJjsXbQvbajCNw.jpg,"[10749, 18]",1020006,en,Priscilla,When teenage Priscilla Beaulieu meets Elvis Pr...,611.517,/uDCeELWWpsNq7ErM61Yuq70WAE9.jpg,2023-10-27,Priscilla,False,6.876,193,2024-01-12
3,False,/4MCKNAc6AbWjEsM2h9Xc29owo4z.jpg,"[28, 53]",866398,en,The Beekeeper,One man’s campaign for vengeance takes on nati...,453.101,/A7EByudX0eOzlkQ2FIbogzyazm2.jpg,2024-01-10,The Beekeeper,False,7.500,20,2024-01-12
4,False,/zX9m8h33pHXcES7ttO8v0ThiYj7.jpg,"[18, 28, 53]",1053592,es,Todos los nombres de Dios,"After being implicated in a terrorist attack, ...",373.468,/n15gfcgwV0LVPSobrayZcFHcwN6.jpg,2023-09-15,All the Names of God,False,7.048,42,2024-01-12
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1035,False,/2HwImzg3k1xYeUSjChpBW7MDBzs.jpg,[99],1229980,fr,"Iznik, les mystères de la basilique engloutie",,0.000,/fsSU6WYMyC1WqCyqenTxajQwZOB.jpg,2024-01-25,"Iznik, les mystères de la basilique engloutie",False,0.000,0,2024-01-12
1036,False,,[35],1229913,es,El Roomie,Vivi is a young writer who is forced to find a...,0.000,/6NtvS6vFjQKpt8hnAk5D9Ig4trg.jpg,2024-01-18,El Roomie,False,0.000,0,2024-01-12
1037,False,,[],1229838,sv,Barnen från Camp Ashraf,In the aftermath of the 1979 Iranian revolutio...,1.043,,2024-01-31,The Children of Camp Ashraf,False,0.000,0,2024-01-12
1038,False,,[],1229835,ku,Demo ke pelê gozan benî zer,"In southeastern Turkey, Kurds who want to live...",1.101,,2024-01-30,When the Walnut Leaves Turn Yellow,False,0.000,0,2024-01-12


In [46]:
# Cleaning Up

# Dropping unncessary columns
up_df.drop(columns=['genre_ids', 'video'], inplace=True)

# Changing datatypes
up_df['release_date'] = pd.to_datetime(up_df['release_date'])

# Reorganizing Columns
up_df = up_df[['original_title', 'id', 'overview', 
               'original_language', 'release_date', 'popularity', 'vote_average', 
               'vote_count', 'curr_date', 'backdrop_path', 'poster_path']]
up_df

Unnamed: 0,original_title,id,overview,original_language,release_date,popularity,vote_average,vote_count,curr_date,backdrop_path,poster_path
0,Wonka,787699,Willy Wonka – chock-full of ideas and determin...,en,2023-12-06,654.778,7.100,934,2024-01-12,/yOm993lsJyPmBodlYjgpPwBjXP9.jpg,/qhb1qOilapbapxWQn9jtRCMwXJF.jpg
1,Rewind,1143183,Mary loves John for as long as she can remembe...,tl,2023-12-25,466.285,7.059,17,2024-01-12,/X8yF6STUk5Zr5nAuLBJiio8Sxh.jpg,/ru1i4ZR11lPPVArk3fOcO1VCOlD.jpg
2,Priscilla,1020006,When teenage Priscilla Beaulieu meets Elvis Pr...,en,2023-10-27,611.517,6.876,193,2024-01-12,/50stq3Jlny6oEgJjsXbQvbajCNw.jpg,/uDCeELWWpsNq7ErM61Yuq70WAE9.jpg
3,The Beekeeper,866398,One man’s campaign for vengeance takes on nati...,en,2024-01-10,453.101,7.500,20,2024-01-12,/4MCKNAc6AbWjEsM2h9Xc29owo4z.jpg,/A7EByudX0eOzlkQ2FIbogzyazm2.jpg
4,Todos los nombres de Dios,1053592,"After being implicated in a terrorist attack, ...",es,2023-09-15,373.468,7.048,42,2024-01-12,/zX9m8h33pHXcES7ttO8v0ThiYj7.jpg,/n15gfcgwV0LVPSobrayZcFHcwN6.jpg
...,...,...,...,...,...,...,...,...,...,...,...
1035,"Iznik, les mystères de la basilique engloutie",1229980,,fr,2024-01-25,0.000,0.000,0,2024-01-12,/2HwImzg3k1xYeUSjChpBW7MDBzs.jpg,/fsSU6WYMyC1WqCyqenTxajQwZOB.jpg
1036,El Roomie,1229913,Vivi is a young writer who is forced to find a...,es,2024-01-18,0.000,0.000,0,2024-01-12,,/6NtvS6vFjQKpt8hnAk5D9Ig4trg.jpg
1037,Barnen från Camp Ashraf,1229838,In the aftermath of the 1979 Iranian revolutio...,sv,2024-01-31,1.043,0.000,0,2024-01-12,,
1038,Demo ke pelê gozan benî zer,1229835,"In southeastern Turkey, Kurds who want to live...",ku,2024-01-30,1.101,0.000,0,2024-01-12,,


In [49]:
up_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1040 entries, 0 to 1039
Data columns (total 11 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   original_title     1040 non-null   object        
 1   id                 1040 non-null   int64         
 2   overview           1040 non-null   object        
 3   original_language  1040 non-null   object        
 4   release_date       1037 non-null   datetime64[ns]
 5   popularity         1040 non-null   float64       
 6   vote_average       1040 non-null   float64       
 7   vote_count         1040 non-null   int64         
 8   curr_date          1040 non-null   datetime64[ns]
 9   backdrop_path      542 non-null    object        
 10  poster_path        787 non-null    object        
dtypes: datetime64[ns](2), float64(2), int64(2), object(5)
memory usage: 89.5+ KB


In [None]:
# converting to csv file
up_df.to_csv(up_dest)
# 'append, replace, fail' arguments.
# index = False makes CSV not include index column
up_df.to_sql('up_movie_table', engine, if_exists='replace', index=False)