In [2]:
'''with open('../requirements.txt', 'w') as f:
    f.write('pandas == 2.1.3\nnumpy == 1.26.0\npsycopg2 == 2.9.3\nsqlalchemy == 2.0.23')'''

In [None]:
# if necessary
#! pip install -r ../requirements.txt

# Movie Database
Written and executed in a jupyter notebook

<b> must run 

In [3]:
import pandas as pd
import numpy as np
import psycopg2 as ps # postgresql database connection
import os # for environment variables
from sqlalchemy import create_engine # for pandas and postgresql connection
import requests # for API connection later
import json # for easier API data search later
import re # for searching text

In [4]:
! pwd
! ls ../CSV

/Users/carterthurman/Documents/GitHub/Movie_DB/Notebooks
final_movies.csv movies.csv       trnd_movies.csv


## Reading Data

<b> must run 

In [29]:
# creating paths and dest file paths 
path = '../CSV/movies.csv'
dest = '../CSV/final_movies.csv'
trnd_dest = '../CSV/trnd_movies.csv'
up_dest = '../CSV/up_movies.csv'

In [16]:
# reading file to dataframe
df = pd.read_csv(path, index_col=0)
df

Unnamed: 0,Movie_Title,Year,Genres,Ratings,Tagline,Stars,Votes,Runtime,Gross
0,Blood Red Sky,(2021),"Action, Horror, Thriller",6.1,A woman with a mysterious illness is forced in...,Director:Peter Thorwarth| Stars:Peri B...,21062.0,121.0,
1,Masters of the Universe: Revelation,(2021– ),"Animation, Action, Adventure",5.0,The war for Eternia begins again in what may b...,"Stars:Chris Wood, Sarah Michel...",17870.0,25.0,
2,The Walking Dead,(2010–2022),"Drama, Horror, Thriller",8.2,Sheriff Deputy Rick Grimes wakes up from a com...,"Stars:Andrew Lincoln, Norman R...",885805.0,44.0,
3,Rick and Morty,(2013– ),"Animation, Adventure, Comedy",9.2,An animated series that follows the exploits o...,"Stars:Justin Roiland, Chris Pa...",414849.0,23.0,
4,Army of Thieves,(2021),"Action, Crime, Horror",,"A prequel, set before the events of Army of th...",Director:Matthias Schweighöfer| Stars:...,,,
...,...,...,...,...,...,...,...,...,...
9994,The Imperfects,(2021– ),"Adventure, Drama, Fantasy",,Add a Plot,"Stars:Morgan Taylor Campbell, ...",,,
9995,Arcane,(2021– ),"Animation, Action, Adventure",,Add a Plot,,,,
9996,Heart of Invictus,(2022– ),"Documentary, Sport",,Add a Plot,Director:Orlando von Einsiedel| Star:P...,,,
9997,The Imperfects,(2021– ),"Adventure, Drama, Fantasy",,Add a Plot,Director:Jovanka Vuckovic| Stars:Morga...,,,


In [17]:
# change column names
df.rename(columns=
                     {'MOVIES': 'Movie_Title'
                        ,'YEAR': 'Year'
                        ,'GENRE': 'Genres'
                        ,'RATING': 'Ratings'
                        ,'ONE-LINE': 'Tagline'
                        ,'STARS': 'Stars'
                        ,'VOTES': 'Votes'
                        ,'RunTime': 'Runtime'}, inplace=True)
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 9999 entries, 0 to 9998
Data columns (total 9 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Movie_Title  9999 non-null   object 
 1   Year         9355 non-null   object 
 2   Genres       9919 non-null   object 
 3   Ratings      8179 non-null   float64
 4   Tagline      9999 non-null   object 
 5   Stars        9543 non-null   object 
 6   Votes        8179 non-null   float64
 7   Runtime      7041 non-null   float64
 8   Gross        460 non-null    float64
dtypes: float64(4), object(5)
memory usage: 781.2+ KB


## Data Cleaning

In [18]:
# for each column in df_mov, check if the column is an 'object' ('O') datatype, 
# and if so, replace '\n' (newline) with empty string ''

for column in df.columns:
    if df['{}'.format(column)].dtype == 'O':
        # formating column name into the dataframe search, searching for '\n' and replacing with empty string ''
        df['{}'.format(column)] = df['{}'.format(column)].str.replace('\n', '')

# changing columns to correct datatypes after replacing specific characters

#df['Votes'] = df['Votes'].str.replace(',','')
df['Votes'] = df['Votes'].astype('float64')
#df['Gross'] = df['Gross'].str.replace('$','').str.replace('M', '')
df['Gross'] = df['Gross'].astype('float64')

In [19]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 9999 entries, 0 to 9998
Data columns (total 9 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Movie_Title  9999 non-null   object 
 1   Year         9355 non-null   object 
 2   Genres       9919 non-null   object 
 3   Ratings      8179 non-null   float64
 4   Tagline      9999 non-null   object 
 5   Stars        9543 non-null   object 
 6   Votes        8179 non-null   float64
 7   Runtime      7041 non-null   float64
 8   Gross        460 non-null    float64
dtypes: float64(4), object(5)
memory usage: 781.2+ KB


In [20]:
# cleaning up

# Grabs the first Genre and puts it into a new column based on ','
df['Main_Genre'] = df['Genres'].str.split(',', expand=True)[0]

# grabs first four digits (the year made) and puts it into a new column
df['Main_Year'] = df['Year'].str.extract(r'(\d{4})', expand=True)

# changing year to Int64 type for better aggregation
df['Main_Year'] = df['Main_Year'].astype('Int64')

# replacing 0 with np.nan to get better results
df['Gross'] = df['Gross'].replace(0, np.nan)

# Reorganizing columns
df = df[['Movie_Title', 'Main_Year', 'Main_Genre', 'Ratings', 'Tagline', 'Stars', 'Votes', 'Runtime', 'Gross', 'Genres', 'Year']]
df.rename(columns={'Year': 'Year_Range'}, inplace=True)

In [21]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 9999 entries, 0 to 9998
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Movie_Title  9999 non-null   object 
 1   Main_Year    9251 non-null   Int64  
 2   Main_Genre   9919 non-null   object 
 3   Ratings      8179 non-null   float64
 4   Tagline      9999 non-null   object 
 5   Stars        9543 non-null   object 
 6   Votes        8179 non-null   float64
 7   Runtime      7041 non-null   float64
 8   Gross        445 non-null    float64
 9   Genres       9919 non-null   object 
 10  Year_Range   9355 non-null   object 
dtypes: Int64(1), float64(4), object(6)
memory usage: 947.2+ KB


## Minor Data Searching

In [22]:
# finding top 100 movie title that contains regex string, and that DOES NOT (~) contain a specific string
# in this case we are finding a movie/TV title that contains Avatar Airbend and NOT a tagline that contains 'Live'
# so finding animated series instead of Live series

df[df['Movie_Title'].str.contains(r'Avatar.*Airbend') & ~df['Tagline'].str.contains('Live')].head(100)

Unnamed: 0,Movie_Title,Main_Year,Main_Genre,Ratings,Tagline,Stars,Votes,Runtime,Gross,Genres,Year_Range
129,Avatar: The Last Airbender,2005,Animation,9.3,"In a war-torn world of elemental magic, a youn...","Stars:Dee Bradley Baker, Zach ...",265845.0,23.0,,"Animation, Action, Adventure",(2005–2008)
6475,Avatar: The Last Airbender,2005,Animation,7.6,Sokka and Katara have to solve a centuries-old...,Director:Lauren MacMullan| Stars:Zach ...,2871.0,24.0,,"Animation, Action, Adventure",(2005–2008)
6476,Avatar: The Last Airbender,2005,Animation,8.7,"After getting a house to stay in, Aang and com...",Director:Lauren MacMullan| Stars:Zach ...,2857.0,25.0,,"Animation, Action, Adventure",(2005–2008)
6477,Avatar: The Last Airbender,2005,Animation,8.0,"In a nearby Earth Village, the gang meets a te...",Director:Dave Filoni| Stars:Zach Tyler...,3229.0,25.0,,"Animation, Action, Adventure",(2005–2008)
6478,Avatar: The Last Airbender,2005,Animation,7.8,"Aang, Katara, and Sokka have their friendship ...",Director:Dave Filoni| Stars:Zach Tyler...,3164.0,25.0,,"Animation, Action, Adventure",(2005–2008)
...,...,...,...,...,...,...,...,...,...,...,...
8506,Avatar: The Last Airbender,2005,Animation,8.7,"The gang, now hiding at Ember Island, watch a ...",Director:Giancarlo Volpe| Stars:Zach T...,3246.0,25.0,,"Animation, Action, Adventure",(2005–2008)
8507,Avatar: The Last Airbender,2005,Animation,9.2,Sozin's Comet is only three days away. Zuko te...,Director:Ethan Spaulding| Stars:Zach T...,3465.0,92.0,,"Animation, Action, Adventure",(2005–2008)
8508,Avatar: The Last Airbender,2005,Animation,9.5,Aang awakes on a mysterious island and confron...,Director:Giancarlo Volpe| Stars:Zach T...,3845.0,92.0,,"Animation, Action, Adventure",(2005–2008)
8509,Avatar: The Last Airbender,2005,Animation,9.8,Zuko battles his sister with Katara's help for...,Director:Joaquim Dos Santos| Stars:Zac...,5283.0,92.0,,"Animation, Action, Adventure",(2005–2008)


In [23]:
# finding unique values (won't print out, long list)
df['Gross'].unique()

# finding NA values sum, and count of non NA values
df['Gross'].isna().sum(), df['Gross'].count()

(9554, 445)

In [24]:
# grabbing median for one column (some scewed results) that aren't NA
df['Gross'].median(skipna=True)

8.55

In [25]:
# temporary dropping na values, grabbing Movie Title and Gross columns, sorting by Gross and Ascending
df[['Movie_Title', 'Gross']].dropna().sort_values(by='Gross', ascending=True)

Unnamed: 0,Movie_Title,Gross
512,The Clovehitch Killer,0.01
6056,Theo Who Lived,0.01
1216,Honeymoon,0.01
4196,The Sunshine Makers,0.01
1317,Duck Butter,0.01
...,...,...
196,Spider-Man,403.71
144,Jumanji: Welcome to the Jungle,404.52
226,Captain America: Civil War,408.08
578,Finding Dory,486.30


## To CSV

In [26]:
# making csv file "final_movies.csv"
df.to_csv(dest)
! ls ../CSV

ls: CSV: No such file or directory


## Getting data into PostgreSQL

In [8]:
# gathering environmental variable for the postgres db password
SQL_DB_KEY = os.environ.get('SQL_DB_KEY')
# gathering user environmental variable for postgres user id
ENV_USER = os.environ.get('USER')

In [9]:
# creating sqlachemy engine to read data from postgresql
engine = create_engine(f'postgresql+psycopg2://postgres:{SQL_DB_KEY}@localhost/{ENV_USER}')
engine

Engine(postgresql+psycopg2://postgres:***@localhost/carterthurman)

In [36]:
# 'append, replace, fail' arguments.
# index = False is 
df.to_sql('movie_table', engine, if_exists='replace', index=False)

999

## Putting data into PostgreSQL via chunks 
Alternate way, for bigger datasets

for chunk in pd.read_csv(dest, index_col=0, chunksize=1000): 
    chunk.to_sql('movie_table', engine, if_exists="replace")

## Reading Data from PostgreSQL

In [37]:
sql_df = pd.read_sql_query('SELECT * from public."movie_table"', engine)

In [38]:
sql_df

Unnamed: 0,Movie_Title,Main_Year,Main_Genre,Ratings,Tagline,Stars,Votes,Runtime,Gross,Genres,Year_Range
0,Blood Red Sky,2021.0,Action,6.1,A woman with a mysterious illness is forced in...,Director:Peter Thorwarth| Stars:Peri B...,21062.0,121.0,,"Action, Horror, Thriller",(2021)
1,Masters of the Universe: Revelation,2021.0,Animation,5.0,The war for Eternia begins again in what may b...,"Stars:Chris Wood, Sarah Michel...",17870.0,25.0,,"Animation, Action, Adventure",(2021– )
2,The Walking Dead,2010.0,Drama,8.2,Sheriff Deputy Rick Grimes wakes up from a com...,"Stars:Andrew Lincoln, Norman R...",885805.0,44.0,,"Drama, Horror, Thriller",(2010–2022)
3,Rick and Morty,2013.0,Animation,9.2,An animated series that follows the exploits o...,"Stars:Justin Roiland, Chris Pa...",414849.0,23.0,,"Animation, Adventure, Comedy",(2013– )
4,Army of Thieves,2021.0,Action,,"A prequel, set before the events of Army of th...",Director:Matthias Schweighöfer| Stars:...,,,,"Action, Crime, Horror",(2021)
...,...,...,...,...,...,...,...,...,...,...,...
9994,The Imperfects,2021.0,Adventure,,Add a Plot,"Stars:Morgan Taylor Campbell, ...",,,,"Adventure, Drama, Fantasy",(2021– )
9995,Arcane,2021.0,Animation,,Add a Plot,,,,,"Animation, Action, Adventure",(2021– )
9996,Heart of Invictus,2022.0,Documentary,,Add a Plot,Director:Orlando von Einsiedel| Star:P...,,,,"Documentary, Sport",(2022– )
9997,The Imperfects,2021.0,Adventure,,Add a Plot,Director:Jovanka Vuckovic| Stars:Morga...,,,,"Adventure, Drama, Fantasy",(2021– )


# API

## movie db
https://www.themoviedb.org/settings/api/stats <BR>
https://developer.themoviedb.org/reference/trending-all

<b> must run 

In [5]:
# gathering my environment keys
MDB_KEY = os.environ.get('MOVIE_DB_KEY')
RA_KEY = os.environ.get('READ_ACCESS_KEY')
ACCOUNT_KEY = os.environ.get('ACCOUNT_ID_KEY')

In [5]:
# Testing connection

url = "https://api.themoviedb.org/3/authentication"

headers = {
    "accept": "application/json",
    "Authorization": "Bearer {0}".format(RA_KEY)
}

response = requests.get(url, headers=headers)

print(response.text)

{"success":true,"status_code":1,"status_message":"Success."}


### Trending Movies

To iterate through pages:

url = "https://api.themoviedb.org/3/trending/movie/day?api_key=THE_KEY&page=1"


In [6]:
%%time

# this section is to create the dataframe and gather the first 20 items

# gathering Trending Movies

# specified page number
url = "https://api.themoviedb.org/3/trending/movie/day?api_key=THE_KEY&page=1"

headers = {
    "accept": "application/json",
    "Authorization": "Bearer {}".format(RA_KEY)
}

response_trnd = requests.get(url, headers=headers)

print(response_trnd)

# decodes response into json object
json_data_trnd = response_trnd.content.decode()
# converts json object/string into python object
data_trnd = json.loads(json_data_trnd)

# gathering columns into list
trnd_columns = [i for i in data_trnd['results'][0].keys()]
# unpacking data from json object into the data argument and setting columns, making a dataframe
trnd_df = pd.DataFrame([*data_trnd['results']], columns=trnd_columns)
trnd_df

<Response [200]>
CPU times: user 56.9 ms, sys: 11.7 ms, total: 68.6 ms
Wall time: 265 ms


Unnamed: 0,adult,backdrop_path,id,title,original_language,original_title,overview,poster_path,media_type,genre_ids,popularity,release_date,video,vote_average,vote_count
0,False,/sRLC052ieEzkQs9dEtPMfFxYkej.jpg,848326,Rebel Moon - Part One: A Child of Fire,en,Rebel Moon - Part One: A Child of Fire,When a peaceful colony on the edge of the gala...,/ui4DrH1cKk2vkHshcUcGt2lKxCm.jpg,movie,[878],1777.262,2023-12-15,False,6.455,884
1,False,/fm6KqXpk3M2HVveHwCrBSSBaO0V.jpg,872585,Oppenheimer,en,Oppenheimer,The story of J. Robert Oppenheimer's role in t...,/8Gxv8gSFCU0XGDykEGv7zR1n2ua.jpg,movie,"[18, 36]",886.535,2023-07-19,False,8.115,5808
2,False,/jXJxMcVoEuXzym3vFnjqDW4ifo6.jpg,572802,Aquaman and the Lost Kingdom,en,Aquaman and the Lost Kingdom,"Black Manta, still driven by the need to aveng...",/8xV47NDrjdZDpkVcCFqkdHa3T0C.jpg,movie,"[28, 12, 14]",1603.978,2023-12-20,False,6.493,345
3,False,/nHf61UzkfFno5X1ofIhugCPus2R.jpg,346698,Barbie,en,Barbie,Barbie and Ken are having the time of their li...,/iuFNMS8U5cb6xfzi51Dbkovj7vM.jpg,movie,"[35, 12, 14]",510.25,2023-07-19,False,7.159,6643
4,False,/1jITxVJhkiFJuQuj8NcPLmDNtJg.jpg,930564,Saltburn,en,Saltburn,Struggling to find his place at Oxford Univers...,/qjhahNLSZ705B5JP92YMEYPocPz.jpg,movie,"[18, 35, 53]",449.099,2023-11-16,False,7.176,615
5,False,/8x0kWa30xdLKBjQWnZNvORnv02v.jpg,664341,Eileen,en,Eileen,"During a bitter 1964 Massachusetts winter, you...",/gdGKBa3UVS5GMZsayqnfupBB5fb.jpg,movie,"[18, 53]",24.716,2023-12-01,False,6.813,16
6,False,/4HodYYKEIsGOdinkGi2Ucz6X9i0.jpg,569094,Spider-Man: Across the Spider-Verse,en,Spider-Man: Across the Spider-Verse,"After reuniting with Gwen Stacy, Brooklyn’s fu...",/8Vt6mWEReuy4Of61Lnj5Xj704m8.jpg,movie,"[16, 28, 12, 878]",431.881,2023-05-31,False,8.382,5374
7,False,/vzO729sITNJ1jB6Y9AJpIhlbvb2.jpg,1213997,Bitconned,en,Bitconned,"In this true-crime documentary, three guys exp...",/6YZmezBcTwL9KUn5kkygAvrJ1fx.jpg,movie,[99],66.434,2024-01-01,False,5.7,3
8,False,/5a4JdoFwll5DRtKMe7JLuGQ9yJm.jpg,695721,The Hunger Games: The Ballad of Songbirds & Sn...,en,The Hunger Games: The Ballad of Songbirds & Sn...,64 years before he becomes the tyrannical pres...,/mBaXZ95R2OxueZhvQbcEWy2DqyO.jpg,movie,"[18, 878, 28]",1403.074,2023-11-15,False,7.237,1253
9,False,/mgTeI8UNJzre7YMt24lfQLyXnA8.jpg,1075175,How to Have Sex,en,How to Have Sex,Three British teenage girls go on a rites-of-p...,/rafwrzslLb203hQFIU8s0yRk0Qy.jpg,movie,[18],74.985,2023-11-02,False,6.382,76


In [7]:
%%time

# gathers the total pages of response
match = re.search(r'"total_pages":(\d{4})', response_trnd.text)

# if the response gave a correct variable and not an error, it will grab the page number by the first group
# else, it will print "Number not found"
if match:
    page_number = match.group(1)
    print(f"Number: {page_number}")
else:
    print("Number not found")

# converting page_nunber into 'int' variable
page_number = int(page_number)
    
# gathering Trending Movies one page at a time through iteration
# this whole block of code with try to do it and until it fails, it will continue. 
# once it fails, it will print "Out of Pages"

# this block of code takes around 30 seconds to run.
try:
    # for each page starting at page 2 (because we already have page 1), iterate through and gather the response
    # including the url which the page_number is being formatted into, the headers with the environmental keys
    # and then putting it into a response variable which is then being decoded into a json object
    # and then loading into a data_trnd json object that can be searched through
    
    for page in range(2, page_number):
        url = f"https://api.themoviedb.org/3/trending/movie/day?api_key=THE_KEY&page={page}"
        headers = {
            "accept": "application/json",
            "Authorization": "Bearer {}".format(RA_KEY)}
        response_trnd = requests.get(url, headers=headers)
        # decodes response into json object
        json_data_trnd = response_trnd.content.decode()
        # converts json object/string into python object
        data_trnd = json.loads(json_data_trnd)
        # if data_trnd exists, then add it to a temporary dataframe with correct columns, and then 
        # join it (concat it) into the already created dataframe while ignoring the index column.
        if data_trnd:
            trnd_df_temp = pd.DataFrame([*data_trnd['results']], columns=trnd_columns)
            trnd_df = pd.concat([trnd_df, trnd_df_temp], ignore_index=True)
        else:
            break
except:
    print("Out of Pages")
    
# printing out our results
trnd_df

Number: 1000
Out of Pages
CPU times: user 26.9 s, sys: 1.38 s, total: 28.3 s
Wall time: 1min 55s


Unnamed: 0,adult,backdrop_path,id,title,original_language,original_title,overview,poster_path,media_type,genre_ids,popularity,release_date,video,vote_average,vote_count
0,False,/sRLC052ieEzkQs9dEtPMfFxYkej.jpg,848326,Rebel Moon - Part One: A Child of Fire,en,Rebel Moon - Part One: A Child of Fire,When a peaceful colony on the edge of the gala...,/ui4DrH1cKk2vkHshcUcGt2lKxCm.jpg,movie,[878],1777.262,2023-12-15,False,6.455,884.0
1,False,/fm6KqXpk3M2HVveHwCrBSSBaO0V.jpg,872585,Oppenheimer,en,Oppenheimer,The story of J. Robert Oppenheimer's role in t...,/8Gxv8gSFCU0XGDykEGv7zR1n2ua.jpg,movie,"[18, 36]",886.535,2023-07-19,False,8.115,5808.0
2,False,/jXJxMcVoEuXzym3vFnjqDW4ifo6.jpg,572802,Aquaman and the Lost Kingdom,en,Aquaman and the Lost Kingdom,"Black Manta, still driven by the need to aveng...",/8xV47NDrjdZDpkVcCFqkdHa3T0C.jpg,movie,"[28, 12, 14]",1603.978,2023-12-20,False,6.493,345.0
3,False,/nHf61UzkfFno5X1ofIhugCPus2R.jpg,346698,Barbie,en,Barbie,Barbie and Ken are having the time of their li...,/iuFNMS8U5cb6xfzi51Dbkovj7vM.jpg,movie,"[35, 12, 14]",510.250,2023-07-19,False,7.159,6643.0
4,False,/1jITxVJhkiFJuQuj8NcPLmDNtJg.jpg,930564,Saltburn,en,Saltburn,Struggling to find his place at Oxford Univers...,/qjhahNLSZ705B5JP92YMEYPocPz.jpg,movie,"[18, 35, 53]",449.099,2023-11-16,False,7.176,615.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9993,False,/4HSgJLWSGzxuf8QJaffm2p9zEt.jpg,13302,King Thrushbeard,de,König Drosselbart,A fairy-tale about a beautiful but very haught...,/s030xxlgT5aMpBoP16E3XMTB9Ir.jpg,movie,"[14, 10751]",3.443,1965-12-01,False,5.778,18.0
9994,False,/z8sNNjEXEpZNQCHCuo3QH8kK00t.jpg,665142,Young Aunt 3,ko,어린 이모 3,Seok-yeong has been living with his father eve...,/qD7kT9LysayisTiCrdFyhZWIuK1.jpg,movie,[10749],14.928,2020-01-02,False,6.100,9.0
9995,False,/dnVigajpWbYFshlP77KRfYPhIz7.jpg,33666,The Glass Bottom Boat,en,The Glass Bottom Boat,"Bruce, the owner of a aerospace company, is in...",/jlj5rCl7jfW7QskJ1vucd7j5n5M.jpg,movie,"[35, 10749]",11.445,1966-06-09,False,6.200,56.0
9996,False,/8JxXANejJvRmp7DVZmbPHuMeIg1.jpg,1100962,One More Time,sv,One More Time,"On her 40th birthday, Amelia makes a fateful w...",/dLWDiTJWKgPuyQuuD0zDRhwU2y4.jpg,movie,"[35, 18]",11.022,2023-04-12,False,5.900,67.0


In [10]:
# making trnd_movies.csv and showing schema
trnd_df.to_csv(trnd_dest)
! ls ../CSV
trnd_df.info()

final_movies.csv movies.csv       trnd_movies.csv
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9998 entries, 0 to 9997
Data columns (total 15 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   adult              9998 non-null   bool   
 1   backdrop_path      9689 non-null   object 
 2   id                 9998 non-null   int64  
 3   title              9998 non-null   object 
 4   original_language  9998 non-null   object 
 5   original_title     9998 non-null   object 
 6   overview           9998 non-null   object 
 7   poster_path        9950 non-null   object 
 8   media_type         9998 non-null   object 
 9   genre_ids          9995 non-null   object 
 10  popularity         9995 non-null   float64
 11  release_date       9995 non-null   object 
 12  video              9995 non-null   object 
 13  vote_average       9995 non-null   float64
 14  vote_count         9995 non-null   float64
dtypes: bool(1), float64(3)

In [11]:
# gathering environmental variable for the postgres db password
SQL_DB_KEY = os.environ.get('SQL_DB_KEY')
# gathering user environmental variable for postgres user id
ENV_USER = os.environ.get('USER')

In [12]:
# creating sqlachemy engine to read data from postgresql
engine = create_engine(f'postgresql+psycopg2://postgres:{SQL_DB_KEY}@localhost/{ENV_USER}')
engine

Engine(postgresql+psycopg2://postgres:***@localhost/carterthurman)

In [13]:
# 'append, replace, fail' arguments.
# index = False makes CSV not include index column
trnd_df.to_sql('trnd_movie_table', engine, if_exists='append', index=False)

998

### Upcoming Movies

In [6]:
# gathering Upcoming Movies

url = "https://api.themoviedb.org/3/movie/upcoming?language=en-US&page=1"

headers = {
    "accept": "application/json",
    "Authorization": "Bearer {}".format(RA_KEY)
}

response_up = requests.get(url, headers=headers)

print(response_up)

<Response [200]>


In [8]:
# decodes response into json object
json_data = response_up.content.decode()

In [9]:
# converts json object/string into python object
data = json.loads(json_data)

In [10]:
# we want 'results'
# 'dates' is the day we got this info
# results are our results
# total pages is how many we can iterate through
print("Keys in Object: ", *data.keys(), "\nTotal Pages: ", str(data['total_pages']))

Keys in Object:  dates page results total_pages total_results 
Total Pages:  49


In [13]:
# data of index one, keys and value
print(*data['results'][0].items())

('adult', False) ('backdrop_path', '/jXJxMcVoEuXzym3vFnjqDW4ifo6.jpg') ('genre_ids', [28, 12, 14]) ('id', 572802) ('original_language', 'en') ('original_title', 'Aquaman and the Lost Kingdom') ('overview', "Black Manta, still driven by the need to avenge his father's death and wielding the power of the mythic Black Trident, will stop at nothing to take Aquaman down once and for all. To defeat him, Aquaman must turn to his imprisoned brother Orm, the former King of Atlantis, to forge an unlikely alliance in order to save the world from irreversible destruction.") ('popularity', 1542.493) ('poster_path', '/8xV47NDrjdZDpkVcCFqkdHa3T0C.jpg') ('release_date', '2023-12-20') ('title', 'Aquaman and the Lost Kingdom') ('video', False) ('vote_average', 6.483) ('vote_count', 361)


In [14]:
# making a list full of the column names
up_columns = [i for i in data['results'][0].keys()]
print(up_columns)

['adult', 'backdrop_path', 'genre_ids', 'id', 'original_language', 'original_title', 'overview', 'popularity', 'poster_path', 'release_date', 'title', 'video', 'vote_average', 'vote_count']


In [120]:
for i in (data['results']):
    for clm, dta in i.items():
        print(clm, dta)

adult False
backdrop_path /xgGGinKRL8xeRkaAR9RMbtyk60y.jpg
genre_ids [16, 10751, 10402, 14, 35]
id 901362
original_language en
original_title Trolls Band Together
overview When Branch's brother, Floyd, is kidnapped for his musical talents by a pair of nefarious pop-star villains, Branch and Poppy embark on a harrowing and emotional journey to reunite the other brothers and rescue Floyd from a fate even worse than pop-culture obscurity.
popularity 1620.617
poster_path /qV4fdXXUm5xNlEJ2jw7af3XxuQB.jpg
release_date 2023-10-12
title Trolls Band Together
video False
vote_average 7.1
vote_count 304
adult False
backdrop_path /yOm993lsJyPmBodlYjgpPwBjXP9.jpg
genre_ids [35, 10751, 14]
id 787699
original_language en
original_title Wonka
overview Willy Wonka – chock-full of ideas and determined to change the world one delectable bite at a time – is proof that the best things in life begin with a dream, and if you’re lucky enough to meet Willy Wonka, anything is possible.
popularity 748.278
poster

In [17]:
# unpacking data from json object into the data argument and setting columns
up_df = pd.DataFrame([*data['results']], columns=up_columns)

# Dropping unncessary columns
up_df.drop(columns=['genre_ids', 'video'], inplace=True)

# Reorganizing Columns
up_df = up_df[['original_title', 'id', 'overview', 
               'original_language', 'release_date', 'popularity', 'vote_average', 'vote_count', 'backdrop_path', 'poster_path']]

# Changing datatypes
up_df['release_date'] = pd.to_datetime(up_df['release_date'])

up_df

Unnamed: 0,original_title,id,overview,original_language,release_date,popularity,vote_average,vote_count,backdrop_path,poster_path
0,Aquaman and the Lost Kingdom,572802,"Black Manta, still driven by the need to aveng...",en,2023-12-20,1542.493,6.483,361,/jXJxMcVoEuXzym3vFnjqDW4ifo6.jpg,/8xV47NDrjdZDpkVcCFqkdHa3T0C.jpg
1,Oppenheimer,872585,The story of J. Robert Oppenheimer's role in t...,en,2023-07-19,889.087,8.118,5857,/rLb2cwF3Pazuxaj0sRXQ037tGI1.jpg,/8Gxv8gSFCU0XGDykEGv7zR1n2ua.jpg
2,Wonka,787699,Willy Wonka – chock-full of ideas and determin...,en,2023-12-06,931.227,7.119,809,/yOm993lsJyPmBodlYjgpPwBjXP9.jpg,/qhb1qOilapbapxWQn9jtRCMwXJF.jpg
3,Lord of Misrule,853387,When the daughter of the town's new priest goe...,en,2023-10-26,541.598,5.6,12,/vQpvNDc0AFao8BbWyXDFVVrqiZj.jpg,/eCNJuGsCNdf2yf4F3UcDg1WZTbo.jpg
4,Rewind,1143183,Mary loves John for as long as she can remembe...,tl,2023-12-25,589.269,7.4,6,/X8yF6STUk5Zr5nAuLBJiio8Sxh.jpg,/vW80VheMJhHsj0pDVYJL3qAMWMd.jpg
5,ゴジラ-1.0,940721,"In postwar Japan, a new terror rises. Will the...",ja,2023-11-03,440.591,7.967,230,/bWIIWhnaoWx3FTVXv6GkYDv3djL.jpg,/hkxxMIGaiCTmrEArK7J56JTKUlB.jpg
6,Migration,940551,After a migrating duck family alights on their...,en,2023-12-06,468.604,7.225,80,/ptz5ETMxDoRRiE69BVuIxJzyTEO.jpg,/ldfCF9RhR40mppkzmftxapaHeTo.jpg
7,Todos los nombres de Dios,1053592,"After being implicated in a terrorist attack, ...",es,2023-09-15,439.14,6.8,26,/zX9m8h33pHXcES7ttO8v0ThiYj7.jpg,/n15gfcgwV0LVPSobrayZcFHcwN6.jpg
8,Sound of Freedom,678512,"The story of Tim Ballard, a former US governme...",en,2023-07-03,250.503,8.07,1708,/pA3vdhadJPxF5GA1uo8OPTiNQDT.jpg,/qA5kPYZA7FkVvqcEfJRoOy4kpHg.jpg
9,Мавка: Лісова пісня,459003,Forest soul Mavka faces an impossible choice b...,uk,2023-03-02,191.432,7.3,565,/lyHmhoRj3zXSdeCYbs2oOXLCF4K.jpg,/eeJjd9JU2Mdj9d7nWRFLWlrcExi.jpg


In [25]:
up_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 10 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   original_title     20 non-null     object        
 1   id                 20 non-null     int64         
 2   overview           20 non-null     object        
 3   original_language  20 non-null     object        
 4   release_date       20 non-null     datetime64[ns]
 5   popularity         20 non-null     float64       
 6   vote_average       20 non-null     float64       
 7   vote_count         20 non-null     int64         
 8   backdrop_path      20 non-null     object        
 9   poster_path        20 non-null     object        
dtypes: datetime64[ns](1), float64(2), int64(2), object(5)
memory usage: 1.7+ KB


In [26]:
# gathering environmental variable for the postgres db password
SQL_DB_KEY = os.environ.get('SQL_DB_KEY')
# gathering user environmental variable for postgres user id
ENV_USER = os.environ.get('USER')

In [27]:
# creating sqlachemy engine to read data from postgresql
engine = create_engine(f'postgresql+psycopg2://postgres:{SQL_DB_KEY}@localhost/{ENV_USER}')
engine

Engine(postgresql+psycopg2://postgres:***@localhost/carterthurman)

In [28]:
# 'append, replace, fail' arguments.
# index = False makes CSV not include index column
up_df.to_sql('up_movie_table', engine, if_exists='replace', index=False)

20

In [30]:
# converting to csv file
up_df.to_csv(up_dest)