In [None]:
'''with open('../requirements.txt', 'w') as f:
    f.write('pandas == 2.1.3\nnumpy == 1.26.0\npsycopg2 == 2.9.3\nsqlalchemy == 2.0.23')'''

In [None]:
# if necessary
#! pip install -r ../requirements.txt

<b> must run 

In [48]:
import pandas as pd
import psycopg2 as ps # postgresql database connection
import os # for environment variables
from sqlalchemy import create_engine # for pandas and postgresql connection
import sqlalchemy
import requests # for API connection later
import json # for easier API data search later
import re # for searching text
import datetime # for gathering dates and times

In [2]:
! pwd
! ls ../CSV

/Users/carterthurman/Documents/GitHub/Movie_DB/Notebooks
final_movies.csv movies.csv       trnd_movies.csv  up_movies.csv


# CSV Movie Database
Written and executed in a jupyter notebook

## Reading Data

In [None]:
# reading file to dataframe
df = pd.read_csv(path, index_col=0)
df

In [None]:
# change column names
df.rename(columns=
                     {'MOVIES': 'Movie_Title'
                        ,'YEAR': 'Year'
                        ,'GENRE': 'Genres'
                        ,'RATING': 'Ratings'
                        ,'ONE-LINE': 'Tagline'
                        ,'STARS': 'Stars'
                        ,'VOTES': 'Votes'
                        ,'RunTime': 'Runtime'}, inplace=True)
df.info()

## Data Cleaning

In [None]:
# for each column in df_mov, check if the column is an 'object' ('O') datatype, 
# and if so, replace '\n' (newline) with empty string ''

for column in df.columns:
    if df['{}'.format(column)].dtype == 'O':
        # formating column name into the dataframe search, searching for '\n' and replacing with empty string ''
        df['{}'.format(column)] = df['{}'.format(column)].str.replace('\n', '')

# changing columns to correct datatypes after replacing specific characters

#df['Votes'] = df['Votes'].str.replace(',','')
df['Votes'] = df['Votes'].astype('float64')
#df['Gross'] = df['Gross'].str.replace('$','').str.replace('M', '')
df['Gross'] = df['Gross'].astype('float64')

In [None]:
df.info()

In [None]:
# cleaning up

# Grabs the first Genre and puts it into a new column based on ','
df['Main_Genre'] = df['Genres'].str.split(',', expand=True)[0]

# grabs first four digits (the year made) and puts it into a new column
df['Main_Year'] = df['Year'].str.extract(r'(\d{4})', expand=True)

# changing year to Int64 type for better aggregation
df['Main_Year'] = df['Main_Year'].astype('Int64')

# replacing 0 with np.nan to get better results
df['Gross'] = df['Gross'].replace(0, np.nan)

# Reorganizing columns
df = df[['Movie_Title', 'Main_Year', 'Main_Genre', 'Ratings', 'Tagline', 'Stars', 'Votes', 'Runtime', 'Gross', 'Genres', 'Year']]
df.rename(columns={'Year': 'Year_Range'}, inplace=True)

In [None]:
df.info()

## Minor Data Searching

In [None]:
# finding top 100 movie title that contains regex string, and that DOES NOT (~) contain a specific string
# in this case we are finding a movie/TV title that contains Avatar Airbend and NOT a tagline that contains 'Live'
# so finding animated series instead of Live series

df[df['Movie_Title'].str.contains(r'Avatar.*Airbend') & ~df['Tagline'].str.contains('Live')].head(100)

In [None]:
# finding unique values (won't print out, long list)
df['Gross'].unique()

# finding NA values sum, and count of non NA values
df['Gross'].isna().sum(), df['Gross'].count()

In [None]:
# grabbing median for one column (some scewed results) that aren't NA
df['Gross'].median(skipna=True)

In [None]:
# temporary dropping na values, grabbing Movie Title and Gross columns, sorting by Gross and Ascending
df[['Movie_Title', 'Gross']].dropna().sort_values(by='Gross', ascending=True)

## To CSV

In [None]:
# making csv file "final_movies.csv"
df.to_csv(dest)
! ls ../CSV

## Getting data into PostgreSQL

In [None]:
# gathering environmental variable for the postgres db password
SQL_DB_KEY = os.environ.get('SQL_DB_KEY')
# gathering user environmental variable for postgres user id
ENV_USER = os.environ.get('USER')

In [None]:
# creating sqlachemy engine to read data from postgresql
engine = create_engine(f'postgresql+psycopg2://postgres:{SQL_DB_KEY}@localhost/{ENV_USER}')
engine

In [None]:
# 'append, replace, fail' arguments.
# index = False is 
df.to_sql('movie_table', engine, if_exists='replace', index=False)

## Putting data into PostgreSQL via chunks 
Alternate way, for bigger datasets

for chunk in pd.read_csv(dest, index_col=0, chunksize=1000): 
    chunk.to_sql('movie_table', engine, if_exists="replace")

## Reading Data from PostgreSQL

In [None]:
sql_df = pd.read_sql_query('SELECT * from public."movie_table"', engine)

In [None]:
sql_df

# API The Movie Database

### movie db
https://www.themoviedb.org/settings/api/stats <BR>

<b> must run 

In [27]:
# creating paths and dest file paths 
path = '../CSV/movies.csv'
dest = '../CSV/final_movies.csv'
trnd_dest = '../CSV/trnd_movies.csv'
up_dest = '../CSV/up_movies.csv'
p_dest = '../CSV/people_movies.csv'

In [5]:
# gathering my environment keys
RA_KEY = os.environ.get('READ_ACCESS_KEY')
# gathering environmental variable for the postgres db password
SQL_DB_KEY = os.environ.get('SQL_DB_KEY')
# gathering user environmental variable for postgres user id
ENV_USER = os.environ.get('USER')

# These are unnecessary for now.
MDB_KEY = os.environ.get('MOVIE_DB_KEY')
ACCOUNT_KEY = os.environ.get('ACCOUNT_ID_KEY')

In [44]:
# creating sqlachemy engine to read/write data from postgresql
engine = create_engine(f'postgresql+psycopg2://postgres:{SQL_DB_KEY}@localhost/{ENV_USER}')

### functions

In [7]:
def convert_to_list(value):
    if isinstance(value, float):
        return value  # Enclose float in a list
    else:
        return value  # Keep as-is if already a list

# data cleaning
# genre_ids column has values that are lists, 
# we need to separate the first value in the list to make it the main genre
def corr_values(df, corr_dict):
    df['genre_ids'] = df['genre_ids'].apply(convert_to_list)
    df.dropna(inplace=True)
    # this function takes the genres_ids column which has values such as [1, 13, 24] (example numbers)
    # and replaces them with the correlating Genre NAMES. For example, id '28' = 'Action' 
    # so the column will now look like a list of names ['Action', 'Adventure', 'Comedy']
    df["genre_ids"] = df["genre_ids"].apply(lambda x: [corr_dict[i] for i in x] if isinstance(x, list) else x)
    return df
def genre_values(df):
    df['genre_ids'] = df['genre_ids'].apply(convert_to_list)
    # this function finds the first item in the list and puts it into a new separate column
    # so ['Action', 'Adventure', 'Comedy'] will just become 'Action'
    df['main_genre'] = df['genre_ids'].apply(lambda x: x[0] if len(x)>0 else x)
    return df

In [8]:
def df_make(url):
    # multiple try and excepts to make sure response code is correct.
    try:
        headers = {
        "accept": "application/json",
        "Authorization": "Bearer {}".format(RA_KEY)
        }
        response = requests.get(url, headers=headers)
        print(response, "\n\n")
    except:
        print("Failed Response")
        print(response.status_code)


    try:
        # decodes response into json object
        json_data = response.content.decode()
        # converts json object/string into python object
        data = json.loads(json_data)
    except:
        print("Failed JSON Load")
        

    try:
        # gathering columns into list
        columns = [i for i in data['results'][0].keys()]
        # unpacking data from json object into the data argument and setting columns, making a dataframe
        df = pd.DataFrame([*data['results']], columns=columns)
        ###print(df.info())
        # genre_ids column has values that are lists, 
        # we need to separate the first value in the list to make it the 
        # main genre after correlating id values to genre name
    except:
        print("Failed Dataframe.\nResults:\n\n")
        print(data.content) 
    
    # gathers the total pages of response
    match = re.search(r'"total_pages":(\d+)', response.text)
    ###print("Match:\n", match)
    # if the response gave a correct variable and not an error, it will grab the page number by the first group
    # else, it will print "Number not found"
    if match:
        page_number = match.group(1)
        print(f"Number: {page_number}")
        page_number = int(page_number)
    else:
        print("Number not found")
        return df
    
        
        
    # this whole block of code with try to do it and until it fails, it will continue. 
    # once it fails, it will print "Out of Pages"

    try:
        # for each page starting at page 2 (because we already have page 1), iterate through and gather the response
        # including the url which the page_number is being formatted into, the headers with the environmental keys
        # and then putting it into a response variable which is then being decoded into a json object
        # and then loading into a data json object that can be searched through

        # Split the URL into parts before the "page=" and after
        # useful for iterating through the pages
        base_url, page_part = url.rsplit("page=", 1)
        
        ###print("BASE URL AND PAGE PART\n", base_url, page_part)
        
        for page in range(2, page_number):
            
            # Construct the full URL with the current page number
            url = base_url + "page=" + str(page)
            ###print("URL:\n", url)
            headers = {
                "accept": "application/json",
                "Authorization": "Bearer {}".format(RA_KEY)}
            
            response = requests.get(url, headers=headers)
            ###print(response.status_code)
            
            # decodes response into json object
            json_data = response.content.decode()
            # converts json object/string into python object
            data = json.loads(json_data)
            # if data exists, then add it to a temporary dataframe with correct columns, and then 
            # join it (concat it) into the already created dataframe while ignoring the index column.
            ###print("Data Keys:\n", data.keys())
            ###print("Results IN Data?: ", 'results' in data.keys())
            if 'results' in data.keys():
                df_temp = pd.DataFrame([*data['results']], columns=columns)
                
                df = pd.concat([df, df_temp], ignore_index=True)
            else:
                print("Dataframe Done.")
                break
        # genre_ids column has values that are lists, 
        # we need to separate the first value in the list to make it the main genre
        # to check if a certain column is in df, and if so, it will change the results properly through
        # the different functions
        if 'genre_ids' in df.columns:
            df = corr_values(df, genre_result)
            df = genre_values(df)
        # gathering current date
        df['curr_date'] = datetime.date.today().strftime("%Y-%m-%d")
        # converting to pandas datetime type 
        df['curr_date'] = pd.to_datetime(df['curr_date'])
        return df
    except:
        print("Out of Pages")

In [9]:
# Testing connection

url = "https://api.themoviedb.org/3/authentication"

headers = {
    "accept": "application/json",
    "Authorization": "Bearer {0}".format(RA_KEY)
}

response = requests.get(url, headers=headers, params={'api_key':MDB_KEY})

print(response.text)

{"success":true,"status_code":1,"status_message":"Success."}


### testing functions

In [341]:
test_df = trnd_df.copy()

In [344]:
test_df = corr_values(test_df, genre_result)
test_df = genre_values(test_df)

In [345]:
counts = test_df['genre_ids'].apply(type).value_counts()
counts

genre_ids
<class 'list'>    9673
Name: count, dtype: int64

In [346]:
test_df

Unnamed: 0,adult,backdrop_path,id,title,original_language,original_title,overview,poster_path,media_type,genre_ids,popularity,release_date,video,vote_average,vote_count,main_genre
0,False,/feSiISwgEpVzR1v3zv2n2AU4ANJ.jpg,609681,The Marvels,en,The Marvels,"Carol Danvers, aka Captain Marvel, has reclaim...",/Ag3D9qXjhJ2FUkrlJ0Cv1pgxqYQ.jpg,movie,"[Science Fiction, Adventure, Action]",569.145,2023-11-08,False,6.371,896.0,Science Fiction
1,False,/rz8GGX5Id2hCW1KzAIY4xwbQw1w.jpg,955916,Lift,en,Lift,"An international heist crew, led by Cyrus Whit...",/46sp1Z9b2PPTgCMyA87g9aTLUXi.jpg,movie,"[Action, Comedy, Crime]",795.472,2024-01-10,False,6.212,226.0,Action
2,False,/mHpoCdCSebNDIIHbqaAxbD57o7H.jpg,823452,The Boys in the Boat,en,The Boys in the Boat,The triumphant underdog story of the Universit...,/ncJMztHprw3gLRAnDjNnnT23CIt.jpg,movie,"[Drama, History]",27.831,2023-12-25,False,8.500,21.0,Drama
3,False,/yl2GfeCaPoxChcGyM5p7vYp1CKS.jpg,848187,Role Play,en,Role Play,Emma has a wonderful husband and two kids in t...,/7MhXiTmTl16LwXNPbWCmqxj7UxH.jpg,movie,"[Action, Comedy, Romance]",143.492,2023-12-14,False,5.932,110.0,Action
4,False,/iP3fL9jwcYeGMyfZKAT6KqW1qc.jpg,558915,The Color Purple,en,The Color Purple,A decades-spanning tale of love and resilience...,/3Jc93sCl0DqkePYjw47zHpqj7YS.jpg,movie,[Drama],78.462,2023-12-25,False,6.917,36.0,Drama
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9986,False,/yeQZTan6q2W9Lo69QjHULCJAwbJ.jpg,45380,Crazy Stone,zh,疯狂的石头,Three thieves try to steal a valuable jade tha...,/nGtj7pyxZQ8hK6fJBekU27bVtmZ.jpg,movie,"[Action, Comedy]",6.994,2006-06-30,False,7.300,100.0,Action
9987,False,/8TIRO8lxg3yGRqQNIAE11FFfWZS.jpg,2346,Still Life,zh,三峡好人,A town in Fengjie county is gradually being de...,/pJYBqUk9zZ3lotYo4711HqWh8QE.jpg,movie,[Drama],12.409,2006-11-16,False,7.200,127.0,Drama
9988,False,/xhHsbpLpPO5AvKx3E6jLXtYAqXV.jpg,530154,Lust Stories,hi,Lust Stories,An anthology of four stories that sheds light ...,/mP5HehSpCaRGd6ej66jxkhdl60B.jpg,movie,"[Drama, Romance]",12.439,2018-06-15,False,6.300,116.0,Drama
9990,False,/lUK6pWQeamOVKeNGJJ6Q4Y9HzOU.jpg,83902,Na Tum Jaano Na Hum,hi,ना तुम जानो ना हम,Esha and Rahul start a letter based friendship...,/cOIrm9WvMGvbliNZ88bK8hVYUc1.jpg,movie,"[Romance, Drama]",10.774,2002-05-10,False,4.600,13.0,Romance


### Genre Lists

In [10]:
## Gathering Movie and TV show genre list

url = "https://api.themoviedb.org/3/genre/movie/list?language=en"

headers = {
    "accept": "application/json",
    "Authorization": "Bearer {0}".format(RA_KEY)
}

response_mlist = requests.get(url, headers=headers)

url = "https://api.themoviedb.org/3/genre/tv/list?language=en"

headers = {
    "accept": "application/json",
    "Authorization": "Bearer {0}".format(RA_KEY)
}

response_tvlist = requests.get(url, headers=headers)

In [11]:
# decodes response into json object
json_data_mlist = response_mlist.content.decode()
# converts json object/string into python object
temp_data_mlist = json.loads(json_data_mlist)
# decodes response into json object
json_data_tvlist = response_tvlist.content.decode()
# converts json object/string into python object
temp_data_tvlist = json.loads(json_data_tvlist)

In [12]:
data_mlist = (temp_data_mlist['genres'])
data_tvlist = (temp_data_tvlist['genres'])

In [13]:
movie_genres = list()
tv_genres = list()

In [14]:
for i, v in enumerate(data_mlist):
    movie_genres.append([*data_mlist[i].values()])
for i, v in enumerate(data_tvlist):
    tv_genres.append([*data_tvlist[i].values()])

In [15]:
# combining the two lists
genre_list = movie_genres + tv_genres
# length of list with duplicate genres, should be 35
print(len(genre_list))
# making sure there is no duplicates via "set"
genre_list = list(set(tuple(sublist) for sublist in genre_list))
# lenght of genre list now - should be 27
print(len(genre_list))

35
27


In [16]:
# making it a dictionary
genre_result = {id_val[0]: id_val[1] for id_val in genre_list}

#### Genre Searching

In [65]:
movie_genres[0], movie_genres[0][0]

([28, 'Action'], 28)

In [66]:
tv_genres[0], tv_genres[0][0]

([10759, 'Action & Adventure'], 10759)

In [None]:
# to iterate through list 
for i, v in (genre_list):
    print(i, v)

In [None]:
# another way to iterate through the list
for i in genre_list:
    print(i[0], i[1])

### Trending Movies

<b> TO DO </b>
<br> - MAKE INTO FUNCTION

To iterate through pages:

url = "https://api.themoviedb.org/3/trending/movie/day?api_key=THE_KEY&page=1"


In [17]:
%%time
url = "https://api.themoviedb.org/3/trending/movie/day?api_key=THE_KEY&page=1"
trnd_df = df_make(url)

<Response [200]> 


Number: 1000
Dataframe Done.
CPU times: user 25.6 s, sys: 1.39 s, total: 27 s
Wall time: 1min 56s


In [18]:
trnd_df

Unnamed: 0,adult,backdrop_path,id,title,original_language,original_title,overview,poster_path,media_type,genre_ids,popularity,release_date,video,vote_average,vote_count,main_genre,curr_date
0,False,/feSiISwgEpVzR1v3zv2n2AU4ANJ.jpg,609681,The Marvels,en,The Marvels,"Carol Danvers, aka Captain Marvel, has reclaim...",/Ag3D9qXjhJ2FUkrlJ0Cv1pgxqYQ.jpg,movie,"[Science Fiction, Adventure, Action]",569.145,2023-11-08,False,6.370,903.0,Science Fiction,2024-01-17
1,False,/rz8GGX5Id2hCW1KzAIY4xwbQw1w.jpg,955916,Lift,en,Lift,"An international heist crew, led by Cyrus Whit...",/46sp1Z9b2PPTgCMyA87g9aTLUXi.jpg,movie,"[Action, Comedy, Crime]",795.472,2024-01-10,False,6.239,230.0,Action,2024-01-17
2,False,/yl2GfeCaPoxChcGyM5p7vYp1CKS.jpg,848187,Role Play,en,Role Play,Emma has a wonderful husband and two kids in t...,/7MhXiTmTl16LwXNPbWCmqxj7UxH.jpg,movie,"[Action, Comedy, Romance]",143.492,2023-12-14,False,5.950,111.0,Action,2024-01-17
3,False,/mHpoCdCSebNDIIHbqaAxbD57o7H.jpg,823452,The Boys in the Boat,en,The Boys in the Boat,The triumphant underdog story of the Universit...,/ncJMztHprw3gLRAnDjNnnT23CIt.jpg,movie,"[Drama, History]",27.831,2023-12-25,False,8.500,21.0,Drama,2024-01-17
4,False,/iP3fL9jwcYeGMyfZKAT6KqW1qc.jpg,558915,The Color Purple,en,The Color Purple,A decades-spanning tale of love and resilience...,/3Jc93sCl0DqkePYjw47zHpqj7YS.jpg,movie,[Drama],78.462,2023-12-25,False,6.919,37.0,Drama,2024-01-17
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9987,False,/8Z0gPcBeImWCxmgZIChcDEFjqJK.jpg,401065,The Axe Murders of Villisca,en,The Axe Murders of Villisca,Three ghost-hunting high-schoolers visit the M...,/69BB2Bp4i0LdOqtAtH5hIZiUFb3.jpg,movie,[Horror],5.534,2017-01-20,False,4.877,81.0,Horror,2024-01-17
9988,False,/8eRYHoGRCh5SSnkbEttgBrOvwcH.jpg,54860,The Bodyguard,ja,ボディガード牙,Karate master and anti-drug vigilante Chiba re...,/bECtwslT7z8uS0Y7ahFfidIXO0m.jpg,movie,"[Action, Thriller]",2.961,1973-06-30,False,4.188,16.0,Action,2024-01-17
9989,False,/20ZansYII7VciPyNXWegcipOl5a.jpg,1066205,My Sister's Serial Killer Boyfriend,en,My Sister's Serial Killer Boyfriend,"Maddie, a local reporter investigating a serie...",/9CMMTffmuzaK8Lx8TBYk6qTTSyZ.jpg,movie,"[Thriller, TV Movie]",3.752,2023-01-08,False,8.500,2.0,Thriller,2024-01-17
9990,False,/wCLbSw3hSQBB0iwruUzuDvMq37D.jpg,542787,Frenzy,en,Frenzy,A group of friends run a popular travel vlog t...,/iQxV2OLz1Y8NiC8HWeD5uYu14iH.jpg,movie,"[Horror, Thriller, TV Movie]",13.021,2018-08-15,False,3.750,54.0,Horror,2024-01-17


In [19]:
# making trnd_movies.csv and showing schema
trnd_df.to_csv(trnd_dest)
! ls ../CSV
trnd_df.info()

final_movies.csv movies.csv       trnd_movies.csv  up_movies.csv
<class 'pandas.core.frame.DataFrame'>
Index: 9676 entries, 0 to 9991
Data columns (total 17 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   adult              9676 non-null   bool          
 1   backdrop_path      9676 non-null   object        
 2   id                 9676 non-null   int64         
 3   title              9676 non-null   object        
 4   original_language  9676 non-null   object        
 5   original_title     9676 non-null   object        
 6   overview           9676 non-null   object        
 7   poster_path        9676 non-null   object        
 8   media_type         9676 non-null   object        
 9   genre_ids          9676 non-null   object        
 10  popularity         9676 non-null   float64       
 11  release_date       9676 non-null   object        
 12  video              9676 non-null   object        
 13  vot

In [20]:
# 'append, replace, fail' arguments.
# index = False makes CSV not include index column
trnd_df.to_sql('trnd_movie_table', engine, if_exists='replace', index=False)

676

#### Old TRND DF Code

In [89]:
%%time

# this section is to create the dataframe and gather the first 20 items

# gathering Trending Movies

# specified page number
url = "https://api.themoviedb.org/3/trending/movie/day?api_key=THE_KEY&page=1"

headers = {
    "accept": "application/json",
    "Authorization": "Bearer {}".format(RA_KEY)
}

response_trnd = requests.get(url, headers=headers)

print(response_trnd)

# decodes response into json object
json_data_trnd = response_trnd.content.decode()
# converts json object/string into python object
data_trnd = json.loads(json_data_trnd)

# gathering columns into list
trnd_columns = [i for i in data_trnd['results'][0].keys()]
# unpacking data from json object into the data argument and setting columns, making a dataframe
trnd_df = pd.DataFrame([*data_trnd['results']], columns=trnd_columns)
# genre_ids column has values that are lists, 
# we need to separate the first value in the list to make it the 
# main genre after correlating id values to genre name
trnd_df = corr_values(trnd_df, genre_result)
trnd_df = genre_values(trnd_df)
trnd_df

<Response [200]>
CPU times: user 45.7 ms, sys: 4.41 ms, total: 50.1 ms
Wall time: 202 ms


Unnamed: 0,adult,backdrop_path,id,title,original_language,original_title,overview,poster_path,media_type,genre_ids,popularity,release_date,video,vote_average,vote_count,main_genre
0,False,/qgFrFXtiGfWN1zkk4sPd0nwFaBF.jpg,848187,Role Play,en,Role Play,Emma has a wonderful husband and two kids in t...,/7MhXiTmTl16LwXNPbWCmqxj7UxH.jpg,movie,"[Action, Comedy, Romance]",26.68,2023-12-14,False,6.563,16,Action
1,False,/rz8GGX5Id2hCW1KzAIY4xwbQw1w.jpg,955916,Lift,en,Lift,"An international heist crew, led by Cyrus Whit...",/gma8o1jWa6m0K1iJ9TzHIiFyTtI.jpg,movie,"[Action, Comedy]",147.906,2024-01-10,False,6.4,19,Action
2,False,/f1AQhx6ZfGhPZFTVKgxG91PhEYc.jpg,753342,Napoleon,en,Napoleon,An epic that details the checkered rise and fa...,/jE5o7y9K6pZtWNNMEw3IdpHuncR.jpg,movie,"[History, War, Drama]",1092.626,2023-11-22,False,6.456,1014,History
3,False,/uKP0B8HUJ08fas7NF77Xwu0bolJ.jpg,1214314,One More Shot,en,One More Shot,Following the attack on the black site in Pola...,/gdF3Q1Mcr2XvxLPStQSoQIO2cIj.jpg,movie,"[Action, Thriller]",38.525,2024-01-12,False,7.289,19,Action
4,False,/rVJfabCz1ViynQCEz54MRqdZig1.jpg,1155089,Justice League: Crisis on Infinite Earths Part...,en,Justice League: Crisis on Infinite Earths Part...,Death is coming. Worse than death: oblivion. N...,/zR6C66EDklgTPLHRSmmMt5878MR.jpg,movie,"[Animation, Science Fiction, Action]",104.182,2024-01-09,False,7.821,56,Animation
5,False,/4MCKNAc6AbWjEsM2h9Xc29owo4z.jpg,866398,The Beekeeper,en,The Beekeeper,One man’s campaign for vengeance takes on nati...,/A7EByudX0eOzlkQ2FIbogzyazm2.jpg,movie,"[Action, Thriller]",453.101,2024-01-10,False,7.464,28,Action
6,False,/vdpE5pjJVql5aD6pnzRqlFmgxXf.jpg,906126,Society of the Snow,es,La sociedad de la nieve,"On October 13, 1972, Uruguayan Air Force Fligh...",/2e853FDVSIso600RqAMunPxiZjq.jpg,movie,"[Drama, History]",1412.999,2023-12-13,False,8.074,611,Drama
7,False,/rLb2cwF3Pazuxaj0sRXQ037tGI1.jpg,872585,Oppenheimer,en,Oppenheimer,The story of J. Robert Oppenheimer's role in t...,/8Gxv8gSFCU0XGDykEGv7zR1n2ua.jpg,movie,"[Drama, History]",674.156,2023-07-19,False,8.116,6052,Drama
8,False,/plLqCMIYgvh3dqnIm16da7lDtmb.jpg,1072876,Self Reliance,en,Self Reliance,When a man is offered a million dollars to pla...,/4AH3S0xMEYW20KGI6CSPO1W70bo.jpg,movie,[Comedy],16.292,2024-01-03,False,4.3,3,Comedy
9,False,/m3s0jyPGtluJ48kD0fUiPjXrRhr.jpg,673593,Mean Girls,en,Mean Girls,New student Cady Heron is welcomed into the to...,/fbbj3viSUDEGT1fFFMNpHP1iUjw.jpg,movie,[Comedy],202.39,2024-01-10,False,6.9,25,Comedy


In [90]:
%%time

# gathers the total pages of response
match = re.search(r'"total_pages":(\d+)', response_trnd.text)
# if the response gave a correct variable and not an error, it will grab the page number by the first group
# else, it will print "Number not found"
if match:
    page_number = match.group(1)
    print(f"Number: {page_number}")
else:
    print("Number not found")

# converting page_nunber into 'int' variable
page_number = int(page_number)
    
# gathering Trending Movies one page at a time through iteration
# this whole block of code with try to do it and until it fails, it will continue. 
# once it fails, it will print "Out of Pages"

# this block of code takes around 30 seconds to run.
try:
    # for each page starting at page 2 (because we already have page 1), iterate through and gather the response
    # including the url which the page_number is being formatted into, the headers with the environmental keys
    # and then putting it into a response variable which is then being decoded into a json object
    # and then loading into a data_trnd json object that can be searched through
    
    for page in range(2, page_number):
        url = f"https://api.themoviedb.org/3/trending/movie/day?api_key=THE_KEY&page={page}"
        headers = {
            "accept": "application/json",
            "Authorization": "Bearer {}".format(RA_KEY)}
        response_trnd = requests.get(url, headers=headers)
        # decodes response into json object
        json_data_trnd = response_trnd.content.decode()
        # converts json object/string into python object
        data_trnd = json.loads(json_data_trnd)
        # if data_trnd exists, then add it to a temporary dataframe with correct columns, and then 
        # join it (concat it) into the already created dataframe while ignoring the index column.
        if data_trnd:
            trnd_df_temp = pd.DataFrame([*data_trnd['results']], columns=trnd_columns)
            # genre_ids column has values that are lists, 
            # we need to separate the first value in the list to make it the main genre
            trnd_df_temp = corr_values(trnd_df_temp, genre_result)
            trnd_df_temp = genre_values(trnd_df_temp)
            trnd_df = pd.concat([trnd_df, trnd_df_temp], ignore_index=True)
        else:
            break
except:
    print("Out of Pages")
    
    
# gathering current date
trnd_df['curr_date'] = datetime.date.today().strftime("%Y-%m-%d")
# converting to pandas datetime type 
trnd_df['curr_date'] = pd.to_datetime(trnd_df['curr_date'])
# Reorganizing columns
trnd_df = trnd_df[['original_title', 'id', 'original_language', 'main_genre', 
                   'overview', 'release_date', 'vote_average', 'vote_count', 'popularity', 
                   'curr_date', 'media_type', 'video', 'title', 'genre_ids', 
                   'adult', 'poster_path', 'backdrop_path']]

Number: 1000
Out of Pages
CPU times: user 28 s, sys: 1.21 s, total: 29.2 s
Wall time: 2min 4s


In [114]:
# printing final results
trnd_df

Unnamed: 0,original_title,id,original_language,main_genre,overview,release_date,vote_average,vote_count,popularity,curr_date,media_type,video,title,genre_ids,adult,poster_path,backdrop_path
0,Role Play,848187,en,Action,Emma has a wonderful husband and two kids in t...,2023-12-14,6.563,16,26.680,2024-01-12,movie,False,Role Play,"[Action, Comedy, Romance]",False,/7MhXiTmTl16LwXNPbWCmqxj7UxH.jpg,/qgFrFXtiGfWN1zkk4sPd0nwFaBF.jpg
1,Lift,955916,en,Action,"An international heist crew, led by Cyrus Whit...",2024-01-10,6.400,19,147.906,2024-01-12,movie,False,Lift,"[Action, Comedy]",False,/gma8o1jWa6m0K1iJ9TzHIiFyTtI.jpg,/rz8GGX5Id2hCW1KzAIY4xwbQw1w.jpg
2,Napoleon,753342,en,History,An epic that details the checkered rise and fa...,2023-11-22,6.456,1014,1092.626,2024-01-12,movie,False,Napoleon,"[History, War, Drama]",False,/jE5o7y9K6pZtWNNMEw3IdpHuncR.jpg,/f1AQhx6ZfGhPZFTVKgxG91PhEYc.jpg
3,One More Shot,1214314,en,Action,Following the attack on the black site in Pola...,2024-01-12,7.289,19,38.525,2024-01-12,movie,False,One More Shot,"[Action, Thriller]",False,/gdF3Q1Mcr2XvxLPStQSoQIO2cIj.jpg,/uKP0B8HUJ08fas7NF77Xwu0bolJ.jpg
4,Justice League: Crisis on Infinite Earths Part...,1155089,en,Animation,Death is coming. Worse than death: oblivion. N...,2024-01-09,7.821,56,104.182,2024-01-12,movie,False,Justice League: Crisis on Infinite Earths Part...,"[Animation, Science Fiction, Action]",False,/zR6C66EDklgTPLHRSmmMt5878MR.jpg,/rVJfabCz1ViynQCEz54MRqdZig1.jpg
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9990,The Medusa Touch,11623,en,Horror,A French detective in London reconstructs the ...,1978-04-13,6.800,160,8.921,2024-01-12,movie,False,The Medusa Touch,"[Horror, Thriller, Science Fiction, Mystery]",False,/sk523UMvmmiFMsGKxerGiwSQWdZ.jpg,/7aY1ji2fVHVTAW9DjkpFvPMyLi9.jpg
9991,The Dead,39507,en,Drama,"After a convivial holiday dinner party, things...",1987-09-03,6.932,139,9.891,2024-01-12,movie,False,The Dead,[Drama],False,/hFLUqiwvvtcUqA5NZNejaaNpTv0.jpg,/ypySbENCsnlpeG49txjxcITYSRX.jpg
9992,The Roaring Twenties,37698,en,Crime,"After World War I, Armistice Lloyd Hart goes b...",1939-10-28,7.500,211,10.625,2024-01-12,movie,False,The Roaring Twenties,"[Crime, Drama, Thriller]",False,/nkymdElqwJBm2bl3F6hJp4xJggD.jpg,/f4UnGG6Pyf6ZLrYdNj2LjcXmWZI.jpg
9993,Happiest Season,520172,en,Romance,A young woman's plans to propose to her girlfr...,2020-11-26,7.299,1091,30.989,2024-01-12,movie,False,Happiest Season,"[Romance, Comedy]",False,/vzec9kkOSE93tygyfOktedkeOQ.jpg,/jOWgFmD1H5t6nL7ChbmX8oDFUZl.jpg


In [112]:
# making trnd_movies.csv and showing schema
trnd_df.to_csv(trnd_dest)
! ls ../CSV
trnd_df.info()

final_movies.csv movies.csv       trnd_movies.csv  up_movies.csv
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9995 entries, 0 to 9994
Data columns (total 17 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   original_title     9995 non-null   object        
 1   id                 9995 non-null   int64         
 2   original_language  9995 non-null   object        
 3   main_genre         8915 non-null   object        
 4   overview           9995 non-null   object        
 5   release_date       9995 non-null   object        
 6   vote_average       9995 non-null   float64       
 7   vote_count         9995 non-null   int64         
 8   popularity         9995 non-null   float64       
 9   curr_date          9995 non-null   datetime64[ns]
 10  media_type         9995 non-null   object        
 11  video              9995 non-null   bool          
 12  title              9995 non-null   object        
 13

In [113]:
# 'append, replace, fail' arguments.
# index = False makes CSV not include index column
trnd_df.to_sql('trnd_movie_table', engine, if_exists='replace', index=False)

995

### Upcoming Movies

In [21]:
%%time
url = "https://api.themoviedb.org/3/movie/upcoming?language=en-US&page=1"
up_df = df_make(url)

<Response [200]> 


Number: 46
CPU times: user 1.93 s, sys: 121 ms, total: 2.05 s
Wall time: 6.44 s


In [25]:
up_df

Unnamed: 0,adult,backdrop_path,genre_ids,id,original_language,original_title,overview,popularity,poster_path,release_date,title,video,vote_average,vote_count,main_genre,curr_date
0,False,/4MCKNAc6AbWjEsM2h9Xc29owo4z.jpg,"[Action, Thriller]",866398,en,The Beekeeper,One man’s campaign for vengeance takes on nati...,723.738,/A7EByudX0eOzlkQ2FIbogzyazm2.jpg,2024-01-10,The Beekeeper,False,7.474,97,Action,2024-01-17
1,False,/yOm993lsJyPmBodlYjgpPwBjXP9.jpg,"[Comedy, Family, Fantasy]",787699,en,Wonka,Willy Wonka – chock-full of ideas and determin...,673.002,/qhb1qOilapbapxWQn9jtRCMwXJF.jpg,2023-12-06,Wonka,False,7.088,1013,Comedy,2024-01-17
2,False,/t5zCBSB5xMDKcDqe91qahCOUYVV.jpg,"[Horror, Mystery]",507089,en,Five Nights at Freddy's,"Recently fired and desperate for work, a troub...",349.652,/7BpNtNfxuocYEVREzVMO75hso1l.jpg,2023-10-25,Five Nights at Freddy's,False,7.726,3184,Horror,2024-01-17
3,False,/ptz5ETMxDoRRiE69BVuIxJzyTEO.jpg,"[Animation, Action, Adventure, Comedy, Family]",940551,en,Migration,After a migrating duck family alights on their...,342.720,/ldfCF9RhR40mppkzmftxapaHeTo.jpg,2023-12-06,Migration,False,7.400,111,Animation,2024-01-17
4,False,/50stq3Jlny6oEgJjsXbQvbajCNw.jpg,"[Romance, Drama]",1020006,en,Priscilla,When teenage Priscilla Beaulieu meets Elvis Pr...,301.519,/uDCeELWWpsNq7ErM61Yuq70WAE9.jpg,2023-10-27,Priscilla,False,6.897,239,Romance,2024-01-17
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
891,False,/cEYX8clX0uZIE4xXIZ9jycfypoK.jpg,[],1202309,fr,Hiver,"It's winter and like every year, the little Se...",0.696,/8KKUtW1LtlAw8gMoTYqzpgXOAu2.jpg,2023-12-12,Hiver,False,0.000,0,[],2024-01-17
892,False,/5FDzPQP24hNENa0bd4xorPF09qt.jpg,[Drama],1114899,ru,Блажь,"In Russia, an introverted father and his teena...",0.651,/oOixEuylGEwcjG7ZKpjXAIa3ob9.jpg,2023-10-14,Grace,False,6.500,2,Drama,2024-01-17
894,False,/wlFMyzqhZmx4K9KNBRdTCmhimRD.jpg,"[Drama, Animation]",1023636,xx,From the Main Square,"Encircling its central square, a new town emer...",0.613,/7QXWgLuTGNBbapjulOukulgTfuw.jpg,2021-11-01,From the Main Square,False,0.000,0,Drama,2024-01-17
898,False,/dISZ4MIcwKrugqsmC5r53hpXNAw.jpg,[Animation],1077966,sv,Historien om Bodri,My name is Hédi and I'm going to tell you abou...,0.600,/20iUBAbP7XMmENVbD3UR2EOJXcC.jpg,2022-06-06,Historien om Bodri,False,0.000,0,Animation,2024-01-17


In [26]:
up_df.info()
# converting to csv file
up_df.to_csv(up_dest)
# 'append, replace, fail' arguments.
# index = False makes CSV not include index column
up_df.to_sql('up_movie_table', engine, if_exists='replace', index=False)

<class 'pandas.core.frame.DataFrame'>
Index: 453 entries, 0 to 899
Data columns (total 16 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   adult              453 non-null    bool          
 1   backdrop_path      453 non-null    object        
 2   genre_ids          453 non-null    object        
 3   id                 453 non-null    int64         
 4   original_language  453 non-null    object        
 5   original_title     453 non-null    object        
 6   overview           453 non-null    object        
 7   popularity         453 non-null    float64       
 8   poster_path        453 non-null    object        
 9   release_date       453 non-null    object        
 10  title              453 non-null    object        
 11  video              453 non-null    bool          
 12  vote_average       453 non-null    float64       
 13  vote_count         453 non-null    int64         
 14  main_genre     

453

#### Old UP DF Code

In [115]:
# gathering Upcoming Movies

# specified page number
url = "https://api.themoviedb.org/3/movie/upcoming?language=en-US&page=1"

headers = {
    "accept": "application/json",
    "Authorization": "Bearer {}".format(RA_KEY)
}

response_up = requests.get(url, headers=headers)

print(response_up)

<Response [200]>


In [116]:
%%time

# this section is to create the dataframe and gather the first 20 items

# gathering Upcoming Movies

# specified page number
url = "https://api.themoviedb.org/3/movie/upcoming?language=en-US&page=1"

headers = {
    "accept": "application/json",
    "Authorization": "Bearer {}".format(RA_KEY)
}

response_up = requests.get(url, headers=headers)

print(response_up)

# decodes response into json object
json_data_up = response_up.content.decode()
# converts json object/string into python object
data_up = json.loads(json_data_up)

# making a list full of the column names
up_columns = [i for i in data_up['results'][0].keys()]
# unpacking data from json object into the data argument and setting columns, making a dataframe
up_df = pd.DataFrame([*data_up['results']], columns=up_columns)
# genre_ids column has values that are lists, 
# we need to separate the first value in the list to make it the main genre
up_df = corr_values(up_df, genre_result)
up_df = genre_values(up_df)
up_df

<Response [200]>
CPU times: user 47.6 ms, sys: 5.58 ms, total: 53.2 ms
Wall time: 128 ms


Unnamed: 0,adult,backdrop_path,genre_ids,id,original_language,original_title,overview,popularity,poster_path,release_date,title,video,vote_average,vote_count,main_genre
0,False,/yOm993lsJyPmBodlYjgpPwBjXP9.jpg,"[Comedy, Family, Fantasy]",787699,en,Wonka,Willy Wonka – chock-full of ideas and determin...,654.778,/qhb1qOilapbapxWQn9jtRCMwXJF.jpg,2023-12-06,Wonka,False,7.102,938,Comedy
1,False,/50stq3Jlny6oEgJjsXbQvbajCNw.jpg,"[Romance, Drama]",1020006,en,Priscilla,When teenage Priscilla Beaulieu meets Elvis Pr...,611.517,/uDCeELWWpsNq7ErM61Yuq70WAE9.jpg,2023-10-27,Priscilla,False,6.884,194,Romance
2,False,/X8yF6STUk5Zr5nAuLBJiio8Sxh.jpg,"[Drama, Romance]",1143183,tl,Rewind,Mary loves John for as long as she can remembe...,466.285,/ru1i4ZR11lPPVArk3fOcO1VCOlD.jpg,2023-12-25,Rewind,False,6.806,18,Drama
3,False,/4MCKNAc6AbWjEsM2h9Xc29owo4z.jpg,"[Action, Thriller]",866398,en,The Beekeeper,One man’s campaign for vengeance takes on nati...,453.101,/A7EByudX0eOzlkQ2FIbogzyazm2.jpg,2024-01-10,The Beekeeper,False,7.52,25,Action
4,False,/ptz5ETMxDoRRiE69BVuIxJzyTEO.jpg,"[Animation, Action, Adventure, Comedy, Family]",940551,en,Migration,After a migrating duck family alights on their...,374.851,/ldfCF9RhR40mppkzmftxapaHeTo.jpg,2023-12-06,Migration,False,7.406,101,Animation
5,False,/zX9m8h33pHXcES7ttO8v0ThiYj7.jpg,"[Drama, Action, Thriller]",1053592,es,Todos los nombres de Dios,"After being implicated in a terrorist attack, ...",373.468,/n15gfcgwV0LVPSobrayZcFHcwN6.jpg,2023-09-15,All the Names of God,False,7.048,42,Drama
6,False,/vQpvNDc0AFao8BbWyXDFVVrqiZj.jpg,[Horror],853387,en,Lord of Misrule,When the daughter of the town's new priest goe...,326.801,/eCNJuGsCNdf2yf4F3UcDg1WZTbo.jpg,2023-10-26,Lord of Misrule,False,5.719,16,Horror
7,False,/4HodYYKEIsGOdinkGi2Ucz6X9i0.jpg,"[Animation, Action, Adventure, Science Fiction]",569094,en,Spider-Man: Across the Spider-Verse,"After reuniting with Gwen Stacy, Brooklyn’s fu...",326.69,/8Vt6mWEReuy4Of61Lnj5Xj704m8.jpg,2023-05-31,Spider-Man: Across the Spider-Verse,False,8.379,5467,Animation
8,False,/9ZlGiEKmcYrrxmiQEJDhjeT2kEW.jpg,"[Action, Adventure, War]",1061181,ja,キングダム 運命の炎,To defend their kingdom against a sudden invas...,260.254,/60eYZkkksgKeeAV8fAAKQslZZnH.jpg,2023-07-28,Kingdom III: The Flame of Destiny,False,7.61,86,Action
9,False,/pA3vdhadJPxF5GA1uo8OPTiNQDT.jpg,"[Action, Drama]",678512,en,Sound of Freedom,"The story of Tim Ballard, a former US governme...",238.867,/qA5kPYZA7FkVvqcEfJRoOy4kpHg.jpg,2023-07-03,Sound of Freedom,False,8.068,1765,Action


In [117]:
%%time

# gathers the total pages of response
match = re.search(r'"total_pages":(\d+)', response_up.text)

# if the response gave a correct variable and not an error, it will grab the page number by the first group
# else, it will print "Number not found"
if match:
    page_number = match.group(1)
    print(f"Number: {page_number}")
else:
    print("Number not found")

# converting page_nunber into 'int' variable
page_number = int(page_number)
    
# gathering Upcoming Movies one page at a time through iteration
# this whole block of code with try to do it and until it fails, it will continue. 
# once it fails, it will print "Out of Pages"

# this block of code takes around 30 seconds to run.
try:
    # for each page starting at page 2 (because we already have page 1), iterate through and gather the response
    # including the url which the page_number is being formatted into, the headers with the environmental keys
    # and then putting it into a response variable which is then being decoded into a json object
    # and then loading into a data_trnd json object that can be searched through
    
    for page in range(2, page_number):
        # specified page number
        url = f"https://api.themoviedb.org/3/movie/upcoming?language=en-US&page={page}"
        headers = {
            "accept": "application/json",
            "Authorization": "Bearer {}".format(RA_KEY)
        }
        response_up = requests.get(url, headers=headers)
        # decodes response into json object
        json_data_up = response_up.content.decode()
        # converts json object/string into python object
        data_up = json.loads(json_data_up)
        # if data_up exists, then add it to a temporary dataframe with correct columns, and then 
        # join it (concat it) into the already created dataframe while ignoring the index column.
        if data_up:
            up_df_temp = pd.DataFrame([*data_up['results']], columns=up_columns)
            # genre_ids column has values that are lists, 
            # we need to separate the first value in the list to make it the main genre
            up_df_temp = corr_values(up_df_temp, genre_result)
            up_df_temp = genre_values(up_df_temp)
            up_df = pd.concat([up_df, up_df_temp], ignore_index=True)
        else:
            break
except:
    print("Out of Pages")
    
    
# gathering current date
up_df['curr_date'] = datetime.date.today().strftime("%Y-%m-%d")
# converting to pandas datetime type 
up_df['curr_date'] = pd.to_datetime(up_df['curr_date'])
# Cleaning Up

# Dropping unncessary columns
up_df.drop(columns=['video'], inplace=True)

# Changing datatypes
up_df['release_date'] = pd.to_datetime(up_df['release_date'])

# Reorganizing Columns
up_df = up_df[['original_title', 'id', 'overview', 
               'original_language', 'release_date', 'popularity', 'vote_average', 
               'vote_count', 'curr_date', 'genre_ids', 'backdrop_path', 'poster_path']]
# printing out our results
up_df

Number: 53
CPU times: user 2.72 s, sys: 128 ms, total: 2.85 s
Wall time: 10.9 s


Unnamed: 0,adult,backdrop_path,genre_ids,id,original_language,original_title,overview,popularity,poster_path,release_date,title,video,vote_average,vote_count,main_genre,curr_date
0,False,/yOm993lsJyPmBodlYjgpPwBjXP9.jpg,"[Comedy, Family, Fantasy]",787699,en,Wonka,Willy Wonka – chock-full of ideas and determin...,654.778,/qhb1qOilapbapxWQn9jtRCMwXJF.jpg,2023-12-06,Wonka,False,7.102,938,Comedy,2024-01-12
1,False,/50stq3Jlny6oEgJjsXbQvbajCNw.jpg,"[Romance, Drama]",1020006,en,Priscilla,When teenage Priscilla Beaulieu meets Elvis Pr...,611.517,/uDCeELWWpsNq7ErM61Yuq70WAE9.jpg,2023-10-27,Priscilla,False,6.884,194,Romance,2024-01-12
2,False,/X8yF6STUk5Zr5nAuLBJiio8Sxh.jpg,"[Drama, Romance]",1143183,tl,Rewind,Mary loves John for as long as she can remembe...,466.285,/ru1i4ZR11lPPVArk3fOcO1VCOlD.jpg,2023-12-25,Rewind,False,6.806,18,Drama,2024-01-12
3,False,/4MCKNAc6AbWjEsM2h9Xc29owo4z.jpg,"[Action, Thriller]",866398,en,The Beekeeper,One man’s campaign for vengeance takes on nati...,453.101,/A7EByudX0eOzlkQ2FIbogzyazm2.jpg,2024-01-10,The Beekeeper,False,7.520,25,Action,2024-01-12
4,False,/ptz5ETMxDoRRiE69BVuIxJzyTEO.jpg,"[Animation, Action, Adventure, Comedy, Family]",940551,en,Migration,After a migrating duck family alights on their...,374.851,/ldfCF9RhR40mppkzmftxapaHeTo.jpg,2023-12-06,Migration,False,7.406,101,Animation,2024-01-12
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1035,False,/iWpL5TEdJeX0ArGDBXvZEtBImHi.jpg,"[Adventure, Comedy]",1229581,en,The Art of Blending In,A student burdened with the curse of a constan...,0.600,/sXugmYmFazawCwGwIbJzcYSMjdz.jpg,2024-02-01,The Art of Blending In,False,0.000,0,,2024-01-12
1036,False,,[],1225608,en,Bedlam,Bedlam involves an imaginary meeting between t...,0.600,,1991-01-01,Bedlam,False,0.000,0,,2024-01-12
1037,False,,[],1220315,en,Embrace the Moon,The Moon Goddess falls in love with a human fa...,0.600,/o9ZaonCufRWDurMttnLs7wb3sTI.jpg,2024-02-02,Embrace the Moon,False,0.000,0,,2024-01-12
1038,False,,[Documentary],1216585,en,Six Inches of Soil,The inspiring story of British farmers standin...,0.600,,2024-01-19,Six Inches of Soil,False,0.000,0,,2024-01-12


In [None]:
up_df.info()

In [118]:
# converting to csv file
up_df.to_csv(up_dest)
# 'append, replace, fail' arguments.
# index = False makes CSV not include index column
up_df.to_sql('up_movie_table', engine, if_exists='replace', index=False)

40

#### API testing

In [None]:
# we want 'results'
# 'dates' is the day we got this info
# results are our results
# total pages is how many we can iterate through
print("Keys in Object: ", *data.keys(), "\nTotal Pages: ", str(data['total_pages']))

In [None]:
# data of index one, keys and value
print(*data['results'][0].items())

In [None]:
for i in (data['results']):
    for clm, dta in i.items():
        print(clm, dta)

### People List

In [28]:
%%time
url = "https://api.themoviedb.org/3/person/popular?language=en-US&page=1"
p_df = df_make(url)

<Response [200]> 


Number: 160590
Dataframe Done.
CPU times: user 24 s, sys: 1.59 s, total: 25.6 s
Wall time: 1min 45s


In [37]:
p_df.info()
p_df

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 10 columns):
 #   Column                Non-Null Count  Dtype         
---  ------                --------------  -----         
 0   adult                 10000 non-null  bool          
 1   gender                10000 non-null  int64         
 2   id                    10000 non-null  int64         
 3   known_for_department  10000 non-null  object        
 4   name                  10000 non-null  object        
 5   original_name         10000 non-null  object        
 6   popularity            10000 non-null  float64       
 7   profile_path          9906 non-null   object        
 8   known_for             10000 non-null  object        
 9   curr_date             10000 non-null  datetime64[ns]
dtypes: bool(1), datetime64[ns](1), float64(1), int64(2), object(5)
memory usage: 713.0+ KB


Unnamed: 0,adult,gender,id,known_for_department,name,original_name,popularity,profile_path,known_for,curr_date
0,False,2,12799,Acting,Jeremy Piven,Jeremy Piven,288.000,/dHBHZRHEVBAdozGC4SWyIIP5NLh.jpg,"[{'adult': False, 'backdrop_path': '/sd4xN5xi8...",2024-01-17
1,False,2,1951129,Acting,Justin Chien,Justin Chien,243.166,/2Hwf697i22eBUUDKJPiaGfkJXM.jpg,"[{'adult': False, 'backdrop_path': '/9h15FWJFZ...",2024-01-17
2,False,1,54693,Acting,Emma Stone,Emma Stone,211.140,/3UaYw9KF4fEXRMRWhf25aGJpAW2.jpg,"[{'adult': False, 'backdrop_path': '/kmuSGNlF9...",2024-01-17
3,False,2,64295,Acting,Alan Ritchson,Alan Ritchson,210.368,/wdmLUSPEC7dXuqnjTM4NgbjvTKk.jpg,"[{'adult': False, 'backdrop_path': '/wrhLyiY7k...",2024-01-17
4,False,2,73968,Acting,Henry Cavill,Henry Cavill,197.341,/iWdKjMry5Pt7vmxU7bmOQsIUyHa.jpg,"[{'adult': False, 'backdrop_path': '/69EFgWWPF...",2024-01-17
...,...,...,...,...,...,...,...,...,...,...
9995,False,2,5811,Writing,Agenore Incrocci,Agenore Incrocci,16.325,/nVIsX7ZvqMwk3I8iGysm2P5SMy6.jpg,"[{'adult': False, 'backdrop_path': '/eoCSp75lx...",2024-01-17
9996,False,1,4460,Acting,Trine Dyrholm,Trine Dyrholm,16.321,/ntUn5eKFlQxbjPci5A99qJOjulY.jpg,"[{'adult': False, 'backdrop_path': '/dzm6sNpz9...",2024-01-17
9997,False,2,5695,Acting,Sid Haig,Sid Haig,16.321,/jhYDQvigNX3OzeIsX2KsOUPTi1y.jpg,"[{'adult': False, 'backdrop_path': '/fGTQXeEbU...",2024-01-17
9998,False,2,228466,Acting,Goro Inagaki,Goro Inagaki,16.319,/7XDSC53xpV8Pr1L2mncYQJtxymD.jpg,"[{'adult': False, 'backdrop_path': '/wi8pmuxWN...",2024-01-17


In [34]:
# converting to csv file
p_df.to_csv(p_dest)

In [50]:
# 'append, replace, fail' arguments.
# index = False makes CSV not include index column
# making "known_for" column a jsondump because it was giving error since the value data types were 'dict'
p_df.to_sql('people_movie_table', engine, if_exists='replace', index=False, dtype={"known_for": sqlalchemy.types.JSON})

1000