# Movies - Part 3 -- MySQL Database
- Amber Kutscher
- Data Enrichment

In [23]:
# Imports
import pandas as pd
import json
import mysql.connector
from sqlalchemy import create_engine, String, Text, Float, Integer, text
from sqlalchemy_utils import create_database, database_exists
import pymysql
pymysql.install_as_MySQLdb()
from urllib.parse import quote_plus as urlquote

In [24]:
with open (r"\Users\amber\.secret\mysql.json") as f:
    login = json.load(f)
    
# Display the keys of the loaded dict
login.keys()

dict_keys(['username', 'password'])

In [25]:
# Connect to the MySQL server and create the "movies" database
connection = mysql.connector.connect(
    host='localhost',
    user=login['username'],  # Use the username from mysql.json
    password=login['password'],  # Use the password from mysql.json
)

cursor = connection.cursor()
cursor.execute("CREATE DATABASE IF NOT EXISTS movies")
cursor.close()
connection.close()

In [26]:
# Create an engine to connect to the "movies" database in MySQL
engine = create_engine(f'mysql+mysqlconnector://{login["username"]}:{login["password"]}@localhost/movies', echo=True)

In [27]:
# Load cleaned datasets
df_basics = pd.read_csv('Data/title_basics.csv.gz', encoding='utf-8', low_memory=False)
df_ratings = pd.read_csv('Data/title_ratings.csv.gz', encoding='utf-8', low_memory=False)

In [28]:
# Load the JSON files into a Pandas dataframe
df_api_2000 = pd.read_json('Data/tmdb_api_results_2000.json')
df_api_2001 = pd.read_json('Data/tmdb_api_results_2001.json')

In [29]:
# Combine tmdb files into one singular dataframe
tmdb_data = pd.concat([df_api_2000, df_api_2001], ignore_index=True)

## Normalize Genres

In [30]:
# Create a new column with the single-string genres as a list of strings
df_basics['genres_split'] = df_basics['genres'].str.split(',')
df_basics

Unnamed: 0,tconst,titleType,primaryTitle,originalTitle,isAdult,startYear,endYear,runtimeMinutes,genres,genres_split
0,tt0035423,movie,Kate & Leopold,Kate & Leopold,0,2001.0,,118,"Comedy,Fantasy,Romance","[Comedy, Fantasy, Romance]"
1,tt0062336,movie,The Tango of the Widower and Its Distorting Mi...,El tango del viudo y su espejo deformante,0,2020.0,,70,Drama,[Drama]
2,tt0068865,movie,Lives of Performers,Lives of Performers,0,2016.0,,90,Drama,[Drama]
3,tt0069049,movie,The Other Side of the Wind,The Other Side of the Wind,0,2018.0,,122,Drama,[Drama]
4,tt0088751,movie,The Naked Monster,The Naked Monster,0,2005.0,,100,"Comedy,Horror,Sci-Fi","[Comedy, Horror, Sci-Fi]"
...,...,...,...,...,...,...,...,...,...,...
114443,tt9915436,movie,Vida em Movimento,Vida em Movimento,0,2019.0,,70,Documentary,[Documentary]
114444,tt9915872,movie,The Last White Witch,Boku no kanojo wa mahoutsukai,0,2019.0,,97,"Comedy,Drama,Fantasy","[Comedy, Drama, Fantasy]"
114445,tt9916170,movie,The Rehearsal,O Ensaio,0,2019.0,,51,Drama,[Drama]
114446,tt9916190,movie,Safeguard,Safeguard,0,2020.0,,95,"Action,Adventure,Thriller","[Action, Adventure, Thriller]"


In [31]:
# Use .explode() to separate the list of genres into new rows
exploded_genres = df_basics.explode('genres_split')
exploded_genres

Unnamed: 0,tconst,titleType,primaryTitle,originalTitle,isAdult,startYear,endYear,runtimeMinutes,genres,genres_split
0,tt0035423,movie,Kate & Leopold,Kate & Leopold,0,2001.0,,118,"Comedy,Fantasy,Romance",Comedy
0,tt0035423,movie,Kate & Leopold,Kate & Leopold,0,2001.0,,118,"Comedy,Fantasy,Romance",Fantasy
0,tt0035423,movie,Kate & Leopold,Kate & Leopold,0,2001.0,,118,"Comedy,Fantasy,Romance",Romance
1,tt0062336,movie,The Tango of the Widower and Its Distorting Mi...,El tango del viudo y su espejo deformante,0,2020.0,,70,Drama,Drama
2,tt0068865,movie,Lives of Performers,Lives of Performers,0,2016.0,,90,Drama,Drama
...,...,...,...,...,...,...,...,...,...,...
114446,tt9916190,movie,Safeguard,Safeguard,0,2020.0,,95,"Action,Adventure,Thriller",Action
114446,tt9916190,movie,Safeguard,Safeguard,0,2020.0,,95,"Action,Adventure,Thriller",Adventure
114446,tt9916190,movie,Safeguard,Safeguard,0,2020.0,,95,"Action,Adventure,Thriller",Thriller
114447,tt9916362,movie,Coven,Akelarre,0,2020.0,,92,"Drama,History",Drama


In [32]:
# Use .unique() to get the unique genres from the genres_split column
unique_genres = sorted(exploded_genres['genres_split'].unique())
unique_genres

['Action',
 'Adult',
 'Adventure',
 'Animation',
 'Biography',
 'Comedy',
 'Crime',
 'Documentary',
 'Drama',
 'Family',
 'Fantasy',
 'Game-Show',
 'History',
 'Horror',
 'Music',
 'Musical',
 'Mystery',
 'News',
 'Reality-TV',
 'Romance',
 'Sci-Fi',
 'Sport',
 'Talk-Show',
 'Thriller',
 'War',
 'Western']

In [33]:
# Save just tconst and genres_split as a new dataframe
title_genres = exploded_genres[['tconst', 'genres_split']].copy()
title_genres.head()

Unnamed: 0,tconst,genres_split
0,tt0035423,Comedy
0,tt0035423,Fantasy
0,tt0035423,Romance
1,tt0062336,Drama
2,tt0068865,Drama


In [34]:
# Make a dictionary with list of unique genres as the key and the new integer id as values
genre_ints = range(len(unique_genres))
genre_map = dict(zip(unique_genres, genre_ints))
genre_map

{'Action': 0,
 'Adult': 1,
 'Adventure': 2,
 'Animation': 3,
 'Biography': 4,
 'Comedy': 5,
 'Crime': 6,
 'Documentary': 7,
 'Drama': 8,
 'Family': 9,
 'Fantasy': 10,
 'Game-Show': 11,
 'History': 12,
 'Horror': 13,
 'Music': 14,
 'Musical': 15,
 'Mystery': 16,
 'News': 17,
 'Reality-TV': 18,
 'Romance': 19,
 'Sci-Fi': 20,
 'Sport': 21,
 'Talk-Show': 22,
 'Thriller': 23,
 'War': 24,
 'Western': 25}

In [35]:
# Replace the string genres in title_genres with the new integer ids
title_genres['genre_id'] = title_genres['genres_split'].map(genre_map)
title_genres = title_genres.drop(columns='genres_split')

In [36]:
# Verifying that the above code was applied correctly
title_genres.head()

Unnamed: 0,tconst,genre_id
0,tt0035423,5
0,tt0035423,10
0,tt0035423,19
1,tt0062336,8
2,tt0068865,8


In [37]:
# Create a genres dataframe
genres = pd.DataFrame({'Genre_Name': genre_map.keys(), 
                       'Genre_ID': genre_map.values()})
genres.head()

Unnamed: 0,Genre_Name,Genre_ID
0,Action,0
1,Adult,1
2,Adventure,2
3,Animation,3
4,Biography,4


## Load Dataframes Into MySQL

In [38]:
# Check the dtypes of the df_basics dataframe
df_basics.dtypes

tconst             object
titleType          object
primaryTitle       object
originalTitle      object
isAdult             int64
startYear         float64
endYear           float64
runtimeMinutes      int64
genres             object
genres_split       object
dtype: object

In [39]:
# Get the max string length for each column
max_str_len = {}

for column in df_basics.columns:
    max_len = df_basics[column].fillna('').astype(str).apply(len).max()
    max_str_len[column] = max_len

print(max_str_len)

{'tconst': 10, 'titleType': 5, 'primaryTitle': 242, 'originalTitle': 242, 'isAdult': 1, 'startYear': 6, 'endYear': 0, 'runtimeMinutes': 5, 'genres': 32, 'genres_split': 42}


In [40]:
# Calculate max string lengths for object columns
key_len = df_basics['tconst'].fillna('').map(len).max()
title_len = df_basics['primaryTitle'].fillna('').map(len).max()

In [41]:
from sqlalchemy.types import *

In [42]:
# Create a schema dictionary using SQLAlchemy datatype objects
df_schema = {
    "tconst": String(key_len+1),
    "primaryTitle": Text(title_len+1),
    'startYear': Float(),
    'endYear': Float(),
    'runtimeMinutes': Integer()}

In [43]:
# Drop unnecessary columns from df_basics
df_basics.drop(columns=['originalTitle', 'isAdult', 'titleType', 'genres', 'genres_split'], inplace=True)

In [44]:
# Save to sql with dtype and index=False
df_basics.to_sql('title_basics', engine, if_exists='replace', index=False, dtype=df_schema)

2023-10-03 15:08:52,084 INFO sqlalchemy.engine.Engine SELECT DATABASE()
2023-10-03 15:08:52,084 INFO sqlalchemy.engine.Engine [raw sql] {}
2023-10-03 15:08:52,086 INFO sqlalchemy.engine.Engine SELECT @@sql_mode
2023-10-03 15:08:52,087 INFO sqlalchemy.engine.Engine [raw sql] {}
2023-10-03 15:08:52,089 INFO sqlalchemy.engine.Engine SELECT @@lower_case_table_names
2023-10-03 15:08:52,090 INFO sqlalchemy.engine.Engine [raw sql] {}
2023-10-03 15:08:52,094 INFO sqlalchemy.engine.Engine SELECT COUNT(*) FROM information_schema.tables WHERE table_schema = %(table_schema)s AND table_name = %(table_name)s
2023-10-03 15:08:52,095 INFO sqlalchemy.engine.Engine [generated in 0.00115s] {'table_schema': 'movies', 'table_name': 'title_basics'}
2023-10-03 15:08:52,104 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-10-03 15:08:52,106 INFO sqlalchemy.engine.Engine 
CREATE TABLE title_basics (
	tconst VARCHAR(11), 
	`primaryTitle` TEXT(243), 
	`startYear` FLOAT, 
	`endYear` FLOAT, 
	`runtimeMinutes` I

114448

In [49]:
# Create title_ratings table with tconst as primary key
create_table_sql_ratings = """
CREATE TABLE title_ratings (
    tconst VARCHAR(255) PRIMARY KEY,
    averageRating FLOAT,
    numVotes INTEGER
);
"""

# Execute the SQL statement to create the "title_ratings" table
engine.execute(create_table_sql_ratings)

2023-10-03 15:14:38,286 INFO sqlalchemy.engine.Engine 
CREATE TABLE title_ratings (
    tconst VARCHAR(255) PRIMARY KEY,
    averageRating FLOAT,
    numVotes INTEGER
);

2023-10-03 15:14:38,287 INFO sqlalchemy.engine.Engine [raw sql] {}
2023-10-03 15:14:38,303 INFO sqlalchemy.engine.Engine COMMIT


<sqlalchemy.engine.cursor.LegacyCursorResult at 0x1a1673d1840>

In [50]:
# Modify the title_ratings dataframe
title_ratings = df_ratings[['tconst', 'averageRating', 'numVotes']].copy()

In [52]:
# Add title_ratings dataframe to MySQL
title_ratings.to_sql('title_ratings', engine, if_exists='replace', index=False)

2023-10-03 15:15:09,935 INFO sqlalchemy.engine.Engine SELECT COUNT(*) FROM information_schema.tables WHERE table_schema = %(table_schema)s AND table_name = %(table_name)s
2023-10-03 15:15:09,936 INFO sqlalchemy.engine.Engine [cached since 377.8s ago] {'table_schema': 'movies', 'table_name': 'title_ratings'}
2023-10-03 15:15:09,940 INFO sqlalchemy.engine.Engine SELECT COUNT(*) FROM information_schema.tables WHERE table_schema = %(table_schema)s AND table_name = %(table_name)s
2023-10-03 15:15:09,941 INFO sqlalchemy.engine.Engine [cached since 377.8s ago] {'table_schema': 'movies', 'table_name': 'title_ratings'}
2023-10-03 15:15:09,945 INFO sqlalchemy.engine.Engine SHOW FULL TABLES FROM `movies`
2023-10-03 15:15:09,945 INFO sqlalchemy.engine.Engine [raw sql] {}
2023-10-03 15:15:09,966 INFO sqlalchemy.engine.Engine SHOW CREATE TABLE `title_ratings`
2023-10-03 15:15:09,969 INFO sqlalchemy.engine.Engine [raw sql] {}
2023-10-03 15:15:09,976 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023

87571

In [53]:
# Add title_genres dataframe to MySQL
title_genres.to_sql('title_genres', engine, if_exists='replace', index=False)

2023-10-03 15:15:17,983 INFO sqlalchemy.engine.Engine SELECT COUNT(*) FROM information_schema.tables WHERE table_schema = %(table_schema)s AND table_name = %(table_name)s
2023-10-03 15:15:17,984 INFO sqlalchemy.engine.Engine [cached since 385.9s ago] {'table_schema': 'movies', 'table_name': 'title_genres'}
2023-10-03 15:15:17,988 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-10-03 15:15:17,990 INFO sqlalchemy.engine.Engine 
CREATE TABLE title_genres (
	tconst TEXT, 
	genre_id BIGINT
)


2023-10-03 15:15:17,992 INFO sqlalchemy.engine.Engine [no key 0.00177s] {}
2023-10-03 15:15:18,004 INFO sqlalchemy.engine.Engine COMMIT
2023-10-03 15:15:18,018 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-10-03 15:15:19,012 INFO sqlalchemy.engine.Engine INSERT INTO title_genres (tconst, genre_id) VALUES (%(tconst)s, %(genre_id)s)
2023-10-03 15:15:19,014 INFO sqlalchemy.engine.Engine [generated in 0.88295s] ({'tconst': 'tt0035423', 'genre_id': 5}, {'tconst': 'tt0035423', 'genre_id': 10}, {'t

204754

In [54]:
# Add genres dataframe to MySQL
genres.to_sql('genres', engine, if_exists='replace', index=False)

2023-10-03 15:15:25,918 INFO sqlalchemy.engine.Engine SELECT COUNT(*) FROM information_schema.tables WHERE table_schema = %(table_schema)s AND table_name = %(table_name)s
2023-10-03 15:15:25,920 INFO sqlalchemy.engine.Engine [cached since 393.8s ago] {'table_schema': 'movies', 'table_name': 'genres'}
2023-10-03 15:15:25,922 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-10-03 15:15:25,924 INFO sqlalchemy.engine.Engine 
CREATE TABLE genres (
	`Genre_Name` TEXT, 
	`Genre_ID` BIGINT
)


2023-10-03 15:15:25,925 INFO sqlalchemy.engine.Engine [no key 0.00165s] {}
2023-10-03 15:15:25,937 INFO sqlalchemy.engine.Engine COMMIT
2023-10-03 15:15:25,939 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-10-03 15:15:25,941 INFO sqlalchemy.engine.Engine INSERT INTO genres (`Genre_Name`, `Genre_ID`) VALUES (%(Genre_Name)s, %(Genre_ID)s)
2023-10-03 15:15:25,942 INFO sqlalchemy.engine.Engine [generated in 0.00148s] ({'Genre_Name': 'Action', 'Genre_ID': 0}, {'Genre_Name': 'Adult', 'Genre_ID': 1}, {

26

In [None]:
# Modify the tmdb_data dataframe
tmdb_data = tmdb_data[['id', 'revenue', 'budget', 'certification', 'title']].copy()

In [57]:
# Create schema for tmdb_data table
tmdb_data_schema = {
    "id": Integer(),
    "revenue": Integer(),
    "budget": Integer(),
    "certification": String(length=255),
    "title": Text()}

In [58]:
# Create tmdb_data table
tmdb_data.to_sql('tmdb_data', engine, if_exists='replace', index=False, dtype=tmdb_data_schema)

2023-10-03 15:17:41,846 INFO sqlalchemy.engine.Engine SELECT COUNT(*) FROM information_schema.tables WHERE table_schema = %(table_schema)s AND table_name = %(table_name)s
2023-10-03 15:17:41,847 INFO sqlalchemy.engine.Engine [cached since 529.8s ago] {'table_schema': 'movies', 'table_name': 'tmdb_data'}
2023-10-03 15:17:41,853 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-10-03 15:17:41,856 INFO sqlalchemy.engine.Engine 
CREATE TABLE tmdb_data (
	imdb_id TEXT, 
	adult FLOAT(53), 
	backdrop_path TEXT, 
	belongs_to_collection TEXT, 
	budget INTEGER, 
	genres TEXT, 
	homepage TEXT, 
	id INTEGER, 
	original_language TEXT, 
	original_title TEXT, 
	overview TEXT, 
	popularity FLOAT(53), 
	poster_path TEXT, 
	production_companies TEXT, 
	production_countries TEXT, 
	release_date TEXT, 
	revenue INTEGER, 
	runtime FLOAT(53), 
	spoken_languages TEXT, 
	status TEXT, 
	tagline TEXT, 
	title TEXT, 
	video FLOAT(53), 
	vote_average FLOAT(53), 
	vote_count FLOAT(53), 
	certification VARCHAR(25

InterfaceError: (mysql.connector.errors.InterfaceError) Failed executing the operation; Python type list cannot be converted
[SQL: INSERT INTO tmdb_data (imdb_id, adult, backdrop_path, belongs_to_collection, budget, genres, homepage, id, original_language, original_title, overview, popularity, poster_path, production_companies, production_countries, release_date, revenue, runtime, spoken_languages, status, tagline, title, video, vote_average, vote_count, certification) VALUES (%(imdb_id)s, %(adult)s, %(backdrop_path)s, %(belongs_to_collection)s, %(budget)s, %(genres)s, %(homepage)s, %(id)s, %(original_language)s, %(original_title)s, %(overview)s, %(popularity)s, %(poster_path)s, %(production_companies)s, %(production_countries)s, %(release_date)s, %(revenue)s, %(runtime)s, %(spoken_languages)s, %(status)s, %(tagline)s, %(title)s, %(video)s, %(vote_average)s, %(vote_count)s, %(certification)s)]
[parameters: ({'imdb_id': 0, 'adult': None, 'backdrop_path': None, 'belongs_to_collection': None, 'budget': None, 'genres': None, 'homepage': None, 'id': None, 'original_language': None, 'original_title': None, 'overview': None, 'popularity': None, 'poster_path': None, 'production_companies': None, 'production_countries': None, 'release_date': None, 'revenue': None, 'runtime': None, 'spoken_languages': None, 'status': None, 'tagline': None, 'title': None, 'video': None, 'vote_average': None, 'vote_count': None, 'certification': None}, {'imdb_id': 'tt0113026', 'adult': 0.0, 'backdrop_path': '/vMFs7nw6P0bIV1jDsQpxAieAVnH.jpg', 'belongs_to_collection': None, 'budget': 10000000.0, 'genres': [{'id': 35, 'name': 'Comedy'}, {'id': 10402, 'name': 'Music'}, {'id': 10749, 'name': 'Romance'}], 'homepage': '', 'id': 62127.0, 'original_language': 'en', 'original_title': 'The Fantasticks', 'overview': 'Two rural teens sing and dance their way through a forbidden romance and a dangerous travelling carnival.', 'popularity': 3.1390000000000002, 'poster_path': '/hfO64mXz3DgUxkBVU7no2UWRP7x.jpg', 'production_companies': [{'id': 51207, 'logo_path': None, 'name': 'Sullivan Street Productions', 'origin_country': ''}, {'id': 51208, 'logo_path': None, 'name': 'Michael Ritc ... (88 characters truncated) ... n Company, The', 'origin_country': ''}, {'id': 60, 'logo_path': '/1SEj4nyG3JPBSKBbFhtdcHRaIF9.png', 'name': 'United Artists', 'origin_country': 'US'}], 'production_countries': [{'iso_3166_1': 'US', 'name': 'United States of America'}], 'release_date': '2000-09-22', 'revenue': 0.0, 'runtime': 86.0, 'spoken_languages': [{'english_name': 'English', 'iso_639_1': 'en', 'name': 'English'}], 'status': 'Released', 'tagline': 'Try to remember the first time magic happened', 'title': 'The Fantasticks', 'video': 0.0, 'vote_average': 5.5, 'vote_count': 22.0, 'certification': ''}, {'imdb_id': 'tt0113092', 'adult': 0.0, 'backdrop_path': None, 'belongs_to_collection': None, 'budget': 0.0, 'genres': [{'id': 878, 'name': 'Science Fiction'}], 'homepage': '', 'id': 110977.0, 'original_language': 'en', 'original_title': 'For the Cause', 'overview': 'Earth is in a state of constant war and two colonies of humans have been sent to a far away world. The colonists create a new civilization but have i ... (22 characters truncated) ... its of their forebears and war breaks out leaving the new cities in ruins. It falls to a bunch of teens to bring peace to the colony - but will they?', 'popularity': 1.6800000000000002, 'poster_path': '/h9bWO13nWRGZJo4XVPiElXyrRMU.jpg', 'production_companies': [{'id': 7405, 'logo_path': '/rfnws0uY8rsNAsrLbx3gEIcYXx3.png', 'name': 'Dimension Films', 'origin_country': 'US'}, {'id': 12816, 'logo_path': None, 'n ... (103 characters truncated) ... tien Holdings A.V.V.', 'origin_country': ''}, {'id': 925, 'logo_path': '/dIb9hjXNOkgxu4kBWdIdK8nM4w.png', 'name': 'Nu Image', 'origin_country': 'US'}], 'production_countries': [{'iso_3166_1': 'US', 'name': 'United States of America'}], 'release_date': '2000-11-15', 'revenue': 0.0, 'runtime': 100.0, 'spoken_languages': [{'english_name': 'English', 'iso_639_1': 'en', 'name': 'English'}], 'status': 'Released', 'tagline': 'The ultimate showdown on a forbidden planet.', 'title': 'For the Cause', 'video': 0.0, 'vote_average': 5.45, 'vote_count': 10.0, 'certification': ''}, {'imdb_id': 'tt0116391', 'adult': 0.0, 'backdrop_path': None, 'belongs_to_collection': None, 'budget': 0.0, 'genres': [{'id': 18, 'name': 'Drama'}, {'id': 28, 'name': 'Action'}, {'id': 80, 'name': 'Crime'}], 'homepage': '', 'id': 442869.0, 'original_language': 'hi', 'original_title': 'Gang', 'overview': 'After falling prey to underworld, four friends jeopardise their personal lives with point of no return.', 'popularity': 2.107, 'poster_path': '/yB5wRu4uyXXwZA3PEj8cITu0xt3.jpg', 'production_companies': [], 'production_countries': [{'iso_3166_1': 'IN', 'name': 'India'}], 'release_date': '2000-04-14', 'revenue': 0.0, 'runtime': 152.0, 'spoken_languages': [{'english_name': 'Hindi', 'iso_639_1': 'hi', 'name': 'हिन्दी'}], 'status': 'Released', 'tagline': '', 'title': 'Gang', 'video': 0.0, 'vote_average': 4.0, 'vote_count': 1.0, 'certification': None}, {'imdb_id': 'tt0118694', 'adult': 0.0, 'backdrop_path': '/n4GJFGzsc7NinI1VeGDXIcQjtU2.jpg', 'belongs_to_collection': None, 'budget': 150000.0, 'genres': [{'id': 18, 'name': 'Drama'}, {'id': 10749, 'name': 'Romance'}], 'homepage': '', 'id': 843.0, 'original_language': 'cn', 'original_title': '花樣年華', 'overview': 'Two neighbors become intimate after discovering that their spouses are having an affair with one another.', 'popularity': 30.687, 'poster_path': '/iYypPT4bhqXfq1b6EnmxvRt6b2Y.jpg', 'production_companies': [{'id': 539, 'logo_path': '/iPLtePguIzOPNtAWfTxWLXb2O11.png', 'name': 'Block 2 Pictures', 'origin_country': 'HK'}, {'id': 148854, 'logo_path': None, ' ... (143 characters truncated) ... go_path': None, 'name': 'Paradis Films', 'origin_country': 'FR'}, {'id': 1615, 'logo_path': None, 'name': 'Fortissimo Films', 'origin_country': 'NL'}], 'production_countries': [{'iso_3166_1': 'FR', 'name': 'France'}, {'iso_3166_1': 'HK', 'name': 'Hong Kong'}, {'iso_3166_1': 'NL', 'name': 'Netherlands'}], 'release_date': '2000-09-29', 'revenue': 14204632.0, 'runtime': 99.0, 'spoken_languages': [{'english_name': 'Cantonese', 'iso_639_1': 'cn', 'name': '广州话 / 廣州話'}, {'english_name': 'French', 'iso_639_1': 'fr', 'name': 'Français'}, {'english_name': 'Spanish', 'iso_639_1': 'es', 'name': 'Español'}], 'status': 'Released', 'tagline': 'Feel the heat, keep the feeling burning, let the sensation explode.', 'title': 'In the Mood for Love', 'video': 0.0, 'vote_average': 8.109, 'vote_count': 2331.0, 'certification': 'PG'}, {'imdb_id': 'tt0118852', 'adult': 0.0, 'backdrop_path': '/vceiGZ3uavAEHlTA7v0GjQsGVKe.jpg', 'belongs_to_collection': None, 'budget': 0.0, 'genres': [{'id': 18, 'name': 'Drama'}], 'homepage': '', 'id': 49511.0, 'original_language': 'en', 'original_title': 'Chinese Coffee', 'overview': 'When Harry Levine, an aging, unsuccessful Greenwich Village writer is fired from his job as restaurant doorman, he calls on friend and mentor Jake, ostensibly to collect a long-standing debt.', 'popularity': 3.933, 'poster_path': '/nZGWnSuf1FIuzyEuMRZHHZWViAp.jpg', 'production_companies': [{'id': 67930, 'logo_path': None, 'name': 'Chal Productions', 'origin_country': ''}, {'id': 1596, 'logo_path': None, 'name': 'Shooting Gallery', 'origin_country': ''}], 'production_countries': [{'iso_3166_1': 'US', 'name': 'United States of America'}], 'release_date': '2000-09-02', 'revenue': 0.0, 'runtime': 99.0, 'spoken_languages': [{'english_name': 'English', 'iso_639_1': 'en', 'name': 'English'}], 'status': 'Released', 'tagline': "There's a fine line between friendship and betrayal.", 'title': 'Chinese Coffee', 'video': 0.0, 'vote_average': 6.618, 'vote_count': 55.0, 'certification': 'R'}, {'imdb_id': 'tt0119273', 'adult': 0.0, 'backdrop_path': '/f5C03doOWiauu37bToKXtpgP5bS.jpg', 'belongs_to_collection': {'id': 141086, 'name': 'Heavy Metal Collection', 'poster_path': '/tgPpYcsjSo1DK0wublqYItYDwSW.jpg', 'backdrop_path': '/iao9hIahX41T1Lxpa5h62J28rQF.jpg'}, 'budget': 15000000.0, 'genres': [{'id': 28, 'name': 'Action'}, {'id': 12, 'name': 'Adventure'}, {'id': 16, 'name': 'Animation'}, {'id': 14, 'name': 'Fantasy'}, {'id': 878, 'name': 'Science Fiction'}], 'homepage': '', 'id': 16225.0, 'original_language': 'en', 'original_title': 'Heavy Metal 2000', 'overview': "Upon discovery of a shard of what could be the Loc-Nar, a miner named Tyler becomes possessed with an insatiable hunger for power and a thirst for im ... (86 characters truncated) ... nd kidnaps a beautiful young woman. His only mistake is that he doesn't kill her sister, Julie, who then sets out on a mission of rescue and revenge.", 'popularity': 12.393, 'poster_path': '/mzOgKYOXiuwE5DIfagFs34bOb3J.jpg', 'production_companies': [{'id': 2216, 'logo_path': None, 'name': 'Helkon Media', 'origin_country': 'DE'}, {'id': 91006, 'logo_path': None, 'name': 'Das Werk Productions', 'or ... (91 characters truncated) ... umbia Pictures', 'origin_country': 'US'}, {'id': 4172, 'logo_path': '/vb2gE5Ao9wIfj3XlY8CwwwbnAlh.png', 'name': 'CinéGroupe', 'origin_country': 'CA'}], 'production_countries': [{'iso_3166_1': 'CA', 'name': 'Canada'}, {'iso_3166_1': 'DE', 'name': 'Germany'}], 'release_date': '2000-04-19', 'revenue': 0.0, 'runtime': 88.0, 'spoken_languages': [{'english_name': 'English', 'iso_639_1': 'en', 'name': 'English'}], 'status': 'Released', 'tagline': 'not to survive the fight in the external world without inner strength', 'title': 'Heavy Metal 2000', 'video': 0.0, 'vote_average': 6.076, 'vote_count': 191.0, 'certification': 'R'}, {'imdb_id': 'tt0119495', 'adult': 0.0, 'backdrop_path': None, 'belongs_to_collection': None, 'budget': 0.0, 'genres': [{'id': 35, 'name': 'Comedy'}, {'id': 10749, 'name': 'Romance'}], 'homepage': '', 'id': 51181.0, 'original_language': 'en', 'original_title': 'Love 101', 'overview': 'A group of raucous college buds living the Dorm life, decide to have a blowout while most of their friends are away for the Thanksgiving holiday. With LOVE 101 being the toughest course this semester, will they pass the final exam?', 'popularity': 0.6000000000000001, 'poster_path': '/42dUuv3lVaJe7wubwJhorWul3HI.jpg', 'production_companies': [], 'production_countries': [], 'release_date': '2000-08-24', 'revenue': 0.0, 'runtime': 86.0, 'spoken_languages': [{'english_name': 'English', 'iso_639_1': 'en', 'name': 'English'}], 'status': 'Released', 'tagline': '', 'title': 'Love 101', 'video': 0.0, 'vote_average': 0.0, 'vote_count': 0.0, 'certification': 'R'}  ... displaying 10 of 2985 total bound parameter sets ...  {'imdb_id': 'tt9668554', 'adult': 0.0, 'backdrop_path': None, 'belongs_to_collection': None, 'budget': 0.0, 'genres': [{'id': 99, 'name': 'Documentary'}], 'homepage': '', 'id': 274300.0, 'original_language': 'en', 'original_title': 'Armageddon: Bible Prophecies and the Predictions of Nostradamus', 'overview': 'Documentary, Historical Documentaries, Faith & Spirituality Documentaries, Faith & Spirituality, Spiritual Mysteries - Are the predictions of Nostrad ... (264 characters truncated) ...  and warnings by 12th century Irish priest Saint Malachy and American psychic Edgar Cayce, and chronicles events accurately predicted by the two men.', 'popularity': 0.6000000000000001, 'poster_path': '/9FjjaKPmC4ACsHquYjS8VR0J9wv.jpg', 'production_companies': [{'id': 72120, 'logo_path': None, 'name': 'Questar Video', 'origin_country': ''}], 'production_countries': [{'iso_3166_1': 'US', 'name': 'United States of America'}], 'release_date': '2003-01-01', 'revenue': 0.0, 'runtime': 0.0, 'spoken_languages': [{'english_name': 'English', 'iso_639_1': 'en', 'name': 'English'}], 'status': 'Released', 'tagline': '', 'title': 'Armageddon: Bible Prophecies and the Predictions of Nostradamus', 'video': 0.0, 'vote_average': 0.0, 'vote_count': 0.0, 'certification': ''}, {'imdb_id': 'tt9789230', 'adult': 0.0, 'backdrop_path': None, 'belongs_to_collection': None, 'budget': 0.0, 'genres': [], 'homepage': '', 'id': 986785.0, 'original_language': 'en', 'original_title': 'Girls Who Like Girls', 'overview': 'Presents a series of clips, love scenes taken from softcore films made in the 60s and 70s, and articulates the history and evolution of lesbian portrayals as they contrast to gay and straight cultures of the times.', 'popularity': 0.6000000000000001, 'poster_path': '/ApOAj3YwprhNBuDrxp7e7gUKIcj.jpg', 'production_companies': [], 'production_countries': [], 'release_date': '2001-06-10', 'revenue': 0.0, 'runtime': 89.0, 'spoken_languages': [{'english_name': 'English', 'iso_639_1': 'en', 'name': 'English'}], 'status': 'Released', 'tagline': '', 'title': 'Girls Who Like Girls', 'video': 0.0, 'vote_average': 0.0, 'vote_count': 0.0, 'certification': ''})]
(Background on this error at: https://sqlalche.me/e/14/rvf5)

## Queries

In [60]:
# Show the first 5 rows of title_basics
query_show_title_basics = """
SELECT *
FROM title_basics
LIMIT 5;"""

result_title_basics = engine.execute(query_show_title_basics)
for row in result_title_basics:
    print(row)

2023-10-03 15:18:11,237 INFO sqlalchemy.engine.Engine 
SELECT *
FROM title_basics
LIMIT 5;
2023-10-03 15:18:11,238 INFO sqlalchemy.engine.Engine [raw sql] {}
('tt0035423', 'Kate & Leopold', 2001.0, None, 118)
('tt0062336', 'The Tango of the Widower and Its Distorting Mirror', 2020.0, None, 70)
('tt0068865', 'Lives of Performers', 2016.0, None, 90)
('tt0069049', 'The Other Side of the Wind', 2018.0, None, 122)
('tt0088751', 'The Naked Monster', 2005.0, None, 100)


In [61]:
# Show the first 5 rows of title_ratings
query_show_title_ratings = """
SELECT *
FROM title_ratings
LIMIT 5;
"""

result_title_ratings = engine.execute(query_show_title_ratings)
for row in result_title_ratings:
    print(row)

2023-10-03 15:18:30,032 INFO sqlalchemy.engine.Engine 
SELECT *
FROM title_ratings
LIMIT 5;

2023-10-03 15:18:30,033 INFO sqlalchemy.engine.Engine [raw sql] {}
('tt0035423', 6.4, 87623)
('tt0062336', 6.4, 180)
('tt0068865', 5.4, 74)
('tt0069049', 6.7, 7834)
('tt0088751', 5.3, 341)


In [62]:
# Show the first 5 rows of title_genres
query_show_title_genres = """
SELECT *
FROM title_genres
LIMIT 5;
"""

result_title_genres = engine.execute(query_show_title_genres)
for row in result_title_genres:
    print(row)

2023-10-03 15:18:30,767 INFO sqlalchemy.engine.Engine 
SELECT *
FROM title_genres
LIMIT 5;

2023-10-03 15:18:30,769 INFO sqlalchemy.engine.Engine [raw sql] {}
('tt0035423', 5)
('tt0035423', 10)
('tt0035423', 19)
('tt0062336', 8)
('tt0068865', 8)


In [63]:
# Show first 5 rows of genres table
query_show_genres = """
SELECT *
FROM genres
LIMIT 5;
"""

result_genres = engine.execute(query_show_genres)
for row in result_genres:
    print(row)

2023-10-03 15:18:31,390 INFO sqlalchemy.engine.Engine 
SELECT *
FROM genres
LIMIT 5;

2023-10-03 15:18:31,391 INFO sqlalchemy.engine.Engine [raw sql] {}
('Action', 0)
('Adult', 1)
('Adventure', 2)
('Animation', 3)
('Biography', 4)


In [64]:
# Show first 5 rows of tmdb_data table
query_show_tmdb_data = """
SELECT *
FROM tmdb_data
LIMIT 5;
"""

result_tmdb_data = engine.execute(query_show_tmdb_data)
for row in result_tmdb_data:
    print(row)

2023-10-03 15:18:31,896 INFO sqlalchemy.engine.Engine 
SELECT *
FROM tmdb_data
LIMIT 5;

2023-10-03 15:18:31,897 INFO sqlalchemy.engine.Engine [raw sql] {}


In [70]:
# Execute the SQL query to list all tables
result = engine.execute("SHOW TABLES;")

# Fetch and print the table names
table_names = [row[0] for row in result.fetchall()]
print(table_names)

# Close the connection
connection.close()

2023-10-03 15:21:18,846 INFO sqlalchemy.engine.Engine SHOW TABLES;
2023-10-03 15:21:18,846 INFO sqlalchemy.engine.Engine [raw sql] {}
['genres', 'title_basics', 'title_genres', 'title_ratings', 'tmdb_data']
