In [1]:
from sqlalchemy import create_engine
import pandas as pd
from dotenv import load_dotenv
import os

load_dotenv()  # take environment variables from .env.
database_url = os.getenv("DATABASE_URL")
engine = create_engine(database_url) 

In [2]:

query = "SELECT * FROM sebastien.title_basics LIMIT 10000;"
with engine.connect() as conn, conn.begin():
    df = pd.read_sql_query(query, engine)

df.head()

Unnamed: 0,tconst,titletype,primarytitle,originaltitle,isadult,startyear,endyear,runtimeminutes,genres,averagerating,numvotes
0,tt32349431,tvEpisode,Comment distinguer la critique du harcèlement?...,Comment distinguer la critique du harcèlement?...,False,2024.0,,,Documentary,,
1,tt32349433,tvEpisode,Episode #1.7,Episode #1.7,False,2024.0,,,"Drama,Romance",,
2,tt32349436,tvEpisode,Episode #1.8,Episode #1.8,False,2024.0,,,"Drama,Romance",,
3,tt32349437,tvEpisode,The Uber Rush,The Uber Rush,False,2024.0,,,Comedy,,
4,tt32349438,tvEpisode,Episode #1.9,Episode #1.9,False,2024.0,,,"Drama,Romance",,


In [4]:
df.drop(columns=["originaltitle"], inplace=True)
df

Unnamed: 0,tconst,titletype,primarytitle,isadult,startyear,endyear,runtimeminutes,genres,averagerating,numvotes
0,tt32349431,tvEpisode,Comment distinguer la critique du harcèlement?...,False,2024.0,,,Documentary,,
1,tt32349433,tvEpisode,Episode #1.7,False,2024.0,,,"Drama,Romance",,
2,tt32349436,tvEpisode,Episode #1.8,False,2024.0,,,"Drama,Romance",,
3,tt32349437,tvEpisode,The Uber Rush,False,2024.0,,,Comedy,,
4,tt32349438,tvEpisode,Episode #1.9,False,2024.0,,,"Drama,Romance",,
...,...,...,...,...,...,...,...,...,...,...
9995,tt32378907,tvEpisode,Faszination Wasser: Flussdeltas - Paradiese zw...,False,2024.0,,44.0,"Documentary,History",,
9996,tt32378912,tvEpisode,Faszination Wasser: Wasserfälle - Die Urgewalt...,False,2024.0,,44.0,"Documentary,History",,
9997,tt32378919,tvEpisode,Episode #5.71,False,1998.0,,,Game-Show,,
9998,tt3237892,tvEpisode,Episode #1.6689,False,2013.0,,18.0,"Drama,Romance",,


In [10]:
query = "SELECT * FROM sebastien.title_episode;"
with engine.connect() as conn, conn.begin():
    df_episode = pd.read_sql_query(query, engine)

df_episode.head()

Unnamed: 0,tconst,parenttconst,seasonnumber,episodenumber
0,tt12155488,tt12145510,1.0,7.0
1,tt1215549,tt0990536,1.0,3.0
2,tt12155490,tt12092480,1.0,211.0
3,tt12155492,tt12092480,1.0,212.0
4,tt12155494,tt12145510,1.0,8.0


In [11]:
df_merge = df.merge(df_episode, on="tconst", how="left")
df_merge

Unnamed: 0,tconst,titletype,primarytitle,isadult,startyear,endyear,runtimeminutes,genres,averagerating,numvotes,parenttconst,seasonnumber,episodenumber
0,tt32349431,tvEpisode,Comment distinguer la critique du harcèlement?...,False,2024.0,,,Documentary,,,tt8690468,,
1,tt32349433,tvEpisode,Episode #1.7,False,2024.0,,,"Drama,Romance",,,tt32325609,1.0,7.0
2,tt32349436,tvEpisode,Episode #1.8,False,2024.0,,,"Drama,Romance",,,tt32325609,1.0,8.0
3,tt32349437,tvEpisode,The Uber Rush,False,2024.0,,,Comedy,,,tt30946404,1.0,25.0
4,tt32349438,tvEpisode,Episode #1.9,False,2024.0,,,"Drama,Romance",,,tt32325609,1.0,9.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,tt32378907,tvEpisode,Faszination Wasser: Flussdeltas - Paradiese zw...,False,2024.0,,44.0,"Documentary,History",,,tt0382491,,
9996,tt32378912,tvEpisode,Faszination Wasser: Wasserfälle - Die Urgewalt...,False,2024.0,,44.0,"Documentary,History",,,tt0382491,,
9997,tt32378919,tvEpisode,Episode #5.71,False,1998.0,,,Game-Show,,,tt0233098,5.0,71.0
9998,tt3237892,tvEpisode,Episode #1.6689,False,2013.0,,18.0,"Drama,Romance",,,tt0092325,1.0,6689.0


In [12]:
df_merge_parent = df.merge(df_episode, left_on="tconst", right_on="parenttconst", how="left", suffixes=('', '_parent'))
df_merge_parent

Unnamed: 0,tconst,titletype,primarytitle,isadult,startyear,endyear,runtimeminutes,genres,averagerating,numvotes,tconst_parent,parenttconst,seasonnumber,episodenumber
0,tt32349431,tvEpisode,Comment distinguer la critique du harcèlement?...,False,2024.0,,,Documentary,,,,,,
1,tt32349433,tvEpisode,Episode #1.7,False,2024.0,,,"Drama,Romance",,,,,,
2,tt32349436,tvEpisode,Episode #1.8,False,2024.0,,,"Drama,Romance",,,,,,
3,tt32349437,tvEpisode,The Uber Rush,False,2024.0,,,Comedy,,,,,,
4,tt32349438,tvEpisode,Episode #1.9,False,2024.0,,,"Drama,Romance",,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12146,tt32378907,tvEpisode,Faszination Wasser: Flussdeltas - Paradiese zw...,False,2024.0,,44.0,"Documentary,History",,,,,,
12147,tt32378912,tvEpisode,Faszination Wasser: Wasserfälle - Die Urgewalt...,False,2024.0,,44.0,"Documentary,History",,,,,,
12148,tt32378919,tvEpisode,Episode #5.71,False,1998.0,,,Game-Show,,,,,,
12149,tt3237892,tvEpisode,Episode #1.6689,False,2013.0,,18.0,"Drama,Romance",,,,,,


In [13]:
combined = pd.concat([df_merge, df_merge_parent], ignore_index=True)
combined

Unnamed: 0,tconst,titletype,primarytitle,isadult,startyear,endyear,runtimeminutes,genres,averagerating,numvotes,parenttconst,seasonnumber,episodenumber,tconst_parent
0,tt32349431,tvEpisode,Comment distinguer la critique du harcèlement?...,False,2024.0,,,Documentary,,,tt8690468,,,
1,tt32349433,tvEpisode,Episode #1.7,False,2024.0,,,"Drama,Romance",,,tt32325609,1.0,7.0,
2,tt32349436,tvEpisode,Episode #1.8,False,2024.0,,,"Drama,Romance",,,tt32325609,1.0,8.0,
3,tt32349437,tvEpisode,The Uber Rush,False,2024.0,,,Comedy,,,tt30946404,1.0,25.0,
4,tt32349438,tvEpisode,Episode #1.9,False,2024.0,,,"Drama,Romance",,,tt32325609,1.0,9.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22146,tt32378907,tvEpisode,Faszination Wasser: Flussdeltas - Paradiese zw...,False,2024.0,,44.0,"Documentary,History",,,,,,
22147,tt32378912,tvEpisode,Faszination Wasser: Wasserfälle - Die Urgewalt...,False,2024.0,,44.0,"Documentary,History",,,,,,
22148,tt32378919,tvEpisode,Episode #5.71,False,1998.0,,,Game-Show,,,,,,
22149,tt3237892,tvEpisode,Episode #1.6689,False,2013.0,,18.0,"Drama,Romance",,,,,,


In [21]:
combined[combined["tconst"].duplicated()]

Unnamed: 0,tconst,titletype,primarytitle,isadult,startyear,endyear,runtimeminutes,genres,averagerating,numvotes,parenttconst,seasonnumber,episodenumber,tconst_parent
10000,tt32349431,tvEpisode,Comment distinguer la critique du harcèlement?...,False,2024.0,,,Documentary,,,,,,
10001,tt32349433,tvEpisode,Episode #1.7,False,2024.0,,,"Drama,Romance",,,,,,
10002,tt32349436,tvEpisode,Episode #1.8,False,2024.0,,,"Drama,Romance",,,,,,
10003,tt32349437,tvEpisode,The Uber Rush,False,2024.0,,,Comedy,,,,,,
10004,tt32349438,tvEpisode,Episode #1.9,False,2024.0,,,"Drama,Romance",,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22146,tt32378907,tvEpisode,Faszination Wasser: Flussdeltas - Paradiese zw...,False,2024.0,,44.0,"Documentary,History",,,,,,
22147,tt32378912,tvEpisode,Faszination Wasser: Wasserfälle - Die Urgewalt...,False,2024.0,,44.0,"Documentary,History",,,,,,
22148,tt32378919,tvEpisode,Episode #5.71,False,1998.0,,,Game-Show,,,,,,
22149,tt3237892,tvEpisode,Episode #1.6689,False,2013.0,,18.0,"Drama,Romance",,,,,,
