In [42]:
import sqlite3
import pandas as pd 

conn = sqlite3.Connection("im.db")

Looking into which (living) director would maximize popularity, we'll create a pandas DataFrame from the IMDB database for this purpose.

In [43]:
director_df = pd.read_sql("""
        SELECT p.primary_name AS director_name, p.death_year, mb.primary_title, mb.original_title, mb.genres
        FROM persons AS p
            JOIN directors AS d
                USING(person_id)
            JOIN movie_basics AS mb
                USING(movie_id)
    """,conn)

In [44]:
director_df.drop_duplicates(inplace=True, ignore_index=True)
director_df

Unnamed: 0,director_name,death_year,primary_title,original_title,genres
0,Ruel S. Bayani,,Paano na kaya,Paano na kaya,"Drama,Romance"
1,Ruel S. Bayani,,No Other Woman,No Other Woman,"Drama,Romance,Thriller"
2,Ruel S. Bayani,,One More Try,One More Try,Drama
3,Ruel S. Bayani,,Kasal,Kasal,
4,Bryan Beasley,,The Quiet Philanthropist: The Edith Gaylord Story,The Quiet Philanthropist: The Edith Gaylord Story,"Documentary,History"
...,...,...,...,...,...
163126,Zheng Wei,,The Old Road,The Old Road,Family
163127,Rama Narayanan,,Chain Jayapal,Chain Jayapal,
163128,Rama Narayanan,,Arya Suriya,Arya Suriya,Drama
163129,Samir Eshra,,The Shadow Lawyers,The Shadow Lawyers,Documentary


In [45]:
director_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 163131 entries, 0 to 163130
Data columns (total 5 columns):
 #   Column          Non-Null Count   Dtype  
---  ------          --------------   -----  
 0   director_name   163131 non-null  object 
 1   death_year      979 non-null     float64
 2   primary_title   163131 non-null  object 
 3   original_title  163128 non-null  object 
 4   genres          159392 non-null  object 
dtypes: float64(1), object(4)
memory usage: 6.2+ MB


In [46]:
#Keeping only directors who are still alive
director_df = director_df[director_df.death_year.isna()]

In [47]:
director_df = director_df.reset_index(drop=True)
director_df

Unnamed: 0,director_name,death_year,primary_title,original_title,genres
0,Ruel S. Bayani,,Paano na kaya,Paano na kaya,"Drama,Romance"
1,Ruel S. Bayani,,No Other Woman,No Other Woman,"Drama,Romance,Thriller"
2,Ruel S. Bayani,,One More Try,One More Try,Drama
3,Ruel S. Bayani,,Kasal,Kasal,
4,Bryan Beasley,,The Quiet Philanthropist: The Edith Gaylord Story,The Quiet Philanthropist: The Edith Gaylord Story,"Documentary,History"
...,...,...,...,...,...
162147,Zheng Wei,,The Old Road,The Old Road,Family
162148,Rama Narayanan,,Chain Jayapal,Chain Jayapal,
162149,Rama Narayanan,,Arya Suriya,Arya Suriya,Drama
162150,Samir Eshra,,The Shadow Lawyers,The Shadow Lawyers,Documentary


In [56]:
director_df = director_df.drop('death_year', axis=1)
director_df

Unnamed: 0,director_name,primary_title,original_title,genres
0,Ruel S. Bayani,Paano na kaya,Paano na kaya,"Drama,Romance"
1,Ruel S. Bayani,No Other Woman,No Other Woman,"Drama,Romance,Thriller"
2,Ruel S. Bayani,One More Try,One More Try,Drama
3,Ruel S. Bayani,Kasal,Kasal,
4,Bryan Beasley,The Quiet Philanthropist: The Edith Gaylord Story,The Quiet Philanthropist: The Edith Gaylord Story,"Documentary,History"
...,...,...,...,...
162147,Zheng Wei,The Old Road,The Old Road,Family
162148,Rama Narayanan,Chain Jayapal,Chain Jayapal,
162149,Rama Narayanan,Arya Suriya,Arya Suriya,Drama
162150,Samir Eshra,The Shadow Lawyers,The Shadow Lawyers,Documentary


In [58]:
director_df.to_csv('directors.csv', index=False)

We'll do the same for writers.

In [48]:
writer_df = pd.read_sql("""
        SELECT p.primary_name AS writer_name, p.death_year, mb.primary_title, mb.original_title, mb.genres
        FROM persons AS p
            JOIN writers AS w
                USING(person_id)
            JOIN movie_basics AS mb
                USING(movie_id)
    """,conn)

In [49]:
writer_df.drop_duplicates(inplace=True, ignore_index=True)
writer_df

Unnamed: 0,writer_name,death_year,primary_title,original_title,genres
0,Bryan Beasley,,The Quiet Philanthropist: The Edith Gaylord Story,The Quiet Philanthropist: The Edith Gaylord Story,"Documentary,History"
1,Michael Frost Beckner,,Sniper: Ultimate Kill,Sniper: Ultimate Kill,"Action,Drama,Thriller"
2,Hava Kohav Beller,,In the Land of Pomegranates,In the Land of Pomegranates,Documentary
3,Joel Bender,,The True Adventures of Raoul Walsh,The True Adventures of Raoul Walsh,"Biography,Documentary"
4,Doug Benson,,The Greatest Movie Ever Rolled,The Greatest Movie Ever Rolled,Documentary
...,...,...,...,...,...
177971,Amenkai Pilgrim-Owens,,Broken Stories,Broken Stories,Documentary
177972,Elina Gakou Gomba,,Le choc du futur,Le choc du futur,Drama
177973,Rama Narayanan,,Chain Jayapal,Chain Jayapal,
177974,Samir Eshra,,The Shadow Lawyers,The Shadow Lawyers,Documentary


In [50]:
writer_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 177976 entries, 0 to 177975
Data columns (total 5 columns):
 #   Column          Non-Null Count   Dtype  
---  ------          --------------   -----  
 0   writer_name     177976 non-null  object 
 1   death_year      3202 non-null    float64
 2   primary_title   177976 non-null  object 
 3   original_title  177975 non-null  object 
 4   genres          174911 non-null  object 
dtypes: float64(1), object(4)
memory usage: 6.8+ MB


In [51]:
#Keeping only directors who are still alive
writer_df = writer_df[writer_df.death_year.isna()]

In [52]:
writer_df = writer_df.reset_index(drop=True)
writer_df

Unnamed: 0,writer_name,death_year,primary_title,original_title,genres
0,Bryan Beasley,,The Quiet Philanthropist: The Edith Gaylord Story,The Quiet Philanthropist: The Edith Gaylord Story,"Documentary,History"
1,Michael Frost Beckner,,Sniper: Ultimate Kill,Sniper: Ultimate Kill,"Action,Drama,Thriller"
2,Hava Kohav Beller,,In the Land of Pomegranates,In the Land of Pomegranates,Documentary
3,Joel Bender,,The True Adventures of Raoul Walsh,The True Adventures of Raoul Walsh,"Biography,Documentary"
4,Doug Benson,,The Greatest Movie Ever Rolled,The Greatest Movie Ever Rolled,Documentary
...,...,...,...,...,...
174769,Amenkai Pilgrim-Owens,,Broken Stories,Broken Stories,Documentary
174770,Elina Gakou Gomba,,Le choc du futur,Le choc du futur,Drama
174771,Rama Narayanan,,Chain Jayapal,Chain Jayapal,
174772,Samir Eshra,,The Shadow Lawyers,The Shadow Lawyers,Documentary


In [57]:
writer_df = writer_df.drop('death_year', axis=1)
writer_df

Unnamed: 0,writer_name,primary_title,original_title,genres
0,Bryan Beasley,The Quiet Philanthropist: The Edith Gaylord Story,The Quiet Philanthropist: The Edith Gaylord Story,"Documentary,History"
1,Michael Frost Beckner,Sniper: Ultimate Kill,Sniper: Ultimate Kill,"Action,Drama,Thriller"
2,Hava Kohav Beller,In the Land of Pomegranates,In the Land of Pomegranates,Documentary
3,Joel Bender,The True Adventures of Raoul Walsh,The True Adventures of Raoul Walsh,"Biography,Documentary"
4,Doug Benson,The Greatest Movie Ever Rolled,The Greatest Movie Ever Rolled,Documentary
...,...,...,...,...
174769,Amenkai Pilgrim-Owens,Broken Stories,Broken Stories,Documentary
174770,Elina Gakou Gomba,Le choc du futur,Le choc du futur,Drama
174771,Rama Narayanan,Chain Jayapal,Chain Jayapal,
174772,Samir Eshra,The Shadow Lawyers,The Shadow Lawyers,Documentary


In [59]:
writer_df.to_csv('writers.csv', index=False)