In [1]:
import pandas as pd

In [41]:
with pd.HDFStore('film_database.h5') as store:
    films = store['films']
    production_co = store['production_company']
    production_co_link = store['production_co_link']
    producers = store['producers']
    producers_link = store['producers_link']

In [42]:
from datetime import datetime, timedelta
import pandas as pd

# Convert dates and filter recent films
def filter_recent_films(films_df):
    df = films_df.copy()
    df['updated_date'] = pd.to_datetime(df['updated'], format='%b %d, %Y')
    two_years_ago = datetime.now() - timedelta(days=730)
    return df[df['updated_date'] >= two_years_ago]

# Get the final combined table
def get_combined_producers_films(films_df, producers_df, producers_link_df):
    # Filter recent films
    recent_films = filter_recent_films(films_df)
    
    # Get relevant producer links
    relevant_links = producers_link_df[producers_link_df['film_id'].isin(recent_films.index)]
    
    # Join producers with their links
    result = producers_df.loc[relevant_links['producer_id']].copy()
    
    # Add film information
    film_info = recent_films.loc[relevant_links['film_id'], ['name', 'updated']]
    result['film_name'] = film_info['name'].values
    result['film_updated'] = film_info['updated'].values
    
    return result[['name', 'url', 'film_name', 'film_updated']]

def get_producer_film_counts(combined_df):
    # Group by producer (name and url) and count unique films
    producer_counts = combined_df.groupby(['name', 'url'])['film_name'].nunique().reset_index()
    producer_counts = producer_counts.rename(columns={'film_name': 'film_count'})
    
    # Sort by count in descending order
    return producer_counts.sort_values('film_count', ascending=False)

def filter_films_by_producer_url(films_df, producers_df, producers_link_df, producer_url):
    #Filter films table for a specific producer URL
    
    # Get producer ID from URL
    producer_id = producers_df[producers_df['url'] == producer_url].index

    # Get film IDs for this producer
    film_ids = producers_link_df[producers_link_df['producer_id'].isin(producer_id)]['film_id']
    
    # Filter films and return only recent ones
    filtered_films = films_df[films_df.index.isin(film_ids)].copy()
    return filter_recent_films(filtered_films)

In [43]:
tabel = get_combined_producers_films(films, producers, producers_link)

In [44]:
tabel

Unnamed: 0,name,url,film_name,film_updated
2,Michael Clear,https://pro.imdb.com/name/nm2752795,Arachnophobia,"Jun 11, 2024"
3,Frank Marshall,https://pro.imdb.com/name/nm0550881,Arachnophobia,"Jun 11, 2024"
4,Judson Scott,https://pro.imdb.com/name/nm6625349,Arachnophobia,"Jun 11, 2024"
5,James Wan,https://pro.imdb.com/name/nm1490123,Arachnophobia,"Jun 11, 2024"
16,Gerard Butler,https://pro.imdb.com/name/nm0124930,Night Has Fallen,"Jan 20, 2025"
...,...,...,...,...
3275,Molly Milstein,https://pro.imdb.com/name/nm13328464,Untitled Ryan Reynolds Netflix F,"Aug 22, 2024"
95,Ryan Reynolds,https://pro.imdb.com/name/nm0005351,Untitled Ryan Reynolds Netflix F,"Aug 22, 2024"
2204,Claude Dal Farra,https://pro.imdb.com/name/nm3894387,The Language of FlowersThe,"Jan 6, 2025"
3283,Peter Hutchings,https://pro.imdb.com/name/nm4050491,The Language of FlowersThe,"Jan 6, 2025"


In [45]:
tabel['film_name'].nunique()

722

In [72]:
get_producer_film_counts(tabel).iloc[20:30]

Unnamed: 0,name,url,film_count
1115,Kelly McCormick,https://pro.imdb.com/name/nm0566555,6
1482,Neal H. Moritz,https://pro.imdb.com/name/nm0605775,5
184,Basil Iwanyk,https://pro.imdb.com/name/nm0412588,5
2021,Will Ferrell,https://pro.imdb.com/name/nm0002071,5
1637,Reese Witherspoon,https://pro.imdb.com/name/nm0000702,5
2033,Wyck Godfrey,https://pro.imdb.com/name/nm0324041,5
1960,Tom Hardy,https://pro.imdb.com/name/nm0362766,5
480,David Leitch,https://pro.imdb.com/name/nm0500610,5
1952,Tom Ackerley,https://pro.imdb.com/name/nm3943537,5
854,Jason Bateman,https://pro.imdb.com/name/nm0000867,5


In [70]:
filter_films_by_producer_url(films, producers, producers_link, "https://pro.imdb.com/name/nm1696098")

Unnamed: 0,imdb_id,name,URL,description,updated,checked_date,updated_date
26,tt26735127,Jumanji 3,https://pro.imdb.com/title/tt26735127,The Final Level of the next Jumanji series.,"Dec 7, 2024",2025-02-05 14:07:13.497531,2024-12-07
42,tt17017830,Red Notice 2,https://pro.imdb.com/title/tt17017830,,"Dec 20, 2024",2025-02-05 14:19:12.677452,2024-12-20
58,tt5478534,San Andreas 2,https://pro.imdb.com/title/tt5478534,"A sequel to the 2015 action film 'San Andreas,...","Aug 12, 2024",2025-02-05 18:19:44.741189,2024-08-12
95,tt8917520,The King,https://pro.imdb.com/title/tt8917520,King Kamehameha fulfills his life-long prophec...,"Sep 27, 2023",2025-02-05 18:48:13.933409,2023-09-27
117,tt18183072,It Takes Two,https://pro.imdb.com/title/tt18183072,May and Cody are about to get divorced when th...,"Nov 8, 2024",2025-02-05 19:03:35.951849,2024-11-08
824,tt15218952,Untitled Kate Warne Biopic,https://pro.imdb.com/title/tt15218952,"Story of Kate Warne, the first female detectiv...","May 28, 2024",2025-02-06 13:30:24.549027,2024-05-28
