In [1]:
import pandas as pd
from ipywidgets import interact, widgets

# Show all rows and colums
pd.set_option('display.max_rows', None)  
pd.set_option('display.max_columns', None)  

# Load the dataset
df = pd.read_csv('oscars_merged.csv')

# Function to filter and display movies based on selection
def display_movies(metric, selection, min_votes, num_movies):
    if selection == 'best':
        # Filter movies based on minimum number of votes
        filtered_df = df[df['numVotes'] >= min_votes]
        
        # Display top n best movies based on selected metric
        if metric == 'rating':
            top_movies = filtered_df.nlargest(num_movies, 'averageRating')
        elif metric == 'number of votes':
            top_movies = filtered_df.nlargest(num_movies, 'numVotes')
        
        # Display only title, rating, and number of votes with enumeration starting from 1
        top_movies = top_movies[['originalTitle', 'averageRating', 'numVotes']]
        top_movies.reset_index(drop=True, inplace=True)
        top_movies.index += 1
        top_movies.rename_axis(f'Top {num_movies}', inplace=True)
        
        # Format averageRating column to one decimal place
        top_movies['averageRating'] = top_movies['averageRating'].round(1)
        
        # Format numVotes column to integer (without decimal places)
        top_movies['numVotes'] = top_movies['numVotes'].astype(int)
        
        # Display styled dataframe
        display(top_movies)
    elif selection == 'worst':
        # Filter movies based on minimum number of votes
        filtered_df = df[df['numVotes'] >= min_votes]
        
        # Display bottom n worst movies based on selected metric
        if metric == 'rating':
            worst_movies = filtered_df.nsmallest(num_movies, 'averageRating')
        elif metric == 'number of votes':
            worst_movies = filtered_df.nsmallest(num_movies, 'numVotes')
        
        # Display only title, rating, and number of votes with enumeration starting from 1
        worst_movies = worst_movies[['originalTitle', 'averageRating', 'numVotes']]
        worst_movies.reset_index(drop=True, inplace=True)
        worst_movies.index += 1
        worst_movies.rename_axis(f'Worst {num_movies}', inplace=True)
        
        # Format averageRating column to one decimal place
        worst_movies['averageRating'] = worst_movies['averageRating'].round(1)
        
        # Format numVotes column to integer (without decimal places)
        worst_movies['numVotes'] = worst_movies['numVotes'].astype(int)
        
        # Display styled dataframe
        display(worst_movies)

# Create dropdown widgets for metric and selection
metric_dropdown = widgets.Dropdown(
    options=['rating', 'number of votes'],
    value='rating',
    description='Select metric:',
    disabled=False,
)

selection_dropdown = widgets.Dropdown(
    options=['best', 'worst'],
    value='best',
    description='Select movies:',
    disabled=False,
)

min_votes_slider = widgets.IntSlider(
    value=0,
    min=0,
    max=10000,
    step=100,
    description='Minimum votes:',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d'
)

num_movies_slider = widgets.IntSlider(
    value=10,
    min=1,
    max=200,
    step=1,
    description='Number of movies:',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d'
)

# Interact function to update displayed movies based on metric, selection, minimum votes, and number of movies
interact(display_movies, metric=metric_dropdown, selection=selection_dropdown, min_votes=min_votes_slider, num_movies=num_movies_slider)



interactive(children=(Dropdown(description='Select metric:', options=('rating', 'number of votes'), value='rat…

<function __main__.display_movies(metric, selection, min_votes, num_movies)>

For Directors:

In [2]:
import pandas as pd
from ipywidgets import interact, widgets

# Load the dataset
df = pd.read_csv("oscars_merged.csv")
df = df[df.releaseYear >= 1915]
df_names = pd.read_csv("directors_merged.csv")

# Function to break a string to do a list.
def split_words(string_with_commas):
    return string_with_commas.split(',')
    
# Extract directors and update 'directors' column.
df['directors'] = df['directors'].apply(split_words)

# Explode 'directors' column to give each director their own row.
directors_exploded = df.explode('directors')

# Grouping by directors and calculating the average rating and number of films
director_rating = directors_exploded.groupby("directors")["averageRating"].agg(['mean','count']).reset_index()
director_rating.columns = ['directors', 'averageRating', 'movieCount']

# Interactive feature to display the top and flop directors based on the selected parameters
@interact(min_movies=widgets.IntSlider(min=1, max=100, step=1, value=10, description='Mindestanzahl Filme'),
          num_directors=widgets.IntSlider(min=1, max=200, step=1, value=10, description='Anzahl der Regisseure'),
          selection=['best', 'worst'])
def display_directors(min_movies, num_directors, selection):
    # Filter directors by minimum number of films
    atleast_y_movies_d = director_rating[director_rating["movieCount"] >= min_movies]
    # Selecting top/lowest n directors based on average rating
    if selection == 'best':
        selected_directors = atleast_y_movies_d.nlargest(num_directors, 'averageRating')
    else:
        selected_directors = atleast_y_movies_d.nsmallest(num_directors, 'averageRating')
    # Merging the data with the names of the directors
    selected_directors = selected_directors.merge(df_names[['directors','primaryName','knownForTitles']], left_on='directors', right_on='directors', how='left')
    # Show results with index starting from 1
    selected_directors.index = range(1, len(selected_directors) + 1)
    # Replace titleID with original title
    selected_directors['knownForTitles'] = selected_directors['knownForTitles'].map(lambda x: df[df['titleID'].isin(x.split(','))]['originalTitle'].tolist())
    # Increasing the column of the most famous titles
    pd.set_option('display.max_colwidth', None)
    # View results
    display(selected_directors[['primaryName', 'averageRating', 'movieCount', 'knownForTitles']])
    # Reset column width option
    pd.reset_option('display.max_colwidth')


interactive(children=(IntSlider(value=10, description='Mindestanzahl Filme', min=1), IntSlider(value=10, descr…

In [None]:
For Writers:

In [1]:
import pandas as pd
from ipywidgets import interact, widgets

# load the dataset
df = pd.read_csv("oscars_merged.csv")
df = df[df.releaseYear >= 1915]
df_names = pd.read_csv("writers_merged.csv")

# Function to break a string to do a list.
def split_words(string_with_commas):
    return string_with_commas.split(',')
    
# Extract writers and update 'writers' column.
df['writers'] = df['writers'].apply(split_words)

# Explode 'writers' column to give each writers their own row.
writers_exploded = df.explode('writers')

# Grouping by writers and calculating average rating and number of films
writer_rating = writers_exploded.groupby("writers")["averageRating"].agg(['mean','count']).reset_index()
writer_rating.columns = ['writers', 'averageRating', 'movieCount']

# Interactive feature to display top and flop writers based on the selected parameters
@interact(min_movies=widgets.IntSlider(min=1, max=100, step=1, value=10, description='Mindestanzahl Filme'),
          num_writers=widgets.IntSlider(min=1, max=200, step=1, value=10, description='Anzahl der Schriftsteller'),
          selection=['best', 'worst'])
def display_writers(min_movies, num_writers, selection):
    # Filter writers by minimum number of films
    atleast_y_movies_w = writer_rating[writer_rating["movieCount"] >= min_movies]
    # Selecting best/lowest n writers based on average rating
    if selection == 'best':
        selected_writers = atleast_y_movies_w.nlargest(num_writers, 'averageRating')
    else:
        selected_writers = atleast_y_movies_w.nsmallest(num_writers, 'averageRating')
    # Merging the data with the names of the writers
    selected_writers = selected_writers.merge(df_names[['writers','primaryName','knownForTitles']], left_on='writers', right_on='writers', how='left')
    # Show results with index starting from 1
    selected_writers.index = range(1, len(selected_writers) + 1)
    # Replace titleID with original title
    selected_writers['knownForTitles'] = selected_writers['knownForTitles'].map(lambda x: df[df['titleID'].isin(x.split(','))]['originalTitle'].tolist())
    # Increasing the column of the most famous titles
    pd.set_option('display.max_colwidth', None)
    # View results
    display(selected_writers[['primaryName', 'averageRating', 'movieCount', 'knownForTitles']])
    # Zurücksetzen der Spaltenbreitenoption
    pd.reset_option('display.max_colwidth')


interactive(children=(IntSlider(value=10, description='Mindestanzahl Filme', min=1), IntSlider(value=10, descr…