In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import ipywidgets as widgets
import seaborn as sns
from data_gen import gen_df

## Defining a function which plots the distribution of the IMDB rating conditioned on award wins
def fig1_J():
    """ Generates a figure which plots the distribution of the IMDB rating conditioned on award wins
    
    Args: 
        No arguments are needed.
        
    Returns: 
        An interactive plots
        
    Notice: 
        It will raise an error if the function are given an argument!
    """   
    
    ## Importing data and making new dataframe
    filename = 'imdb.csv'
    df = gen_df(filename)
    imdb = df.copy()
    
    ## Creating variable that groups number of wins and a list with the intervals
    imdb['nrw'] = 0
    imdb.loc[(imdb['nrOfWins'] > 0) & (imdb['nrOfWins'] < 10), ['nrw']] = 1
    imdb.loc[(imdb['nrOfWins'] >= 10) & (imdb['nrOfWins'] < 20), ['nrw']] = 2
    imdb.loc[(imdb['nrOfWins'] >= 20) & (imdb['nrOfWins'] < 30), ['nrw']] = 3
    imdb.loc[(imdb['nrOfWins'] >= 30) & (imdb['nrOfWins'] < 40), ['nrw']] = 4
    imdb.loc[(imdb['nrOfWins'] >= 40), ['nrw']] = 5
    
    nrwins = ['0', '1-10', '11-20', '21-30', '31-40', '+40']

    
    ## Defining a function to plot the distribution of the IMDB rating condition on award wins
    def fig(win_0, win_110, win_1120, win_2130, win_3140, win_40):
        """ Generates a figure which plots which plots the 
        distribution of the IMDB rating conditioned on award wins
        
        Args: 
            win_0: Specifies whether to plot the distribution of ratings for movies with 0 wins 
            win_110: Specifies whether to plot the distribution of ratings for movies with 1-10 wins 
            win_1120: Specifies whether to plot the distribution of ratings for movies with 11-20 wins 
            win_2130: Specifies whether to plot the distribution of ratings for movies with 21-30 wins 
            win_3140: Specifies whether to plot the distribution of ratings for movies with 31-40 wins 
            win_40: Specifies whether to plot the distribution of ratings for movies with +40 wins 
           
        Returns: 
            An interactive plot.  

        Notice: 
            The function is generated so that it can be called using widgets.interact. 
            Thus, it is not intended to be used on its own. 
            """         
        
        # Make lists to plot the win categories and give them colors
        wins = [win_0, win_110, win_1120, win_2130, win_3140, win_40]
        color = ['red', 'orange', 'yellow', 'lime', 'blue', 'purple']
        
        # Loop over win category to plot the distribution of their ratings
        for i,val in enumerate(nrwins):
            nrwins_i = imdb.loc[(imdb['nrw'] == i)]
            if wins[i] == True:  # Only plot distribution if box is 'checked'
                sns.distplot(nrwins_i['imdbRating'], hist = False, kde = True,
                         kde_kws = {'linewidth': 2}, label = val, color=color[i]
                     )
        
        # Legends and labels        
        plt.legend(loc='upper left')
        plt.title('IMDB rating conditioned on numbers of award wins')
        plt.xlabel('IMDB rating')
        plt.ylabel('Density')
    
    # Making the figure interactive so it is optinally which wins categories to plot
    widgets.interact(fig,
        win_0=widgets.Checkbox(description=nrwins[0], value=True, disabled=False),
        win_110=widgets.Checkbox(description=nrwins[1], value=True, disabled=False),
        win_1120=widgets.Checkbox(description=nrwins[2], value=True, disabled=False),
        win_2130=widgets.Checkbox(description=nrwins[3], value=True, disabled=False),
        win_3140=widgets.Checkbox(description=nrwins[4], value=True, disabled=False),
        win_40=widgets.Checkbox(description=nrwins[5], value=True, disabled=False),
    );

fig1_J()

interactive(children=(Checkbox(value=True, description='0'), Checkbox(value=True, description='1-10'), Checkbo…