**Author:** Priyadharsshini Sakrapani

In [238]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import seaborn as sns
import numpy as np
from ipywidgets import interact
import ipywidgets as widgets
from IPython.display import display, HTML
from ipywidgets import ToggleButtons
from ipywidgets import ToggleButtons, Layout
from ipywidgets import Dropdown
import warnings
from ipywidgets import Text
%matplotlib inline


warnings.filterwarnings("ignore")
diamonds_df = pd.read_csv('diamonds.csv')
names_df = pd.read_csv('names.csv')

##### The code creates a heatmap using the seaborn library to visualize the average carat weight of diamonds based on their color and clarity. It also uses an interactive widget to allow users to choose to view data for all diamonds, natural diamonds, or lab diamonds. The clarity order is defined and used in the heatmap visualization.

In [239]:
clarity_order = ['SI2', 'SI1', 'VS2', 'VS1', 'VVS2', 'VVS1', 'IF']

def plot_heatmap(Origin):
    # Filtereing the data based on the selected origin
    if Origin == 'Natural':
        data = diamonds_df[diamonds_df['type'] == 'natural']
    elif Origin == 'Lab':
        data = diamonds_df[diamonds_df['type'] == 'lab']
    else:
        data = diamonds_df

    data['clarity'] = pd.Categorical(data['clarity'], categories=clarity_order, ordered=True)
    # Calculating the average carat weight
    table = pd.pivot_table(data, values='carat', index='colour', columns='clarity', aggfunc='mean')
    sns.heatmap(table, cmap='mako', fmt=".2f")

    plt.xlabel('clarity')
    plt.ylabel('color')
    

# Creating the interactive widget
interact(plot_heatmap, Origin=['All', 'Natural', 'Lab'])

plt.show()


interactive(children=(Dropdown(description='Origin', options=('All', 'Natural', 'Lab'), value='All'), Output()…

The initialPlot() function creates a plot that shows the popularity rankings of the top 100 names for baby boys and girls between 1996 and 2020. It creates a filtered DataFrame of the baby names data for 2020, sorts it by the count of each name, and gets the top 100 names. It then gets the rankings for these top 100 names between 1996 and 2020 and creates a list of faded grey lines for each of these names. It sets the axes, ticks, and grid lines for the plot, and returns the ax object.
<br>

The update_plot(gender, name) function updates the initialPlot() with new data based on the selected gender and name(s). It first calls initialPlot() to create a new plot, and then filters the DataFrame based on the selected gender and name(s). It gets the rankings for the selected name(s) between 1996 and 2020 and plots them as a line with markers. It uses a color cycle to distinguish between multiple names plotted on the same graph. If a name is not found in the DataFrame, it plots the top 100 names as faded grey lines. The function returns the ax object.

In [240]:
def initialPlot():
    fig, ax = plt.subplots(figsize=(10,6), dpi=100)
    # Filter the DataFrame to only include rows from 2020 and sort by count
    df_2020 = names_df[names_df["year"] == 2020].sort_values(by="count", ascending=False)

    # Get the top 100 names and their corresponding rankings between 1996 and 2022
    top_100_names = df_2020["name"].head(100)
    top_100_rankings = {}
    for name in top_100_names:
        name_data = names_df[names_df["name"] == name]
        rankings = name_data[["year", "rank"]]
        rankings = rankings.set_index("year")
        rankings_dict = rankings.to_dict()["rank"]
        top_100_rankings[name] = rankings_dict

    # Create a list of the faded grey lines and their corresponding names
    grey_lines = []
    grey_line_names = []
    for name, rankings_dict in top_100_rankings.items():
        # Create a list of the ranks for each year between 1996 and 2022
        ranks = [rankings_dict.get(year, None) for year in range(1996, 2021)]
        # Plot the ranks as a faded grey line
        line, = ax.plot(range(1996, 2021), ranks, color="grey", alpha=0.1)
        grey_lines.append(line)
        grey_line_names.append(name)

    ax.set_title("Popularity ranking(1 being the most popular)")
    ax.tick_params(axis='x')
    ax.tick_params(axis='y')
    ax.set_ylim(1000, 0)
    ax.set_yticks(range(1, 1001, 100))
    ax.set_yticks([1,100,200,300,400,500,600,700,800,900,1000,1100])
    ax.set_yticklabels(['1', '100', '200', '300', '400', '500', '600', '700', '800', '900', '1000',''])

    ax.set_xticks(range(1996, 2021, 2))

    # Show horizontal grid lines
    ax.grid(True, axis='y', linestyle='--')
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['bottom'].set_visible(False)
    ax.spines['left'].set_visible(False)
    ax.axhline(y=1100, color='black')

    return ax

    
# Define the function to update the plot
def update_plot(gender, name):
    ax = initialPlot()
    color_cycle = ['#206095', '#A8BD3A', '#118C7B', '#F66068', '#004662', '#27A0CC']

    if gender == "Baby boy names":
        gender = "boy"
    else:
        gender = "girl"
    names = name.split(' ')
    
    # Limiting to only 6 names
    names = names[:6]

    # Plot the initial data
    # Filter the data for the selected name and gender
    for i, name in enumerate(names):
        filtered_df = names_df[(names_df['name'].str.upper() == name.upper()) & (names_df['sex'].str.lower() == gender.lower())]
        if not filtered_df.empty:
            # Get the rankings for the selected name and gender
            rankings = filtered_df[["year", "rank"]]
            rankings = rankings.set_index("year")
            rankings_dict = rankings.to_dict()["rank"]
            ranks = [rankings_dict.get(year, None) for year in range(1996, 2021)]

            color = color_cycle[i % len(color_cycle)]
            line, = ax.plot(range(1996, 2021), ranks, color=color, label=name, alpha=0.9)
            ax.legend(loc='lower right')
            for x, y in zip(range(1996, 2021), ranks):
                if y is not None:
                    ax.scatter(x, y, color=color, s=10)

        else:
            df_2020 = names_df[names_df["year"] == 2020].sort_values(by="count", ascending=False)

            # Get the top 100 names and their corresponding rankings between 1996 and 2022
            top_100_names = df_2020["name"].head(100)
            # print(top_100_names)
            top_100_rankings = {}
            for name in top_100_names:
                name_data = names_df[names_df["name"] == name]
                rankings = name_data[["year", "rank"]]
                rankings = rankings.set_index("year")
                rankings_dict = rankings.to_dict()["rank"]
                top_100_rankings[name] = rankings_dict

            # Create a list of the faded grey lines and their corresponding names
            grey_lines = []
            grey_line_names = []
            for name, rankings_dict in top_100_rankings.items():
                # Create a list of the ranks for each year between 1996 and 2022
                ranks = [rankings_dict.get(year, None) for year in range(1996, 2021)]
                # Plot the ranks as a faded grey line
                line, = ax.plot(range(1996, 2021), ranks, color="grey", alpha=0.1)
                grey_lines.append(line)
                grey_line_names.append(name)
        

            ax.tick_params(axis='x')
            ax.tick_params(axis='y')
            ax.set_ylim(1000, 0)
            ax.set_yticks(range(1, 1001, 100))
            ax.set_yticks([1,100,200,300,400,500,600,700,800,900,1000])
            ax.set_yticklabels(['1', '100', '200', '300', '400', '500', '600', '700', '800', '900', '>1000'])

            ax.set_xticks(range(1996, 2021, 2))

            ax.grid(True, axis='y', linestyle='--')
            ax.spines['top'].set_visible(False)
            ax.spines['right'].set_visible(False)
            ax.spines['bottom'].set_visible(False)
            ax.spines['left'].set_visible(False)

            ax = plt.gca()  
            ax.set_ylim(1000, 0)
            ax.set_yticks(range(1, 1001, 100))
            ax.set_yticks([1, 100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100])
            ax.set_yticklabels(['1', '100', '200', '300', '400', '500', '600', '700', '800', '900', '1000',''])

            # Show horizontal grid lines
            ax.grid(True, axis='y', linestyle='--')
            ax.axhline(y=1100, color='black')


gender_toggle = ToggleButtons(options=['Baby boy names', 'Baby girl names'], description=" ")
name_textarea = Text(
    value='',  
    placeholder='Enter names separated by space. Please give time for the plot to update after each name',  
    description='Name:',  
    disabled=False,  
    layout=widgets.Layout(width='auto', height='80px') 
)

# Create the interactive widget
interact(update_plot, gender=gender_toggle, name=name_textarea)


interactive(children=(ToggleButtons(description=' ', options=('Baby boy names', 'Baby girl names'), value='Bab…

<function __main__.update_plot(gender, name)>