# COMP 6934 - Assignment 6
- Professor:       Terrence Tricco
- Term:            Winter 2024
- Student Name:    **Daniel Wiredu**
- Student User:    **dbwiredu**
- Student Email:   dbwiredu@mun.ca
- Student ID:      **202286594**

In [1]:
# import libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import ipywidgets as widgets
from ipywidgets import interact, interact_manual

## Question 1:

___

In [2]:
# Load the data
diamonds = pd.read_csv('diamonds.csv')

# Define the order of clarity and color
color_order = ['D', 'E', 'F', 'G', 'H', 'I', 'J']
clarity_order = ['SI2', 'SI1', 'VS2', 'VS1', 'VVS2', 'VVS1', 'IF']

@interact(origin = widgets.Dropdown(options=['All', 'Natural', 'Lab'], value='All', description="Origin"))
def f(origin):
    # Filter data based on origin
    if origin == 'All':
        filtered_data = diamonds
    else:
        filtered_data = diamonds[diamonds['type'].str.upper() == origin.upper()]
    
    # Calculate average carat weight for each combination of colour and clarity
    heatmap_data = filtered_data.groupby(['colour', 'clarity'])['carat'].mean().unstack()
    
    heatmap_data = heatmap_data.fillna(0)
    
    # Reorder columns and rows
    heatmap_data = heatmap_data.reindex(index=color_order, columns=clarity_order)  
    
    # Final Plot
    fig, ax = plt.subplots(figsize=(7,5))
    ax = sns.heatmap(data=heatmap_data, cmap="mako", ax=ax)   

interactive(children=(Dropdown(description='Origin', options=('All', 'Natural', 'Lab'), value='All'), Output()…

## Question 2:

___

In [3]:
from matplotlib.colors import LinearSegmentedColormap
import matplotlib.cm as cm

# Load the data
olympic_data = pd.read_csv('olympic_athletes.csv')

blues_cmap = cm.get_cmap('Blues', lut=10)
blues_mod = blues_cmap(np.linspace(0.1, 1, 10))
#print(blues_mod)
blues_mod_cmap = LinearSegmentedColormap.from_list('BluesMod', blues_mod)

#gold_medals = olympic_data[(olympic_data['Medal'] == 'Gold') & (olympic_data['Year'].between(2000, 2016))]

sports_options = ['Athletics', 'Gymnastics', 'Swimming', 'Shooting', 'Cycling', 'Fencing', 'Rowing', 'Wrestling']
#sports_options = gold_medals['Sport'].unique()


# Filter gold medal winners from 2000 to 2016 Olympics
#gold_medals = olympic_data[(olympic_data['Medal'] == 'Gold') & (olympic_data['Year'].between(2000, 2016))]

def plot_dist(sport, kde, display_data):
    
    gold_medals = olympic_data[(olympic_data['Medal'] == 'Gold') & (olympic_data['Year'].between(2000, 2016))]
    if sport == 'Combined':
        gold_medals = gold_medals[gold_medals['Sport'].isin(sports_options)]   
    elif sport == 'All':
        gold_medals = gold_medals
    else:
        gold_medals = gold_medals[(gold_medals['Sport'].str.upper() == sport.upper())]
    
    #print(sport)
    #print(sports_options)
    
    # Bin the height and weight data
    height_bins = range(int(gold_medals['Height'].min()), int(gold_medals['Height'].max()), 5)
    weight_bins = range(int(gold_medals['Weight'].min()), int(gold_medals['Weight'].max()), 5)

    hist, x_edges, y_edges = np.histogram2d(gold_medals['Weight'], gold_medals['Height'], bins=[weight_bins, height_bins])

    nonzero_hist = np.where(hist > 0, hist, np.nan)

    extent = [x_edges[0], x_edges[-1], y_edges[0], y_edges[-1]]
    
    #ax = plt.subplot(111)
    if (kde):
        sns.kdeplot(x='Weight', y='Height', data=gold_medals, fill=True, cmap="Blues")
    else:    
        # Plot 2D histogram using imshow
        plt.imshow(nonzero_hist.T, extent=extent, origin='lower', cmap="Blues")
    
    if (display_data):
        sns.scatterplot(x='Weight', y='Height', data=gold_medals.sort_values(by='Sex',ascending=False), hue='Sex')
    
    # Add labels and title
    plt.xlabel('Weight')
    plt.ylabel('Height')
    plt.xlim(20, 140)
    plt.ylim(130, 220)
    plt.xticks([20,40,60,80,100,120,140])
    plt.yticks([130,140,150,160,170,180,190,200,210,220])
    
    # Show plot
    plt.show()

dlSport = widgets.Dropdown(options=['All','Combined']+sports_options, description="Sport")
ckKde = widgets.Checkbox(value=False, description="kde")
ckDisplayData = widgets.Checkbox(value=False, description="display_data")
interact(plot_dist, sport=dlSport, kde=ckKde, display_data=ckDisplayData);

  blues_cmap = cm.get_cmap('Blues', lut=10)


interactive(children=(Dropdown(description='Sport', options=('All', 'Combined', 'Athletics', 'Gymnastics', 'Sw…

## Question 3:

___

In [4]:
cars = pd.read_csv("used_cars.csv")
years = cars['year'].unique()
brands = cars[cars['year'] == years[0]]['brand'].unique()
models = cars[(cars['year'] == years[0]) & (cars['brand'].str.upper() == brands[0].upper())]['model'].unique()

def cars_plot(year, brand, model):
    
    brands = cars[cars['year'] == int(year)]['brand'].unique()
    dlBrand.options = brands
    #brand = brands[0]
    
    models = cars[(cars['year'] == int(year)) & (cars['brand'].str.upper() == brand.upper())]['model'].unique()
    dlModel.options = models
    #model = [models[0]]
    
    fig, ax = plt.subplots(figsize=(7,5))
    cars_data = cars[(cars['year'] == int(year)) & (cars['brand'].str.upper() == brand.upper()) & (cars['model'].isin(model))]
    #print(cars_data)
    ax = sns.scatterplot(x='price', y='mileage', data=cars_data, hue='model')
    ax.set_xlabel("Price")
    ax.set_ylabel("Mileage")
    ax.set_title(str(year) + " " + brand)
    #plt.close(p1.fig)
    #brand = ''
    #model = ('')
    
dlYear = widgets.Dropdown(options=years, description="Year")
dlBrand = widgets.Dropdown(options=brands, description="Brand")
dlModel = widgets.SelectMultiple(options=models, description="Model")

dlYear.observe(cars_plot, 'value', dlBrand)
dlBrand.observe(cars_plot, 'value', dlModel)

interact(cars_plot, year=dlYear, brand=dlBrand, model=dlModel);

interactive(children=(Dropdown(description='Year', options=(2017, 2016, 2019, 2015, 2014, 2018, 2013, 2020, 20…

## References
