In [1]:
import ipywidgets as widgets
from ipywidgets import interact, interactive
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import math
import contextlib

In [2]:
# We will make a dict containing each aggregated dataframe for easy access. This is based off the columns.
dfs={
    'Cab Type': pd.read_csv('resources/aggregated_data/cabprice_group.csv').set_index('Cab Type'),
#     'Color': pd.read_csv('resources/aggregated_data/color_group.csv').set_index('Color'),
    'Drivetrain': pd.read_csv('resources/aggregated_data/drivetrain_group.csv').set_index('Drivetrain'),
    'Fuel Type': pd.read_csv('resources/aggregated_data/fuel_group.csv').set_index('Fuel Type'),
    'Rear Wheels': pd.read_csv('resources/aggregated_data/wheels_group.csv').set_index('Rear Wheels')
}
# functions for generating a linear color scale for pie chart (found online here: https://www.oreilly.com/library/view/python-cookbook/0596001673/ch09s11.html).
def floatRgb(mag, cmin, cmax):
    """ Return a tuple of floats between 0 and 1 for R, G, and B. """
    # Normalize to 0-1
    try: x = float(mag-cmin)/(cmax-cmin)
    except ZeroDivisionError: x = 0.5 # cmax == cmin
    blue  = min((max((4*(0.75-x), 0.)), 1.))
    red   = min((max((4*(x-0.25), 0.)), 1.))
    green = min((max((4*math.fabs(x-0.5)-1., 0.)), 1.))
    return red, green, blue

def rgb(mag, cmin, cmax):
    """ Return a tuple of integers, as used in AWT/Java plots. """
    red, green, blue = floatRgb(mag, cmin, cmax)
    return int(red*255), int(green*255), int(blue*255)


def strRgb(mag, cmin, cmax):
    """ Return a hex string, as used in Tk plots. """
    return "#%02x%02x%02x" % rgb(mag, cmin, cmax)


## Price Statistics By Attribute Category.

In [15]:
# widgets for each plot
stats_attribute_dropdown=widgets.Dropdown(options=list(dfs.keys()), description='Select', value='Cab Type')
stats_metric_select=widgets.RadioButtons(options=['mean','median'],value='mean',description='Metric',disable=False)
dif_metric_select=widgets.RadioButtons(options=['mean','median'],value='mean',description='Metric',disable=False)
dif_attribute_dropdown=widgets.Dropdown(options=list(dfs.keys()), description='Select',value='Cab Type')
dif_attr_select=widgets.Dropdown(description='Select',options=dfs[dif_attribute_dropdown.value].index)
pie_attribute_dropdown=widgets.Dropdown(options=list(dfs.keys()), description='Select', value='Cab Type')

In [16]:
# plotting function the median of the dataframes
def stats_plot(key, metric):
    
    if metric=='mean':
        title_word='Average'
    else:
        title_word='Median'
        
    fig=px.bar(dfs[key],y=dfs[key].index,x=metric,title=f'{title_word} Listed Price by {key}',orientation='h',color=metric, labels={metric:f'{title_word} Price ($)'})
    fig.show()
    
# interact(avg_plot, key=widgets.Dropdown(options=list(dfs.keys()), description='Select', value='Cab Type'));
int_plot1 = interactive(stats_plot,key=stats_attribute_dropdown, metric=stats_metric_select)
output = int_plot1.children[-1]
output.layout.height = '500px'
int_plot1

interactive(children=(Dropdown(description='Select', options=('Cab Type', 'Drivetrain', 'Fuel Type', 'Rear Whe…

## Price Differentials By Attribute and Metric

In [19]:
# function for generating bar colors based on postive or negative differential. Accepts numerical data types.
def color_dif(i):
    if i<0:
        return 'red'
    else:
        return 'green'

# function for switching text labels based on differential
def text_swap(x):
    if x>0:
        return 'Cheaper'
    else:
        return 'More Expensive'
    
    
# function for calculating the price differentials. Accepts a key in our dfs dictionary, a metric ('mean' or 'median'), and the attribute to compare from.
# returns a tuple containing 3 lists: one with the price differentials, one with the asscoiated attributes, and one with colors for each differntial.
def price_dif_calc(key, metric, attribute):
    
    values = list(dfs[key][metric])
    labels = list(dfs[key].index)   
    attribute_value = values[labels.index(attribute)]
    labels.remove(attribute)
    x = [round(attribute_value-val,2) for val in values]
    x.remove(0)
    colors=[color_dif(val) for val in x]
    text=[text_swap(val) for val in x]
    return (x, labels, colors, text)

def price_dif_plot(key, metric, attribute):
    
    dif_attr_select.options=list(dfs[key].index)
    
    if metric=='mean':
        title_word='Average'
    else:
        title_word='Median'
    
    x,labels,colors,text=price_dif_calc(key, metric, attribute)
    fig = go.Figure(data=[go.Bar(
    x=x,
    y=labels, 
    orientation='h',
    text=text,
    textposition='auto',
    marker_color=colors)])
    fig.update_layout(title_text=f'{title_word} Price Differential Compared to {attribute}')
    fig.show()

# This is to suppress The ValueError (doesn't affect the functionality of the graphs) until a solution is found.
# with contextlib.suppress(ValueError):
int_plot2 = interactive(price_dif_plot,key=dif_attribute_dropdown, attribute=dif_attr_select, metric=dif_metric_select)
output = int_plot2.children[-1]
output.layout.height = '500px'
int_plot2

interactive(children=(Dropdown(description='Select', index=1, options=('Cab Type', 'Drivetrain', 'Fuel Type', …

## Frequency of Each Attribute Category.

In [33]:
def counts_plot(key):
    seg_pull=len(dfs[key])*[0]
    color=[]
    if len(seg_pull)==2:
        seg_pull[0]=.2
    else:
        seg_pull[0]=.3
        seg_pull[1]=.3
    for val in dfs[key]['count']:
        color.append(strRgb(val,dfs[key]['count'].min(),dfs[key]['count'].max()))
#     fig=px.pie(dfs[key],values=sorted_values,names=dfs[key].index,title=f'Percentage by {key}',hole=.3)
#     fig.show()
    fig = go.Figure(data=[go.Pie(labels=dfs[key].index, values=dfs[key]['count'], marker=dict(colors=color,line=dict(color='black',width=1)),textinfo='label+percent', pull=seg_pull,hole=.1)])
    fig.show()

int_plot3=interactive(counts_plot, key=pie_attribute_dropdown)
output3 = int_plot2.children[-1]
output3.layout.height = '500px'
int_plot3

interactive(children=(Dropdown(description='Select', options=('Cab Type', 'Drivetrain', 'Fuel Type', 'Rear Whe…