# Job 2 - Results analysis

In [22]:
import os
import pathlib

projectDir = str(pathlib.Path(os.getcwd()).parent.parent.parent)

In [23]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

# Configure matplotlib for better Jupyter compatibility
plt.rcParams['figure.dpi'] = 100
%config InlineBackend.figure_format = 'retina'

CSV_FILE = f"{projectDir}/full-output/job2-results.csv"

# Load csv data
df = pd.read_csv(CSV_FILE)

df['year'] = df['year'].astype(int)
df['avg_rating'] = df['avg_rating'].astype(float)
price_mapping = {
    '*': 'Low',
    '**': 'Medium',
    '***': 'High',
    '****': 'Premium',
}
df['price'] = df['price']#.replace(price_mapping)

### 1. HEATMAP: States vs Prices


In [24]:
import ipywidgets as widgets
from IPython.display import display, clear_output
import matplotlib.pyplot as plt

# Ensure matplotlib backend is set for Jupyter
%matplotlib inline

heatmap_dropdown = widgets.Dropdown(
    options=df['category'].sort_values().unique().tolist(),
    description='Category:'
)

heatmap_output = widgets.Output()

def update_heatmap(category):
    with heatmap_output:
        clear_output(wait=True)
        plt.figure(figsize=(10, 6))
        cat_df = df[df['category'] == category]
        heatmap_data = cat_df.groupby(['state', 'price'])['avg_rating'].mean().unstack()
        sns.heatmap(heatmap_data, annot=True, cmap='YlGnBu', fmt=".2f")
        plt.title(f'Media of the reviews by state and price range of the category {category}')
        plt.xlabel('Price')
        plt.ylabel('State')
        plt.tight_layout()
        plt.show()

heatmap_dropdown.observe(lambda change: update_heatmap(change['new']), names='value')

# Display the widget and output
display(heatmap_dropdown)
display(heatmap_output)

# Initialize with first value
update_heatmap(heatmap_dropdown.value)

Dropdown(description='Category:', options=('ATM', 'ATV dealer', 'ATV repair shop', 'Abrasives supplier', 'Acup…

Output()

### 2. Temporal trend of categories

In [25]:
from IPython.display import display, clear_output

categories = sorted(df['category'].unique())

multi_select = widgets.SelectMultiple(
    options=categories,
    value=[categories[0]],  # initial selected value
    description='Categories:',
    rows=10,
    disabled=False
)

output = widgets.Output()

def plot_categories(change):
    with output:
        clear_output(wait=True)
        selected = list(change['new'])
        if not selected:
            print("Please select at least one category")
            return
        
        plt.figure(figsize=(12, 6))
        filtered_df = df[df['category'].isin(selected)]
        sns.lineplot(data=filtered_df, x='year', y='avg_rating', hue='category', marker='o')
        plt.title(f'Temporal trend of the average assessments for selected categories')
        plt.ylabel('Average Rating')
        plt.xlabel('Year')
        plt.xticks(rotation=45)
        plt.legend(title='Category', loc='lower left')
        plt.grid(True, alpha=0.3)
        plt.tight_layout()
        plt.show()

multi_select.observe(plot_categories, names='value')

# Display widgets
display(multi_select)
display(output)

# Initialize with first value
plot_categories({'new': multi_select.value})

SelectMultiple(description='Categories:', index=(0,), options=('ATM', 'ATV dealer', 'ATV repair shop', 'Abrasi…

Output()

### 3. CHOROPLETH MAP: Average Rating by State
Shows in which state of the USA the selected business category has the highest average reviews.

In [26]:
import ipywidgets as widgets
from IPython.display import display, clear_output

chmap_dropdown = widgets.Dropdown(
    options=df['category'].sort_values().unique().tolist(),
    description='Category:'
)

chmap_output = widgets.Output()

def update_choroplethmap(category):
    with chmap_output:
        clear_output(wait=True)
        cat_df = df[df['category'] == category]
        state_avg = cat_df.groupby(['state'], as_index=False)['avg_rating'].mean()
        fig = px.choropleth(
            state_avg,
            locations='state',
            locationmode='USA-states',
            color='avg_rating',
            color_continuous_scale='RdYlGn',
            scope='usa',
            title=f'Average evaluation by state for {category}'
        )
        fig.show()

chmap_dropdown.observe(lambda change: update_choroplethmap(change['new']), names='value')

# Display widgets
display(chmap_dropdown)
display(chmap_output)

# Initialize with first value
update_choroplethmap(chmap_dropdown.value)

Dropdown(description='Category:', options=('ATM', 'ATV dealer', 'ATV repair shop', 'Abrasives supplier', 'Acup…

Output()