In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from dash import Dash, dcc, html, Input, Output

# Set a consistent style for seaborn plots
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (12, 6) # Default figure size

In [None]:
# --- Section 1: Synthetic Data Generation ---
# Enhanced data generation for more realistic interdependencies

np.random.seed(42)  # For reproducibility

# Define date range (monthly frequency)
date_range = pd.date_range(start="1980-01-01", end="2020-12-31", freq="M")
n_periods = len(date_range)

# Define recession years (these are illustrative)
recession_years = [1980, 1981, 1982, 1990, 1991, 2001, 2008, 2009, 2020]

# Create base DataFrame
df = pd.DataFrame({"Date": date_range})
df["Year"] = df["Date"].dt.year
df["Month"] = df["Date"].dt.month

# 1. Recession Indicator
df["Recession"] = df["Year"].apply(lambda year: 1 if year in recession_years else 0)

# 2. Vehicle Type (with realistic distribution)
vehicle_types = ["Superminicar", "Smallfamilycar", "Mediumfamilycar", "Executivecar", "Sports"]
vehicle_probabilities = [0.30, 0.25, 0.20, 0.15, 0.10] # Market share
df["Vehicle_Type"] = np.random.choice(vehicle_types, size=n_periods, p=vehicle_probabilities)

# 3. GDP (Gross Domestic Product)
# Simulate GDP with a general upward trend and dips during recessions
base_gdp = 10000 + np.arange(n_periods) * 50  # Upward trend
gdp_noise = np.random.normal(0, 1500, n_periods)
df["GDP"] = base_gdp + gdp_noise
# Apply recession impact: Reduce GDP by 10-20% during recession years
gdp_recession_impact = np.random.uniform(0.80, 0.90, n_periods)
df.loc[df["Recession"] == 1, "GDP"] *= gdp_recession_impact[df["Recession"] == 1]

# 4. Unemployment Rate
# Simulate unemployment: lower during growth, higher during recessions
base_unemployment = np.random.normal(5.5, 0.5, n_periods)
unemployment_recession_spike = np.array([np.random.uniform(2, 5) if r == 1 else np.random.uniform(-1, 0.5) for r in df["Recession"]])
df["Unemployment_Rate"] = np.clip(base_unemployment + unemployment_recession_spike, 2.0, 12.0) # Clipped between 2% and 12%

# 5. Price
# Simulate price with inflation and some noise
# Base price in 1980, increases by ~2.5% annually due to inflation + vehicle improvements
# Different vehicle types have different base prices
price_base_map = {
    "Superminicar": 15000, "Smallfamilycar": 20000, "Mediumfamilycar": 28000,
    "Executivecar": 45000, "Sports": 55000
}
df["Base_Price_Type"] = df["Vehicle_Type"].map(price_base_map)
inflation_rate = 1.025
df["Price"] = df["Base_Price_Type"] * (inflation_rate ** (df["Year"] - 1980)) + \
              np.random.normal(0, 2000, n_periods) # Add some noise
df["Price"] = np.maximum(df["Price"], 5000) # Ensure price is not too low

# 6. Advertising Expenditure (Log-normal distribution often fits ad spend)
df["Advertising_Expenditure"] = np.random.lognormal(mean=np.log(50000), sigma=0.5, size=n_periods)
# Reduce ad spend slightly during recessions
df.loc[df["Recession"] == 1, "Advertising_Expenditure"] *= np.random.uniform(0.7, 0.9)

# 7. Seasonality Weight (Sinusoidal pattern, peaks in spring and fall)
# March (month 3) and September (month 9) tend to be strong sales months
df["Seasonality_Weight"] = 1 + 0.15 * np.sin(2 * np.pi * (df["Month"] - 3) / 12) + \
                           0.10 * np.sin(2 * np.pi * (df["Month"] - 9) / 12)
df["Seasonality_Weight"] = np.clip(df["Seasonality_Weight"], 0.7, 1.3) # Clip to avoid extreme values

# 8. Automobile Sales (The Target Variable)
# Base sales influenced by vehicle type, GDP, Price, Unemployment, Advertising, and Seasonality
# Define base sales potential for each vehicle type
sales_potential_map = {
    "Superminicar": 6000, "Smallfamilycar": 5000, "Mediumfamilycar": 4000,
    "Executivecar": 2500, "Sports": 1500
}
df["Base_Sales_Potential"] = df["Vehicle_Type"].map(sales_potential_map)

# Sales = Base * (GDP_effect) * (Price_effect) * (Unemployment_effect) * (Ad_effect) * Seasonality * Noise
# Coefficients are illustrative to create plausible relationships
gdp_effect = (df["GDP"] / df["GDP"].mean()) ** 0.5  # Positive correlation with diminishing returns
price_effect = (df["Price"].mean() / df["Price"]) ** 0.8 # Negative correlation (elasticity)
unemployment_effect = ((100 - df["Unemployment_Rate"]) / (100 - df["Unemployment_Rate"].mean())) ** 0.7 # Negative
ad_effect = (df["Advertising_Expenditure"] / df["Advertising_Expenditure"].mean()) ** 0.2 # Positive, less sensitive

# Initial sales calculation
df["Automobile_Sales"] = (df["Base_Sales_Potential"] *
                          gdp_effect *
                          price_effect *
                          unemployment_effect *
                          ad_effect *
                          df["Seasonality_Weight"] *
                          np.random.normal(1, 0.1, n_periods)) # Multiplicative noise

# Apply a stronger direct recession impact on sales (e.g., consumer confidence drop)
sales_recession_multiplier = np.random.uniform(0.60, 0.80, n_periods) # 20-40% sales drop
df.loc[df["Recession"] == 1, "Automobile_Sales"] *= sales_recession_multiplier[df["Recession"] == 1]

# Ensure sales are non-negative and integer (as they are counts)
df["Automobile_Sales"] = np.maximum(0, df["Automobile_Sales"]).astype(int)

# Display a sample of the generated data and basic info
print("--- Sample of Generated Data ---")
print(df.head())
print("\n--- Data Information ---")
df.info()
print("\n--- Descriptive Statistics ---")
print(df.describe())

# Clean up temporary columns
df.drop(columns=['Base_Price_Type', 'Base_Sales_Potential'], inplace=True, errors='ignore')

In [None]:
# --- TASK 2.1: Annual sales trend ---
# Objective: To show long-term fluctuations and the impact of recessions.

# Aggregate sales by year
annual_sales = df.groupby("Year")["Automobile_Sales"].sum().reset_index()

plt.figure(figsize=(15, 7))
sns.lineplot(x="Year", y="Automobile_Sales", data=annual_sales, linewidth=2.5, color='dodgerblue', label="Total Annual Sales")

# Highlight recession periods
# Get unique recession years from the data for shading
unique_recession_years_in_data = sorted(df[df['Recession'] == 1]['Year'].unique())
for year_recession in unique_recession_years_in_data:
    # Shade the entire year; axvspan handles overlapping ranges gracefully if needed for multi-year recessions
    plt.axvspan(year_recession - 0.5, year_recession + 0.5, color='red', alpha=0.15, lw=0) # lw=0 to remove border

# Create a custom legend entry for recession shading
from matplotlib.patches import Patch
legend_elements = [
    Patch(facecolor='red', alpha=0.15, label='Recession Period')
]
# Get existing handles and labels for the line plot
handles, labels = plt.gca().get_legend_handles_labels()
plt.legend(handles=handles + legend_elements, loc='upper left', fontsize=12)


plt.title("Annual Automobile Sales Trend (1980-2020) with Recession Periods", fontsize=16, fontweight='bold')
plt.xlabel("Year", fontsize=14)
plt.ylabel("Total Automobile Sales (Units)", fontsize=14)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
sns.despine() # Remove top and right spines
plt.grid(True, linestyle='--', alpha=0.7)
plt.show()

# Quantitative look: Average sales during recession vs. non-recession years
avg_sales_recession = df[df['Recession'] == 1]['Automobile_Sales'].mean()
avg_sales_non_recession = df[df['Recession'] == 0]['Automobile_Sales'].mean()

print(f"Average monthly sales during recession months: {avg_sales_recession:,.0f} units")
print(f"Average monthly sales during non-recession months: {avg_sales_non_recession:,.0f} units")
print(f"Percentage difference: {(avg_sales_recession - avg_sales_non_recession) / avg_sales_non_recession * 100:.2f}%")

In [None]:
# --- TASK 2.2: Comparison of sales by vehicle type during recessions ---
# Objective: To identify which types of vehicles are more or less resilient to recessions..

# Calculate average monthly sales per vehicle type, grouped by year
avg_yearly_sales_by_type = df.groupby(["Year", "Vehicle_Type"])["Automobile_Sales"].mean().reset_index()

plt.figure(figsize=(16, 8))
sns.lineplot(
    data=avg_yearly_sales_by_type,
    x="Year",
    y="Automobile_Sales",
    hue="Vehicle_Type",
    style="Recession" if "Recession" in avg_yearly_sales_by_type.columns else None, # Check if Recession column exists from a previous merge
    linewidth=2,
    palette="viridis" # Using a perceptually uniform colormap
)
# Re-add recession shading as it's not directly supported by hue/style in lineplot for background
for year_recession in unique_recession_years_in_data:
    plt.axvspan(year_recession - 0.5, year_recession + 0.5, color='red', alpha=0.1, lw=0)

plt.title("Average Monthly Sales by Vehicle Type Over Years", fontsize=16, fontweight='bold')
plt.xlabel("Year", fontsize=14)
plt.ylabel("Average Monthly Sales (Units)", fontsize=14)
plt.legend(title="Vehicle Type", loc='upper left', bbox_to_anchor=(1, 1), fontsize=10) # Move legend outside
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.tight_layout() # Adjust layout to make room for legend
sns.despine()
plt.grid(True, linestyle='--', alpha=0.7)
plt.show()

# Quantitative Analysis: Percentage drop in sales during recessions for each vehicle type
sales_by_type_recession = df.groupby(['Vehicle_Type', 'Recession'])['Automobile_Sales'].mean().unstack()
sales_by_type_recession.columns = ['Non_Recession_Avg_Sales', 'Recession_Avg_Sales']
sales_by_type_recession['Recession_Impact_Pct'] = \
    ((sales_by_type_recession['Recession_Avg_Sales'] - sales_by_type_recession['Non_Recession_Avg_Sales']) /
     sales_by_type_recession['Non_Recession_Avg_Sales']) * 100

print("\n--- Impact of Recession on Sales by Vehicle Type ---")
print(sales_by_type_recession.sort_values(by='Recession_Impact_Pct'))

In [None]:
# --- TASK 2.3: Analysis of Economic Indicators (GDP and Unemployment Rate) ---
# Objective: To show the behaviour of GDP and the unemployment rate, and their relationship with recessions..

fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 10), sharex=True) # Share x-axis for easier comparison

# Subplot 1: GDP over time
sns.lineplot(data=df, x="Date", y="GDP", ax=ax1, color='green', label='GDP')
ax1_twin = ax1.twinx() # Create a twin y-axis for recession indicator
sns.lineplot(data=df, x="Date", y="Recession", ax=ax1_twin, color='red', alpha=0.3, linestyle='--', label='Recession Indicator (0 or 1)')
ax1.set_title("GDP Over Time and Recession Periods", fontsize=15, fontweight='bold')
ax1.set_ylabel("GDP (Scaled Units)", fontsize=12)
ax1_twin.set_ylabel("Recession (1=Yes)", fontsize=12, color='red')
ax1.legend(loc='upper left')
ax1_twin.legend(loc='center left')
ax1.grid(True, linestyle='--', alpha=0.5)

# Subplot 2: Unemployment Rate over time
sns.lineplot(data=df, x="Date", y="Unemployment_Rate", ax=ax2, color='purple', label='Unemployment Rate')
ax2_twin = ax2.twinx()
sns.lineplot(data=df, x="Date", y="Recession", ax=ax2_twin, color='red', alpha=0.3, linestyle='--', label='Recession Indicator (0 or 1)')
ax2.set_title("Unemployment Rate Over Time and Recession Periods", fontsize=15, fontweight='bold')
ax2.set_ylabel("Unemployment Rate (%)", fontsize=12)
ax2_twin.set_ylabel("Recession (1=Yes)", fontsize=12, color='red')
ax2.set_xlabel("Date", fontsize=12)
ax2.legend(loc='upper left')
ax2_twin.legend(loc='center left')
ax2.grid(True, linestyle='--', alpha=0.5)

plt.tight_layout()
sns.despine(fig=fig)
plt.show()

# Boxplot comparison
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
sns.boxplot(data=df, x="Recession", y="GDP", palette=['skyblue', 'salmon'])
plt.title("GDP Distribution: Normal vs. Recession", fontsize=14, fontweight='bold')
plt.xticks([0, 1], ["Normal", "Recession"], fontsize=12)
plt.xlabel("")
plt.ylabel("GDP (Scaled Units)", fontsize=12)

plt.subplot(1, 2, 2)
sns.boxplot(data=df, x="Recession", y="Unemployment_Rate", palette=['skyblue', 'salmon'])
plt.title("Unemployment Rate: Normal vs. Recession", fontsize=14, fontweight='bold')
plt.xticks([0, 1], ["Normal", "Recession"], fontsize=12)
plt.xlabel("")
plt.ylabel("Unemployment Rate (%)", fontsize=12)

plt.tight_layout()
sns.despine()
plt.show()

In [None]:
# --- TASK 2.4: Seasonality in Sales ---
# Objetivo: Display monthly patterns in sales, weighted by the seasonality factor.

# To better visualize, let's average sales per month across all years
monthly_avg_sales = df.groupby("Month")["Automobile_Sales"].mean().reset_index()
# We can also average the seasonality weight if desired, but using one year's profile is fine for plotting
# For this plot, we'll use the average monthly sales. The seasonality weight is already incorporated in sales.

plt.figure(figsize=(14, 7))

# We can use a bar plot for average monthly sales and overlay seasonality for context if needed.
# Or, as originally, a scatter plot where size and color reflect a 'seasonal strength' concept.
# Let's try a slightly different approach: plot average sales and color by month to see pattern.

# Aggregate average seasonality weight per month to show its intended pattern
avg_monthly_seasonality = df.groupby("Month")["Seasonality_Weight"].mean().reset_index()

fig, ax1 = plt.subplots(figsize=(14, 7))

# Plot average monthly sales
color_sales = 'tab:blue'
ax1.set_xlabel('Month', fontsize=14)
ax1.set_ylabel('Average Monthly Sales (Units)', color=color_sales, fontsize=14)
sns.barplot(x="Month", y="Automobile_Sales", data=monthly_avg_sales, ax=ax1, palette="Blues_r", alpha=0.8)
ax1.tick_params(axis='y', labelcolor=color_sales)
ax1.set_xticks(range(12)) # 0-11 for 12 months
ax1.set_xticklabels(['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'], fontsize=12)

# Create a second y-axis for the seasonality weight
ax2 = ax1.twinx()
color_seasonality = 'tab:red'
ax2.set_ylabel('Average Seasonality Weight', color=color_seasonality, fontsize=14)
sns.lineplot(x=avg_monthly_seasonality["Month"]-1, y="Seasonality_Weight", data=avg_monthly_seasonality, ax=ax2, color=color_seasonality, marker='o', linewidth=2, label="Seasonality Weight")
ax2.tick_params(axis='y', labelcolor=color_seasonality)
ax2.grid(False) # Turn off grid for the second axis to avoid clutter

plt.title("Average Monthly Automobile Sales and Seasonality Weight", fontsize=16, fontweight='bold')
fig.tight_layout() # otherwise the right y-label is slightly clipped
plt.show()

# Original bubble chart idea:
plt.figure(figsize=(12, 7))
# For the bubble chart, let's use individual data points, but it might be too crowded.
# Using monthly averages might be better for the bubble chart too.
# Let's use the monthly_avg_sales and avg_monthly_seasonality.
bubble_plot_data = pd.merge(monthly_avg_sales, avg_monthly_seasonality, on="Month")

scatter = plt.scatter(
    x=bubble_plot_data["Month"],
    y=bubble_plot_data["Automobile_Sales"],
    s=bubble_plot_data["Seasonality_Weight"] * 500,  # Scale size for visibility
    c=bubble_plot_data["Seasonality_Weight"],
    cmap="viridis",
    alpha=0.7,
    edgecolors="w", linewidth=0.5
)
plt.colorbar(scatter, label='Average Seasonality Weight')
plt.title("Seasonal Impact on Average Monthly Sales (Bubble Chart)", fontsize=16, fontweight='bold')
plt.xlabel("Month", fontsize=14)
plt.ylabel("Average Monthly Sales (Units)", fontsize=14)
plt.xticks(range(1,13), labels=['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'], fontsize=12)
plt.grid(True, linestyle='--', alpha=0.6)
plt.show()

In [None]:
# --- TASK 2.5: Price-sales ratio during recessions ---
# Objetivo: Explore the sensitivity of demand to prices, especially in times of crisis.

# Focus on recessionary periods
recession_data = df[df["Recession"] == 1].copy() # Use .copy() to avoid SettingWithCopyWarning

plt.figure(figsize=(14, 8))
sns.scatterplot(
    data=recession_data,
    x="Price",
    y="Automobile_Sales",
    hue="Vehicle_Type",
    size="GDP",  # Size bubbles by GDP to see if lower GDP exacerbates price sensitivity
    sizes=(30, 300), # Min and max bubble sizes
    palette="muted", # A good palette for distinct categories
    alpha=0.7
)
# Add a regression line to visualize the overall trend
sns.regplot(
    data=recession_data,
    x="Price",
    y="Automobile_Sales",
    scatter=False, # Don't replot scatter points
    color="black",
    line_kws={'linestyle':'--','alpha':0.8},
    label="Overall Trend"
)

plt.title("Price vs. Sales During Recession Periods", fontsize=16, fontweight='bold')
plt.xlabel("Average Vehicle Price (USD)", fontsize=14)
plt.ylabel("Automobile Sales (Units)", fontsize=14)
plt.legend(title="Legend", loc='upper right', bbox_to_anchor=(1.25, 1)) # Adjust legend position
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.grid(True, linestyle='--', alpha=0.6)
plt.tight_layout(rect=[0, 0, 0.85, 1]) # Adjust layout to make space for legend
sns.despine()
plt.show()

In [None]:
# --- TASK 2.6: Distribution of advertising expenditure ---
# Objective: To compare total advertising investment in crisis vs. normality.

# Calculate total advertising expenditure for recession and non-recession periods
adv_exp_by_recession_status = df.groupby("Recession")["Advertising_Expenditure"].sum()

# Prepare labels and values for the pie chart
labels = [f"Normal Periods ({adv_exp_by_recession_status[0]/adv_exp_by_recession_status.sum()*100:.1f}%)",
          f"Recession Periods ({adv_exp_by_recession_status[1]/adv_exp_by_recession_status.sum()*100:.1f}%)"]
sizes = adv_exp_by_recession_status.values
colors = ['#66b3ff', '#ff9999'] # Light blue for normal, light red for recession
explode = (0, 0.05)  # Explode the recession slice slightly

plt.figure(figsize=(8, 8))
plt.pie(
    sizes,
    explode=explode,
    labels=labels,
    colors=colors,
    autopct='%1.1f%%', # Show percentage on slice (though already in label for clarity)
    shadow=True,
    startangle=90,
    wedgeprops={'edgecolor': 'gray'} # Add edge color for better separation
)
plt.title("Distribution of Total Advertising Expenditure\n(Normal vs. Recession Periods)", fontsize=16, fontweight='bold')
plt.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.
plt.show()

In [None]:
# --- TASK 2.7: Advertising strategy by vehicle type during the crisis ---
# Objective: To identify in which types of vehicles advertising investment is concentrated during recessions.

# Filter for recession data and group by vehicle type
recession_adv_by_type = df[df["Recession"] == 1].groupby("Vehicle_Type")["Advertising_Expenditure"].sum().sort_values(ascending=False)

plt.figure(figsize=(10, 10))
# Using a bar chart might be more effective for comparing multiple categories than a pie chart if many slices
# sns.barplot(x=recession_adv_by_type.index, y=recession_adv_by_type.values, palette="Spectral")
# plt.ylabel("Total Advertising Expenditure During Recessions", fontsize=12)
# plt.xlabel("Vehicle Type", fontsize=12)
# plt.xticks(rotation=45, ha='right')

# Or, if sticking to pie chart for consistency with previous user notebook:
plt.pie(
    recession_adv_by_type,
    labels=recession_adv_by_type.index,
    autopct='%1.1f%%',
    startangle=90,
    colors=sns.color_palette("Spectral", len(recession_adv_by_type)),
    wedgeprops={'edgecolor': 'white', 'linewidth': 1} # White edges for clarity
)
plt.title("Advertising Expenditure by Vehicle Type\nDuring Recession Periods", fontsize=16, fontweight='bold')
plt.axis('equal')
plt.show()

In [None]:
# --- TASK 2.8: Impact of unemployment on sales by vehicle type during recessions ---
# Objective: To analyse the sensitivity of sales of different types of vehicles to the unemployment rate during crises.

# Filter for recession data
recession_data = df[df["Recession"] == 1].copy()

plt.figure(figsize=(15, 8))
sns.lmplot( # Using lmplot for easy faceting or hue with regression lines
    data=recession_data,
    x="Unemployment_Rate",
    y="Automobile_Sales",
    hue="Vehicle_Type",
    # col="Vehicle_Type", col_wrap=3, # Alternative: facet by vehicle type
    # scatter_kws={'alpha':0.3},
    height=7, aspect=1.5,
    palette="tab10",
    ci=None # Turn off confidence interval for clarity
)
# lmplot creates its own figure, so title and labels need to be set differently or use a regular scatterplot with regplot
# For a single plot with hue:
# sns.scatterplot(data=recession_data, x="Unemployment_Rate", y="Automobile_Sales", hue="Vehicle_Type", alpha=0.5, palette="tab10")
# for v_type in recession_data['Vehicle_Type'].unique():
#     sns.regplot(data=recession_data[recession_data['Vehicle_Type']==v_type], x="Unemployment_Rate", y="Automobile_Sales", scatter=False, ci=None, label=f'{v_type} Trend')

# plt.title("Relationship Between Unemployment Rate and Sales by Vehicle Type (Recessions)", fontsize=16, fontweight='bold')
# plt.xlabel("Unemployment Rate (%)", fontsize=14)
# plt.ylabel("Monthly Automobile Sales (Units)", fontsize=14)
# plt.legend(title="Vehicle Type")
# plt.grid(True, linestyle='--', alpha=0.6)
# sns.despine()
# plt.show() # This plt.show() is for the lmplot if using it directly

# For finer control with a single Axes object if not using lmplot's figure-level nature:
plt.figure(figsize=(15,8))
vehicle_types_recession = recession_data["Vehicle_Type"].unique()
colors = sns.color_palette("tab10", len(vehicle_types_recession))

for i, v_type in enumerate(vehicle_types_recession):
    subset = recession_data[recession_data["Vehicle_Type"] == v_type]
    sns.regplot(x="Unemployment_Rate", y="Automobile_Sales", data=subset,
                scatter_kws={'alpha':0.2, 's': 50}, label=v_type, color=colors[i], ci=95) # ci=95 for confidence interval

plt.title("Unemployment Rate vs. Sales by Vehicle Type (During Recessions)", fontsize=16, fontweight='bold')
plt.xlabel("Unemployment Rate (%)", fontsize=14)
plt.ylabel("Automobile Sales (Units)", fontsize=14)
plt.legend(title="Vehicle Type", fontsize=10)
plt.grid(True, linestyle='--', alpha=0.6)
sns.despine()
plt.show()

In [None]:
# --- TASK 2.9: Correlation Heatmap ---
# Objective: To visualise linear correlations between numerical variables.

# Select only numerical columns for correlation analysis
numerical_df = df.select_dtypes(include=np.number).drop(columns=['Year', 'Month'], errors='ignore') # Year/Month can be noisy if not cyclical

plt.figure(figsize=(12, 10))
correlation_matrix = numerical_df.corr()
sns.heatmap(correlation_matrix, annot=True, cmap="coolwarm", fmt=".2f", linewidths=.5, cbar_kws={"shrink": .8})
plt.title("Correlation Matrix of Key Variables", fontsize=16, fontweight='bold')
plt.xticks(rotation=45, ha='right', fontsize=10)
plt.yticks(fontsize=10)
plt.tight_layout()
plt.show()

print("\n--- Strongest Correlations with Automobile_Sales ---")
print(correlation_matrix["Automobile_Sales"].sort_values(ascending=False))

In [None]:
# --- TASK 3: Interactive Dashboard with Dash ---
# This section defines a Dash application.
# To run this, you would typically save it as a .py file and run `python your_file_name.py`
# Or use jupyter-dash if running within a notebook environment.

# For demonstration in a notebook, we'll define it but might not run it interactively here
# unless jupyter-dash is set up.

# (Assuming 'df' is the DataFrame we've been working with)

# Initialize the Dash app
# Use JupyterDash if running in Jupyter: from jupyter_dash import JupyterDash
# app = JupyterDash(__name__)
app = Dash(__name__) # Standard Dash app
app.title = "Automobile Sales Analysis Dashboard"

# Define available years and vehicle types for dropdowns
available_years = sorted(df["Year"].unique())
available_vehicle_types = sorted(df["Vehicle_Type"].unique())

# App layout
app.layout = html.Div(style={'fontFamily': 'Arial, sans-serif'}, children=[
    html.H1(
        "Automobile Sales Dashboard: Recession Impact Analysis",
        style={'textAlign': 'center', 'color': '#333', 'paddingBottom': '20px'}
    ),

    html.Div(className="row", style={'display': 'flex', 'paddingBottom': '20px'}, children=[
        html.Div(style={'width': '48%', 'paddingRight': '2%'}, children=[
            html.Label("Select Year:", style={'fontWeight': 'bold'}),
            dcc.Dropdown(
                id="year-dropdown",
                options=[{"label": y, "value": y} for y in available_years],
                value=available_years[-1], # Default to the last year
                clearable=False
            )
        ]),
        html.Div(style={'width': '48%'}, children=[
            html.Label("Select Vehicle Type:", style={'fontWeight': 'bold'}),
            dcc.Dropdown(
                id="vehicle-type-dropdown",
                options=[{"label": vt, "value": vt} for vt in available_vehicle_types],
                value=available_vehicle_types[0], # Default to the first vehicle type
                clearable=False
            )
        ])
    ]),

    html.Div(id="output-graphs-container")
])

# Callback to update graphs based on dropdown selections
@app.callback(
    Output("output-graphs-container", "children"),
    [Input("year-dropdown", "value"),
     Input("vehicle-type-dropdown", "value")]
)
def update_dashboard_graphs(selected_year, selected_vehicle_type):
    # Filter data for the selected year
    year_df = df[df["Year"] == selected_year].copy() # Data for the whole selected year

    # Filter further for the selected vehicle type for the first graph
    specific_vehicle_df = year_df[year_df["Vehicle_Type"] == selected_vehicle_type]

    # Graph 1: Monthly sales for the selected vehicle type in the selected year
    fig_sales_trend = px.line(
        specific_vehicle_df,
        x="Date",
        y="Automobile_Sales",
        title=f"Monthly Sales: {selected_vehicle_type} ({selected_year})",
        labels={"Automobile_Sales": "Units Sold", "Date": "Month"},
        markers=True
    )
    fig_sales_trend.update_layout(title_x=0.5, paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)')


    # Graph 2: Comparison of average sales for ALL vehicle types in the selected year
    avg_sales_all_types_year = year_df.groupby("Vehicle_Type")["Automobile_Sales"].mean().reset_index()
    fig_vehicle_comparison = px.bar(
        avg_sales_all_types_year,
        x="Vehicle_Type",
        y="Automobile_Sales",
        color="Vehicle_Type",
        title=f"Average Monthly Sales by Vehicle Type ({selected_year})",
        labels={"Automobile_Sales": "Average Units Sold", "Vehicle_Type": "Vehicle Type"}
    )
    fig_vehicle_comparison.update_layout(title_x=0.5, showlegend=False, paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)')

    # Graph 3: GDP vs Unemployment for the selected year (monthly data)
    fig_econ_indicators = px.scatter(
        year_df,
        x="GDP",
        y="Unemployment_Rate",
        color="Recession", # Color by recession status of the month
        title=f"GDP vs. Unemployment Rate ({selected_year})",
        labels={"GDP": "GDP (Scaled)", "Unemployment_Rate": "Unemployment Rate (%)"},
        hover_data=["Month"]
    )
    fig_econ_indicators.update_layout(title_x=0.5, paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)')


    return [
        html.Div(className="row", style={'display': 'flex'}, children=[
            html.Div(dcc.Graph(figure=fig_sales_trend), style={'width': '50%'}),
            html.Div(dcc.Graph(figure=fig_vehicle_comparison), style={'width': '50%'})
        ]),
        html.Div(className="row", style={'paddingTop': '20px'}, children=[
            html.Div(dcc.Graph(figure=fig_econ_indicators), style={'width': '100%'})
        ])
    ]

# To run the Dash app:
# if __name__ == "__main__":
# app.run_server(debug=True, port=8051) # Using a different port if 8050 is common
# For JupyterDash, it would be: app.run_server(mode='inline', port=8051)

# Since we are in a notebook, we'll just show the code structure.
# To actually run, you'd uncomment the app.run_server line and execute this cell
# (potentially needing to install jupyter-dash and change `Dash` to `JupyterDash`).
# For now, we'll comment out the run_server line to prevent it from auto-starting in this static context.
# print("Dash app defined. To run, uncomment 'app.run_server(...)' and execute.")