In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

In [None]:
# Data Loading
# Load Data (Cleaned by Stream 2) & Load Market Data (Stream 4 / New 2025 Specs)
df_usage = pd.read_csv('clean_ev_data.csv')
df_market = pd.read_csv('electric_vehicles_spec_2025.csv')

print(f"Usage Data Loaded: {len(df_usage)} rows")
print(f"Market Data Loaded: {len(df_market)} rows")

sns.set_theme(style="whitegrid")

In [None]:
# Data Engineering

# Estimate price based on segment (since 2025 data lacks price)
def estimate_price(segment):
    if pd.isna(segment): return 45000
    if 'A -' in segment or 'B -' in segment: return 30000  # Economy
    if 'C -' in segment or 'D -' in segment: return 48000  # Mid-Range
    if 'E -' in segment or 'F -' in segment: return 85000  # Premium
    if 'S -' in segment: return 75000 # Sports
    return 55000 

df_market['Estimated_Price'] = df_market['segment'].apply(estimate_price)

# Simplify segments
def simplify_segment(segment):
    if pd.isna(segment): return 'Mid-Range'
    if 'A -' in segment or 'B -' in segment: return 'Economy'
    if 'E -' in segment or 'F -' in segment or 'S -' in segment: return 'Premium'
    return 'Mid-Range'

df_market['Category'] = df_market['segment'].apply(simplify_segment)

In [None]:
# Convert efficiency (Wh/km -> km/kWh)
df_market['Efficiency_km_kWh'] = 1000 / df_market['efficiency_wh_per_km']

# Visuals for dashboard

# VISUAL 1: COST PER CATEGORY
# Purpose: Show the customer the price gap between Economy and Premium.
plt.figure(figsize=(8, 5))
sns.barplot(data=df_market, x='Category', y='Estimated_Price', palette='Blues_d', order=['Economy', 'Mid-Range', 'Premium'])
plt.title("Average Car Price by Category")
plt.ylabel("Estimated Price ($)")
plt.xlabel("Vehicle Segment")
plt.tight_layout()
plt.show()

In [None]:
# VISUAL 2: EFFICIENCY VS PRICE
# Purpose: Answer "Do I get better range if I pay more?" (Miro Requirement)
plt.figure(figsize=(10, 6))
sns.scatterplot(
    data=df_market, 
    x='Estimated_Price', 
    y='Efficiency_km_kWh', 
    hue='Category',
    palette='viridis',
    s=100,
    alpha=0.8
)
plt.title("Market Efficiency: Does Higher Price Mean Better Efficiency?")
plt.xlabel("Estimated Car Price ($)")
plt.ylabel("Efficiency (km/kWh)")
# Add a trend line to show the correlation
sns.regplot(data=df_market, x='Estimated_Price', y='Efficiency_km_kWh', scatter=False, color='red', ci=None)
plt.tight_layout()
plt.show()

In [None]:
# VISUAL 3: CHARGING SPEED & PRICE (AC vs DC Inputs)
# Purpose: Miro - "explore price differential between AC and DC inputs".
# Since most 2025 cars have DC, analyse "Fast Charging Speed" vs "Price".
plt.figure(figsize=(10, 6))
sns.scatterplot(
    data=df_market,
    x='Estimated_Price',
    y='fast_charging_power_kw_dc',
    hue='Category',
    palette='magma',
    s=100
)
plt.title("Charging Capability: Price vs. Max DC Charging Speed")
plt.xlabel("Estimated Car Price ($)")
plt.ylabel("Max DC Charging Speed (kW)")
plt.axhline(y=150, color='r', linestyle='--', label="High Speed Threshold (150kW)")
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
# PREDICTOR: CHEAPEST MILE PER CITY
# Purpose: The "Customer Tool" to predict costs for actual users.

# Step 1: Get electricity costs
city_costs = df_usage.groupby('Charging Station Location')['Price per kWh'].mean().reset_index()

# Step 2: The Hybrid Lookup ("Manual" Cars for accuracy)
comparison_cars = [
    {'Model': 'Nissan Leaf (Economy)', 'Eff_km_kWh': 5.5}, 
    {'Model': 'Tesla Model 3 (Mid)',   'Eff_km_kWh': 6.5}, 
    {'Model': 'BMW i3 (Premium)',      'Eff_km_kWh': 6.0}, 
    {'Model': 'Hummer EV (Premium)',   'Eff_km_kWh': 2.5}
]
df_compare = pd.DataFrame(comparison_cars)

# Step 3: Merge
df_pred = pd.merge(city_costs.assign(key=1), df_compare.assign(key=1), on='key').drop('key', axis=1)

# Step 4: Calculate
df_pred['Cost_Per_Km'] = df_pred['Price per kWh'] / df_pred['Eff_km_kWh']

In [None]:
# VISUAL 4: THE PREIDICTION CHART
plt.figure(figsize=(12, 6))
sns.barplot(
    data=df_pred, 
    x='Charging Station Location', 
    y='Cost_Per_Km', 
    hue='Model',
    palette='Paired'
)
plt.title("Customer Tool: Predicted Cost per Km by City")
plt.ylabel("Cost ($) per Km")
plt.xticks(rotation=45)
plt.legend(bbox_to_anchor=(1.01, 1), loc='upper left')
plt.tight_layout()
plt.show()

print("EDA Complete. All 4 Dashboard Visuals Generated.")