In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

In [None]:
df = pd.read_csv('clean_ev_data.csv')

print("Data Loaded. Rows:", len(df))

sns.set_theme(style="whitegrid")

In [None]:
# --- VISUAL ANALYSIS ---

# Q1: Which car models are the most energy-efficient?
efficiency_by_model = df.groupby('Vehicle Model')['Efficiency (km per kWh)'].mean().sort_values().reset_index()

plt.figure(figsize=(10, 6))
sns.barplot(data=efficiency_by_model, x='Efficiency (km per kWh)', y='Vehicle Model', palette='viridis')
plt.title("Average Efficiency by Vehicle Model (Km/kWh)")
plt.xlabel("Efficiency (km/kWh)")
plt.tight_layout()
plt.show()

In [None]:
# Q2: How do different types of drivers behave at the charger?
plt.figure(figsize=(8, 6))
sns.boxplot(data=df, x='User Type', y='Charging Duration (hours)', palette='Set2')
plt.title("Charging Duration Distribution by User Persona")
plt.show()

In [None]:
# Q3: Does having a bigger battery actually lead to driving longer distances?
plt.figure(figsize=(10, 6))
sns.scatterplot(
    data=df, 
    x='Battery Capacity (kWh)', 
    y='Distance Driven (since last charge) (km)', 
    hue='Vehicle Model', 
    size='Energy Consumed (kWh)',
    sizes=(20, 200),
    alpha=0.7
)
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.title("Battery Capacity vs. Distance Driven")
plt.tight_layout()
plt.show()