In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data (adjust the file path as needed)
file_path = 'Electric_Vehicle_Population_Data.csv'  # Replace with your file path
df = pd.read_csv(file_path)

In [None]:
df.info()
df.isnull().sum()

In [None]:
df['Electric Range'] = df['Electric Range'].fillna(0)
df.dropna(inplace=True)


In [None]:
df['Model Year'] = df['Model Year'].astype(int)
df['Base MSRP'] = pd.to_numeric(df['Base MSRP'], errors='coerce').fillna(0)

In [None]:
plt.figure(figsize=(10, 5))
sns.histplot(df['Model Year'], kde=True, color='blue', bins=20)
plt.title('Distribution of Electric Vehicles by Model Year')
plt.xlabel('Model Year')
plt.ylabel('Frequency')
plt.show()

In [None]:
top_makes = df['Make'].value_counts().head(10)
plt.figure(figsize=(10, 5))
sns.barplot(x=top_makes.index, y=top_makes.values, palette='muted')
plt.title('Top 10 Vehicle Makes')
plt.xlabel('Make')
plt.ylabel('Number of Vehicles')
plt.xticks(rotation=45)
plt.show()



In [None]:
plt.figure(figsize=(10, 6))
sns.histplot(df['Electric Range'], bins=30, kde=True, color='green')
plt.title('Distribution of Electric Range')
plt.xlabel('Electric Range (miles)')
plt.ylabel('Frequency')
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
sns.scatterplot(x='Base MSRP', y='Electric Range', hue='Make', data=df, alpha=0.7, palette='tab10')
plt.title('Electric Range vs Base MSRP by Make')
plt.xlabel('Base MSRP ($)')
plt.ylabel('Electric Range (miles)')
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.show()

In [None]:
cafv_by_make = df.groupby('Make')['Clean Alternative Fuel Vehicle (CAFV) Eligibility'].value_counts().unstack().fillna(0)
cafv_by_make.plot(kind='bar', stacked=True, figsize=(12, 6), colormap='coolwarm')
plt.title('CAFV Eligibility by Make')
plt.xlabel('Make')
plt.ylabel('Count')
plt.legend(title='CAFV Eligibility', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.show()

In [None]:
avg_range_by_state = df.groupby('City')['Electric Range'].mean().sort_values(ascending=False).head(10)
plt.figure(figsize=(10, 5))
sns.barplot(x=avg_range_by_state.values, y=avg_range_by_state.index, palette='viridis')
plt.title('Top 10 City by Average Electric Range')
plt.xlabel('Average Electric Range (miles)')
plt.ylabel('City')
plt.show()

In [None]:
# Select only numeric columns before calculating correlation
numeric_df = df.select_dtypes(include=['number'])

plt.figure(figsize=(8, 6))
sns.heatmap(numeric_df.corr(), annot=True, cmap='coolwarm', fmt=".2f")
plt.title('Correlation Heatmap')
plt.show()

In [None]:
# a. Percentage of Electric Vehicles Eligible for CAFV
eligible_percentage = (
    (df['Clean Alternative Fuel Vehicle (CAFV) Eligibility'] == 'Eligible').mean() * 100
)
print(f"Percentage of vehicles eligible for CAFV: {eligible_percentage:.2f}%")

In [None]:
common_ev_type_by_city = (
    df.groupby('City')['Electric Vehicle Type'].value_counts().unstack().idxmax(axis=1)
)
print("\nMost common Electric Vehicle Type by City:")
print(common_ev_type_by_city.head(10))


In [None]:
import plotly.express as px

fig_scatter = px.scatter(
    df,
    x="Base MSRP",
    y="Electric Range",
    color="Make",
    size="Electric Range",
    hover_data=["Model Year", "City", "State"],
    title="Electric Range vs Base MSRP (Interactive)",
    labels={"Base MSRP": "Base MSRP ($)", "Electric Range": "Electric Range (miles)"},
)
fig_scatter.show()

In [None]:
fig_violin = px.violin(
    df,
    x="Model Year",
    y="Electric Range",
    color="Make",
    box=True,
    points="all",
    title="Electric Range Distribution by Model Year",
    labels={"Model Year": "Model Year", "Electric Range": "Electric Range (miles)"},
)
fig_violin.show()


In [None]:
top_makes = df.groupby("Make")[["Electric Range", "Base MSRP"]].mean().sort_values(by="Electric Range", ascending=False).head(10)
fig_bar = px.bar(
    top_makes.reset_index(),
    x="Make",
    y=["Electric Range", "Base MSRP"],
    barmode="group",
    title="Top 10 Makes with Average Electric Range and MSRP",
    labels={"value": "Value", "variable": "Metric"},
    text_auto=True,
)
fig_bar.show()

In [None]:
ig_sunburst = px.sunburst(
    df,
    path=["State", "Electric Vehicle Type"],
    values="Electric Range",
    color="Electric Range",
    title="Distribution of Electric Vehicle Types by State",
    color_continuous_scale="RdBu",
)

In [None]:
fig_parallel = px.parallel_categories(
    df,
    dimensions=["Make", "Electric Vehicle Type", "Clean Alternative Fuel Vehicle (CAFV) Eligibility"],
    color="Electric Range",
    color_continuous_scale="Blues",
    title="Parallel Categories Plot for Vehicle Characteristics",
)
fig_parallel.show()


In [None]:
range_trends = df.groupby("Model Year")["Electric Range"].mean().reset_index()
fig_line = px.line(
    range_trends,
    x="Model Year",
    y="Electric Range",
    title="Average Electric Range Trends by Model Year",
    markers=True,
    labels={"Model Year": "Model Year", "Electric Range": "Average Electric Range (miles)"},
)
fig_line.show()

In [None]:
fig_box = px.box(
    df,
    x="Electric Vehicle Type",
    y="Base MSRP",
    color="Electric Vehicle Type",
    title="Base MSRP Distribution by Electric Vehicle Type",
    points="all",
    hover_data=["Make", "Model"],
    labels={"Base MSRP": "Base MSRP ($)"},
)
fig_box.show()