In [None]:
import pandas as pd

# Load dataset
df = pd.read_csv("EV_Dataset.csv")

# Check structure
print(df.head())

# Clean data
df['State'] = df['State'].str.strip().str.title()  # Standardize names
df['Year'] = pd.to_datetime(df['Registration_Date']).dt.year  # Extract year
df = df.dropna(subset=['Vehicle_Type', 'State'])


In [None]:
ev_trend = df.groupby('Year')['EV_Sales_Quantity'].sum().reset_index()
ev_trend.rename(columns={'EV_Sales_Quantity': 'Total_Sales'}, inplace=True)
print(ev_trend)

In [None]:
top_states = df.groupby('State').size().reset_index(name='Count')
top_states = top_states.sort_values(by='Count', ascending=False).head(5)
print(top_states)

In [None]:
type_share = df['Vehicle_Type'].value_counts(normalize=True) * 100
print(type_share)


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# 1. EV growth over time
sns.lineplot(data=ev_trend, x='Year', y='Total_Sales')
plt.title("EV Registrations Growth in India (2019–2024)")
plt.xlabel("Year")
plt.ylabel("Number of EVs")
plt.show()

# 2. Top 5 states
sns.barplot(data=top_states, x='Count', y='State')
plt.title("Top 5 States for EV Adoption")
plt.xlabel("Total Registrations")
plt.ylabel("State")
plt.show()

# 3. Vehicle type share
type_share.plot(kind='pie', autopct='%1.1f%%', title="EV Type Distribution")
plt.ylabel("")
plt.show()