# Market Segmentation Analysis of Electric Vehicles (EVs) in India

This notebook presents a comprehensive market segmentation analysis of electric vehicles in India. The analysis aims to identify distinct market segments based on vehicle attributes like range, price, style, and capacity.

In [None]:
## Data Loading and Initial Exploration

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer

# Load the dataset
file_path = 'EVIndia.csv' # Update the path according to your dataset location
ev_data = pd.read_csv(file_path)
ev_data.head()

In [None]:
## Data Cleaning and Preprocessing

# Converting 'PriceRange' and 'Range' to numerical values and handling missing values
# (The actual code for conversion will depend on the format of these columns in your dataset)

# Standardizing the numerical data
scaler = StandardScaler()
features_to_scale = ['Range', 'MinPrice', 'MaxPrice'] # Update these features based on your dataset
ev_data_scaled = scaler.fit_transform(ev_data[features_to_scale])

In [None]:

## Exploratory Data Analysis (EDA)

# Descriptive statistics
ev_data.describe()

# Distribution of numerical features like Range and Price
plt.figure(figsize=(12, 5))
sns.histplot(ev_data['Range'], bins=15)
plt.title('Distribution of Range')
plt.show()

In [None]:

## Clustering Analysis Using K-Means

# Elbow method to determine the optimal number of clusters
sse = []
for k in range(1, 11):
    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans.fit(ev_data_scaled)
    sse.append(kmeans.inertia_)

# Plotting the Elbow graph
plt.figure(figsize=(10, 6))
plt.plot(range(1, 11), sse, marker='o')
plt.title('Elbow Method for Determining Optimal Number of Clusters')
plt.xlabel('Number of Clusters')
plt.ylabel('Sum of Squared Errors (SSE)')
plt.show()

# Applying K-means clustering
optimal_clusters = 4 # This can be adjusted based on the Elbow graph
kmeans = KMeans(n_clusters=optimal_clusters, random_state=42)
clusters = kmeans.fit_predict(ev_data_scaled)
ev_data['Cluster'] = clusters


In [None]:
## Cluster Analysis and Interpretation

# Statistical summary of each cluster
cluster_summary = ev_data.groupby('Cluster').mean()
cluster_summary['Count'] = ev_data['Cluster'].value_counts()
cluster_summary

In [None]:
## Categorical Variable Analysis within Clusters

# Analyzing the distribution of 'Style', 'Transmission', and 'Capacity' within each cluster
plt.figure(figsize=(12, 7))
sns.countplot(data=ev_data, x='Cluster', hue='Style')
plt.title('Style Distribution in Each Cluster')
plt.legend(title='Style', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.show()