<a href="https://colab.research.google.com/github/Mac1211/ML-Projects/blob/main/EV_market_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans

In [None]:
# Load data
ev_data = pd.read_csv("ev_market_data.csv")

In [None]:
# Data preprocessing
ev_data = ev_data.dropna()  # Remove rows with missing values

In [None]:
# Geographic segmentation
urban_areas = ['Delhi', 'Mumbai', 'Bengaluru', 'Chennai', 'Hyderabad']
ev_data['urban_area'] = ev_data['city'].apply(lambda x: 1 if x in urban_areas else 0)

In [None]:
# Demographic segmentation
ev_data['high_income'] = ev_data['annual_income'].apply(lambda x: 1 if x > 1200000 else 0)
ev_data['young_age'] = ev_data['age'].apply(lambda x: 1 if 18 <= x <= 35 else 0)

In [None]:
# Psychographic segmentation
ev_data['env_conscious'] = ev_data['env_score'].apply(lambda x: 1 if x > 4 else 0)  # Assuming env_score ranges from 1-5
ev_data['tech_savvy'] = ev_data['tech_score'].apply(lambda x: 1 if x > 4 else 0)  # Assuming tech_score ranges from 1-5

In [None]:
# Behavioral segmentation
ev_data['daily_commute'] = ev_data['commute_distance'].apply(lambda x: 1 if x < 100 else 0)  # Assuming commute distance in km
ev_data['home_charging'] = ev_data['has_home_charging'].apply(lambda x: 1 if x == 1 else 0)

In [None]:
# Industry/Commercial segmentation
ev_data['commercial_usage'] = ev_data['vehicle_purpose'].apply(lambda x: 1 if x == 'commercial' else 0)

In [None]:
# Clustering
X = ev_data[['env_conscious', 'tech_savvy', 'daily_commute', 'home_charging', 'commercial_usage']]
kmeans = KMeans(n_clusters=3, random_state=0).fit(X)
ev_data['cluster'] = kmeans.labels_

In [None]:
# Analyze clusters
cluster_analysis = ev_data.groupby('cluster').mean()
print(cluster_analysis)

In [None]:
# Identify potential target segments
target_segments = ev_data[(ev_data['urban_area'] == 1) &
                           (ev_data['high_income'] == 1) &
                           (ev_data['young_age'] == 1) &
                           (ev_data['env_conscious'] == 1) &
                           (ev_data['tech_savvy'] == 1) &
                           (ev_data['daily_commute'] == 1) &
                           (ev_data['home_charging'] == 1) &
                           (ev_data['commercial_usage'] == 0)]

In [None]:
print(f"Potential target segment size: {len(target_segments)}")