In [1]:
# --- 1. Install all necessary libraries ---
!pip install pandas numpy matplotlib seaborn folium scikit-learn joblib

# --- 2. Import all libraries ---
import pandas as pd
import numpy as np
import folium
import folium.plugins
import joblib
import random
import warnings

# Import scikit-learn (sklearn) components
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

# Ignore warnings for a cleaner output
warnings.filterwarnings('ignore')

print("All libraries installed and imported successfully.")


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
All libraries installed and imported successfully.


In [2]:
print("--- 1. Starting: Cleaning 'ev-charging-stations-india.csv' ---")

# --- 1. Load Station Data ---
station_data_url = 'https://raw.githubusercontent.com/JBahulika/AI-Driven-EV-Charging-and-Route-Optimization/main/Data/ev-charging-stations-india.csv'
df_stations = pd.read_csv(station_data_url)

# --- 2. Fix 'lattitude' (typo and data type) ---
# Drop rows with no coordinates
df_stations_clean = df_stations.dropna(subset=['lattitude', 'longitude'])
# Coerce bad text to 'NaN'
df_stations_clean['lattitude'] = pd.to_numeric(df_stations_clean['lattitude'], errors='coerce')
# Drop any new NaN rows
df_stations_clean = df_stations_clean.dropna(subset=['lattitude'])
# Rename column
df_stations_clean = df_stations_clean.rename(columns={'lattitude': 'latitude'})

# --- 3. Filter Coordinate Outlier ---
# Remove the row with the impossible longitude
df_stations_clean = df_stations_clean[df_stations_clean['longitude'] <= 180]

# --- 4. Clean and Standardize 'state' Column ---
# This map fixes all cities-as-states and typos we found
state_cleaning_map = {
    'Telengana': 'Telangana', 'Tamilnadu': 'Tamil Nadu', 'Taminadu': 'Tamil Nadu',
    'Maharashra': 'Maharashtra', 'Westbengal': 'West Bengal', 'Uttrakhand': 'Uttarakhand',
    'Uttarkhand': 'Uttarakhand', 'Harayana': 'Haryana', 'Karala': 'Kerala',
    'Chattisgarh': 'Chhattisgarh', 'Jammu & Kashmir': 'Jammu and Kashmir',
    'Jammu': 'Jammu and Kashmir', 'Pondicherry': 'Puducherry', 'Andra Pradesh': 'Andhra Pradesh',
    'Andhrapradesh': 'Andhra Pradesh', 'Andhra Pradesh ': 'Andhra Pradesh',
    'Andaman': 'Andaman and Nicobar Island', 'Hyderabad': 'Telangana',
    'Hyderabadu00A0': 'Telangana', 'Rajahmundry': 'Andhra Pradesh', 'Hisar': 'Haryana',
    'Kochi': 'Kerala', 'Ernakulam': 'Kerala', 'Chikhali': 'Maharashtra',
    'Limbdi': 'Gujarat', 'Jajpur': 'Odisha', 'Bhubhaneswar': 'Odisha'
}
df_stations_clean['state'] = df_stations_clean['state'].replace(state_cleaning_map)

# Standardize remaining names (e.g., "TAMIL NADU" -> "Tamil Nadu")
df_stations_clean['state'] = df_stations_clean['state'].str.title()
# Consolidate 'Delhi Ncr' and 'Andhra pradesh'
df_stations_clean['state'] = df_stations_clean['state'].replace({
    'Andhra Pradesh': 'Andhra Pradesh',
    'Delhi Ncr': 'Delhi'
})

# --- 5. Final Drops ---
df_stations_clean = df_stations_clean.dropna(subset=['type'])
df_stations_clean = df_stations_clean.drop(columns=['address'])

print(f"--- Success: 'df_stations_clean' created with {len(df_stations_clean)} rows. ---")

--- 1. Starting: Cleaning 'ev-charging-stations-india.csv' ---
--- Success: 'df_stations_clean' created with 1529 rows. ---


In [3]:
print("--- 1. Starting: Cleaning 'Electric Vehicle Trip...csv' ---")

# --- 1. Load Trip Data (with fixed URL) ---
trip_data_url = 'https://raw.githubusercontent.com/JBahulika/AI-Driven-EV-Charging-and-Route-Optimization/main/Data/Electric%2520Vehicle%2520Trip%2520Energy%2520Consumption%2520Data.csv'
df_trip = pd.read_csv(trip_data_url)

# --- 2. Clean Column Names ---
new_column_names = {
    'Trip Energy Consumption': 'trip_energy_kwh', 'Vehicle ID': 'vehicle_id',
    'Trip Distance': 'trip_distance_km', 'Time of Day': 'time_of_day_24h',
    'Day of the Week': 'day_of_week', 'Longitude': 'longitude', 'Latitude': 'latitude',
    'Speed': 'speed_kmh', 'Current': 'current_a', 'Total Voltage': 'voltage_v',
    'Maximum Cell Temperature of Battery': 'temp_batt_max_c',
    'Minimum Cell Temperature of Battery': 'temp_batt_min_c',
    'Trip Time Length': 'trip_time_len'
}
df_trip_clean = df_trip.rename(columns=new_column_names)

# --- 3. Filter Outliers ---
df_trip_clean = df_trip_clean[(df_trip_clean['latitude'] >= -90) & (df_trip_clean['latitude'] <= 90)]
df_trip_clean = df_trip_clean[(df_trip_clean['longitude'] >= -180) & (df_trip_clean['longitude'] <= 180)]
df_trip_clean = df_trip_clean[df_trip_clean['speed_kmh'] <= 250]

# --- 4. Feature Engineering ---
df_trip_clean = df_trip_clean[df_trip_clean['trip_distance_km'] > 0]
df_trip_clean['kwh_per_km'] = df_trip_clean['trip_energy_kwh'] / df_trip_clean['trip_distance_km']

print(f"--- Success: 'df_trip_clean' created with {len(df_trip_clean)} rows. ---")

--- 1. Starting: Cleaning 'Electric Vehicle Trip...csv' ---
--- Success: 'df_trip_clean' created with 9620 rows. ---


In [4]:
print("--- 1. Starting: Cleaning 'final_dataset.csv' ---")

# --- 1. Load Demand Data ---
final_data_url = 'https://raw.githubusercontent.com/JBahulika/AI-Driven-EV-Charging-and-Route-Optimization/main/Data/final_dataset.csv'
df_demand = pd.read_csv(final_data_url)

# --- 2. Clean Columns ---
if 'Unnamed: 0' in df_demand.columns:
    df_demand = df_demand.drop(columns=['Unnamed: 0'])

new_names = {
    'State Name': 'state_name', 'Two Wheeler': 'two_wheeler', 'Three Wheeler': 'three_wheeler',
    'Four Wheeler': 'four_wheeler', 'Goods Vehicles': 'goods_vehicles',
    'Public Service Vehicle': 'public_service_vehicle', 'Special Category Vehicles': 'special_category_vehicles',
    'Ambulance/Hearses': 'ambulance_hearses', 'Construction Equipment Vehicle': 'construction_equipment_vehicle',
    'Other': 'other', 'Grand Total': 'grand_total', 'total-charging-stations': 'total_charging_stations'
}
df_demand = df_demand.rename(columns=new_names)

# --- 3. Handle Missing Values ---
# Assume NaN stations = 0 stations
df_demand['total_charging_stations'] = df_demand['total_charging_stations'].fillna(0)

# --- 4. Feature Engineering ---
df_demand['evs_per_station'] = df_demand['grand_total'] / df_demand['total_charging_stations']
# Fill 'NaN' (from 0/0) with 0. 'inf' (from x/0) is left as-is.
df_demand['evs_per_station'] = df_demand['evs_per_station'].fillna(0)

df_demand_clean = df_demand
print(f"--- Success: 'df_demand_clean' created with {len(df_demand_clean)} rows. ---")

--- 1. Starting: Cleaning 'final_dataset.csv' ---
--- Success: 'df_demand_clean' created with 32 rows. ---


In [5]:
print("--- 1. Starting: Merging datasets ---")

# --- 1. Create Merge Keys ---
df_stations_to_merge = df_stations_clean.copy()
df_demand_to_merge = df_demand_clean.copy()

df_stations_to_merge['merge_key'] = df_stations_to_merge['state'].str.lower().str.strip()
df_demand_to_merge['merge_key'] = df_demand_to_merge['state_name'].str.lower().str.strip()

# --- 2. Perform the Merge ---
df_merged_final = pd.merge(
    df_stations_to_merge,
    df_demand_to_merge,
    on='merge_key',
    how='left'
)

print(f"Merge complete. {df_merged_final['state_name'].isnull().sum()} stations did not have demand data.")

# --- 3. Build the Advanced Map ---
print("Starting advanced map generation...")
map_center = [df_merged_final['latitude'].mean(), df_merged_final['longitude'].mean()]
m_final = folium.Map(location=map_center, zoom_start=5, tiles='CartoDB positron')

# --- 3a. Add Heatmap Layer ---
heat_data = df_merged_final[['latitude', 'longitude']].values.tolist()
folium.plugins.HeatMap(heat_data, radius=15).add_to(m_final)

# --- 3b. Add Smart Markers ---
for index, row in df_merged_final.iterrows():
    
    # Check for missing demand data
    if pd.isna(row['evs_per_station']):
        demand_text = "No Demand Data"
        color = 'gray'
    else:
        # Check for 'infinite' pressure
        if np.isinf(row['evs_per_station']):
            demand_text = "Extremely High (No Stations)"
            color = 'red'
        else:
            demand_text = f"{row['evs_per_station']:.0f} EVs per Station"
            # Color-code normal markers
            if row['evs_per_station'] > 1000:
                color = 'orange'
            else:
                color = 'green'

    popup_text = f"""
    <b>Name:</b> {row['name']}<br>
    <b>City:</b> {row['city']}<br>
    <b>State:</b> {row['state']}<hr>
    <b>State Demand:</b> {demand_text}
    """
    popup = folium.Popup(popup_text, max_width=300)

    folium.Marker(
        location=[row['latitude'], row['longitude']],
        popup=popup,
        tooltip=row['name'],
        icon=folium.Icon(color=color, icon='bolt', prefix='fa')
    ).add_to(m_final)

# --- 4. Save the Map ---
final_map_filename = 'ev_demand_heatmap.html'
m_final.save(final_map_filename)

print(f"--- üèÜ Success: 'ev_demand_heatmap.html' saved. ---")

--- 1. Starting: Merging datasets ---
Merge complete. 175 stations did not have demand data.
Starting advanced map generation...
--- üèÜ Success: 'ev_demand_heatmap.html' saved. ---


In [6]:
print("--- 1. Starting: Engineering features for energy model ---")

# --- 1. Engineer Features ---
df_engineered = df_trip_clean.copy()
df_engineered['temp_diff'] = df_engineered['temp_batt_max_c'] - df_engineered['temp_batt_min_c']
bins = [-1, 6, 12, 18, 24]
labels = ['Night', 'Morning', 'Afternoon', 'Evening']
df_engineered['time_of_day_bin'] = pd.cut(df_engineered['time_of_day_24h'], bins=bins, labels=labels, right=True)
df_engineered = pd.get_dummies(df_engineered, columns=['time_of_day_bin'], drop_first=True)

# --- 2. Define Features (X) and Target (y) ---
new_feature_columns = [
    'day_of_week', 'speed_kmh', 'current_a', 'voltage_v', 'temp_batt_max_c', 
    'temp_batt_min_c', 'temp_diff', 'time_of_day_bin_Morning', 
    'time_of_day_bin_Afternoon', 'time_of_day_bin_Evening'
]
target_column = 'kwh_per_km'

X = df_engineered[new_feature_columns]
y = df_engineered[target_column]

# --- 3. Split Data ---
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# --- 4. Train Model ---
print("Training energy model (rf_v2)...")
rf_v2 = RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=-1)
rf_v2.fit(X_train, y_train)

print("--- üèÜ Success: Energy model 'rf_v2' is trained. ---")

--- 1. Starting: Engineering features for energy model ---
Training energy model (rf_v2)...
--- üèÜ Success: Energy model 'rf_v2' is trained. ---


In [8]:
print("--- 1. Starting: Evaluating energy model 'rf_v2' ---")

# --- 1. Make Predictions ---
y_predictions = rf_v2.predict(X_test)

# --- 2. Calculate Metrics ---
r2 = r2_score(y_test, y_predictions)
mae = mean_absolute_error(y_test, y_predictions)
mse = mean_squared_error(y_test, y_predictions)
rmse = np.sqrt(mse) # Fix for older sklearn versions

print("\n" + "="*40 + "\n")
print("--- üìà MODEL PERFORMANCE REPORT ---")
print(f"R-squared (R¬≤):     {r2:.4f}")
print(f"Mean Absolute Error (MAE): {mae:.4f} kwh/km")
print(f"Root Mean Squared Error (RMSE): {rmse:.4f} kwh/km")
print(f"(Context: Average value is {y_test.mean():.4f} kwh/km)")
print("="*40 + "\n")

# --- 3. Save the Model ---
model_filename = 'energy_model.joblib'
joblib.dump(rf_v2, model_filename)
print(f"--- üèÜ Success: Energy model saved as '{model_filename}' ---")

--- 1. Starting: Evaluating energy model 'rf_v2' ---


--- üìà MODEL PERFORMANCE REPORT ---
R-squared (R¬≤):     0.2569
Mean Absolute Error (MAE): 0.0410 kwh/km
Root Mean Squared Error (RMSE): 0.0587 kwh/km
(Context: Average value is 0.2187 kwh/km)

--- üèÜ Success: Energy model saved as 'energy_model.joblib' ---


In [9]:
print("--- 1. Defining Reinforcement Learning Environment ---")

class ChargingStationEnv:
    def __init__(self):
        # 1. ACTIONS: 0=Low, 1=Medium, 2=High
        self.action_prices = {0: 10, 1: 15, 2: 20}
        self.action_space_size = len(self.action_prices)

        # 2. STATES: 4 Time Bins x 3 Occupancy Levels = 12 States
        self.time_bins = ['Night', 'Morning', 'Afternoon', 'Evening']
        self.occupancy_levels = ['Low', 'Medium', 'High']
        self.state_space_size = len(self.time_bins) * len(self.occupancy_levels)

        # 3. ENVIRONMENT RULES
        self.demand_prob_schedule = [
            0.1, 0.1, 0.1, 0.1, 0.2, 0.3, # 00:00 - 05:00 (Night)
            0.5, 0.7, 0.8, 0.6, 0.5, 0.4, # 06:00 - 11:00 (Morning)
            0.5, 0.6, 0.7, 0.8, 0.9, 0.9, # 12:00 - 17:00 (Afternoon)
            1.0, 1.0, 0.9, 0.8, 0.5, 0.2  # 18:00 - 23:00 (Evening)
        ]
        self.occupancy_schedule = [
            'Low', 'Low', 'Low', 'Low', 'Low', 'Low',       # Night
            'Medium', 'Medium', 'High', 'High', 'Medium', 'Medium', # Morning
            'Medium', 'High', 'High', 'High', 'High', 'High', # Afternoon
            'High', 'High', 'High', 'Medium', 'Medium', 'Low'  # Evening
        ]
        self.current_hour = 0

    def _get_state(self):
        if 0 <= self.current_hour <= 5: time_bin_index = 0
        elif 6 <= self.current_hour <= 11: time_bin_index = 1
        elif 12 <= self.current_hour <= 17: time_bin_index = 2
        else: time_bin_index = 3

        occupancy = self.occupancy_schedule[self.current_hour]
        if occupancy == 'Low': occupancy_index = 0
        elif occupancy == 'Medium': occupancy_index = 1
        else: occupancy_index = 2
            
        return time_bin_index * len(self.occupancy_levels) + occupancy_index

    def reset(self):
        self.current_hour = 0
        return self._get_state()

    def step(self, action):
        price = self.action_prices[action]
        current_state_index = self._get_state()
        time_bin = self.time_bins[current_state_index // len(self.occupancy_levels)]
        occupancy = self.occupancy_levels[current_state_index % len(self.occupancy_levels)]

        reward = 0
        customer_arrives = (random.random() < self.demand_prob_schedule[self.current_hour])

        if customer_arrives:
            customer_stays = False
            if time_bin == 'Night' or occupancy == 'Low':
                if price <= 15: customer_stays = True
            elif time_bin == 'Evening' and occupancy == 'High':
                customer_stays = True
            else:
                if price <= 15: customer_stays = (random.random() < 0.9)
                else: customer_stays = (random.random() < 0.5)

            if customer_stays: reward = price
            else: reward = -5 # Penalty for lost sale
        
        self.current_hour += 1
        done = (self.current_hour == 24)
        new_state = self._get_state() if not done else None
        
        return new_state, reward, done

print("--- üèÜ Success: 'ChargingStationEnv' class defined. ---")

--- 1. Defining Reinforcement Learning Environment ---
--- üèÜ Success: 'ChargingStationEnv' class defined. ---


In [10]:
print("--- 1. Starting: Initializing Q-Learning Agent ---")
env = ChargingStationEnv()

# --- 1. Initialize Q-Table and Hyperparameters ---
q_table = np.zeros((env.state_space_size, env.action_space_size))
total_episodes = 50000
learning_rate = 0.1
discount_rate = 0.99
epsilon = 1.0
max_epsilon = 1.0
min_epsilon = 0.01
decay_rate = 0.0001

print(f"Running {total_episodes} training episodes (days)...")

# --- 2. Run the Training Loop ---
for episode in range(total_episodes):
    state = env.reset()
    done = False
    
    while not done:
        # Explore vs. Exploit
        if random.uniform(0, 1) < epsilon:
            action = np.random.randint(0, env.action_space_size)
        else:
            action = np.argmax(q_table[state, :])

        # Take action
        new_state, reward, done = env.step(action)
        
        # Q-Learning Formula
        if not done:
            q_table[state, action] = q_table[state, action] + learning_rate * (
                reward + discount_rate * np.max(q_table[new_state, :]) - q_table[state, action]
            )
        else:
            q_table[state, action] = q_table[state, action] + learning_rate * (reward - q_table[state, action])

        state = new_state
        
    # Decay Epsilon
    epsilon = min_epsilon + (max_epsilon - min_epsilon) * np.exp(-decay_rate * episode)

print("--- üèÜ Training Complete! ---")

# --- 3. Print and Save the Final "AI Brain" (The Q-Table) ---
print("\n" + "="*40 + "\n")
print("--- FINAL Q-TABLE (The AI 'Brain') ---")
np.set_printoptions(precision=2, suppress=True) 
print(q_table)
print("\n" + "="*40 + "\n")

# --- 4. Save the Q-Table ---
q_table_filename = 'pricing_model_q_table.npy'
np.save(q_table_filename, q_table)
print(f"--- üèÜ Success: Pricing model saved as '{q_table_filename}' ---")

--- 1. Starting: Initializing Q-Learning Agent ---
Running 50000 training episodes (days)...
--- üèÜ Training Complete! ---


--- FINAL Q-TABLE (The AI 'Brain') ---
[[153.09 159.45 150.12]
 [  0.     0.     0.  ]
 [  0.     0.     0.  ]
 [  0.     0.     0.  ]
 [141.33 159.64 144.92]
 [164.44 173.48 162.06]
 [  0.     0.     0.  ]
 [109.88 118.45 107.42]
 [107.81 114.73 105.41]
 [  0.09   1.24  -1.02]
 [ 10.33  18.86   9.14]
 [ 63.54  61.49  71.54]]


--- üèÜ Success: Pricing model saved as 'pricing_model_q_table.npy' ---


In [11]:
print("--- 1. Loading models back from files ---")

# --- 1a. Load the Energy Model ---
try:
    energy_model = joblib.load('energy_model.joblib')
    print("Energy model 'energy_model.joblib' loaded successfully.")
except Exception as e:
    print(f"Error loading energy model: {e}")

# --- 1b. Load the Pricing Model ---
try:
    pricing_model = np.load('pricing_model_q_table.npy')
    print("Pricing model 'pricing_model_q_table.npy' loaded successfully.\n")
except Exception as e:
    print(f"Error loading pricing model: {e}")


# --- 2. Example: Use the Energy Model ---
# Let's create a *hypothetical* trip
# We need to provide all 10 features our 'rf_v2' model was trained on
hypothetical_trip = [
    5,      # 'day_of_week' (e.g., Friday)
    60,     # 'speed_kmh'
    -10,    # 'current_a' (Regen braking)
    350,    # 'voltage_v'
    30,     # 'temp_batt_max_c'
    28,     # 'temp_batt_min_c'
    2,      # 'temp_diff' (30 - 28)
    0,      # 'time_of_day_bin_Morning' (0=False)
    1,      # 'time_of_day_bin_Afternoon' (1=True)
    0       # 'time_of_day_bin_Evening' (0=False)
]
# Convert to numpy array for the model
trip_data = np.array([hypothetical_trip])

# Make prediction
predicted_kwh_per_km = energy_model.predict(trip_data)
print("--- Energy Model Prediction Example ---")
print(f"Predicted energy use: {predicted_kwh_per_km[0]:.4f} kwh/km\n")


# --- 3. Example: Use the Pricing Model ---
# What price should we set at 'Evening, High Occupancy' (State 11)?
state_to_check = 11

# Look up the action values for that state
action_values = pricing_model[state_to_check]
# Find the index of the highest value
best_action_index = np.argmax(action_values)

price_map = {0: 'Low Price', 1: 'Medium Price', 2: 'High Price'}

print("--- Pricing Model Prediction Example ---")
print(f"For State {state_to_check} (Evening, High):")
print(f"  Action values are: {action_values}")
print(f"  The AI chooses: Index {best_action_index} -> {price_map[best_action_index]}")

--- 1. Loading models back from files ---
Energy model 'energy_model.joblib' loaded successfully.
Pricing model 'pricing_model_q_table.npy' loaded successfully.

--- Energy Model Prediction Example ---
Predicted energy use: 0.2221 kwh/km

--- Pricing Model Prediction Example ---
For State 11 (Evening, High):
  Action values are: [63.54 61.49 71.54]
  The AI chooses: Index 2 -> High Price
