In [1]:
from kagglehub import dataset_download
import pandas as pd
import os

path = dataset_download("valakhorasani/electric-vehicle-charging-patterns")
print("Dataset downloaded to:", path)

  from .autonotebook import tqdm as notebook_tqdm


Dataset downloaded to: C:\Users\divya\.cache\kagglehub\datasets\valakhorasani\electric-vehicle-charging-patterns\versions\1


In [2]:
files = os.listdir(path)
print("Files inside dataset:", files)

df = pd.read_csv(os.path.join(path, files[0]))
print("Original shape:", df.shape)
print(df.head())

Files inside dataset: ['ev_charging_patterns.csv']
Original shape: (1320, 20)
  User ID Vehicle Model  Battery Capacity (kWh) Charging Station ID  \
0  User_1        BMW i3              108.463007         Station_391   
1  User_2  Hyundai Kona              100.000000         Station_428   
2  User_3    Chevy Bolt               75.000000         Station_181   
3  User_4  Hyundai Kona               50.000000         Station_327   
4  User_5  Hyundai Kona               50.000000         Station_108   

  Charging Station Location  Charging Start Time    Charging End Time  \
0                   Houston  2024-01-01 00:00:00  2024-01-01 00:39:00   
1             San Francisco  2024-01-01 01:00:00  2024-01-01 03:01:00   
2             San Francisco  2024-01-01 02:00:00  2024-01-01 04:48:00   
3                   Houston  2024-01-01 03:00:00  2024-01-01 06:42:00   
4               Los Angeles  2024-01-01 04:00:00  2024-01-01 05:46:00   

   Energy Consumed (kWh)  Charging Duration (hours)  Cha

In [3]:
df = df.drop_duplicates()

# Handle missing values
df = df.dropna()   # or df.fillna(0) if you prefer

print("After cleaning:", df.shape)

After cleaning: (1131, 20)


In [5]:
# Convert start/end times to datetime (using correct column names)
df['Charging Start Time'] = pd.to_datetime(df['Charging Start Time'])
df['Charging End Time'] = pd.to_datetime(df['Charging End Time'])

# Session duration in minutes
df['duration_min'] = (df['Charging End Time'] - df['Charging Start Time']).dt.total_seconds() / 60

# Hour of day & day of week
df['start_hour'] = df['Charging Start Time'].dt.hour
df['day_of_week'] = df['Charging Start Time'].dt.dayofweek   # 0 = Monday, 6 = Sunday

# Weekend flag
df['is_weekend'] = df['day_of_week'].apply(lambda x: 1 if x >= 5 else 0)

# Flexible regulation capability (dummy example for V2G)
# Assume 30% of charged energy (kWh) can be discharged back to grid
df['flexible_kW'] = df['Energy Consumed (kWh)'] * 0.3  

In [18]:
# Ensure the directory exists before saving
processed_path = "../results/processed_ev_data.csv"
os.makedirs(os.path.dirname(processed_path), exist_ok=True)
df.to_csv(processed_path, index=False)

print("Processed dataset saved to:", processed_path)
print(df.head())

Processed dataset saved to: ../results/processed_ev_data.csv
  User ID Vehicle Model  Battery Capacity (kWh) Charging Station ID  \
0  User_1        BMW i3              108.463007         Station_391   
1  User_2  Hyundai Kona              100.000000         Station_428   
2  User_3    Chevy Bolt               75.000000         Station_181   
3  User_4  Hyundai Kona               50.000000         Station_327   
4  User_5  Hyundai Kona               50.000000         Station_108   

  Charging Station Location Charging Start Time   Charging End Time  \
0                   Houston 2024-01-01 00:00:00 2024-01-01 00:39:00   
1             San Francisco 2024-01-01 01:00:00 2024-01-01 03:01:00   
2             San Francisco 2024-01-01 02:00:00 2024-01-01 04:48:00   
3                   Houston 2024-01-01 03:00:00 2024-01-01 06:42:00   
4               Los Angeles 2024-01-01 04:00:00 2024-01-01 05:46:00   

   Energy Consumed (kWh)  Charging Duration (hours)  Charging Rate (kW)  ...  \
0    