# EV Vehicle Demand Prediction - Data Exploration
## Week 1: Understanding the Dataset

This notebook contains comprehensive exploratory data analysis (EDA) of the EV dataset.

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Set style for visualizations
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette('husl')

print('Libraries imported successfully!')

In [None]:
# Load the dataset
df = pd.read_csv('../data/raw/EV_DATASET.csv')

print('Dataset Shape:', df.shape)
print('\nColumn Names:')
print(df.columns.tolist())

In [None]:
# Display basic information
df.info()

In [None]:
# First few rows
df.head()

In [None]:
# Basic statistics
df.describe()

In [None]:
# Check for missing values
print('Missing Values:')
print(df.isnull().sum())

## Data Cleaning and Type Conversion

In [None]:
# Convert date column to datetime
df['Date'] = pd.to_datetime(df['Date'])

# Convert string numbers with commas to numeric
numeric_columns = ['Battery Electric Vehicles (BEVs)', 
                   'Plug-In Hybrid Electric Vehicles (PHEVs)',
                   'Electric Vehicle (EV) Total', 
                   'Non-Electric Vehicle Total',
                   'Total Vehicles']

for col in numeric_columns:
    df[col] = pd.to_numeric(df[col].astype(str).str.replace(',', ''), errors='coerce')

print('Data types after conversion:')
print(df.dtypes)

## Feature Engineering

In [None]:
# Extract time-based features
df['Year'] = df['Date'].dt.year
df['Month'] = df['Date'].dt.month
df['Quarter'] = df['Date'].dt.quarter
df['YearMonth'] = df['Date'].dt.to_period('M')

# Create season
def get_season(month):
    if month in [12, 1, 2]:
        return 'Winter'
    elif month in [3, 4, 5]:
        return 'Spring'
    elif month in [6, 7, 8]:
        return 'Summer'
    else:
        return 'Fall'

df['Season'] = df['Month'].apply(get_season)

print('New features created successfully!')

## Exploratory Data Analysis

In [None]:
# Add more analysis cells here as you explore the data