## EV Vehicle Charging Demand Prediction  

In [1]:
!pip install pandas numpy matplotlib seaborn scikit-learn



**Import Required Libraries**

In [3]:
import joblib
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

**Load the Dataset**

In [7]:
df = pd.read_csv("Dataset.csv")

In [8]:
df.head()

Unnamed: 0,Date,County,State,Vehicle Primary Use,Battery Electric Vehicles (BEVs),Plug-In Hybrid Electric Vehicles (PHEVs),Electric Vehicle (EV) Total,Non-Electric Vehicle Total,Total Vehicles,Percent Electric Vehicles
0,September 30 2022,Riverside,CA,Passenger,7,0,7,460,467,1.5
1,December 31 2022,Prince William,VA,Passenger,1,2,3,188,191,1.57
2,January 31 2020,Dakota,MN,Passenger,0,1,1,32,33,3.03
3,June 30 2022,Ferry,WA,Truck,0,0,0,3575,3575,0.0
4,July 31 2021,Douglas,CO,Passenger,0,1,1,83,84,1.19


**Initial Data Exploration**

In [10]:
print("Number of rows and columns:", df.shape)

Number of rows and columns: (20819, 10)


In [11]:
print("Data Types and Memory:")
print(df.info())

Data Types and Memory:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20819 entries, 0 to 20818
Data columns (total 10 columns):
 #   Column                                    Non-Null Count  Dtype  
---  ------                                    --------------  -----  
 0   Date                                      20819 non-null  object 
 1   County                                    20733 non-null  object 
 2   State                                     20733 non-null  object 
 3   Vehicle Primary Use                       20819 non-null  object 
 4   Battery Electric Vehicles (BEVs)          20819 non-null  object 
 5   Plug-In Hybrid Electric Vehicles (PHEVs)  20819 non-null  object 
 6   Electric Vehicle (EV) Total               20819 non-null  object 
 7   Non-Electric Vehicle Total                20819 non-null  object 
 8   Total Vehicles                            20819 non-null  object 
 9   Percent Electric Vehicles                 20819 non-null  float64
dtypes: float64(

In [17]:
df.columns

Index(['Date', 'County', 'State', 'Vehicle Primary Use',
       'Battery Electric Vehicles (BEVs)',
       'Plug-In Hybrid Electric Vehicles (PHEVs)',
       'Electric Vehicle (EV) Total', 'Non-Electric Vehicle Total',
       'Total Vehicles', 'Percent Electric Vehicles'],
      dtype='object')

In [14]:
print("Missing Values:")
print(df.isnull().sum())

Missing Values:
Date                                         0
County                                      86
State                                       86
Vehicle Primary Use                          0
Battery Electric Vehicles (BEVs)             0
Plug-In Hybrid Electric Vehicles (PHEVs)     0
Electric Vehicle (EV) Total                  0
Non-Electric Vehicle Total                   0
Total Vehicles                               0
Percent Electric Vehicles                    0
dtype: int64


**Identify outliers in 'Percent Electric Vehicles' using the IQR method to detect values beyond typical distribution range.**

In [15]:
#Calculate Q1,Q3 and IQR
Q1 = df['Percent Electric Vehicles'].quantile(0.25)
Q3 = df['Percent Electric Vehicles'].quantile(0.75)
IQR = Q3 - Q1

#Define the lower and upper bounds for acceptable range
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR

print('Lower Bound:', lower_bound)
print('Upper Bound:', upper_bound)

#Identify rows where 'Percent Electric Vehicles' is outside the IQR bounds
outliers = df[(df['Percent Electric Vehicles'] < lower_bound) | 
              (df['Percent Electric Vehicles'] > upper_bound)]

print("Number of outliers in 'Percent Electric Vehicles':", outliers.shape[0])

Lower Bound: -3.5174999999999996
Upper Bound: 6.9025
Number of outliers in 'Percent Electric Vehicles': 2476
