In [1]:
import numpy as np
import pandas as pd
from ydata_profiling import ProfileReport

  def hasna(x: np.ndarray) -> bool:


In [2]:
# Reading the charging dataset
charging_df = pd.read_csv("datasets/ev-charging-stations-india.csv")
charging_df.head()

Unnamed: 0,name,state,city,address,lattitude,longitude,type
0,Neelkanth Star DC Charging Station,Haryana,Gurugram,"Neelkanth Star Karnal, NH 44, Gharunda, Kutail...",29.6019,76.9803,12.0
1,Galleria DC Charging Station,Haryana,Gurugram,"DLF Phase IV, Sector 28, Gurugram, Haryana 122022",28.4673,77.0818,12.0
2,Highway Xpress (Jaipur-Delhi) DC charging station,Rajasthan,Behror,"Jaipur to Delhi Road, Behror Midway, Behror, R...",27.8751,76.276,12.0
3,Food Carnival DC Charging Station,Uttar Pradesh,Khatauli,"Fun and Food Carnival, NH 58, Khatauli Bypass,...",29.3105,77.7218,12.0
4,Food Carnival AC Charging Station,Uttar Pradesh,Khatauli,"NH 58, Khatauli Bypass, Bhainsi, Uttar Pradesh...",29.3105,77.7218,12.0


In [3]:
charging_df.info(show_counts=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1547 entries, 0 to 1546
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   name       1547 non-null   object 
 1   state      1547 non-null   object 
 2   city       1547 non-null   object 
 3   address    1507 non-null   object 
 4   lattitude  1541 non-null   object 
 5   longitude  1541 non-null   float64
 6   type       1539 non-null   float64
dtypes: float64(2), object(5)
memory usage: 84.7+ KB


In [4]:
# Drop duplicate values
print(f"Number of duplicate rows: {charging_df.duplicated().sum()}")
charging_df.drop_duplicates(inplace=True)

Number of duplicate rows: 205


In [5]:
# Drop missing values
charging_df.dropna(inplace=True)
charging_df.reset_index(drop=True, inplace=True)

In [6]:
# Changing the datatype of variables
charging_df["lattitude"] = charging_df["lattitude"].str.replace(',', '').astype(float)
charging_df["type"] = charging_df["type"].astype(int)
charging_df["state"] = pd.Categorical(charging_df["state"])
charging_df["city"] = pd.Categorical(charging_df["city"])

In [7]:
# Creating a variable "charging_speed"
def encode_type(type_value):
    if type_value <= 11:
        return "slow"
    elif type_value <= 16:
        return "fast"
    else:
        return "ultra-fast"

charging_df["charging_speed"] = charging_df["type"].apply(encode_type)

In [9]:
charging_df.describe()

Unnamed: 0,lattitude,longitude,type
count,1291.0,1291.0,1291.0
mean,19.907463,65999.58,8.928737
std,6.932307,2368613.0,4.167747
min,8.390198,8.058454,6.0
25%,13.032111,75.78536,7.0
50%,19.051601,77.21756,7.0
75%,27.179942,78.46415,10.0
max,77.597289,85105510.0,24.0


The maximum value for the `longitude` variable is 85105514 and minimum value is 8.0584541 which could be a mistake. Also the maximum value for the `lattitude` variable is 77.597289. As the longitude of India ranges from approximately 68 to 97, and lattitude ranges from 8 to 37, we will correct these values in the below cells.

In [10]:
# Correcting the maximum and minimum values of the longitude variable
charging_df.iloc[charging_df.longitude.argmax(), 5] = charging_df.iloc[charging_df.longitude.argmax(), 5] / 10e5
charging_df.iloc[charging_df.longitude.argmin(), 5] = charging_df.iloc[charging_df.longitude.argmin(), 5] * 10

In [11]:
index = charging_df.lattitude.argmax()
lat = charging_df.loc[charging_df.lattitude.argmax(), "lattitude"]

# Swap the values of latitude and longitude for row 535
charging_df.loc[index, "lattitude"] = charging_df.loc[index, "longitude"]
charging_df.loc[index, "longitude"] = lat

In [12]:
charging_df.describe()

Unnamed: 0,lattitude,longitude,type
count,1291.0,1291.0,1291.0
mean,19.85734,77.586028,8.928737
std,6.746304,3.663208,4.167747
min,8.390198,69.678673,6.0
25%,13.03182,75.794858,7.0
50%,19.051601,77.217962,7.0
75%,27.094893,78.466949,10.0
max,32.9918,94.168072,24.0


## Top 10 states with maximum charging stations

In [13]:
charging_df.head()

Unnamed: 0,name,state,city,address,lattitude,longitude,type,charging_speed
0,Neelkanth Star DC Charging Station,Haryana,Gurugram,"Neelkanth Star Karnal, NH 44, Gharunda, Kutail...",29.6019,76.9803,12,fast
1,Galleria DC Charging Station,Haryana,Gurugram,"DLF Phase IV, Sector 28, Gurugram, Haryana 122022",28.4673,77.0818,12,fast
2,Highway Xpress (Jaipur-Delhi) DC charging station,Rajasthan,Behror,"Jaipur to Delhi Road, Behror Midway, Behror, R...",27.8751,76.276,12,fast
3,Food Carnival DC Charging Station,Uttar Pradesh,Khatauli,"Fun and Food Carnival, NH 58, Khatauli Bypass,...",29.3105,77.7218,12,fast
4,Food Carnival AC Charging Station,Uttar Pradesh,Khatauli,"NH 58, Khatauli Bypass, Bhainsi, Uttar Pradesh...",29.3105,77.7218,12,fast
