In [14]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

In [15]:
vehicle_path = 'D:\Work\Projects\Advanced-Research\Dataset\Vehicle Data'
charging_path = 'D:\Work\Projects\Advanced-Research\Dataset\Charging Data'


def framer(folderPath):

    df_list = []

    for fileName in os.listdir(folderPath):
        if fileName.endswith('.csv'):
            filePath = os.path.join(folderPath, fileName)

            df_list.append(pd.read_csv(filePath))


    mergedDF = pd.concat(df_list, ignore_index=True)

    return mergedDF


vehicleDF = framer(vehicle_path)
chargingDF = framer(charging_path)


  vehicle_path = 'D:\Work\Projects\Advanced-Research\Dataset\Vehicle Data'
  charging_path = 'D:\Work\Projects\Advanced-Research\Dataset\Charging Data'


In [16]:
print(vehicleDF.shape)

(24108, 24)


In [17]:
characteristics = pd.concat([vehicleDF.isnull().sum(), 
                             vehicleDF.notnull().sum(), 
                             vehicleDF.nunique(), 
                             vehicleDF.dtypes],
                        keys=['Null Values', 
                              'Available Values', 
                              'Unique Values', 
                              'Data Types'],
                        axis=1)
characteristics

Unnamed: 0,Null Values,Available Values,Unique Values,Data Types
Vehicle ID,891,23217,54,object
Local Trip Start Time,15229,8879,8777,object
Local Trip End Time,15229,8879,8797,object
Total Distance,5246,18862,12830,float64
Idling Time,8522,15586,12999,float64
Driving Time,9795,14313,10485,float64
Total Run Time,6105,18003,13632,float64
Initial SOC,13902,10206,3772,float64
Final SOC,13603,10505,3565,float64
SOC Used,9155,14953,5358,float64


Filtering data with respect to the theoretical importance of attrubites in terms of State of charge analysis during charge usage.

In [18]:
SelectedColumns = [
    "Vehicle ID",
    "Local Trip Start Time",
    "Local Trip End Time",
    "Initial SOC",
    "Final SOC",
    "SOC Used",
    "Total Energy Consumption",
    "Total Distance",
    "Average Ambient Temperature"
]

vehicleDF = vehicleDF[SelectedColumns]

### Column Analysis for SOC Time Series Analysis During Trips

#### 1. Time Information
- ***Local Trip Start Time***: Provides the timestamp for the start of each trip, essential for time-based analysis.
- ***Local Trip End Time***: Provides the timestamp for the end of each trip, essential for time-based analysis.
- ***Date***: Could help group trips by day but has many missing values.

#### 2. SOC Information
- ***Initial SOC***: Indicates the stored charge at the beginning of each trip, essential for tracking SOC levels over time.
- **Final SOC**: Indicates the stored charge at the end of each trip, essential for SOC analysis.
- **SOC Used**: Represents the amount of SOC depleted during each trip.

#### 3. Energy and Distance Information
- ***Total Energy Consumption***: Measures energy consumed per trip, providing insight into SOC usage patterns.
- ***Total Distance***: Total distance traveled in each trip; useful for understanding energy efficiency in relation to SOC usage.
- ***Idling Time***: Shows time spent idling, which affects SOC usage differently from active driving.
- ***Driving Time***: Shows active driving time, providing context for energy and SOC usage patterns.

#### 4. Additional Contextual Information
- ***Average Ambient Temperature***: Tracks the environmental temperature, which can impact SOC and battery efficiency.


In [19]:
vehicleDF.dropna(inplace=True)

In [20]:
characteristics = pd.concat([vehicleDF.isnull().sum(), 
                             vehicleDF.notnull().sum(), 
                             vehicleDF.nunique(), 
                             vehicleDF.dtypes],
                        keys=['Null Values', 
                              'Available Values', 
                              'Unique Values', 
                              'Data Types'],
                        axis=1)
characteristics

Unnamed: 0,Null Values,Available Values,Unique Values,Data Types
Vehicle ID,0,8013,27,object
Local Trip Start Time,0,8013,7922,object
Local Trip End Time,0,8013,7946,object
Initial SOC,0,8013,3161,float64
Final SOC,0,8013,3257,float64
SOC Used,0,8013,3271,float64
Total Energy Consumption,0,8013,7850,float64
Total Distance,0,8013,5465,float64
Average Ambient Temperature,0,8013,4849,float64


In [21]:
characteristics = pd.concat([chargingDF.isnull().sum(), 
                             chargingDF.notnull().sum(), 
                             chargingDF.nunique(), 
                             chargingDF.dtypes],
                        keys=['Null Values', 
                              'Available Values', 
                              'Unique Values', 
                              'Data Types'],
                        axis=1)
characteristics

Unnamed: 0,Null Values,Available Values,Unique Values,Data Types
Vehicle ID,0,11859,17,object
Charger ID,0,11859,2,object
Local Connect Time,11859,0,0,float64
Local Disconnect Time,11859,0,0,float64
Local Charge Start Time,3248,8611,8557,object
Local Charge End Time,4469,7390,7334,object
Average Power,679,11180,11041,float64
Max Power,3927,7932,5154,float64
Total Energy Delivered,1,11858,10796,float64
Starting SOC,3976,7883,171,float64


Filtering data with respect to the theoretical importance of attribute in terms of State of Charge Analysis while the vehicle is on charging.

In [22]:
SelectedColumns = [
    "Vehicle ID",
    "Local Charge Start Time",
    "Local Charge End Time",
    "Starting SOC",
    "Ending SOC",
    "Total Energy Delivered",
    "Average Power",
    "Max Power"
]

chargingDF = chargingDF[SelectedColumns]

### Column Analysis for SOC Time Series Analysis During Charging Sessions

#### 1. Time Information
- ***Local Charge Start Time***: Essential for determining the start of each charging session.
- ***Local Charge End Time***: Essential for determining the end of each charging session.
- ***Date***: Could be useful for grouping sessions by day but has significant missing values.

#### 2. SOC Information
- ***Starting SOC***: Key metric for SOC at the beginning of each charging session.
- ***Ending SOC***: Key metric for SOC at the end of each charging session.
- ***SOC Charged***: Indicates the amount of SOC added, though has a high rate of missing values.

#### 3. Energy Information
- ***Total Energy Delivered***: Tracks energy delivered during each session; highly relevant for charging efficiency.
- ***Average Power***: Insight into power levels during charging with a relatively low rate of missing values.
- ***Max Power***: Insight into the maximum power level during charging sessions.


In [23]:
characteristics = pd.concat([chargingDF.isnull().sum(), 
                             chargingDF.notnull().sum(), 
                             chargingDF.nunique(), 
                             chargingDF.dtypes],
                        keys=['Null Values', 
                              'Available Values', 
                              'Unique Values', 
                              'Data Types'],
                        axis=1)
characteristics

Unnamed: 0,Null Values,Available Values,Unique Values,Data Types
Vehicle ID,0,11859,17,object
Local Charge Start Time,3248,8611,8557,object
Local Charge End Time,4469,7390,7334,object
Starting SOC,3976,7883,171,float64
Ending SOC,3248,8611,124,float64
Total Energy Delivered,1,11858,10796,float64
Average Power,679,11180,11041,float64
Max Power,3927,7932,5154,float64


In [24]:
chargingDF.dropna(inplace=True)

In [25]:
characteristics = pd.concat([chargingDF.isnull().sum(), 
                             chargingDF.notnull().sum(), 
                             chargingDF.nunique(), 
                             chargingDF.dtypes],
                        keys=['Null Values', 
                              'Available Values', 
                              'Unique Values', 
                              'Data Types'],
                        axis=1)
characteristics

Unnamed: 0,Null Values,Available Values,Unique Values,Data Types
Vehicle ID,0,6296,8,object
Local Charge Start Time,0,6296,6269,object
Local Charge End Time,0,6296,6263,object
Starting SOC,0,6296,170,float64
Ending SOC,0,6296,120,float64
Total Energy Delivered,0,6296,5705,float64
Average Power,0,6296,6210,float64
Max Power,0,6296,4371,float64


In [26]:
print(chargingDF.shape)

(6296, 8)
