In [103]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [104]:
VehicleDF = pd.read_excel('Dataset.xlsx', sheet_name='Trip')
ChargingDF = pd.read_excel('Dataset.xlsx', sheet_name='Charge Cycle')

In [105]:
ChargingDF['Local Charge Start Time'] = pd.to_datetime(ChargingDF['Local Charge Start Time'],
                                                       format='mixed')
ChargingDF['Local Charge End Time'] = pd.to_datetime(ChargingDF['Local Charge End Time'],
                                                     format='mixed')
VehicleDF['Local Trip Start Time'] = pd.to_datetime(VehicleDF['Local Trip Start Time'],
                                                    format='mixed')
VehicleDF['Local Trip End Time'] = pd.to_datetime(VehicleDF['Local Trip End Time'],
                                                  format='mixed')

In [106]:
characteristics = pd.concat([ChargingDF.isnull().sum(), 
                             ChargingDF.notnull().sum(), 
                             ChargingDF.nunique(), 
                             ChargingDF.dtypes],
                        keys=['Null Values', 
                              'Available Values', 
                              'Unique Values', 
                              'Data Types'],
                        axis=1)
characteristics

Unnamed: 0,Null Values,Available Values,Unique Values,Data Types
Vehicle ID,0,6296,8,object
Local Charge Start Time,0,6296,6274,datetime64[ns]
Local Charge End Time,0,6296,6265,datetime64[ns]
Starting SOC,0,6296,170,float64
Ending SOC,0,6296,120,float64
Total Energy Delivered,0,6296,5705,float64
Average Power,0,6296,6210,float64
Max Power,0,6296,4371,float64


In [107]:
characteristics = pd.concat([VehicleDF.isnull().sum(), 
                             VehicleDF.notnull().sum(), 
                             VehicleDF.nunique(), 
                             VehicleDF.dtypes],
                        keys=['Null Values', 
                              'Available Values', 
                              'Unique Values', 
                              'Data Types'],
                        axis=1)
characteristics

Unnamed: 0,Null Values,Available Values,Unique Values,Data Types
Vehicle ID,0,8013,27,object
Local Trip Start Time,0,8013,7893,datetime64[ns]
Local Trip End Time,0,8013,7931,datetime64[ns]
Initial SOC,0,8013,3161,float64
Final SOC,0,8013,3257,float64
SOC Used,0,8013,3271,float64
Total Energy Consumption,0,8013,7850,float64
Total Distance,0,8013,5465,float64
Average Ambient Temperature,0,8013,4849,float64


In [108]:
ChargingDF['Charging Duration (Hours)'] = round((
    ChargingDF['Local Charge End Time'] - ChargingDF['Local Charge Start Time']
    ).dt.total_seconds() / 3600, 2)
ChargingDF['Total SOC Charged'] = (
    ChargingDF['Ending SOC'] - ChargingDF['Starting SOC']
    )
ChargingDF.head(5)

Unnamed: 0,Vehicle ID,Local Charge Start Time,Local Charge End Time,Starting SOC,Ending SOC,Total Energy Delivered,Average Power,Max Power,Charging Duration (Hours),Total SOC Charged
0,EV026,2017-09-01 08:05:00,2017-09-01 08:09:00,81.0,99.5,13.91,193.386,342.28,0.07,18.5
1,EV026,2017-09-01 09:02:00,2017-09-01 09:11:00,65.0,100.0,24.091,308.8,363.64,0.15,35.0
2,EV026,2017-09-01 10:08:00,2017-09-01 10:14:00,57.5,100.0,31.054,327.83,364.82,0.1,42.5
3,EV026,2017-09-01 11:05:00,2017-09-01 11:14:00,62.5,99.5,25.935,321.533,361.06,0.15,37.0
4,EV026,2017-09-01 12:10:00,2017-09-01 12:19:00,56.5,100.0,32.01,230.942,360.1,0.15,43.5


In [109]:
VehicleDF['Energy Efficiency'] = np.where(VehicleDF['Total Distance'] == 0, 
                                          np.nan, 
                                          round(VehicleDF['SOC Used'] 
                                                / VehicleDF['Total Distance'], 2))
VehicleDF['Trip Duration (Hours)'] = (
    VehicleDF['Local Trip End Time'] - VehicleDF['Local Trip Start Time']
    ).dt.total_seconds() / 3600
VehicleDF = round(VehicleDF,3)

In [110]:
VehicleDF = VehicleDF.drop(labels=['Local Trip Start Time'], axis=1)

In [111]:
VehicleDF = VehicleDF[VehicleDF['Trip Duration (Hours)'] >= 0]

In [112]:
VehicleDF.sort_values(by=['Vehicle ID', 'Local Trip End Time'], inplace=True)

In [113]:
VehicleDF

Unnamed: 0,Vehicle ID,Local Trip End Time,Initial SOC,Final SOC,SOC Used,Total Energy Consumption,Total Distance,Average Ambient Temperature,Energy Efficiency,Trip Duration (Hours)
179,EV026,2018-01-10 19:00:00,100.000,47.000,53.000,36.609,18.991,56.840,2.79,1.783
178,EV026,2018-01-10 21:13:00,59.500,77.500,-18.000,26.207,14.488,54.680,-1.24,1.217
177,EV026,2018-01-11 19:05:00,76.500,45.500,31.000,21.004,11.481,55.760,2.70,0.917
176,EV026,2018-01-11 20:25:00,99.000,52.500,46.500,25.429,13.375,53.240,3.48,1.083
175,EV026,2018-01-12 13:48:00,60.000,53.000,7.000,24.238,10.213,52.880,0.69,1.300
...,...,...,...,...,...,...,...,...,...,...
8008,EV169,2022-11-21 08:52:00,56.022,52.016,7.853,10.178,3.607,66.740,2.18,2.383
8009,EV169,2023-01-23 12:24:00,46.167,45.119,1.166,4.670,0.121,39.175,9.62,0.129
8010,EV169,2023-01-23 13:29:00,44.572,43.139,1.467,5.032,0.068,48.523,21.47,0.283
8011,EV169,2023-01-24 09:13:00,43.523,43.709,0.734,1.365,0.438,62.493,1.68,0.533


In [118]:
ChargingDF.head(5)

Unnamed: 0,Vehicle ID,Local Charge Start Time,Local Charge End Time,Starting SOC,Ending SOC,Total Energy Delivered,Average Power,Max Power,Charging Duration (Hours),Total SOC Charged
0,EV026,2017-09-01 08:05:00,2017-09-01 08:09:00,81.0,99.5,13.91,193.386,342.28,0.07,18.5
1,EV026,2017-09-01 09:02:00,2017-09-01 09:11:00,65.0,100.0,24.091,308.8,363.64,0.15,35.0
2,EV026,2017-09-01 10:08:00,2017-09-01 10:14:00,57.5,100.0,31.054,327.83,364.82,0.1,42.5
3,EV026,2017-09-01 11:05:00,2017-09-01 11:14:00,62.5,99.5,25.935,321.533,361.06,0.15,37.0
4,EV026,2017-09-01 12:10:00,2017-09-01 12:19:00,56.5,100.0,32.01,230.942,360.1,0.15,43.5


In [122]:
ChargingDF.drop('Local Charge Start Time', axis=1, inplace=True)

In [123]:
ChargingDF.sort_values(by=['Local Charge End Time', 'Vehicle ID'], inplace=True)

In [125]:
ChargingDF.head(5)

Unnamed: 0,Vehicle ID,Local Charge End Time,Starting SOC,Ending SOC,Total Energy Delivered,Average Power,Max Power,Charging Duration (Hours),Total SOC Charged
0,EV026,2017-09-01 08:09:00,81.0,99.5,13.91,193.386,342.28,0.07,18.5
1,EV026,2017-09-01 09:11:00,65.0,100.0,24.091,308.8,363.64,0.15,35.0
2,EV026,2017-09-01 10:14:00,57.5,100.0,31.054,327.83,364.82,0.1,42.5
3,EV026,2017-09-01 11:14:00,62.5,99.5,25.935,321.533,361.06,0.15,37.0
4,EV026,2017-09-01 12:19:00,56.5,100.0,32.01,230.942,360.1,0.15,43.5


In [139]:
from sklearn.preprocessing import StandardScaler

FeaturesT = ['Initial SOC', 
             'Final SOC', 
             'Total Energy Consumption', 
             'Total Distance', 
             'Average Ambient Temperature']
TargetT = ['SOC Used']

FeaturesC = ['Starting SOC', 
             'Ending SOC', 
             'Total Energy Delivered', 
             'Average Power', 
             'Max Power',
             'Charging Duration (Hours)']
TargetC = ['Total SOC Charged']


ScalerTrip = StandardScaler()
VehicleDF[FeaturesT] = ScalerTrip.fit_transform(VehicleDF[FeaturesT])

ScalerCharge = StandardScaler()
ChargingDF[FeaturesC] = ScalerCharge.fit_transform(ChargingDF[FeaturesC])

In [140]:
def CreateSequences(Data, VehicleCol, Features, Time, Target, SequenceLength):
    """
    Create sequences of Features & Targets grouped by Vehicle ID.

    Parameters:
    - Data: DataFrame containing the data.
    - VehicleCol: Column name for Vehicle IDs.
    - Features: List of feature column names.
    - Time: Time column to sort by.
    - Target: Target column name.
    - n: number of time steps in each sequence.

    Returns:
    - X: Numpy array of input sequences (shape: NumSampels, SequenceLength, NumFeatures)
    - y: Numpy array of target values (shape: NumSamples, 1)
    """

    X, y = [], []
    Grouped = Data.groupby(VehicleCol)

    
    for VehicleID, Group in Grouped:
        Group = Group.sort_values(by=Time)

        for i in range(len(Group) - SequenceLength):
            X.append(Group[Features].iloc[i:i+SequenceLength].values)
            y.append(Group[Target].iloc[i+SequenceLength].values)

    return np.array(X), np.array(y)

XTrip, yTrip = CreateSequences(
    Data=VehicleDF,
    VehicleCol='Vehicle ID',
    Features=FeaturesT,
    Time='Local Trip End Time',
    Target=TargetT,
    SequenceLength=5,
)

XCharge, yCharge = CreateSequences(
    Data=ChargingDF,
    VehicleCol='Vehicle ID',
    Features=FeaturesC,
    Time='Local Charge End Time',
    Target=TargetC,
    SequenceLength=5
)

print("X shape: ", XTrip.shape)
print("y shape: ", yTrip.shape)

print("\nX shape: ", XCharge.shape)
print("y shape: ", yCharge.shape)

X shape:  (7874, 5, 5)
y shape:  (7874, 1)

X shape:  (6256, 5, 6)
y shape:  (6256, 1)


In [141]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, LSTM, RepeatVector, TimeDistributed, concatenate
from tensorflow.keras.optimizers import Adam

