In [191]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [215]:
VehicleDF = pd.read_excel('Dataset.xlsx', sheet_name='Trip')
ChargingDF = pd.read_excel('Dataset.xlsx', sheet_name='Charge Cycle')

In [216]:
ChargingDF['Local Charge Start Time'] = pd.to_datetime(ChargingDF['Local Charge Start Time'],
                                                       format='mixed')
ChargingDF['Local Charge End Time'] = pd.to_datetime(ChargingDF['Local Charge End Time'],
                                                     format='mixed')
VehicleDF['Local Trip Start Time'] = pd.to_datetime(VehicleDF['Local Trip Start Time'],
                                                    format='mixed')
VehicleDF['Local Trip End Time'] = pd.to_datetime(VehicleDF['Local Trip End Time'],
                                                  format='mixed')

In [217]:
characteristics = pd.concat([ChargingDF.isnull().sum(), 
                             ChargingDF.notnull().sum(), 
                             ChargingDF.nunique(), 
                             ChargingDF.dtypes],
                        keys=['Null Values', 
                              'Available Values', 
                              'Unique Values', 
                              'Data Types'],
                        axis=1)
characteristics

Unnamed: 0,Null Values,Available Values,Unique Values,Data Types
Vehicle ID,0,6296,8,object
Local Charge Start Time,0,6296,6274,datetime64[ns]
Local Charge End Time,0,6296,6265,datetime64[ns]
Starting SOC,0,6296,170,float64
Ending SOC,0,6296,120,float64
Total Energy Delivered,0,6296,5705,float64
Average Power,0,6296,6210,float64
Max Power,0,6296,4371,float64


In [218]:
characteristics = pd.concat([VehicleDF.isnull().sum(), 
                             VehicleDF.notnull().sum(), 
                             VehicleDF.nunique(), 
                             VehicleDF.dtypes],
                        keys=['Null Values', 
                              'Available Values', 
                              'Unique Values', 
                              'Data Types'],
                        axis=1)
characteristics

Unnamed: 0,Null Values,Available Values,Unique Values,Data Types
Vehicle ID,0,8013,27,object
Local Trip Start Time,0,8013,7893,datetime64[ns]
Local Trip End Time,0,8013,7931,datetime64[ns]
Initial SOC,0,8013,3161,float64
Final SOC,0,8013,3257,float64
SOC Used,0,8013,3271,float64
Total Energy Consumption,0,8013,7850,float64
Total Distance,0,8013,5465,float64
Average Ambient Temperature,0,8013,4849,float64


In [219]:
ChargingDF['Charging Duration (Hours)'] = round((
    ChargingDF['Local Charge End Time'] - ChargingDF['Local Charge Start Time']
    ).dt.total_seconds() / 3600, 2)
ChargingDF['Total SOC Charged'] = (
    ChargingDF['Ending SOC'] - ChargingDF['Starting SOC']
    )
ChargingDF.head(5)

Unnamed: 0,Vehicle ID,Local Charge Start Time,Local Charge End Time,Starting SOC,Ending SOC,Total Energy Delivered,Average Power,Max Power,Charging Duration (Hours),Total SOC Charged
0,EV026,2017-09-01 08:05:00,2017-09-01 08:09:00,81.0,99.5,13.91,193.386,342.28,0.07,18.5
1,EV026,2017-09-01 09:02:00,2017-09-01 09:11:00,65.0,100.0,24.091,308.8,363.64,0.15,35.0
2,EV026,2017-09-01 10:08:00,2017-09-01 10:14:00,57.5,100.0,31.054,327.83,364.82,0.1,42.5
3,EV026,2017-09-01 11:05:00,2017-09-01 11:14:00,62.5,99.5,25.935,321.533,361.06,0.15,37.0
4,EV026,2017-09-01 12:10:00,2017-09-01 12:19:00,56.5,100.0,32.01,230.942,360.1,0.15,43.5


In [220]:
VehicleDF['Energy Efficiency'] = np.where(VehicleDF['Total Distance'] == 0, 
                                          np.nan, 
                                          round(VehicleDF['SOC Used'] 
                                                / VehicleDF['Total Distance'], 2))
VehicleDF['Trip Duration (Hours)'] = (
    VehicleDF['Local Trip End Time'] - VehicleDF['Local Trip Start Time']
    ).dt.total_seconds() / 3600
VehicleDF = round(VehicleDF,3)

In [221]:
VehicleDF = VehicleDF.drop(labels=['Local Trip Start Time'], axis=1)

In [222]:
VehicleDF = VehicleDF[VehicleDF['Trip Duration (Hours)'] >= 0]

In [223]:
VehicleDF.sort_values(by=['Vehicle ID', 'Local Trip End Time'], inplace=True)

In [224]:
VehicleDF

Unnamed: 0,Vehicle ID,Local Trip End Time,Initial SOC,Final SOC,SOC Used,Total Energy Consumption,Total Distance,Average Ambient Temperature,Energy Efficiency,Trip Duration (Hours)
179,EV026,2018-01-10 19:00:00,100.000,47.000,53.000,36.609,18.991,56.840,2.79,1.783
178,EV026,2018-01-10 21:13:00,59.500,77.500,-18.000,26.207,14.488,54.680,-1.24,1.217
177,EV026,2018-01-11 19:05:00,76.500,45.500,31.000,21.004,11.481,55.760,2.70,0.917
176,EV026,2018-01-11 20:25:00,99.000,52.500,46.500,25.429,13.375,53.240,3.48,1.083
175,EV026,2018-01-12 13:48:00,60.000,53.000,7.000,24.238,10.213,52.880,0.69,1.300
...,...,...,...,...,...,...,...,...,...,...
8008,EV169,2022-11-21 08:52:00,56.022,52.016,7.853,10.178,3.607,66.740,2.18,2.383
8009,EV169,2023-01-23 12:24:00,46.167,45.119,1.166,4.670,0.121,39.175,9.62,0.129
8010,EV169,2023-01-23 13:29:00,44.572,43.139,1.467,5.032,0.068,48.523,21.47,0.283
8011,EV169,2023-01-24 09:13:00,43.523,43.709,0.734,1.365,0.438,62.493,1.68,0.533


In [225]:
ChargingDF.head(5)

Unnamed: 0,Vehicle ID,Local Charge Start Time,Local Charge End Time,Starting SOC,Ending SOC,Total Energy Delivered,Average Power,Max Power,Charging Duration (Hours),Total SOC Charged
0,EV026,2017-09-01 08:05:00,2017-09-01 08:09:00,81.0,99.5,13.91,193.386,342.28,0.07,18.5
1,EV026,2017-09-01 09:02:00,2017-09-01 09:11:00,65.0,100.0,24.091,308.8,363.64,0.15,35.0
2,EV026,2017-09-01 10:08:00,2017-09-01 10:14:00,57.5,100.0,31.054,327.83,364.82,0.1,42.5
3,EV026,2017-09-01 11:05:00,2017-09-01 11:14:00,62.5,99.5,25.935,321.533,361.06,0.15,37.0
4,EV026,2017-09-01 12:10:00,2017-09-01 12:19:00,56.5,100.0,32.01,230.942,360.1,0.15,43.5


In [226]:
ChargingDF.drop('Local Charge Start Time', axis=1, inplace=True)

In [227]:
ChargingDF.sort_values(by=['Local Charge End Time', 'Vehicle ID'], inplace=True)

In [228]:
ChargingDF.head(5)

Unnamed: 0,Vehicle ID,Local Charge End Time,Starting SOC,Ending SOC,Total Energy Delivered,Average Power,Max Power,Charging Duration (Hours),Total SOC Charged
0,EV026,2017-09-01 08:09:00,81.0,99.5,13.91,193.386,342.28,0.07,18.5
1,EV026,2017-09-01 09:11:00,65.0,100.0,24.091,308.8,363.64,0.15,35.0
2,EV026,2017-09-01 10:14:00,57.5,100.0,31.054,327.83,364.82,0.1,42.5
3,EV026,2017-09-01 11:14:00,62.5,99.5,25.935,321.533,361.06,0.15,37.0
4,EV026,2017-09-01 12:19:00,56.5,100.0,32.01,230.942,360.1,0.15,43.5


In [236]:
from scipy import stats

#Removing outliers from the Datasets.

NumericalColumnsC = ['Starting SOC', 'Ending SOC', 'Total Energy Delivered',
                     'Average Power', 'Max Power', 'Charging Duration (Hours)',
                      'Total SOC Charged']

ZScoreC = stats.zscore(ChargingDF[NumericalColumnsC])

ChargingDF = ChargingDF[(ZScoreC < 3).all(axis=1)]

print("Z-Score for the first few rows: ", ZScoreC[:5])
print("Number of rows before filtering: ", len(ChargingDF))

Z-Score for the first few rows:     Starting SOC  Ending SOC  Total Energy Delivered  Average Power  Max Power  \
0      1.736968    0.642320               -1.089933      -0.571334   0.527278   
1      0.427732    0.693532                0.017151       1.104457   0.837016   
2     -0.185972    0.693532                0.774310       1.380769   0.854127   
3      0.223164    0.642320                0.217669       1.289338   0.799604   
4     -0.267799    0.693532                0.878266      -0.026027   0.785683   

   Charging Duration (Hours)  Total SOC Charged  
0                  -0.885309          -1.227475  
1                   0.751182           0.126708  
2                  -0.271625           0.742245  
3                   0.751182           0.290851  
4                   0.751182           0.824317  
Number of rows before filtering:  6105


In [244]:
round(ChargingDF[NumericalColumnsC].describe(), 2)

Unnamed: 0,Starting SOC,Ending SOC,Total Energy Delivered,Average Power,Max Power,Charging Duration (Hours),Total SOC Charged
count,6105.0,6105.0,6105.0,6105.0,6105.0,6105.0,6105.0
mean,59.63,93.23,24.04,234.59,308.1,0.11,33.6
std,11.8,9.74,9.0,66.92,65.94,0.04,11.91
min,14.0,33.5,0.0,-16.56,-16.36,0.0,-1.5
25%,53.0,90.5,19.34,199.82,277.7,0.08,27.0
50%,58.0,97.5,25.04,244.2,327.06,0.12,35.5
75%,65.5,100.0,30.46,282.8,356.92,0.15,42.0
max,96.0,100.0,51.49,367.3,382.24,0.25,69.5


In [255]:
NumericalColumnsT = ['Initial SOC', 'Final SOC', 'SOC Used',
                     'Total Energy Consumption','Total Distance',
                     'Average Ambient Temperature', 'Energy Efficiency',
                     'Trip Duration (Hours)']

Q1 = VehicleDF[NumericalColumnsT].quantile(0.15)
Q3 = VehicleDF[NumericalColumnsT].quantile(0.85)
InterQuartileRange = Q3 - Q1


LowerBound = Q1 - 1.5 * InterQuartileRange
UpperBound = Q3 - 1.5 * InterQuartileRange

FilteredVehicleDF = VehicleDF[
    ~((VehicleDF[NumericalColumnsT] >= LowerBound) &
      (VehicleDF[NumericalColumnsT] <= UpperBound)).any(axis=1)
]

print(f"Number of Rows before filtering: {len(VehicleDF)}")
print(f"Number of Rows after IQR filtering: {len(FilteredVehicleDF)}")

Number of Rows before filtering: 8009
Number of Rows after IQR filtering: 7159


In [256]:
VehicleDF = FilteredVehicleDF.copy()

In [257]:
round(VehicleDF[NumericalColumnsT].describe(), 2)

Unnamed: 0,Initial SOC,Final SOC,SOC Used,Total Energy Consumption,Total Distance,Average Ambient Temperature,Energy Efficiency,Trip Duration (Hours)
count,7159.0,7159.0,7159.0,7159.0,7159.0,7159.0,6168.0,7159.0
mean,0.22,0.11,18.29,-0.0,0.02,0.05,2.9,3.12
std,0.67,0.88,22.08,0.96,1.0,0.96,9.56,4.5
min,-5.01,-2.19,-20.5,-0.65,-0.68,-4.44,-44.81,0.02
25%,-0.11,-0.68,0.33,-0.64,-0.68,-0.59,0.52,0.38
50%,0.47,0.34,6.5,-0.36,-0.31,0.1,0.9,1.17
75%,0.73,0.89,35.5,0.2,0.3,0.68,2.73,3.72
max,0.79,1.19,187.87,5.62,4.86,2.72,425.81,24.15


In [258]:
from sklearn.preprocessing import StandardScaler

FeaturesT = ['Initial SOC', 
             'Final SOC', 
             'Total Energy Consumption', 
             'Total Distance', 
             'Average Ambient Temperature']
TargetT = ['SOC Used']

FeaturesC = ['Starting SOC', 
             'Ending SOC', 
             'Total Energy Delivered', 
             'Average Power', 
             'Max Power',
             'Charging Duration (Hours)']
TargetC = ['Total SOC Charged']


ScalerTrip = StandardScaler()
VehicleDF[FeaturesT] = ScalerTrip.fit_transform(VehicleDF[FeaturesT])

ScalerCharge = StandardScaler()
ChargingDF[FeaturesC] = ScalerCharge.fit_transform(ChargingDF[FeaturesC])

In [259]:
from sklearn.preprocessing import LabelBinarizer

Encoder = LabelBinarizer()

VEncoded = Encoder.fit_transform(VehicleDF['Vehicle ID'])

EncodedTVID = [f"Vehicle_{VClass}" for VClass in Encoder.classes_]
EncodedTDF = pd.DataFrame(VEncoded, columns=EncodedTVID)
VehicleDF = pd.concat([VehicleDF.reset_index(drop=True),
                       EncodedTDF.reset_index(drop=True)],
                       axis=1)

CEncoded = Encoder.fit_transform(ChargingDF['Vehicle ID'])
EncodedCVID = [f"Vehicle_{VClass}" for VClass in Encoder.classes_]
EncodedCDF = pd.DataFrame(CEncoded, columns=EncodedCVID)
ChargingDF = pd.concat([ChargingDF.reset_index(drop=True),
                        EncodedCDF.reset_index(drop=True)],
                        axis=1)

In [260]:
def CreateSequences(Data, VehicleIDCol, EncodedVIDCols, Features, Time, Target, SequenceLength):
    """
    Create sequences of Features & Targets grouped by Vehicle ID.

    Parameters:
    - Data: DataFrame containing the data.
    - VehicleCol: Column name for Vehicle IDs.
    - EncodedVIDCols: List one-hot encoded Vehicle ID column names.
    - Features: List of feature column names.
    - Time: Time column to sort by.
    - Target: Target column name.
    - n: number of time steps in each sequence.

    Returns:
    - X: Numpy array of input sequences (shape: NumSampels, SequenceLength, NumFeatures)
    - y: Numpy array of target values (shape: NumSamples, 1)
    """

    X, y = [], []

    Data = Data.reset_index(drop=True)

    Grouped  = Data.groupby(VehicleIDCol)
    
    for VehicleID, Group in Grouped:

        Group = Group.sort_values(by=Time)

        for i in range(len(Group) - SequenceLength):
            FeaturesCols = Features + EncodedVIDCols
            X.append(Group[FeaturesCols].iloc[i:i+SequenceLength].values)
            y.append(Group[Target].iloc[i+SequenceLength])

    return np.array(X), np.array(y)

SequenceLength = 5

XTrip, yTrip = CreateSequences(
    Data=VehicleDF,
    VehicleIDCol='Vehicle ID',
    Features=FeaturesT,
    EncodedVIDCols=EncodedTVID,
    Time='Local Trip End Time',
    Target=TargetT,
    SequenceLength=SequenceLength,
)

XCharge, yCharge = CreateSequences(
    Data=ChargingDF,
    VehicleIDCol='Vehicle ID',
    EncodedVIDCols=EncodedCVID,
    Features=FeaturesC,
    Time='Local Charge End Time',
    Target=TargetC,
    SequenceLength=SequenceLength
)

print("XTrip shape: ", XTrip.shape)
print("yTrip shape: ", yTrip.shape)

print("\nXCharge shape: ", XCharge.shape)
print("yCharge shape: ", yCharge.shape)

XTrip shape:  (7024, 5, 32)
yTrip shape:  (7024, 1)

XCharge shape:  (6065, 5, 14)
yCharge shape:  (6065, 1)


In [317]:
from sklearn.model_selection import train_test_split


def DataPrep(X1, X2, y1, y2, TestSize=0.2, RandomState=301):
    

    MinSamples = min(len(X1), len(X2), len(y1), len(y2))

    X1, y1 = X1[:MinSamples], y1[:MinSamples]
    X2, y2 = X2[:MinSamples], y2[:MinSamples]

    y1 = np.expand_dims(y1, axis=-1)
    y2 = np.expand_dims(y2, axis=-1)
    

    X1Train, X1Temp, y1Train, y1Temp = train_test_split(X1, y1,
                                                    test_size=TestSize,
                                                    random_state=RandomState)
    
    X2Train, X2Temp, y2Train, y2Temp = train_test_split(X2, y2,
                                                    test_size=TestSize,
                                                    random_state=RandomState)
    
    X1Val, X1Test, y1Val, y1Test = train_test_split(X1Temp, y1Temp,
                                                    test_size=0.5,
                                                    random_state=RandomState)
    
    X2Val, X2Test, y2Val, y2Test = train_test_split(X2Temp, y2Temp,
                                                    test_size=0.5,
                                                    random_state=RandomState)
    
    return (X1Train, X1Val, X1Test, y1Train, y1Val, y1Test), (X2Train, X2Val, X2Test, y2Train, y2Val, y2Test)


(TripTrain, TripVal, TripTest, SOCTrain, SOCVal, SOCTest), \
(ChargeTrain, ChargeVal, ChargeTest, AnomalyTrain, AnomalyVal, AnomalyTest) = DataPrep(
    XTrip, XCharge, yTrip, yCharge
    )

In [318]:
print("TripTrain shape:", TripTrain.shape)
print("ChargeTrain shape:", ChargeTrain.shape)
print("SOCTrain shape:", SOCTrain.shape)
print("AnomalyTrain shape:", AnomalyTrain.shape)

print("\nTripVal shape:", TripVal.shape)
print("ChargeVal shape:", ChargeVal.shape)
print("SOCVal shape:", SOCVal.shape)
print("AnomalyVal shape:", AnomalyVal.shape)

TripTrain shape: (4852, 5, 32)
ChargeTrain shape: (4852, 5, 14)
SOCTrain shape: (4852, 1, 1)
AnomalyTrain shape: (4852, 1, 1)

TripVal shape: (606, 5, 32)
ChargeVal shape: (606, 5, 14)
SOCVal shape: (606, 1, 1)
AnomalyVal shape: (606, 1, 1)


In [322]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, LSTM, RepeatVector, TimeDistributed, concatenate
from tensorflow.keras.optimizers import Adam

TripFeatures = TripTrain.shape[2]
ChargeFeatures = ChargeTrain.shape[2]

TripInput = Input(shape=(SequenceLength, TripFeatures), name='TripInput')
ChargeInput = Input(shape=(SequenceLength, ChargeFeatures), name='ChargeInput')


TripDense = Dense(64, activation='relu')(TripInput)
ChargeDense = Dense(64, activation='relu')(ChargeInput)

Shared = concatenate([TripDense, ChargeDense])
SharedDense = Dense(128, activation='relu')(Shared)

LSTMLayer = LSTM(64, return_sequences=True)(SharedDense)
LSTMOutput = Dense(1, activation='linear', name='SOCOutput')(LSTMLayer)

Encoder = LSTM(64, activation='relu', return_sequences=False)(SharedDense)
Repeat = RepeatVector(SequenceLength)(Encoder)
Decoder = LSTM(64, activation='relu', return_sequences=True)(Repeat)
AutoEncoderOutput = TimeDistributed(Dense(ChargeFeatures), name='AnomalyOutput')(Decoder)

CombinedModel =  Model(inputs=[TripInput, ChargeInput],
              outputs=[LSTMOutput, AutoEncoderOutput])


CombinedModel.compile(
    optimizer = 'adam',
    loss = {
        'SOCOutput': 'mse',
        'AnomalyOutput': 'mse',
    },
    metrics = {
        'SOCOutput': 'mae',
        'AnomalyOutput': 'mae'
    }
)

CombinedModel.summary()

In [323]:
History = CombinedModel.fit(
    x={"TripInput": TripTrain, "ChargeInput": ChargeTrain},
    y={"SOCOutput": SOCTrain, "AnomalyOutput": AnomalyTrain},
    validation_data=(
        {"TripInput": TripVal, "ChargeInput": ChargeVal},
        {"SOCOutput": SOCVal, "AnomalyOutput": AnomalyVal}
    ),
    batch_size=16,
    epochs=75,
    verbose=1
)

Epoch 1/75
[1m304/304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 9ms/step - AnomalyOutput_loss: 449.6653 - AnomalyOutput_mae: 16.3438 - SOCOutput_loss: 540.2958 - SOCOutput_mae: 17.9641 - loss: 989.9659 - val_AnomalyOutput_loss: 136.3011 - val_AnomalyOutput_mae: 9.5025 - val_SOCOutput_loss: 536.0425 - val_SOCOutput_mae: 19.1423 - val_loss: 672.4842
Epoch 2/75
[1m304/304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - AnomalyOutput_loss: 121.9948 - AnomalyOutput_mae: 8.6304 - SOCOutput_loss: 445.1364 - SOCOutput_mae: 18.2407 - loss: 567.1245 - val_AnomalyOutput_loss: 126.0401 - val_AnomalyOutput_mae: 8.8074 - val_SOCOutput_loss: 535.9293 - val_SOCOutput_mae: 19.1014 - val_loss: 662.1013
Epoch 3/75
[1m304/304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - AnomalyOutput_loss: 115.7636 - AnomalyOutput_mae: 8.3597 - SOCOutput_loss: 433.3264 - SOCOutput_mae: 18.0961 - loss: 549.0947 - val_AnomalyOutput_loss: 130.1094 - val_AnomalyOutput_mae: 9