In [1]:
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
from itertools import product
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout

We generate a base vehicle dataset against which the models will be tested:
- First we define the thresholds to generate data.
- Then we create the pandas vehicle dataframe, using random data that falls within the defined thresholds, for the stated period (1 year).

In [2]:
num_vehicles = 100
start_date = datetime.strptime("2022-01-01", '%Y-%m-%d')
end_date = datetime.strptime("2022-12-31", '%Y-%m-%d')
min_battery_capacity = 120
max_battery_capacity = 370
max_plugged_time = 360

In [8]:
def generate_vehicle_mock_dataset() -> pd.DataFrame:
  np.random.seed(42)

  # Generate all possible combinations of dates and vehicle_ids
  dates = pd.date_range(start=start_date, end=end_date, freq='D')
  vehicle_ids = np.arange(0, num_vehicles)

  # Create de dataframe
  all_combinations = list(product(dates, vehicle_ids))
  df = pd.DataFrame(all_combinations, columns=['date', 'vehicle_id'])
  df['usable_capacity'] = np.random.randint(min_battery_capacity, max_battery_capacity, size=len(df))
  df['discharged'] = df['usable_capacity'] * np.random.random()
  df['plugged_time_minutes'] = np.random.randint(1, max_plugged_time, size=len(df))

  return df

vehicle_df = generate_vehicle_mock_dataset()

vehicle_df

Unnamed: 0,date,vehicle_id,usable_capacity,discharged,plugged_time_minutes
0,2022-01-01,0,222,67.504559,255
1,2022-01-01,1,299,90.918302,126
2,2022-01-01,2,212,64.463813,209
3,2022-01-01,3,134,40.745995,134
4,2022-01-01,4,226,68.720857,86
...,...,...,...,...,...
36495,2022-12-31,95,138,41.962293,3
36496,2022-12-31,96,229,69.633081,5
36497,2022-12-31,97,300,91.222377,235
36498,2022-12-31,98,368,111.899449,275


We do the necessary transformations in order to adapt the dataset to the requirements of the discharge capacity prediction model.

In [4]:
def generate_discharge_mock_dataset(vehicle_df: pd.DataFrame) -> pd.DataFrame:
  discharge_capacity_df = vehicle_df.drop(columns=['plugged_time_minutes'])
  discharge_capacity_df['day_of_week'] = discharge_capacity_df['date'].dt.dayofweek
  discharge_capacity_df['week_of_year'] = discharge_capacity_df['date'].dt.isocalendar().week
  discharge_capacity_df = discharge_capacity_df.groupby(['date', 'day_of_week', 'week_of_year']).agg({
    'usable_capacity': 'sum',
    'discharged': 'sum'
  }).reset_index()
  return discharge_capacity_df

discharge_capacity_df = generate_discharge_mock_dataset(vehicle_df)

discharge_capacity_df

Unnamed: 0,date,day_of_week,week_of_year,usable_capacity,discharged
0,2022-01-01,5,52,24002,7298.398281
1,2022-01-02,6,52,24997,7600.952497
2,2022-01-03,0,1,25619,7790.086891
3,2022-01-04,1,1,25415,7728.055675
4,2022-01-05,2,1,24115,7332.758710
...,...,...,...,...,...
360,2022-12-27,1,52,24491,7447.090755
361,2022-12-28,2,52,22907,6965.436606
362,2022-12-29,3,52,25109,7635.008851
363,2022-12-30,4,52,24449,7434.319622


We create a class with methods that return specific parts of the dataset. These methods are used to simplify the training input section.

In [5]:
class DischargeCapacityDataset():
  """Dataset for discharge capacity."""

  def __init__(self, dataframe: pd.DataFrame = None):
    """Initialize."""
    self._data = dataframe

  def getTrainHyperparams(self):
    """Get hyperparameters for training."""
    return self._data[['day_of_week', 'week_of_year', 'usable_capacity']]

  def getTestHyperparams(self):
    """Get hyperparameters for testing."""
    return self._data[['day_of_week', 'week_of_year', 'usable_capacity']]

  def getTrainLabels(self):
    """Get labels for training."""
    return self._data['discharged']

dataset = DischargeCapacityDataset(discharge_capacity_df)

We define the Neural Network that will work as a regression model. We tell it to use the previously generated dataset and we simplify the input separation using the previous DischargeCapacityDataset wrapper class.

In [9]:
class RecurrentRegressionModel():
  """Recurrent regression model."""

  def __init__(self):
    """Initialize the model."""
    self.input_dim = 3
    self.num_epochs = 7
    self._build()

  def _build(self):
    """Build the model."""
    self._model = Sequential()
    self._model.add(LSTM(units=50, activation='relu', return_sequences=True, input_shape=(1, self.input_dim)))
    self._model.add(Dropout(0.2))
    self._model.add(LSTM(units=50, activation='relu', return_sequences=True))
    self._model.add(Dropout(0.2))
    self._model.add(LSTM(units=50, activation='relu'))
    self._model.add(Dense(1))
    self._model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mae'])

  def train(self, dataset: DischargeCapacityDataset):
    """Train the model."""
    train_data = dataset.getTrainHyperparams().values.reshape(-1, 1, self.input_dim).astype('float32')
    train_labels = dataset.getTrainLabels().values.reshape(-1, 1).astype('float32')
    self._model.fit(
      train_data, train_labels,
      epochs=self.num_epochs, batch_size=1, verbose=1,
      validation_split=0.2 # 20% of the data will be used for validation
    )

  def predict(self, dataset):
    """Predict."""
    return self._model.predict(
      dataset.getTestHyperparams().values.reshape(-1, 1, self.input_dim)
    ).flatten().tolist()

Lastly, we create and train the model.

In [11]:
discharge_capacity_model = RecurrentRegressionModel()

discharge_capacity_model.train(dataset)

Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
