In [1]:
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
from itertools import product
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout, BatchNormalization

We generate a base vehicle dataset against which the models will be tested:
- First we define the thresholds to generate data.
- Then we create the pandas vehicle dataframe, using random data that falls within the defined thresholds, for the stated period (1 year).

In [2]:
num_vehicles = 100
start_date = datetime.strptime("2022-01-01", '%Y-%m-%d')
end_date = datetime.strptime("2022-12-31", '%Y-%m-%d')
min_battery_capacity = 120
max_battery_capacity = 370
max_plugged_time = 360

In [3]:
def generate_vehicle_mock_dataset() -> pd.DataFrame:
  np.random.seed(42)

  # Generate all possible combinations of dates and vehicle_ids
  dates = pd.date_range(start=start_date, end=end_date, freq='D')
  vehicle_ids = np.arange(0, num_vehicles)

  # Create de dataframe
  all_combinations = list(product(dates, vehicle_ids))
  df = pd.DataFrame(all_combinations, columns=['date', 'vehicle_id'])
  df['usable_capacity'] = np.random.randint(min_battery_capacity, max_battery_capacity, size=len(df))
  df['discharged'] = df['usable_capacity'] * np.random.random()
  df['plugged_time_minutes'] = np.random.randint(1, max_plugged_time, size=len(df))

  return df

vehicle_df = generate_vehicle_mock_dataset()

vehicle_df

Unnamed: 0,date,vehicle_id,usable_capacity,discharged,plugged_time_minutes
0,2022-01-01,0,222,67.504559,255
1,2022-01-01,1,299,90.918302,126
2,2022-01-01,2,212,64.463813,209
3,2022-01-01,3,134,40.745995,134
4,2022-01-01,4,226,68.720857,86
...,...,...,...,...,...
36495,2022-12-31,95,138,41.962293,3
36496,2022-12-31,96,229,69.633081,5
36497,2022-12-31,97,300,91.222377,235
36498,2022-12-31,98,368,111.899449,275


We do the necessary transformations to adapt the dataset to the requirements of the user behaviour prediction model.

In [4]:
def generate_behaviour_mock_dataset(vehicle_df: pd.DataFrame) -> pd.DataFrame:
  # We first define a helper method to get the last 30 days of plugged time for a given vehicle
  def get_last_30_days(row, dataframe):
    filtered_data = dataframe[(dataframe['vehicle_id'] == row['vehicle_id']) &
                              (dataframe['date'] >= row['date'] - pd.DateOffset(days=30)) &
                              (dataframe['date'] < row['date'])]['plugged_time_minutes'].tolist()
    if len(filtered_data) < 30:
      padded_data = np.pad(filtered_data, (30 - len(filtered_data), 0), mode='constant', constant_values=0)
    else:
      padded_data = filtered_data[-30:]
    return padded_data

  user_behaviour_df = vehicle_df.drop(columns=['usable_capacity', 'discharged'])
  user_behaviour_df['last_30_days'] = user_behaviour_df.apply(
    get_last_30_days, axis=1, dataframe=user_behaviour_df
  )
  user_behaviour_df = pd.concat([user_behaviour_df, pd.DataFrame(user_behaviour_df['last_30_days'].tolist(),
                                                                  columns=[f'day_{i+1}' for i in range(30)])],
                                  axis=1)
  user_behaviour_df['day_of_week'] = user_behaviour_df['date'].dt.dayofweek
  user_behaviour_df['week_of_year'] = user_behaviour_df['date'].dt.isocalendar().week
  user_behaviour_df = user_behaviour_df.drop(columns=['date', 'vehicle_id', 'last_30_days'])

  return user_behaviour_df

user_behaviour_df = generate_behaviour_mock_dataset(vehicle_df)

user_behaviour_df

Unnamed: 0,plugged_time_minutes,day_1,day_2,day_3,day_4,day_5,day_6,day_7,day_8,day_9,...,day_23,day_24,day_25,day_26,day_27,day_28,day_29,day_30,day_of_week,week_of_year
0,255,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5,52
1,126,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5,52
2,209,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5,52
3,134,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5,52
4,86,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5,52
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
36495,3,109.0,30.0,327.0,125.0,192.0,285.0,150.0,127.0,68.0,...,247.0,230.0,236.0,261.0,202.0,213.0,296.0,33.0,5,52
36496,5,304.0,309.0,192.0,166.0,127.0,329.0,153.0,263.0,270.0,...,169.0,82.0,112.0,157.0,296.0,138.0,119.0,117.0,5,52
36497,235,309.0,332.0,277.0,1.0,10.0,93.0,217.0,144.0,133.0,...,70.0,157.0,33.0,261.0,130.0,261.0,62.0,171.0,5,52
36498,275,331.0,44.0,291.0,338.0,330.0,255.0,135.0,93.0,62.0,...,36.0,191.0,196.0,39.0,48.0,346.0,87.0,305.0,5,52


We create a class with methods that return specific parts of the dataset. These methods are used to simplify the training input section.

In [5]:
class UserBehaviourDataset():
  """Dataset for discharge capacity."""

  def __init__(self, dataframe: pd.DataFrame = None):
    """Initialize."""
    self._data = dataframe

  def getTrainHyperparams(self):
    """Get hyperparameters for training."""
    return self._data.drop(columns=['plugged_time_minutes'])

  def getTestHyperparams(self):
    """Get hyperparameters for testing."""
    return self._data.drop(columns=['plugged_time_minutes'])

  def getTrainLabels(self):
    """Get labels for training."""
    return self._data['plugged_time_minutes']

dataset = UserBehaviourDataset(user_behaviour_df)

We define the Neural Network that will work as a regression model. We tell it to use the previously generated dataset and we simplify the input separation using the previous DischargeCapacityDataset helper class.

In [8]:
class RecurrentRegressionWindowModel():
  """Recurrent regression model."""

  def __init__(self):
    """Initialize the model."""
    self.input_dim = 32
    self.num_epochs = 2
    self._build()

  def _build(self):
    """Build the model."""
    self._model = Sequential()
    self._model.add(LSTM(units=64, activation='relu', return_sequences=True, input_shape=(1, self.input_dim)))
    self._model.add(Dense(128, activation='relu'))
    self._model.add(BatchNormalization())
    self._model.add(LSTM(units=32, activation='relu', return_sequences=True))
    self._model.add(Dense(64, activation='relu'))
    self._model.add(BatchNormalization())
    self._model.add(LSTM(units=16, activation='relu', return_sequences=True))
    self._model.add(Dense(32, activation='relu'))
    self._model.add(BatchNormalization())
    self._model.add(LSTM(units=16, activation='relu'))
    self._model.add(Dense(1))
    self._model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mae'])

  def train(self, dataset: UserBehaviourDataset):
    """Train the model."""
    train_data = dataset.getTrainHyperparams().values.reshape(-1, 1, self.input_dim).astype('float32')
    train_labels = dataset.getTrainLabels().values.reshape(-1, 1).astype('float32')
    self._model.fit(
      train_data, train_labels,
      epochs=self.num_epochs, batch_size=1, verbose=1,
      validation_split=0.2 # 20% of the data will be used for validation
    )

  def predict(self, dataset):
    """Predict."""
    return self._model.predict(
      dataset.getTestHyperparams().values.reshape(-1, 1, self.input_dim)
    ).flatten().tolist()

We can finally create and train the model

In [9]:
user_behaviour_model = RecurrentRegressionWindowModel()

user_behaviour_model.train(dataset)

Epoch 1/2
Epoch 2/2
