In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense, Dropout
from tensorflow.keras.utils import to_categorical

In [2]:

file_name = '../DATASETS/RNNs-datasets/data_reports_monthly.csv'
database = pd.read_csv(file_name)

database.replace('-', np.nan, inplace=True)  # Replace '-' with NaN
database = database.dropna(subset=['Trips Per Day', 'Unique Vehicles', 'Unique Drivers', 'License Class'])
database['Trips Per Day'] = database['Trips Per Day'].replace({',': ''}, regex=True).astype(float)
database['Unique Vehicles'] = database['Unique Vehicles'].replace({',': ''}, regex=True).astype(float)
database['Unique Drivers'] = database['Unique Drivers'].replace({',': ''}, regex=True).astype(float)
print(database['Month/Year'].head())

0    2024-10
1    2024-10
2    2024-10
3    2024-10
4    2024-10
Name: Month/Year, dtype: object


In [3]:

encoder = LabelEncoder()

encoder.fit(database['Month/Year'])  #

# Check the mappings
print("Correct Mappings:")
for encoded, original in enumerate(encoder.classes_):
    print(f"{encoded}: {original}")
    

Correct Mappings:
0: 2010-01
1: 2010-02
2: 2010-03
3: 2010-04
4: 2010-05
5: 2010-06
6: 2010-07
7: 2010-08
8: 2010-09
9: 2010-10
10: 2010-11
11: 2010-12
12: 2011-01
13: 2011-02
14: 2011-03
15: 2011-04
16: 2011-05
17: 2011-06
18: 2011-07
19: 2011-08
20: 2011-09
21: 2011-10
22: 2011-11
23: 2011-12
24: 2012-01
25: 2012-02
26: 2012-03
27: 2012-04
28: 2012-05
29: 2012-06
30: 2012-07
31: 2012-08
32: 2012-09
33: 2012-10
34: 2012-11
35: 2012-12
36: 2013-01
37: 2013-02
38: 2013-03
39: 2013-04
40: 2013-05
41: 2013-06
42: 2013-07
43: 2013-08
44: 2013-09
45: 2013-10
46: 2013-11
47: 2013-12
48: 2014-01
49: 2014-02
50: 2014-03
51: 2014-04
52: 2014-05
53: 2014-06
54: 2014-07
55: 2014-08
56: 2014-09
57: 2014-10
58: 2014-11
59: 2014-12
60: 2015-01
61: 2015-02
62: 2015-03
63: 2015-04
64: 2015-05
65: 2015-06
66: 2015-07
67: 2015-08
68: 2015-09
69: 2015-10
70: 2015-11
71: 2015-12
72: 2016-01
73: 2016-02
74: 2016-03
75: 2016-04
76: 2016-05
77: 2016-06
78: 2016-07
79: 2016-08
80: 2016-09
81: 2016-10
82: 2016

In [5]:
encoder = LabelEncoder()
database['License Class'] = encoder.fit_transform(database['License Class'])
database['Month/Year'] = encoder.fit_transform(database['Month/Year'])

# Features and target
features = database[['Trips Per Day', 'Unique Vehicles', 'Unique Drivers', 'License Class']].values
target = database['Month/Year'].values

scaler = MinMaxScaler()
features_scaled = scaler.fit_transform(features)


In [6]:

# Create sequences
timesteps = 7

def create_sequences(features, target, timesteps):
    X, y = [], []
    for i in range(len(features) - timesteps):
        X.append(features[i : i + timesteps])
        y.append(target[i + timesteps])
    return np.array(X), np.array(y)

X, y = create_sequences(features_scaled, target, timesteps)

# One-hot encode target
num_classes = len(np.unique(y))
y = to_categorical(y, num_classes=num_classes)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [7]:
# RNN model
model = Sequential([
    SimpleRNN(64, activation='relu', input_shape=(timesteps, X_train.shape[2]), return_sequences=False),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dense(num_classes, activation='softmax') 
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn (SimpleRNN)      (None, 64)                4416      
                                                                 
 dropout (Dropout)           (None, 64)                0         
                                                                 
 dense (Dense)               (None, 32)                2080      
                                                                 
 dense_1 (Dense)             (None, 177)               5841      
                                                                 
Total params: 12337 (48.19 KB)
Trainable params: 12337 (48.19 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [8]:
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.2, verbose=1)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [9]:
loss, accuracy = model.evaluate(X_test, y_test, verbose=1)
print(f"Test Loss: {loss:.4f}, Test Accuracy: {accuracy:.4f}")

Test Loss: 6.2016, Test Accuracy: 0.0000


In [10]:
def predict_month(features):
    # Ensure features are properly shaped (7 timesteps, 4 features)
    features = np.array(features)
    if features.shape != (timesteps, features.shape[1]):
        raise ValueError(f"Input must have shape ({timesteps}, {features.shape[1]}).")
    
    # Normalize and reshape features
    features_flat = features.reshape(-1, features.shape[-1])  # Flatten to 2D
    features_scaled = scaler.transform(features_flat)  # Scale features
    features_reshaped = features_scaled.reshape((1, timesteps, features.shape[-1]))  # Reshape back to 3D

    # Predict using the trained model
    prediction = model.predict(features_reshaped)
    predicted_class = np.argmax(prediction)  # Get the predicted class index

    # Decode the predicted class back to the original label
    return encoder.inverse_transform([predicted_class])[0]

In [11]:
# Example input: 7 timesteps, each with 4 features
example_features = [
    [50, 10, 8, 3],  # Timestep 1
    [52, 12, 9, 4],  # Timestep 2
    [48, 11, 7, 3],  # Timestep 3
    [49, 9, 8, 2],   # Timestep 4
    [51, 10, 9, 3],  # Timestep 5
    [50, 11, 8, 3],  # Timestep 6
    [49, 10, 8, 3]   # Timestep 7
]

# Predict the "Month/Year"
predicted_month = predict_month(example_features)
print(f"Predicted Month/Year: {predicted_month}")

Predicted Month/Year: 2020-04
