In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error


In [4]:

# Load the data
df = pd.read_csv("../data/output_with_combined_patterns.csv")

# Prepare the data
categorical_cols = ['task_ID', 'Gate_number', 'Floor_No', 'shift_no']
df[categorical_cols] = df[categorical_cols].astype('category')


In [None]:


sequences = []
target = []
scalers = {}  # Store scalers for each group
for name, group in df.groupby(categorical_cols, observed=False):  # Pass observed=False
    demand_values = group['crew_demand'].values.reshape(-1, 1)
    scaler = MinMaxScaler(feature_range=(-1, 1))
    scaled_demand = scaler.fit_transform(demand_values)
    scalers[name] = scaler  # Store the scaler for this group
    scaled_demand = scaled_demand.flatten()
    seq_length = 10
    for i in range(len(scaled_demand) - seq_length):
        sequences.append((name, scaled_demand[i:i + seq_length])) #Store the name of the group with the sequence
        target.append(scaled_demand[i + seq_length])

In [7]:

X = np.array([seq[1] for seq in sequences])
y = np.array(target)
groups = [seq[0] for seq in sequences]

X_train, X_test, y_train, y_test, groups_train, groups_test = train_test_split(X, y, groups, test_size=0.2, random_state=42, stratify = groups)

X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)


In [9]:

# Build the LSTM model
model = keras.Sequential([
    keras.Input(shape=(X_train.shape[1], X_train.shape[2])),  # Define input shape
    keras.layers.LSTM(64, activation='tanh', return_sequences=True),
    keras.layers.LSTM(32, activation='tanh'),
    keras.layers.Dense(1)
])

In [10]:

# Compile the model
model.compile(optimizer='adam', loss='mse', metrics=['mae'])


In [11]:

# Train the model
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.1, verbose=1)

# Evaluate the model
loss, mae = model.evaluate(X_test, y_test, verbose=0)
print(f"Mean Absolute Error on Scaled Test Set: {mae}")


Epoch 1/50
[1m2145/2145[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 5ms/step - loss: 0.1881 - mae: 0.3461 - val_loss: 0.0896 - val_mae: 0.2350
Epoch 2/50
[1m2145/2145[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 5ms/step - loss: 0.0840 - mae: 0.2275 - val_loss: 0.0809 - val_mae: 0.2233
Epoch 3/50
[1m2145/2145[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 5ms/step - loss: 0.0760 - mae: 0.2168 - val_loss: 0.0754 - val_mae: 0.2145
Epoch 4/50
[1m2145/2145[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 5ms/step - loss: 0.0731 - mae: 0.2124 - val_loss: 0.0747 - val_mae: 0.2154
Epoch 5/50
[1m2145/2145[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 5ms/step - loss: 0.0712 - mae: 0.2097 - val_loss: 0.0730 - val_mae: 0.2123
Epoch 6/50
[1m2145/2145[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 5ms/step - loss: 0.0702 - mae: 0.2087 - val_loss: 0.0729 - val_mae: 0.2103
Epoch 7/50
[1m2145/2145[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [

In [27]:

predictions_scaled = model.predict(X_test)

predictions = []
y_test_original = []
groups_used = []

for i, group in enumerate(groups_test):
    scaler = scalers[group]
    prediction_scaled = predictions_scaled[i].reshape(-1,1)
    prediction = scaler.inverse_transform(prediction_scaled).flatten()
    prediction_adjusted = np.where(prediction < 0, np.ceil(prediction), np.floor(prediction)) #Adjust prediction based on sign
    actual = scaler.inverse_transform(y_test[i].reshape(-1,1)).flatten()
    predictions.append(prediction_adjusted[0])
    y_test_original.append(actual[0])
    groups_used.append(group)


[1m596/596[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step


In [25]:

rmse = np.sqrt(mean_squared_error(y_test_original, predictions))
print(f"Root Mean Squared Error on Test Set: {rmse}")
mae = mean_absolute_error(y_test_original, predictions)
print(f"Mean Absolute Error on Test Set: {mae}")


Root Mean Squared Error on Test Set: 0.8027274530945072
Mean Absolute Error on Test Set: 0.6293421478450479


In [26]:

# Print all real and predicted values with their groups
results_df = pd.DataFrame({'Group': groups_used, 'Actual': y_test_original, 'Predicted': predictions})
print(results_df)

# Or, if you prefer a more concise output:
print("\n--- Concise Output ---")
for i in range(len(y_test_original)):
    print(f"Group: {groups_used[i]}, Actual: {y_test_original[i]:.2f}, Predicted: {predictions[i]:.2f}")


                   Group  Actual  Predicted
0      (T-004, 17, 2, 3)     1.0  -0.226723
1      (T-001, 39, 2, 4)    -2.0  -2.033513
2      (T-004, 37, 2, 1)     1.0  -0.268679
3      (T-010, 35, 2, 4)     1.0   1.911587
4      (T-001, 31, 2, 3)     4.0   2.933181
...                  ...     ...        ...
19056  (T-017, 36, 2, 2)    -2.0  -2.011551
19057  (T-015, 28, 2, 2)     1.0   1.525736
19058  (T-008, 15, 2, 1)     3.0   3.101595
19059  (T-005, 18, 2, 1)    -2.0  -1.767323
19060  (T-007, 28, 2, 2)     0.0  -1.060567

[19061 rows x 3 columns]

--- Concise Output ---
Group: ('T-004', 17, 2, 3), Actual: 1.00, Predicted: -0.23
Group: ('T-001', 39, 2, 4), Actual: -2.00, Predicted: -2.03
Group: ('T-004', 37, 2, 1), Actual: 1.00, Predicted: -0.27
Group: ('T-010', 35, 2, 4), Actual: 1.00, Predicted: 1.91
Group: ('T-001', 31, 2, 3), Actual: 4.00, Predicted: 2.93
Group: ('T-019', 40, 2, 1), Actual: 0.00, Predicted: -0.35
Group: ('T-018', 28, 2, 1), Actual: 2.00, Predicted: 2.71
Group: ('T-

In [23]:

# Example of predicting the next value in a sequence
group = groups_test[-1]
scaler = scalers[group]
last_sequence = X_test[-1]
last_sequence = last_sequence.reshape((1, last_sequence.shape[0], last_sequence.shape[1]))
predicted_value_scaled = model.predict(last_sequence)
predicted_value = scaler.inverse_transform(predicted_value_scaled).flatten()
predicted_value_adjusted = np.where(predicted_value < 0, np.ceil(predicted_value), np.floor(predicted_value))
print(f"\n--- Single Prediction Example ---")
print(f"Group: {group}, Predicted next value: {predicted_value_adjusted[0]:.2f}")
print(f"Real next value: {y_test_original[-1]:.2f}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step

--- Single Prediction Example ---
Group: ('T-007', 28, 2, 2), Predicted next value: -1.00
Real next value: 0.00
