In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error


In [2]:

# Load the data
df = pd.read_csv("../data/output_with_combined_patterns.csv")


In [3]:

# Prepare the data
categorical_cols = ['task_ID', 'Gate_number', 'Floor_No', 'shift_no']
df[categorical_cols] = df[categorical_cols].astype('category')

sequences = []
target = []
scalers = {}  # Store scalers for each group


In [4]:
for name, group in df.groupby(categorical_cols, observed=False):  # Pass observed=False
    demand_values = group['crew_demand'].values.reshape(-1, 1)
    scaler = MinMaxScaler(feature_range=(-1, 1))
    scaled_demand = scaler.fit_transform(demand_values)
    scalers[name] = scaler  # Store the scaler for this group
    scaled_demand = scaled_demand.flatten()

    seq_length = 10
    for i in range(len(scaled_demand) - seq_length):
        sequences.append((name, scaled_demand[i:i + seq_length]))  # Store the name of the group with the sequence
        target.append(scaled_demand[i + seq_length])

# Prepare input and output data
X = np.array([seq[1] for seq in sequences])
y = np.array(target)
groups = [seq[0] for seq in sequences]

In [5]:
# Split the data into train and test sets
X_train, X_test, y_train, y_test, groups_train, groups_test = train_test_split(
    X, y, groups, test_size=0.2, random_state=42, stratify=groups
)

# Reshape data for LSTM
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)


In [6]:

# Build the LSTM model
model = keras.Sequential([
    keras.Input(shape=(X_train.shape[1], X_train.shape[2])),  # Define input shape
    keras.layers.LSTM(64, activation='tanh', return_sequences=True),
    keras.layers.LSTM(32, activation='tanh'),
    keras.layers.Dense(1)
])

model.compile(optimizer='adam', loss='mse', metrics=['mae'])


In [7]:

# Train the model
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.1, verbose=1)

# Evaluate the model
loss, mae = model.evaluate(X_test, y_test, verbose=0)
print(f"Mean Absolute Error on Scaled Test Set: {mae}")
predictions_scaled = model.predict(X_test)

# Transform predictions back to original scale
predictions = []
y_test_original = []
groups_used = []


Epoch 1/50
[1m2145/2145[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 5ms/step - loss: 0.1896 - mae: 0.3483 - val_loss: 0.0905 - val_mae: 0.2347
Epoch 2/50
[1m2145/2145[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 5ms/step - loss: 0.0844 - mae: 0.2268 - val_loss: 0.0811 - val_mae: 0.2233
Epoch 3/50
[1m2145/2145[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 5ms/step - loss: 0.0771 - mae: 0.2172 - val_loss: 0.0824 - val_mae: 0.2287
Epoch 4/50
[1m2145/2145[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 5ms/step - loss: 0.0752 - mae: 0.2160 - val_loss: 0.0753 - val_mae: 0.2161
Epoch 5/50
[1m2145/2145[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 5ms/step - loss: 0.0731 - mae: 0.2135 - val_loss: 0.0736 - val_mae: 0.2150
Epoch 6/50
[1m2145/2145[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 5ms/step - loss: 0.0720 - mae: 0.2115 - val_loss: 0.0775 - val_mae: 0.2182
Epoch 7/50
[1m2145/2145[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [

In [8]:

for i, group in enumerate(groups_test):
    scaler = scalers[group]
    prediction_scaled = predictions_scaled[i].reshape(-1, 1)
    prediction = scaler.inverse_transform(prediction_scaled).flatten()
    prediction_adjusted = np.where(prediction < 0, np.ceil(prediction), np.floor(prediction))  # Adjust prediction based on sign
    actual = scaler.inverse_transform(y_test[i].reshape(-1, 1)).flatten()
    predictions.append(prediction_adjusted[0])
    y_test_original.append(actual[0])
    groups_used.append(group)


In [9]:

# Calculate evaluation metrics
rmse = np.sqrt(mean_squared_error(y_test_original, predictions))
print(f"Root Mean Squared Error on Test Set: {rmse}")
mae = mean_absolute_error(y_test_original, predictions)
print(f"Mean Absolute Error on Test Set: {mae}")


Root Mean Squared Error on Test Set: 0.9843750715643068
Mean Absolute Error on Test Set: 0.7640732385499187


In [10]:

# Save predictions to a CSV file
predictions_df = pd.DataFrame({
    'Task_ID': [group[0] for group in groups_used],
    'Gate_number': [group[1] for group in groups_used],
    'Floor_No': [group[2] for group in groups_used],
    'Shift_no': [group[3] for group in groups_used],
    'Actual': y_test_original,
    'Predicted': predictions
})


In [11]:

predictions_df.to_csv("../data/predictions_output.csv", index=False)
print(f"Predictions saved to ../data/predictions_output.csv")


Predictions saved to ../data/predictions_output.csv


In [12]:

# Example of a single prediction
last_group = groups_test[-1]
last_scaler = scalers[last_group]
last_sequence = X_test[-1]
last_sequence = last_sequence.reshape((1, last_sequence.shape[0], last_sequence.shape[1]))

predicted_value_scaled = model.predict(last_sequence)
predicted_value = last_scaler.inverse_transform(predicted_value_scaled).flatten()
predicted_value_adjusted = np.where(predicted_value < 0, np.ceil(predicted_value), np.floor(predicted_value))


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step


In [13]:

print(f"\n--- Single Prediction Example ---")
print(f"Group: {last_group}, Predicted next value: {predicted_value_adjusted[0]:.2f}")


--- Single Prediction Example ---
Group: ('T-007', 28, 2, 2), Predicted next value: -1.00
