In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

np.random.seed(42)
tf.random.set_seed(42)

n_samples = 5000
data = {
    "timestamp": pd.date_range("2025-01-01", periods=n_samples, freq="min"),
    "train_id": np.random.randint(1, 20, n_samples),
    "section_id": np.random.randint(1, 10, n_samples),
    "speed": np.random.randint(0, 120, n_samples),
    "occupancy": np.random.randint(0, 3, n_samples),
    "signal_status": np.random.choice([0,1,2], n_samples),
    "delay": np.random.randint(0, 30, n_samples),
    "distance_left": np.random.randint(100, 5000, n_samples)
}
df = pd.DataFrame(data)
df["time_to_clear"] = df["distance_left"] / (df["speed"] + 1)

df["congestion"] = np.where(
    (df["occupancy"] >= 2) |
    (df["speed"] < 30) |
    (df["signal_status"] == 0) |
    (df["delay"] > 15),
    1, 0
)

def create_sequences(data, sequence_length=10):
    features = ["speed", "occupancy", "signal_status", "delay", "time_to_clear"]
    X, y = [], []
    for i in range(sequence_length, len(data)):
        X.append(data[features].iloc[i-sequence_length:i].values)
        y.append(data["congestion"].iloc[i])
    return np.array(X), np.array(y)

sequence_length = 10
X_seq, y_seq = create_sequences(df, sequence_length)

X_train, X_test, y_train, y_test = train_test_split(
    X_seq, y_seq, test_size=0.2, random_state=42
)

scaler = StandardScaler()
X_train_reshaped = X_train.reshape(-1, X_train.shape[-1])
X_test_reshaped = X_test.reshape(-1, X_test.shape[-1])

X_train_scaled = scaler.fit_transform(X_train_reshaped)
X_test_scaled = scaler.transform(X_test_reshaped)

X_train = X_train_scaled.reshape(X_train.shape)
X_test = X_test_scaled.reshape(X_test.shape)

model = Sequential([
    LSTM(64, return_sequences=True, input_shape=(sequence_length, 5)),
    Dropout(0.3),
    LSTM(32, return_sequences=False),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dropout(0.2),
    Dense(1, activation='sigmoid')
])

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

early_stop = EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True
)

history = model.fit(
    X_train, y_train,
    epochs=50,
    batch_size=32,
    validation_split=0.2,
    callbacks=[early_stop],
    verbose=0
)

y_pred_proba = model.predict(X_test, verbose=0)
y_pred = (y_pred_proba > 0.5).astype(int).flatten()

print("Model Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

def predict_congestion(speed, occupancy, signal_status, delay, distance_left):
    time_to_clear = distance_left / (speed + 1)

    input_data = np.array([[speed, occupancy, signal_status, delay, time_to_clear]])
    input_data = np.repeat(input_data, sequence_length, axis=0).reshape(1, sequence_length, 5)
    input_data = scaler.transform(input_data.reshape(-1, 5)).reshape(1, sequence_length, 5)

    pred_proba = model.predict(input_data, verbose=0)[0][0]
    pred = (pred_proba > 0.5).astype(int)

    return pred, pred_proba

print("\nTesting different scenarios:")

test_cases = [
    {"name": "High congestion", "values": [10, 2, 0, 25, 1000]},
    {"name": "Red signal slow", "values": [15, 1, 0, 10, 2000]},
    {"name": "Multiple trains", "values": [40, 2, 1, 5, 800]},
    {"name": "High delay", "values": [50, 1, 2, 20, 1500]},
    {"name": "Free flow", "values": [80, 0, 2, 2, 500]}
]

for case in test_cases:
    speed, occupancy, signal, delay, distance = case["values"]
    pred, prob = predict_congestion(speed, occupancy, signal, delay, distance)

    print(f"\n{case['name']} (speed={speed}, occupancy={occupancy}, signal={signal}, delay={delay}, distance={distance}):")
    print(f"Prediction: {'CONGESTION' if pred == 1 else 'NO CONGESTION'}")
    print(f"Probability: {prob:.4f}")

print("\n" + "="*50)
print("CUSTOM INPUT:")
print("Enter values (speed, occupancy, signal_status, delay, distance_left):")

try:
    speed = int(input("Speed (km/h): "))
    occupancy = int(input("Occupancy (0-3): "))
    signal_status = int(input("Signal (0=Red, 1=Yellow, 2=Green): "))
    delay = int(input("Delay (minutes): "))
    distance_left = int(input("Distance (meters): "))

    prediction, probability = predict_congestion(speed, occupancy, signal_status, delay, distance_left)

    print(f"\nYour input prediction:")
    print(f"Result: {'CONGESTION' if prediction == 1 else 'NO CONGESTION'}")
    print(f"Probability: {probability:.4f}")

except:
    print("Using default example: low speed, high occupancy scenario")
    pred, prob = predict_congestion(5, 2, 0, 30, 4000)
    print(f"Result: {'CONGESTION' if pred == 1 else 'NO CONGESTION'}")
    print(f"Probability: {prob:.4f}")


  super().__init__(**kwargs)


Model Accuracy: 0.8226452905811623

Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00       177
           1       0.82      1.00      0.90       821

    accuracy                           0.82       998
   macro avg       0.41      0.50      0.45       998
weighted avg       0.68      0.82      0.74       998


Testing different scenarios:

High congestion (speed=10, occupancy=2, signal=0, delay=25, distance=1000):
Prediction: CONGESTION
Probability: 0.8889

Red signal slow (speed=15, occupancy=1, signal=0, delay=10, distance=2000):
Prediction: CONGESTION
Probability: 0.8231


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Multiple trains (speed=40, occupancy=2, signal=1, delay=5, distance=800):
Prediction: CONGESTION
Probability: 0.8199

High delay (speed=50, occupancy=1, signal=2, delay=20, distance=1500):
Prediction: CONGESTION
Probability: 0.8305

Free flow (speed=80, occupancy=0, signal=2, delay=2, distance=500):
Prediction: CONGESTION
Probability: 0.8882

CUSTOM INPUT:
Enter values (speed, occupancy, signal_status, delay, distance_left):
Speed (km/h): 3
Occupancy (0-3): 2
Signal (0=Red, 1=Yellow, 2=Green): 1
Delay (minutes): 30
Distance (meters): 400

Your input prediction:
Result: CONGESTION
Probability: 0.8961
