<a href="https://colab.research.google.com/github/SOUMYADEEP-rgb/traffic-prediction-lstm/blob/main/traffic_jam_ai.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

# TRAFFIC CONGESTION PREDICTOR

In [None]:
import pandas as pd

sheet_url = "https://docs.google.com/spreadsheets/d/1mSKkTIpItKbyvOpucce5jatukPVpc0yKySfdpFHw97E/gviz/tq?tqx=out:csv"

df = pd.read_csv(sheet_url)

df["timestamp"] = pd.to_datetime(df["timestamp"])

df["hour"] = df["timestamp"].dt.hour
df["dayofweek"] = df["timestamp"].dt.dayofweek

print(df.head())
print(df.info())


            timestamp       location      lat      lon  current_speed  \
0 2025-09-06 11:05:53      Esplanade  22.5726  88.3639             23   
1 2025-09-06 11:05:54      Salt Lake  22.5867  88.4173             27   
2 2025-09-06 11:05:54  Howrah Bridge  22.5850  88.3468             15   
3 2025-09-06 11:05:55      EM Bypass  22.4989  88.4122             25   
4 2025-09-06 11:05:55        Airport  22.6394  88.4467             19   

   free_flow_speed  confidence  hour  dayofweek  
0               23         1.0    11          5  
1               27         1.0    11          5  
2               23         1.0    11          5  
3               25         1.0    11          5  
4               19         1.0    11          5  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4925 entries, 0 to 4924
Data columns (total 9 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   timestamp        4925 non-null   datetime64[

In [None]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
free_flow_dict = {
    "Esplanade": 40,
    "Salt Lake": 45,
    "Howrah Bridge": 35,
    "EM Bypass": 50,
    "Airport": 40
}

df["free_flow_speed"] = df["location"].map(free_flow_dict)


le = LabelEncoder()
df["location_encoded"] = le.fit_transform(df["location"].astype(str))


scaler_X = MinMaxScaler()
X_scaled = scaler_X.fit_transform(df[["lat","lon","current_speed","free_flow_speed","confidence","hour","dayofweek","location_encoded"]])

scaler_y = MinMaxScaler()
y_speed_scaled = scaler_y.fit_transform(df[["current_speed"]])
df["jam"] = (df["current_speed"] < 0.5 * df["free_flow_speed"]).astype(int)

y_jam = df[["jam"]].values


In [None]:
SEQ_LEN = 96
import numpy as np

def create_sequences(X, y_speed, y_jam, seq_len):
    X_seq = []
    y_speed_seq = []
    y_jam_seq = []

    for i in range(len(X) - seq_len):
        X_seq.append(X[i:i+seq_len])
        y_speed_seq.append(y_speed[i+seq_len])
        y_jam_seq.append(y_jam[i+seq_len])

    return np.array(X_seq), np.array(y_speed_seq), np.array(y_jam_seq)

X_seq, y_speed_seq, y_jam_seq = create_sequences(X_scaled, y_speed_scaled, y_jam, SEQ_LEN)



In [None]:
from tensorflow.keras.layers import Input, LSTM, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping

inp = Input(shape=(SEQ_LEN, X_seq.shape[2]))
x = LSTM(128, return_sequences=True)(inp)
x = Dropout(0.3)(x)
x = LSTM(64)(x)

speed_out = Dense(32, activation="relu")(x)
speed_out = Dense(1, name="speed_output")(speed_out)

jam_out = Dense(32, activation="relu")(x)
jam_out = Dense(1, activation="sigmoid", name="jam_output")(jam_out)

model = Model(inputs=inp, outputs=[speed_out, jam_out])
model.compile(
    optimizer="adam",
    loss={"speed_output": "mse", "jam_output": "binary_crossentropy"},
    metrics={"speed_output": ["mae"], "jam_output": ["accuracy"]}
)

# -----------------------------
# 4. Train model
# -----------------------------
early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)

history = model.fit(
    X_seq,
    {"speed_output": y_speed_seq, "jam_output": y_jam_seq},
    epochs=100,
    batch_size=256,
    validation_split=0.2,
    callbacks=early_stop

)



Epoch 1/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 757ms/step - jam_output_accuracy: 0.4667 - jam_output_loss: 0.6927 - loss: 0.9123 - speed_output_loss: 0.2188 - speed_output_mae: 0.3744 - val_jam_output_accuracy: 0.4513 - val_jam_output_loss: 0.7696 - val_loss: 0.8315 - val_speed_output_loss: 0.0696 - val_speed_output_mae: 0.2079
Epoch 2/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 641ms/step - jam_output_accuracy: 0.6297 - jam_output_loss: 0.6510 - loss: 0.7043 - speed_output_loss: 0.0532 - speed_output_mae: 0.1874 - val_jam_output_accuracy: 0.4513 - val_jam_output_loss: 0.7138 - val_loss: 0.7636 - val_speed_output_loss: 0.0524 - val_speed_output_mae: 0.1932
Epoch 3/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 728ms/step - jam_output_accuracy: 0.6150 - jam_output_loss: 0.6467 - loss: 0.6992 - speed_output_loss: 0.0519 - speed_output_mae: 0.1884 - val_jam_output_accuracy: 0.5435 - val_jam_output_loss: 0.7017 - 

In [None]:
print(df.columns)


Index(['timestamp', 'location', 'lat', 'lon', 'current_speed',
       'free_flow_speed', 'confidence', 'hour', 'dayofweek',
       'location_encoded', 'jam'],
      dtype='object')


In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
import numpy as np

pred_speed_scaled, pred_jam = model.predict(X_seq[-10:])
pred_speed = scaler_y.inverse_transform(pred_speed_scaled).flatten()
pred_jam_class = (pred_jam.flatten() > 0.5).astype(int)

actual_speed = scaler_y.inverse_transform(y_speed_seq[-10:]).flatten()
actual_jam = y_jam_seq[-10:].flatten()

print("Predicted speeds:", pred_speed)
print("Actual speeds:", actual_speed)
print("Predicted jam (1=Jam, 0=No Jam):", pred_jam_class)
print("Actual jam:", actual_jam)

print("Accuracy (Jam Prediction):", accuracy_score(actual_jam, pred_jam_class))
print("Precision:", precision_score(actual_jam, pred_jam_class, zero_division=0))
print("Recall:", recall_score(actual_jam, pred_jam_class, zero_division=0))
print("F1 Score:", f1_score(actual_jam, pred_jam_class, zero_division=0))

speed_accuracy = np.mean(100 - (np.abs(pred_speed - actual_speed) / actual_speed * 100))
print("Speed Prediction Accuracy (%):", speed_accuracy)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 78ms/step
Predicted speeds: [22.539366 24.696314 13.867456 18.438265 16.648424 21.856827 24.609806
 13.866454 16.907593 16.209465]
Actual speeds: [23. 23. 13. 20. 15. 23. 26. 14. 19. 16.]
Predicted jam (1=Jam, 0=No Jam): [0 0 1 1 1 0 0 1 1 1]
Actual jam: [0 0 1 1 1 0 0 1 1 1]
Accuracy (Jam Prediction): 1.0
Precision: 1.0
Recall: 1.0
F1 Score: 1.0
Speed Prediction Accuracy (%): 94.15581096747782
