<a href="https://colab.research.google.com/github/SOUMYADEEP-rgb/traffic-prediction-lstm/blob/main/traffic_jam_ai.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

# TRAFFIC CONGESTION PREDICTOR

In [None]:
import pandas as pd


sheet_url = "https://docs.google.com/spreadsheets/d/1iNJkeHnLNd50F6UzowSA9c94uxj8jIb8jLoH1Ij52_Q/gviz/tq?tqx=out:csv"
df = pd.read_csv(sheet_url)

#print(df.head())


df["timestamp"] = pd.to_datetime(df["timestamp"])

df["hour"] = df["timestamp"].dt.hour
df["dayofweek"] = df["timestamp"].dt.dayofweek

print(df.head())
print(df.info())


            timestamp       location      lat      lon  current_speed  \
0 2025-11-04 13:35:08      Esplanade  22.5726  88.3639             14   
1 2025-11-04 13:35:09      Salt Lake  22.5867  88.4173             26   
2 2025-11-04 13:35:09  Howrah Bridge  22.5850  88.3468             15   
3 2025-11-04 13:35:09      EM Bypass  22.4989  88.4122             25   
4 2025-11-04 13:35:10        Airport  22.6394  88.4467             19   

   free_flow_speed  confidence  hour  dayofweek  
0               22    0.567922    13          1  
1               26    1.000000    13          1  
2               23    0.998934    13          1  
3               25    1.000000    13          1  
4               19    1.000000    13          1  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 115 entries, 0 to 114
Data columns (total 9 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   timestamp        115 non-null    datetime64[ns

In [None]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
free_flow_dict = {
    "Esplanade": 40,
    "Salt Lake": 45,
    "Howrah Bridge": 35,
    "EM Bypass": 50,
    "Airport": 40
}

df["free_flow_speed"] = df["location"].map(free_flow_dict)


le = LabelEncoder()
df["location_encoded"] = le.fit_transform(df["location"].astype(str))


scaler_X = MinMaxScaler()
X_scaled = scaler_X.fit_transform(df[["lat","lon","current_speed","free_flow_speed","confidence","hour","dayofweek","location_encoded"]])

scaler_y = MinMaxScaler()
y_speed_scaled = scaler_y.fit_transform(df[["current_speed"]])
df["jam"] = (df["current_speed"] < 0.5 * df["free_flow_speed"]).astype(int)

y_jam = df[["jam"]].values


In [None]:
SEQ_LEN = 12
import numpy as np

def create_sequences(X, y_speed, y_jam, seq_len):
    X_seq = []
    y_speed_seq = []
    y_jam_seq = []

    for i in range(len(X) - seq_len):
        X_seq.append(X[i:i+seq_len])
        y_speed_seq.append(y_speed[i+seq_len])
        y_jam_seq.append(y_jam[i+seq_len])

    return np.array(X_seq), np.array(y_speed_seq), np.array(y_jam_seq)

X_seq, y_speed_seq, y_jam_seq = create_sequences(X_scaled, y_speed_scaled, y_jam, SEQ_LEN)



In [None]:
from tensorflow.keras.layers import Input, LSTM, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping

inp = Input(shape=(SEQ_LEN, X_seq.shape[2]))
x = LSTM(128, return_sequences=True)(inp)
x = Dropout(0.3)(x)
x = LSTM(64)(x)

speed_out = Dense(32, activation="relu")(x)
speed_out = Dense(1, name="speed_output")(speed_out)

jam_out = Dense(32, activation="relu")(x)
jam_out = Dense(1, activation="sigmoid", name="jam_output")(jam_out)

model = Model(inputs=inp, outputs=[speed_out, jam_out])
model.compile(
    optimizer="adam",
    loss={"speed_output": "mse", "jam_output": "binary_crossentropy"},
    metrics={"speed_output": ["mae"], "jam_output": ["accuracy"]}
)

# -----------------------------
# 4. Train model
# -----------------------------
early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)

history = model.fit(
    X_seq,
    {"speed_output": y_speed_seq, "jam_output": y_jam_seq},
    epochs=60,
    batch_size=16,
    validation_split=0.2,
    callbacks=early_stop

)



Epoch 1/60
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 95ms/step - jam_output_accuracy: 0.4470 - jam_output_loss: 0.6939 - loss: 1.1674 - speed_output_loss: 0.4620 - speed_output_mae: 0.5981 - val_jam_output_accuracy: 0.3810 - val_jam_output_loss: 0.7107 - val_loss: 0.7999 - val_speed_output_loss: 0.0953 - val_speed_output_mae: 0.2495
Epoch 2/60
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - jam_output_accuracy: 0.5404 - jam_output_loss: 0.6909 - loss: 0.8170 - speed_output_loss: 0.1313 - speed_output_mae: 0.3214 - val_jam_output_accuracy: 0.3810 - val_jam_output_loss: 0.7197 - val_loss: 0.8447 - val_speed_output_loss: 0.1347 - val_speed_output_mae: 0.2874
Epoch 3/60
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - jam_output_accuracy: 0.5528 - jam_output_loss: 0.6853 - loss: 0.7894 - speed_output_loss: 0.1012 - speed_output_mae: 0.2886 - val_jam_output_accuracy: 0.3810 - val_jam_output_loss: 0.7334 - val_loss: 0.824

In [None]:
print(df.columns)


Index(['timestamp', 'location', 'lat', 'lon', 'current_speed',
       'free_flow_speed', 'confidence', 'hour', 'dayofweek',
       'location_encoded', 'jam'],
      dtype='object')


In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
import numpy as np

pred_speed_scaled, pred_jam = model.predict(X_seq[-10:])
pred_speed = scaler_y.inverse_transform(pred_speed_scaled).flatten()
pred_jam_class = (pred_jam.flatten() > 0.5).astype(int)

actual_speed = scaler_y.inverse_transform(y_speed_seq[-10:]).flatten().astype(int)
actual_jam = y_jam_seq[-10:].flatten()

print("Predicted speeds:", pred_speed)
print("Actual speeds:", actual_speed)
print("Predicted jam (1=Jam, 0=No Jam):", pred_jam_class)
print("Actual jam:", actual_jam)

print("Accuracy (Jam Prediction):", accuracy_score(actual_jam, pred_jam_class))
print("Precision:", precision_score(actual_jam, pred_jam_class, zero_division=0))
print("Recall:", recall_score(actual_jam, pred_jam_class, zero_division=0))
print("F1 Score:", f1_score(actual_jam, pred_jam_class, zero_division=0))

speed_accuracy = np.mean(100 - (np.abs(pred_speed - actual_speed) / actual_speed * 100))
print("Speed Prediction Accuracy (%):", speed_accuracy)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 222ms/step
Predicted speeds: [20.877165 23.912687 20.491293 21.396004 20.27023  20.895454 23.871016
 20.59675  21.422287 20.292604]
Actual speeds: [23 26 16 20 19 23 26 14 18 19]
Predicted jam (1=Jam, 0=No Jam): [0 0 1 1 1 1 0 1 1 1]
Actual jam: [0 0 1 1 1 0 0 1 1 1]
Accuracy (Jam Prediction): 0.9
Precision: 0.8571428571428571
Recall: 1.0
F1 Score: 0.9230769230769231
Speed Prediction Accuracy (%): 85.0732007542016


In [None]:
# Save your trained model
model.save("traffic_model.h5")
print("✅ Model saved as traffic_model.h5")

# (Optional but recommended)
# Save your scalers and label encoder for later use in Streamlit
import joblib, pickle

joblib.dump(scaler_X, "scaler_X.save")
joblib.dump(scaler_y, "scaler_y.save")
pickle.dump(le, open("label_encoder.pkl", "wb"))

print("✅ Scalers and label encoder saved.")




✅ Model saved as traffic_model.h5
✅ Scalers and label encoder saved.


In [None]:
from google.colab import files

files.download("traffic_model.h5")
files.download("scaler_X.save")
files.download("scaler_y.save")
files.download("label_encoder.pkl")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>