In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense, Input
import math

In [2]:
# Load dataset
df = pd.read_csv("SData_Oct2006.csv", header=1)

# Drop duplicates and handle missing
df.drop_duplicates(inplace=True)
df.dropna(how='all', inplace=True)
for col in df.columns:
    if df[col].dtype == 'object':
        df[col] = df[col].fillna('unknown')
    else:
        df[col] = df[col].fillna(0)

# Extract only V1 to V94 columns (traffic readings every 15 min)
v_columns = [col for col in df.columns if col.startswith('V') and col[1:].isdigit()]
traffic_df = df[v_columns]

# Flatten all values into one long time series
traffic_series = traffic_df.values.flatten()
traffic_series = traffic_series[~np.isnan(traffic_series)]  # remove NaNs
traffic_series = traffic_series.reshape(-1, 1)

print(f"Flattened traffic data points: {len(traffic_series)}")

Flattened traffic data points: 402432


In [3]:
# Normalize data
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(traffic_series)

# Create sequences
def create_sequences(data, seq_length=10):
    x, y = [], []
    for i in range(len(data) - seq_length):
        x.append(data[i:i+seq_length])
        y.append(data[i+seq_length])
    return np.array(x), np.array(y)

SEQ_LEN = 10
X, y = create_sequences(scaled_data, SEQ_LEN)

# Split into train/test
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

In [None]:
# Build and train GRU model
model = Sequential()
model.add(Input(shape=(SEQ_LEN, 1)))
model.add(GRU(50, return_sequences=False))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/20
[1m10061/10061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 2ms/step - loss: 0.0014 - val_loss: 5.7987e-04
Epoch 2/20
[1m10061/10061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 2ms/step - loss: 0.0010 - val_loss: 5.7854e-04
Epoch 3/20
[1m10061/10061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 2ms/step - loss: 9.7910e-04 - val_loss: 5.6159e-04
Epoch 4/20
[1m10061/10061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 2ms/step - loss: 9.7782e-04 - val_loss: 5.6764e-04
Epoch 5/20
[1m10061/10061[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 2ms/step - loss: 9.7163e-04 - val_loss: 5.6947e-04
Epoch 6/20
[1m 4927/10061[0m [32m━━━━━━━━━[0m[37m━━━━━━━━━━━[0m [1m9s[0m 2ms/step - loss: 9.4164e-04

In [None]:
# Plot training and validation loss
plt.figure(figsize=(10, 5))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('GRU Model Training vs Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# Predict and inverse transform
predicted = model.predict(X_test)
predicted_rescaled = scaler.inverse_transform(predicted)
actual_rescaled = scaler.inverse_transform(y_test)

# Plot predictions vs actual
plt.figure(figsize=(12, 6))
plt.plot(actual_rescaled, label='Actual Traffic Flow')
plt.plot(predicted_rescaled, label='Predicted Traffic Flow')
plt.title('GRU Traffic Flow Prediction (Flattened V Columns)')
plt.xlabel('Time Step')
plt.ylabel('Traffic Volume')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# Evaluate model
mae = mean_absolute_error(actual_rescaled, predicted_rescaled)
rmse = math.sqrt(mean_squared_error(actual_rescaled, predicted_rescaled))

print(f"MAE: {mae:.2f}")
print(f"RMSE: {rmse:.2f}")