My Traffic Model for Smart-Cities

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder
from statsmodels.tsa.arima.model import ARIMA
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM
import xgboost as xgb
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import uuid
import datetime

In [None]:
np.random.seed(42)

First Step--Data Generation

In [None]:
def traffic_data(n_days=30, zones=5):
  dates = pd.date_range(start='2025-01-01', periods=n_days*24, freq='h')
  zones_list = [f'Zone_{i}' for i in range(1, zones+1)]
  n = len(dates) * zones

  # Traffic Features
  data = {
      'timestamp': dates.repeat(zones),
      'zone': np.tile(zones_list, len(dates)),
      'vehicle_count': np.random.poisson(lam=100, size=n) + np.sin(np.arange(n) / 24)*50,
      'avg_speed': np.random.normal(60, 10, n).clip(20, 80),
  }

  df = pd.DataFrame(data)

  # adding congestion score
  df['congestion_score'] = (df['vehicle_count'] / df['vehicle_count'].max()) * (1 - df['avg_speed']
                                                                               / df['avg_speed'].max())
  df['congestion_score'] = df['congestion_score'].clip(0, 1)

  # adding additional New_Features
  df['weather'] = np.random.choice(['clear', 'rain', 'fog'], size=n, p=[0.7, 0.2, 0.1])
  df['event'] = np.random.choice([0, 1], size=n, p=[0.9, 0.1])
  df['road_type'] = df['zone'].map({
      'Zone_1': 'highway', 'Zone_2': 'arterial', 'Zone_3': 'local',
      'Zone_4': 'arterial', 'Zone_5': 'highway'
  })

  df['rush_hour'] = df['timestamp'].dt.hour.isin([7, 8, 17, 18]).astype(int)
  df['day_of_week'] = df['timestamp'].dt.day_name()

  # Adjusting average speed and vehicle count based on features
  df.loc[df["weather"] == "rain", "avg_speed"] *= 0.8
  df.loc[df["weather"] == "fog", "avg_speed"] *= 0.7
  df.loc[df["event"] == 1, "vehicle_count"] *= 1.5
  df.loc[df["rush_hour"] == 1, "vehicle_count"] *= 1.2
  df["avg_speed"] = df["avg_speed"].clip(20, 80)
  df["vehicle_count"] = df["vehicle_count"].clip(0, 500)

  return df

traffic_df = traffic_data()


In [None]:
traffic_df.head()

Unnamed: 0,timestamp,zone,vehicle_count,avg_speed,congestion_score,weather,event,road_type,rush_hour,day_of_week
0,2025-01-01,Zone_1,96.0,52.128259,0.186526,clear,0,highway,0,Wednesday
1,2025-01-01,Zone_2,109.082731,62.033595,0.136622,clear,0,arterial,0,Wednesday
2,2025-01-01,Zone_3,92.161846,53.453947,0.084694,rain,0,local,0,Wednesday
3,2025-01-01,Zone_4,109.233737,69.625029,0.079004,clear,0,arterial,0,Wednesday
4,2025-01-01,Zone_5,119.294807,47.738206,0.268296,clear,0,highway,0,Wednesday


Preprocessing

In [None]:
# preprocessing the data for modeling
def preprocess_traffic_data(df):
  df = df.copy()

  # encoding categorical variables
  le_weather = LabelEncoder()
  le_road = LabelEncoder()
  le_day = LabelEncoder()

  df['weather_encoded'] = le_weather.fit_transform(df['weather'])
  df['road_type_encoded'] = le_road.fit_transform(df['road_type'])
  df['day_of_week_encoded'] = le_day.fit_transform(df['day_of_week'])

  # creating lagged and rolling features
  for zone in df['zone'].unique():
    mask = df['zone'] == zone
    df.loc[mask, 'vehicle_count_lag1'] = df.loc[mask, 'vehicle_count'].shift(1)
    df.loc[mask, 'congestion_lag1'] = df.loc[mask, 'congestion_score'].shift(1)
    df.loc[mask, 'vehicle_count_rolling_mean'] = df.loc[mask, 'vehicle_count'].rolling(window=24).mean()

  # droping rows with null values

  df = df.dropna()

  return df

traffic_df = preprocess_traffic_data(traffic_df)

EDA

In [None]:
!pip install -U kaleido



In [None]:
def plot_traffic_data(df):
  '''Visualizing Traffic Data'''
  # Time_series Plot
  plt.figure(figsize=(14, 8))
  sns.lineplot(data=df, x='timestamp', y='vehicle_count', hue='zone')
  plt.title('Vahicle Count on zone over time')
  plt.xlabel('Timestamp')
  plt.ylabel('Vehicle Count')
  plt.savefig('Traffic_Volume.png')
  plt.close()

  # Congestion by Weather
  plt.figure(figsize=(10, 6))
  sns.boxplot(data=df, x='weather', y='congestion_score')
  plt.title('Congestion Score by Weather')
  plt.savefig('Congestion_by_Weather.png')
  plt.close()

  # Geospatial heatmap it's(Simulated Coordinates)
  zones_coords = {
      'Zone_1': (1, 1), 'Zone_2': (1, 3), 'Zone_3': (2, 2),
      'Zone_4': (3, 1), 'Zone_5': (3, 3),
  }
  df['x'] = df['zone'].map(lambda z: zones_coords[z][0])
  df['y'] = df['zone'].map(lambda z: zones_coords[z][1])
  congestion_avg = df.groupby('zone')['congestion_score'].mean().reset_index()
  congestion_avg['x'] = congestion_avg['zone'].map(lambda z: zones_coords[z][0])
  congestion_avg['y'] = congestion_avg['zone'].map(lambda z: zones_coords[z][1])

  fig = px.scatter(congestion_avg, x='x', y='y', size='congestion_score', color='congestion_score',
             hover_data=['zone'], title='Average Congestion Score by Zone')
  fig.update_layout(width=800, height=600)
  fig.to_html('Geospatial_Heatmap.html')

plot_traffic_data(traffic_df)





Predictive Modeling

In [None]:
def arima_forecast(df, zone='Zone_1'):
  # arima forecast for value count
  series = df[df['zone'] == zone][['timestamp', 'vehicle_count']].set_index('timestamp')
  model = ARIMA(series, order=(5, 1, 0))
  fit = model.fit()
  forecast = fit.forecast(step=24)

  plt.figure(figsize=(12, 6))
  plt.plot(series[-48:], label='History')
  plt.plot(forecast, label='Forecast', color='red')
  plt.title(f'ARIMA Traffic Forecast for {zone}')
  plt.xlabel('Timestamp')
  plt.ylabel('Vehiclecount')
  plt.legend()
  plt.savefig('arima_forecast.png')
  plt.close()

  return forecast

def lstm_forecast(df, zone='Zone_1'):
  # lstm forecast for vehicle count
  series = df[df['zone']== zone][['vehicle_count', 'weather_encoded', 'rush_hour', 'vehicle_count_lag1']]
  scaler = MinMaxScaler()
  series_scaled = scaler.fit_transform(series)

  # Preparing Data
  X, y = [], []

  for i in range(5, len(series_scaled)):
    X.append(series_scaled[i-5:i])
    y.append(series_scaled[i, 0]) # As Predicting Vahicle

  X, y = np.array(X), np.array(y)

  # Split the Data
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

  # Building LSTM model
  model = Sequential([
      LSTM(50, activation='relu', input_shape=(5, series.shape[1]), return_sequences=True),
      LSTM(50, activation='relu'),
      Dense(1),
  ])
  model.compile(optimizer='adam', loss='mse')
  model.fit(X_train, y_train, epochs=50,  batch_size=32, verbose=0)

  y_pred = model.predict(X_test)
  y_pred = scaler.inverse_transform(np.concatenate([y_pred, np.zeros((len(y_pred), series.shape[1]-1))], axis=1))[:, 0]
  y_test = scaler.inverse_transform(np.concatenate([y_test.reshape(-1, 1), np.zeros((len(y_test), series.shape[1]-1))], axis=1))[:, 0]


  plt.figure(figsize=(12, 6))
  plt.plot(y_test, label='Actual')
  plt.plot(y_pred, label='Predicted', color='red')
  plt.title(f'LSTM Traffic Forecast for {zone}')
  plt.xlabel('Timestamp')
  plt.ylabel('Vahiclecount')
  plt.legend()
  plt.savefig('lstm_forecast.png')
  plt.close()

  return y_test, y_pred

def xgboost_model(df):
  # XGboost model congestion score
  features = ["vehicle_count", "avg_speed", "weather_encoded", "event", "rush_hour",
                "road_type_encoded", "day_of_week_encoded", "vehicle_count_lag1",
                "congestion_lag1", "vehicle_count_rolling_mean"]

  X = df[features]
  y = df['congestion_score']

  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
  model = xgb.XGBRegressor(n_estimatore=100, learning_rate=0.01, random_state=42)
  model.fit(X_train, y_train)

  y_pred = model.predict(X_test)

  xgb.plot_importance(model, max_num_features=10)
  plt.title('XGBoost Feature Importance')
  plt.savefig('xgboost_feature_importance.png')
  plt.close()

  return y_test, y_pred

arima_pred = arima_forecast(traffic_df)
lstm_actual, lstm_pred = lstm_forecast(traffic_df)
xgb_actual, xgb_pred = xgboost_model(traffic_df)

def evaluate_models():
  lstm_mse = mean_squared_error(lstm_actual, lstm_pred)
  lstm_rmse = np.sqrt(mean_squared_error(lstm_actual, lstm_pred))
  xgb_mse = mean_squared_error(xgb_actual, xgb_pred)
  xgb_rmse = np.sqrt(mean_squared_error(xgb_actual, xgb_pred))

  metrics = pd.DataFrame({
      "Model": ['LSTM', 'XGBoost'],
      "MSE": [lstm_mse, xgb_mse],
      'RMSE': [lstm_rmse, xgb_rmse]
  })

  plt.figure(figsize=(8, 6))
  metrics.set_index('Model')[['MSE', 'RMSE']].plot(kind='bar')
  plt.title('Model Performance Comparison')
  plt.ylabel('Score')
  plt.savefig('model_performance.png')
  plt.close()


evaluate_models()


def create_dashboard(df):
  fig = go.Figure()

  for zone in df['zone'].unique():
    zone_data = df[df['zone'] == zone]
    fig.add_trace(go.Scatter(x=zone_data['timestamp'], y=zone_data['vehicle_count'], mode='lines', name=zone))

    fig.update_layout(
        title='Interactive Traffic Volume Dashboard',
        xaxis_title='Timestamp',
        yaxis_title='Vehicle Count',
        updatemenus=[{
            'buttons': [
                {'label': 'all zone', 'method': 'update', 'args': [{'visible': [True]*len(df['zone'].unique())}]},
                *[{"label": f"{zone}", "method": "update",
                   "args": [{"visible": [i == j for i in range(len(df["zone"].unique()))]}]}
                  for j, zone in enumerate(df["zone"].unique())]
            ],
            "direction": "down",
            "showactive": True
        }]
    )
    fig.write_html('traffic_dashboard.html')
    fig.show()

create_dashboard(traffic_df)


No frequency information was provided, so inferred frequency h will be used.


No frequency information was provided, so inferred frequency h will be used.


No frequency information was provided, so inferred frequency h will be used.


Unknown keyword arguments: dict_keys(['step']).Passing unknown keyword arguments will raise a TypeError beginning in version 0.15.


Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 82ms/step



Parameters: { "n_estimatore" } are not used.




<Figure size 800x600 with 0 Axes>

Save data set

In [None]:
traffic_df.to_csv('traffic_data.csv', index=False)