In [3]:
import pandas as pd

# Load JSON data into a DataFrame
df = pd.read_json('merged_sorted_intersections.json')

FEATURE ENGINEERING AND EXRTACTION

In [4]:
# Convert timestamps to datetime if they aren't already
df['timeStamp'] = pd.to_datetime(df['timeStamp'])

# Extract components
df['hour'] = df['timeStamp'].dt.hour
df['minute'] = df['timeStamp'].dt.minute
df['second'] = df['timeStamp'].dt.second


In [5]:
def categorize_time_of_day(hour):
    if 5 <= hour < 12:
        return 'Morning'
    elif 12 <= hour < 17:
        return 'Afternoon'
    elif 17 <= hour < 21:
        return 'Evening'
    else:
        return 'Night'

df['part_of_day'] = df['hour'].apply(categorize_time_of_day)


In [6]:
df['car_count_diff'] = df['num_cars'].diff()


In [7]:
df['ema_cars'] = df['num_cars'].ewm(span=10, adjust=False).mean()


In [8]:
df['weekday'] = df['timeStamp'].dt.weekday
df['is_weekend'] = df['weekday'].apply(lambda x: 1 if x >= 5 else 0)


In [9]:
# # Assuming you have a list or set of public holidays
# public_holidays = set(['2022-01-01', '2022-12-25'])  # example dates
# df['is_public_holiday'] = df['timestamp'].dt.date.astype(str).isin(public_holidays).astype(int)


In [10]:
df.head(40)

Unnamed: 0,trafficID,num_cars,weatherStamp:,timeStamp,dateStamp,greenTime,hour,minute,second,part_of_day,car_count_diff,ema_cars,weekday,is_weekend
0,A1,82,30,2024-07-29 10:16:15,2024-07-29,41,10,16,15,Morning,,82.0,0,0
1,B1,33,30,2024-07-29 10:16:15,2024-07-29,17,10,16,15,Morning,-49.0,73.090909,0,0
2,C1,71,30,2024-07-29 10:16:15,2024-07-29,36,10,16,15,Morning,38.0,72.710744,0,0
3,D1,73,30,2024-07-29 10:16:15,2024-07-29,37,10,16,15,Morning,2.0,72.763336,0,0
4,B2,157,30,2024-07-29 10:16:35,2024-07-29,50,10,16,35,Morning,84.0,88.079093,0,0
5,C2,12,30,2024-07-29 10:16:53,2024-07-29,6,10,16,53,Morning,-145.0,74.246531,0,0
6,D2,50,30,2024-07-29 10:16:55,2024-07-29,32,10,16,55,Morning,38.0,69.83807,0,0
7,A2,45,30,2024-07-29 10:16:59,2024-07-29,23,10,16,59,Morning,-5.0,65.322058,0,0
8,C3,46,30,2024-07-29 10:17:02,2024-07-29,23,10,17,2,Morning,1.0,61.808956,0,0
9,A3,89,30,2024-07-29 10:17:24,2024-07-29,46,10,17,24,Morning,43.0,66.752782,0,0


In [17]:
import numpy as np

# Weekly cycle (assuming data in hours and full weeks)
period = 7 * 24  # 7 days * 24 hours
df['sin_time'] = np.sin(2 * np.pi * df['timeStamp'].dt.hour / period)
df['cos_time'] = np.cos(2 * np.pi * df['timeStamp'].dt.hour / period)


In [12]:
len(df)

333

In [15]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [25]:
# Define your feature columns and target column
feature_cols = ['num_cars', 'sin_time', 'cos_time', 'is_weekend', 'ema_cars', 'car_count_diff']
target_col = 'greenTime'  # This should be the duration of green light or similar


In [26]:
# Split the data into features and target
X = df[feature_cols]
y = df[target_col]


In [27]:
# Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [28]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [29]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Define the model
model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(64, activation='relu'),
    Dense(1)  # Output layer: Predicting the duration
])

# Compile the model
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [35]:
# Train the model
history = model.fit(X_train, y_train, epochs=100, validation_split=0.2)

Epoch 1/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: 153.1934 - mae: 10.4341 - val_loss: 181.5887 - val_mae: 10.7435
Epoch 2/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 180.9221 - mae: 10.9700 - val_loss: 181.5783 - val_mae: 10.7557
Epoch 3/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 143.1301 - mae: 10.1096 - val_loss: 181.5841 - val_mae: 10.7487
Epoch 4/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 164.2040 - mae: 10.6865 - val_loss: 181.5760 - val_mae: 10.7588
Epoch 5/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 146.8997 - mae: 10.1543 - val_loss: 181.5813 - val_mae: 10.7520
Epoch 6/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 174.5314 - mae: 10.9161 - val_loss: 181.5771 - val_mae: 10.7572
Epoch 7/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s

In [36]:
# Evaluate the model on the test set
test_loss, test_mae = model.evaluate(X_test, y_test)
print(f"Test Loss: {test_loss}, Test MAE: {test_mae}")

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 184.6483 - mae: 10.9203 
Test Loss: 179.49143981933594, Test MAE: 10.769583702087402
