In [30]:
import pandas as pd
import numpy as np


df = pd.read_csv("chennai_final_dataset.csv")

df['date'] = pd.to_datetime(df['date'], dayfirst=True)


df = df.drop_duplicates()

df['event'] = df['event'].fillna('None')
df['type'] = df['type'].fillna('None')
df['traffic_impact'] = df['traffic_impact'].fillna('Low')



daily_df = df.groupby(['date', 'Junction'], observed=False).agg({
    'Vehicles': 'sum',
    'temperature': 'mean',
    'rain (mm)': 'sum',
    'precipitation (mm)': 'sum',
    'snow_depth (m)': 'sum',
    'event': 'first',
    'type': 'first',
    'traffic_impact': 'first'
}).reset_index()


daily_df['day_of_week'] = daily_df['date'].dt.dayofweek
daily_df['month'] = daily_df['date'].dt.month
daily_df['week_of_year'] = daily_df['date'].dt.isocalendar().week
daily_df['is_weekend'] = (daily_df['day_of_week'] >= 5).astype(int)


daily_df['is_event_day'] = (daily_df['event'] != 'None').astype(int)


daily_df = daily_df.sort_values(['Junction', 'date'])

daily_df['vehicles_lag_1'] = daily_df.groupby('Junction', observed=False)['Vehicles'].shift(1)
daily_df['vehicles_lag_7'] = daily_df.groupby('Junction', observed=False)['Vehicles'].shift(7)


daily_df['vehicles_roll_3'] = (
    daily_df.groupby('Junction', observed=False)['Vehicles']
            .rolling(3).mean().reset_index(0, drop=True)
)

daily_df['vehicles_roll_7'] = (
    daily_df.groupby('Junction', observed=False)['Vehicles']
            .rolling(7).mean().reset_index(0, drop=True)
)


encoded_df = pd.get_dummies(
    daily_df,
    columns=['event', 'type', 'traffic_impact'],
    drop_first=True
)


preprocessed_df = encoded_df.copy()

preprocessed_df.head()


Unnamed: 0,date,Junction,Vehicles,temperature,rain (mm),precipitation (mm),snow_depth (m),day_of_week,month,week_of_year,...,event_Tamil New Year Eve Shopping,type_Festival,type_Festival Activity,type_Major Festival,type_None,type_Public Holiday,type_Religious Event,type_Return Travel,traffic_impact_Low,traffic_impact_Medium
0,2015-11-01,1,327,25.9875,0.8625,0.8625,0,6,11,44,...,False,False,False,False,True,False,False,False,True,False
3,2015-11-02,1,546,25.941667,0.1875,0.1875,0,0,11,45,...,False,False,False,False,True,False,False,False,True,False
6,2015-11-03,1,544,26.8625,0.291667,0.291667,0,1,11,45,...,False,False,False,False,True,False,False,False,True,False
9,2015-11-04,1,498,26.7125,0.3625,0.3625,0,2,11,45,...,False,False,False,False,True,False,False,False,True,False
12,2015-11-05,1,464,26.7875,0.0375,0.0375,0,3,11,45,...,False,False,False,False,True,False,False,False,True,False
