In [1]:
# Load libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import OneHotEncoder
from datetime import datetime, timedelta

In [2]:
# Load the dataset
df = pd.read_csv('Data/Traffic_Data_5.csv')

In [3]:
# Ensure the 'Date' column is in datetime format
df['Date'] = pd.to_datetime(df['Date'], format='%d-%m-%Y')

In [4]:
# Add features based on the date
df['Day'] = df['Date'].dt.day
df['Month'] = df['Date'].dt.month
df['Year'] = df['Date'].dt.year
df['Day_of_Week'] = df['Date'].dt.dayofweek  # 0=Monday, 6=Sunday

In [5]:
# Check for NaN values
print("NaN values in the dataset:")
print(df.isnull().sum())

NaN values in the dataset:
Date                                  10061
Road/Intersection Name                10146
Traffic Volume                        10013
Average Speed                         10031
Travel Time Index                     10145
Congestion Level                      10049
Road Capacity Utilization             10086
Incident Reports                      10009
Environmental Impact                  10218
Public Transport Usage                10000
Pedestrian and Cyclist Count           9847
Weather Conditions                    10160
Roadwork and Construction Activity    10115
Day                                   10061
Month                                 10061
Year                                  10061
Day_of_Week                           10061
dtype: int64


In [6]:
df.fillna(method='ffill', inplace=True)  # Forward fill

  df.fillna(method='ffill', inplace=True)  # Forward fill


In [30]:
print(df['Road/Intersection Name'].nunique())
print(df['Road/Intersection Name'].unique())

283
[nan 'CST Road ' 'Road no. 01, TPS V' 'Vakola Pipeline Road'
 'St Anthony Road' 'Mahalaxmi Mandir Road' 'Rajendra Kamble Road'
 "Sant Mary's Road" 'Chetana College Road' 'New English School Marg'
 'Samaj Mandir Hall Marg' 'J L Shirshekar Marg' 'Road no. 07, TPS III'
 'Road No. 06, TPS III' 'Road No. 04, TPS III' 'Road no. 11, TPS V'
 'Ram Mandir Road' 'Khar Pipeline Road' 'Aaram Society Road'
 'Side strips of Jawaharlal Nehru Road' 'Vijay Nagar Road'
 'Parshiwadi Road' 'Manipada Road' 'Shardadevi Marg'
 'Road no. 08, TPS III' 'Takshila Marg' 'Dnyaneshwar Mandir Marg'
 'Abbasaheb Shinde Marg' 'Sunder Nagar Road no. 01'
 'Sunder Nagar Road no. 02' 'Sunder Nagar Road no. 03'
 'Ramanath Parkar Marg' 'Khernagar Road no. 01' 'Khernagar Road no. 02'
 'Khernagar Road no. 03' 'Khernagar Road no. 04' 'Khernagar Road no. 06'
 'Khernagar Road no. 07' 'Khernagar Road no. 08' 'Bhai Tambe Marg'
 'V.M. Bhargav Road' 'Sidestrip of Juhu Road' 'SS of Kanubhai Desai Marg'
 '11th Road, Khar West ' 'Gur

In [7]:
# Encode the 'Road/Intersection Name'
encoder = OneHotEncoder(sparse_output=False)
encoded_roads = encoder.fit_transform(df[['Road/Intersection Name']])
road_names = encoder.get_feature_names_out(['Road/Intersection Name'])

In [8]:
# Create a new DataFrame with encoded roads
encoded_df = pd.DataFrame(encoded_roads, columns=road_names)

In [9]:
# Concatenate the encoded roads with the original DataFrame
df = pd.concat([df, encoded_df], axis=1)


In [10]:
# Define features and target variable
features = ['Day', 'Month', 'Year', 'Day_of_Week'] + list(road_names)
target = 'Traffic Volume'

In [11]:
# Split the dataset into training and testing sets
X = df[features]
y = df[target]

In [12]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [13]:
# Train a Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)

In [14]:
# Evaluate the model
y_pred = model.predict(X_test)
rmse = mean_squared_error(y_test, y_pred, squared=False)
print(f'RMSE: {rmse}')

RMSE: 0.19250978649993128




In [26]:
baseline_traffic_volume = 20000000000  # Example baseline traffic volume

In [19]:
# Prediction function to return percentages
def predict_traffic_percentage(road_name, future_dates):
    # Prepare the input DataFrame
    future_dates = pd.to_datetime(future_dates)  # Ensure dates are datetime objects
    predictions = []

    for date in future_dates:
        day = date.day
        month = date.month
        year = date.year
        day_of_week = date.dayofweek

        # Create input array
        input_data = np.zeros(len(road_names))
        road_index = encoder.transform([[road_name]]).nonzero()[0][0]
        input_data[road_index] = 1

        # Create the input DataFrame
        input_df = pd.DataFrame([np.concatenate([[day, month, year, day_of_week], input_data])],
                                columns=features)

        # Make prediction
        traffic_prediction = model.predict(input_df)[0]

        # Calculate percentage of baseline traffic volume
        percentage_traffic = (traffic_prediction / baseline_traffic_volume) * 100
        predictions.append((date, percentage_traffic))

    return predictions


In [37]:
# Example usage
road_name_input = "CST Road "
future_dates_input = [datetime.now() + timedelta(days=i) for i in range(1, 15)]  # Next 30 days
predictions = predict_traffic_percentage(road_name_input, future_dates_input)



In [38]:
# Display predictions as percentages
for date, prediction in predictions:
    print(f"Predicted traffic volume for {road_name_input} on {date.date()}: {prediction:.2f}%")

Predicted traffic volume for CST Road  on 2024-10-16: 11.20%
Predicted traffic volume for CST Road  on 2024-10-17: 11.20%
Predicted traffic volume for CST Road  on 2024-10-18: 11.20%
Predicted traffic volume for CST Road  on 2024-10-19: 11.20%
Predicted traffic volume for CST Road  on 2024-10-20: 11.20%
Predicted traffic volume for CST Road  on 2024-10-21: 11.20%
Predicted traffic volume for CST Road  on 2024-10-22: 11.20%
Predicted traffic volume for CST Road  on 2024-10-23: 11.20%
Predicted traffic volume for CST Road  on 2024-10-24: 11.20%
Predicted traffic volume for CST Road  on 2024-10-25: 11.20%
Predicted traffic volume for CST Road  on 2024-10-26: 11.20%
Predicted traffic volume for CST Road  on 2024-10-27: 11.20%
Predicted traffic volume for CST Road  on 2024-10-28: 11.20%
Predicted traffic volume for CST Road  on 2024-10-29: 11.20%
