ABOUT THE PROJECT

The main goal of this project is to predict traffic flow using the ARIMA model. The steps include importing necessary libraries, loading and preprocessing the dataset, performing exploratory data analysis (EDA), checking for stationarity, implementing the ARIMA model, evaluating the model, and making predictions based on user inputs.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error
import warnings
warnings.filterwarnings('ignore')

In [None]:
# Load the dataset
df = pd.read_csv('traffic.csv')

# Display the first few rows of the dataset
df.head()

Data Preprocessing

1. Converting DateTime to a datetime object allows for easier time-series manipulation.
2. Setting DateTime as the index helps in time-series analysis.
3. Checking and handling missing values ensure data quality.

In [None]:
# Convert the DateTime column to datetime type
df['DateTime'] = pd.to_datetime(df['DateTime'])

# Set DateTime as the index
df.set_index('DateTime', inplace=True)

# Check for missing values
df.isnull().sum()

# If there are missing values, fill them or drop them
df.dropna(inplace=True)

Exploratory Data Analysis (EDA)

To visualize traffic data for each junction and understand the data distribution and trends.

In [None]:
# Plot the traffic data for each junction
junctions = df['Junction'].unique()
plt.figure(figsize=(15, 10))

for i, junction in enumerate(junctions, 1):
    plt.subplot(2, 2, i)
    junction_data = df[df['Junction'] == junction]['Vehicles']
    plt.plot(junction_data)
    plt.title(f'Traffic at Junction {junction}')
    plt.xlabel('DateTime')
    plt.ylabel('Number of Vehicles')

plt.tight_layout()
plt.show()

Stationarity Check

Checking for stationarity is crucial for time-series modeling. ARIMA requires the series to be stationary.

In [None]:
def check_stationarity(timeseries):
    result = adfuller(timeseries)
    print('ADF Statistic:', result[0])
    print('p-value:', result[1])
    print('Critical Values:', result[4])
    if result[1] > 0.05:
        print("The series is not stationary.")
    else:
        print("The series is stationary.")

# Check stationarity for each junction
for junction in junctions:
    print(f'Junction {junction}:')
    junction_data = df[df['Junction'] == junction]['Vehicles']
    check_stationarity(junction_data)
    print()

ARIMA Model Implementation

1. arima_model function fits the ARIMA model on the training data and forecasts the test data.
2. The dataset is split into training and testing sets.
3. The ARIMA model is fitted with specified order parameters (p, d, q).
4. Forecasted values are plotted against actual values to visualize performance.
5. Mean Squared Error (MSE) is calculated to quantify the prediction accuracy.


In [None]:
# Define a function to fit the ARIMA model and make predictions
def arima_model(train, test, order):
    model = ARIMA(train, order=order)
    model_fit = model.fit()
    forecast = model_fit.forecast(steps=len(test))
    return forecast

# Split the data into training and testing sets (e.g., last 10% of data for testing)
split_ratio = 0.9

for junction in junctions:
    print(f'Junction {junction}:')
    junction_data = df[df['Junction'] == junction]['Vehicles']
    train_size = int(len(junction_data) * split_ratio)
    train, test = junction_data[:train_size], junction_data[train_size:]

    # Fit the ARIMA model (p, d, q) - these parameters need tuning
    order = (5, 1, 0)  # This is an example, you need to tune these parameters
    forecast = arima_model(train, test, order)

 Forecasting

1. forecast_future function fits the ARIMA model on the training data and forecasts future traffic.
2. The forecasted values are plotted alongside the historical data to visualize future predictions.

In [None]:
def forecast_future(train, order, steps):
    model = ARIMA(train, order=order)
    model_fit = model.fit()
    future_forecast = model_fit.forecast(steps=steps)
    return future_forecast

forecast_steps = 24

for junction in junctions:
    print(f'Junction {junction}:')
    junction_data = df[df['Junction'] == junction]['Vehicles']
    order = (5, 1, 0)

    future_index = pd.date_range(start=junction_data.index[-1], periods=forecast_steps, freq='H')
future_index = future_index.shift(1, freq='H')  # Shift the index by 1 hour to align with ARIMA forecast

plt.figure(figsize=(10, 6))
plt.plot(junction_data, label='Historical Data')
plt.plot(future_index, future_forecast, label='Future Forecast')
plt.title(f'Future Traffic Forecast for Junction {junction}')
plt.xlabel('DateTime')
plt.ylabel('Number of Vehicles')
plt.legend()
plt.show()



In [None]:
from datetime import datetime

def predict_traffic_for_input(junction, start_date, periods):
    """
    Predict traffic for a given junction and start date for a specified number of periods.

    Parameters:
    junction (int): Junction number.
    start_date (str): Start date in the format 'YYYY-MM-DD'.
    periods (int): Number of periods to predict.

    Returns:
    pd.Series: Forecasted traffic values.
    """
    # Filter data for the given junction
    junction_data = df[df['Junction'] == junction]['Vehicles']
    
    # Fit the ARIMA model
    order = (5, 1, 0)  # Tune these parameters as needed
    model = ARIMA(junction_data, order=order)
    model_fit = model.fit()
    
    # Generate date range for the forecast
    start_date = pd.to_datetime(start_date)
    future_index = pd.date_range(start=start_date, periods=periods, freq='H')
    
    # Make predictions
    forecast = model_fit.predict(start=future_index[0], end=future_index[-1])
    
    return forecast

# Example usage:
junction = 1
start_date = '2023-01-01'
periods = 24  # Predict for the next 24 hours

forecast = predict_traffic_for_input(junction, start_date, periods)

# Plot the forecast
plt.figure(figsize=(10, 6))
plt.plot(forecast, label='Forecast')
plt.title(f'Traffic Forecast for Junction {junction}')
plt.xlabel('DateTime')
plt.ylabel('Number of Vehicles')
plt.legend()
plt.show()

print(forecast)


In [None]:
# Example usage:
junction = 2
start_date = '2023-06-05'
periods = 24  # Predict for the next 24 hours

forecast = predict_traffic_for_input(junction, start_date, periods)

# Plot the forecast
plt.figure(figsize=(10, 6))
plt.plot(forecast, label='Forecast')
plt.title(f'Traffic Forecast for Junction {junction}')
plt.xlabel('DateTime')
plt.ylabel('Number of Vehicles')
plt.legend()
plt.show()

print(forecast)
