# Import libraries

In [None]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM,GRU, Dense ,Dropout
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.preprocessing import StandardScaler
import seaborn as sns

# loading the dataset

In [None]:
df = pd.read_csv('/content/traffic_volume.csv')

# Exploratory Data Analysis (EDA)

In [None]:
df

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df.isnull().sum()

In [None]:
df_num = df[['air_pollution_index'	,'humidity',	'wind_speed',	'wind_direction',	'visibility_in_miles',	'dew_point',	'temperature',	'rain_p_h',	'snow_p_h',	'clouds_all',	'traffic_volume']]

df_num.corr()

# Data Visualization

In [None]:
plt.figure(figsize=(10,10))
sns.heatmap(df_num.corr(), annot=True)
plt.legend()
plt.show()

In [None]:
plt.figure(figsize=(25,5))
sns.boxplot(df_num)
plt.legend()
plt.show()

# Data Preprocessing

In [None]:
df['date_time'] = pd.to_datetime(df['date_time'])

In [None]:
df.sort_values(by ='date_time' , inplace = True)

In [None]:
df.set_index('date_time', inplace=True)

In [None]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df['is_holiday'] = le.fit_transform(df['is_holiday'])
df['weather_type'] = le.fit_transform(df['weather_type'])
df['weather_description'] = le.fit_transform(df['weather_description'])

In [None]:
def remove_outliers(df, column):
    Q1 = df[column].quantile(0.25)
    Q3 = df[column].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    return df[(df[column] >= lower_bound) & (df[column] <= upper_bound)]
df =remove_outliers(df ,'rain_p_h')

In [None]:
df

# Time Series Visualization

In [None]:
from statsmodels.tsa.seasonal import seasonal_decompose
decomposition = seasonal_decompose(df['traffic_volume'], model='additive', period=24)
decomposition.plot()
plt.show()

In [None]:
sns.lineplot(x='date_time', y='traffic_volume', data=df)
plt.title('traffic volume Over Time')
plt.xlabel('date of time')
plt.ylabel('traffic volume')
plt.show()

# Train-Test Split

In [None]:
split = int(len(df) * 0.7)

train_data = df[    :split]
test_data = df[split:  ]

In [None]:
from sklearn.preprocessing import MinMaxScaler
Scaler = MinMaxScaler()
scaled_training_set = Scaler.fit_transform(train_data)
scaled_testing_set = Scaler.fit_transform(test_data)

In [None]:
X_train = []
y_train = []
for i in range(30,2769):
    X_train.append(scaled_training_set[i-30:i,0])
    y_train.append(scaled_training_set[i,0])
X_train, y_train = np.array(X_train), np.array(y_train)

In [None]:
X_train = np.reshape(X_train, (X_train.shape[0],X_train.shape[1],1))

In [None]:
X_test = []
y_test = []
for i in range(30,1142):
    X_test.append(scaled_testing_set[i-30:i,0])
    y_test.append(scaled_testing_set[i,0])
X_test, y_test = np.array(X_test), np.array(y_test)
X_test = np.reshape(X_test, (X_test.shape[0],X_test.shape[1],1))

# Models Selection

In [None]:
model = Sequential([
    LSTM(units=50, activation='relu', return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])),
    Dropout(0.2),
    LSTM(units=100, activation='relu', return_sequences=True),
    Dropout(0.2),
    LSTM(units=200, activation='relu', return_sequences=True),
    Dropout(0.2),
    LSTM(units=100, activation='relu', return_sequences=True),
    Dropout(0.2),
    LSTM(units=50, return_sequences=False),
    Dropout(0.2),
    Dense(units=1)
])
model.summary()

In [None]:
model.compile(optimizer='adam',loss='mean_squared_error')

In [None]:
history=model.fit(X_train,y_train,epochs=10,batch_size=32)

In [None]:
LSYM_pred = model.predict(X_test)

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

mse = mean_squared_error(y_test, LSYM_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, LSYM_pred)

print('Mean Squared Error (MSE):', mse)
print('Root Mean Squared Error (RMSE):', rmse)
print('Mean Absolute Error (MAE):', mae)

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(y_test, label='Actual')
plt.plot(LSYM_pred, label='Predicted')
plt.title('Actual vs Predicted Values (LSTM)')
plt.xlabel('Time')
plt.ylabel('traffic volume')
plt.legend()
plt.show()

In [None]:
GRU_model = Sequential([
    GRU(units=50, return_sequences=True, input_shape=(X_train.shape[1],1), activation='tanh'),
    Dropout(0.2),
    GRU(units=100, return_sequences=True),
    Dropout(0.2),
    GRU(units=150, return_sequences=True),
    Dropout(0.2),
    GRU(units=100, return_sequences=True),
    Dropout(0.2),
    GRU(units=50, activation='tanh'),
    Dropout(0.2),
    Dense(units=1)

])
GRU_model.summary()

In [None]:
GRU_model.compile(optimizer='adam',loss='mean_squared_error')

GRU_model_prdiction=GRU_model.fit(X_train,y_train,epochs=10,batch_size=150)

In [None]:
GRU_predicted = GRU_model.predict(X_test)

In [None]:
mae = mean_absolute_error(y_test, GRU_predicted)
mse = mean_squared_error(y_test, GRU_predicted)
rmse = np.sqrt(mse)

print("MAE:", mae)
print("MSE:", mse)
print("RMSE:", rmse)

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(y_test, label='Actual')
plt.plot(GRU_predicted, label='Predicted')
plt.title('Actual vs Predicted Values (GRU)')
plt.xlabel('Time')
plt.ylabel('traffic volume')
plt.legend()
plt.show()