In [None]:
!pip install Flask

In [None]:
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.model_selection import train_test_split

In [None]:
!pip install pymongo

In [None]:
# Download and read dataset
import opendatasets as od
dataset = 'https://www.kaggle.com/datasets/stealthtechnologies/traffic-time-series-dataset/data'
od.download(dataset)

In [None]:
# Example dataset
df = pd.read_csv('/content/traffic-time-series-dataset/traffic_dataset_with_trend.csv', parse_dates=['Timestamp'])

# **Database Integration**

In [None]:
from pymongo import MongoClient
import gridfs
client = MongoClient('mongodb+srv://smuhaini98:LcN514QQgB3rxOZX@cluster0.jy02b.mongodb.net/')


database_name = 'Cluster0'
db = client[database_name]
fs = gridfs.GridFS(db)

In [None]:
collection = db['Cluster0']

In [None]:
from pymongo import MongoClient


uri = 'mongodb+srv://smuhaini98:LcN514QQgB3rxOZX@cluster0.jy02b.mongodb.net/'

client = MongoClient(uri)


db = client['Traffic']

# Access a collection
collection = db['TrafficTime']

In [None]:
# Example query
result = collection.find_one()
print(result)

In [None]:
file_path = '/content/traffic-time-series-dataset/traffic_dataset_with_trend.csv'

In [None]:
with open(file_path, 'rb') as file:
    file_id = fs.put(file, filename='file.zip')
    print(f"File stored with file ID: {file_id}")

In [None]:
stored_file = fs.get(file_id)
print(f"Retrieved file: {stored_file.filename}, Size: {stored_file.length} bytes")

In [None]:
output_file_path = '/content/detect.zip'

retrieved_file = fs.get(file_id)

with open(output_file_path, 'wb') as file:
    file.write(retrieved_file.read())

print(f"File retrieved and saved as: {output_file_path}")

### **EDA**

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df.isnull().sum()

In [None]:
df.columns

In [None]:
df.shape

In [None]:
df.dtypes

In [None]:
df.nunique()

In [None]:
# Extract time features
df['Hour'] = df['Timestamp'].dt.hour
df['DayOfWeek'] = df['Timestamp'].dt.dayofweek
df['Month'] = df['Timestamp'].dt.month

In [None]:
df.hist(figsize=(15, 10))

In [None]:
# Select only numeric columns for correlation calculation
numeric_df = df.select_dtypes(include=['number'])

# Calculate the correlation matrix
corr_matrix = numeric_df.corr()

corr_matrix

In [None]:
corr_matrix.style.background_gradient(cmap='coolwarm')

In [None]:
df['Month'].unique()

In [None]:
df['Weather'].unique()

In [None]:
df['Events'].unique()

In [None]:
df['Hour'].unique()

In [None]:
# One-hot encode weather data
weather_encoder = OneHotEncoder(sparse=False)
weather_encoded = weather_encoder.fit_transform(df[['Weather']])

### **Preprocessing**

In [None]:
weather_encoder

In [None]:
weather_encoded

In [None]:
# Concatenate weather features and time features
features = np.hstack((df[['Hour', 'DayOfWeek', 'Month']].values, weather_encoded))
features

In [None]:
# Traffic volume target
target = df['Traffic Volume'].values
target

In [None]:
# Scale features
feature_scaler = MinMaxScaler()
scaled_features = feature_scaler.fit_transform(features)

# Scale target
target_scaler = MinMaxScaler()
scaled_target = target_scaler.fit_transform(target.reshape(-1, 1))

### **Split Data**

In [None]:
# Prepare sequences
def create_sequences(features, target, n_past):
    X, y = [], []
    for i in range(n_past, len(features)):
        X.append(features[i - n_past:i])
        y.append(target[i])
    return np.array(X), np.array(y)

In [None]:
# Create X and y here
n_past = 60
X, y = create_sequences(scaled_features, scaled_target, n_past)

### **Model Selection**

In [None]:
# Define model architecture
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(X.shape[1], X.shape[2]))) # Now X is defined
model.add(Dropout(0.2))
model.add(LSTM(50, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(50))
model.add(Dropout(0.2))
model.add(Dense(1))

model.compile(optimizer='adam', loss='mean_squared_error')
model.summary()

In [None]:
# Train the model
history = model.fit(X, y, epochs=50, batch_size=32, validation_split=0.2, verbose=1)

In [None]:
# Predict on the test set (split data accordingly)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

train_predict = model.predict(X_train)
test_predict = model.predict(X_test)

# Inverse transform to original scale
train_predict = target_scaler.inverse_transform(train_predict)
y_train = target_scaler.inverse_transform(y_train)
test_predict = target_scaler.inverse_transform(test_predict)
y_test = target_scaler.inverse_transform(y_test)

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
# Calculate performance metrics
train_mae = mean_absolute_error(y_train, train_predict)
train_rmse = np.sqrt(mean_squared_error(y_train, train_predict))
test_mae = mean_absolute_error(y_test, test_predict)
test_rmse = np.sqrt(mean_squared_error(y_test, test_predict))
test_r2 = r2_score(y_test, test_predict)

print(f'Training MAE: {train_mae:.2f}')
print(f'Training RMSE: {train_rmse:.2f}')
print(f'Test MAE: {test_mae:.2f}')
print(f'Test RMSE: {test_rmse:.2f}')
print(f'Test R-squared: {test_r2:.2f}')

In [None]:
# Plotting
import matplotlib.pyplot as plt

# Plotting train predictions
plt.figure(figsize=(15, 6))
plt.plot(y_train, label='True Values (Train)')
plt.plot(train_predict, label='Predicted Values (Train)')
plt.legend()
plt.show()

# Plotting test predictions
plt.figure(figsize=(15, 6))
plt.plot(y_test, label='True Values (Test)')
plt.plot(test_predict, label='Predicted Values (Test)')
plt.legend()
plt.show()

In [None]:
# Save the model
model.save('lstm_traffic_model.h5')