In [19]:
import math
import random as rand
import datetime as dt
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import LabelEncoder

In [21]:
class RandomTrafficEvent:
    def __init__(self, csv_file):
        self.df = pd.read_csv(csv_file)
    
    def generate_event(self):
        """
        Generates a random Type with its corresponding Location and High_Accuracy.

        Returns:
            dict: A dictionary containing the generated Type, Location, and High_Accuracy.
        """
        # Select a random Type, Location, and High_Accuracy from the dataframe
        Type = rand.choice(self.df['Type'])
        Location = rand.choice(self.df['Location'])
        High_Accuracy = rand.choice(self.df['High_Accuracy'])
        
        return {'Type': Type, 'Location': Location, 'High_Accuracy': High_Accuracy}

In [22]:
csv_file = 'data_mmda_traffic_spatial.csv'

# Create an instance of the RandomTrafficEvent class
random_traffic_event = RandomTrafficEvent(csv_file)

# Access the df attribute
df = random_traffic_event.df
df.isnull().sum()

# Create a new DataFrame with only the columns of interest


Date               0
Time             122
City             187
Location          23
Latitude           0
Longitude          0
High_Accuracy      0
Direction        857
Type              57
Lanes_Blocked    687
Involved         432
Tweet              0
Source             0
dtype: int64

In [29]:
X = df[['Type', 'Location']]
# One-hot encode the 'Type' column
# X_encoded = pd.get_dummies(X, columns=['Type'], dtype=str)
enc = LabelEncoder()
encoders = {}
X_encoded = pd.DataFrame()

for feature in X.columns:
    encoders[feature] = LabelEncoder()
    X_encoded[feature] = encoders[feature].fit_transform(X[feature])

X_encoded.head()

# Encode the target variable 'High_Accuracy'
y_encoded = enc.fit_transform(df['High_Accuracy'])

# Continue with the rest of the code
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y_encoded, test_size=0.2, random_state=42)

# Create an instance of the Random Forest Classifier model
model = RandomForestRegressor()

# Fit the model to the training data
model.fit(X_train.astype(str), y_train)

# Make predictions on the testing data
y_pred = model.predict(X_test.astype(str))

# Evaluate the model's performance using mean squared error
mse = mean_squared_error(y_test, y_pred)


In [31]:
# Make predictions on new data
new_data = pd.DataFrame({'Type': ['Accident', 'Road Closure'], 'Location': ['Makati', 'Quezon City']})
new_data_encoded = pd.DataFrame()

for feature in new_data.columns:
    new_data_encoded[feature] = encoders[feature].inverse_transform(new_data[feature])

new_predictions = model.predict(new_data_encoded.astype(str))
print("New Data Predictions:", new_predictions)


ValueError: y contains previously unseen labels: ['Accident' 'Road Closure']