In [9]:
import joblib
import numpy as np
import pandas as pd
from sklearn.metrics import classification_report, accuracy_score

In [3]:
# To load the model
loaded_model = joblib.load('Random_Forest_Model.pkl')

In [13]:
#load and clean labels
def load_labels(file_path):
    """
    Load labels using pandas and clean any unwanted formatting, including
    trailing or leading brackets and whitespace.
    """
    #read the file as a single column
    df = pd.read_csv(file_path, header=None, delim_whitespace=True)

    #Clean brackets and strip extra whitespace for each column
    df = df.apply(lambda col: col.astype(str).str.replace('[', '', regex=False).str.replace(']', '', regex=False).str.strip())

    #convert to integers
    labels = df.values.flatten().astype(int)
    return labels



#parse data files
def parse_pems_data(file_path):
    """
    Parse a PEMS data file where each line represents a day's time-series data
    in MATLAB matrix syntax.
    """
    data = []
    with open(file_path, 'r') as file:
        for line in file:
            #Convert MATLAB matrix syntax to NumPy array
            line = line.strip().replace('[', '').replace(']', '')
            rows = line.split(';')
            matrix = np.array([[float(x) for x in row.split()] for row in rows])
            data.append(matrix)
    return np.array(data)

In [31]:
day_names = {1: 'Sunday', 2: 'Monday', 3: 'Tuesday', 
             4: 'Wednesday', 5: 'Thursday', 6: 'Friday', 7: 'Saturday'}

In [21]:
# Prototype Example
test_file = "PEMS_test"
test_label_file = "PEMS_testlabels"
first_guess_file = "First_Day_Guess_test.txt"
first_guess_label_file = "First_Day_Guess_label.txt"
second_guess_file = "Second_Day_Guess_test.txt"
second_guess_label_file = "Second_Day_Guess_label.txt"
third_guess_file = "Third_Day_Guess_test.txt"
third_guess_label_file = "Third_Day_Guess_label.txt"

#Parse
test_data = parse_pems_data(test_file)
first_guess_test_data = parse_pems_data(first_guess_file)
second_guess_test_data = parse_pems_data(second_guess_file)
third_guess_test_data = parse_pems_data(third_guess_file)
#Load 
test_labels = load_labels(test_label_file)
first_guess_test_labels = load_labels(first_guess_label_file)
second_guess_test_labels = load_labels(second_guess_label_file)
third_guess_test_labels = load_labels(third_guess_label_file)
print("Test data shape:", test_data.shape)
print("Test labels shape:", test_labels.shape)
print("First guess data shape:", first_guess_test_data.shape)
print("First guess label shape:", first_guess_test_labels.shape)
print("Second guess data shape:", second_guess_test_data.shape)
print("Second guess label shape:", second_guess_test_labels.shape)
print("Third guess data shape:", third_guess_test_data.shape)
print("Third guess label shape:", third_guess_test_labels.shape)

Test data shape: (18, 963, 144)
Test labels shape: (18,)
First guess data shape: (1, 963, 144)
First guess label shape: (1,)
Second guess data shape: (1, 963, 144)
Second guess label shape: (1,)
Third guess data shape: (1, 963, 144)
Third guess label shape: (1,)


  df = pd.read_csv(file_path, header=None, delim_whitespace=True)
  df = pd.read_csv(file_path, header=None, delim_whitespace=True)
  df = pd.read_csv(file_path, header=None, delim_whitespace=True)
  df = pd.read_csv(file_path, header=None, delim_whitespace=True)


In [17]:
# Predictions
test_pred = loaded_model.predict(test_data.reshape(test_data.shape[0], -1))
print("Baseline Accuracy:", accuracy_score(test_labels, test_pred))
print("\nClassification Report:\n", classification_report(test_labels, test_pred))

Baseline Accuracy: 0.9444444444444444

Classification Report:
               precision    recall  f1-score   support

           1       1.00      1.00      1.00         3
           2       1.00      1.00      1.00         3
           3       0.75      1.00      0.86         3
           4       1.00      0.75      0.86         4
           5       1.00      1.00      1.00         2
           6       1.00      1.00      1.00         1
           7       1.00      1.00      1.00         2

    accuracy                           0.94        18
   macro avg       0.96      0.96      0.96        18
weighted avg       0.96      0.94      0.94        18



In [23]:
# First Day Guess
first_guess_test_pred = loaded_model.predict(first_guess_test_data.reshape(first_guess_test_data.shape[0], -1))
print("Baseline Accuracy:", accuracy_score(first_guess_test_labels, first_guess_test_pred))
print("\nClassification Report:\n", classification_report(first_guess_test_labels, first_guess_test_pred))

Baseline Accuracy: 1.0

Classification Report:
               precision    recall  f1-score   support

           6       1.00      1.00      1.00         1

    accuracy                           1.00         1
   macro avg       1.00      1.00      1.00         1
weighted avg       1.00      1.00      1.00         1



In [45]:
first_value = first_guess_test_pred[0]
print(f'What day is it: {day_names[first_value]}')

What day is it: Friday


In [25]:
# Second Day Guess
second_guess_test_pred = loaded_model.predict(second_guess_test_data.reshape(second_guess_test_data.shape[0], -1))
print("Baseline Accuracy:", accuracy_score(second_guess_test_labels, second_guess_test_pred))
print("\nClassification Report:\n", classification_report(second_guess_test_labels, second_guess_test_pred))

Baseline Accuracy: 1.0

Classification Report:
               precision    recall  f1-score   support

           7       1.00      1.00      1.00         1

    accuracy                           1.00         1
   macro avg       1.00      1.00      1.00         1
weighted avg       1.00      1.00      1.00         1



In [43]:
second_value = second_guess_test_pred[0]
print(f'What day is it: {day_names[second_value]}')

What day is it: Saturday


In [27]:
# Third Day Guess
third_guess_test_pred = loaded_model.predict(third_guess_test_data.reshape(third_guess_test_data.shape[0], -1))
print("Baseline Accuracy:", accuracy_score(third_guess_test_labels, third_guess_test_pred))
print("\nClassification Report:\n", classification_report(third_guess_test_labels, third_guess_test_pred))

Baseline Accuracy: 1.0

Classification Report:
               precision    recall  f1-score   support

           3       1.00      1.00      1.00         1

    accuracy                           1.00         1
   macro avg       1.00      1.00      1.00         1
weighted avg       1.00      1.00      1.00         1



In [41]:
third_value = third_guess_test_pred[0]
print(f'What day is it: {day_names[third_value]}')

What day is it: Tuesday
