# CSE150A Milestone 3: Multinomial HMM for Weather Prediction

## Importing and Preprocessing Data

In [None]:
import pandas as pd
import numpy as np
from hmmlearn import hmm
from hmmlearn.hmm import GaussianHMM
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from scipy.stats import mode
import matplotlib.pyplot as plt
import seaborn as sns

df = pd.read_csv('seattle-weather.csv')

In [None]:
#discretize weather
def discretize_weather(w):
    if w == 'drizzle': return 0
    elif w == 'rain': return 1
    elif w == 'sun': return 2
    elif w == 'snow': return 3
    elif w == 'fog': return 4

df['weather_cat'] = df['weather'].apply(discretize_weather)

#make sure typing is correct
df['weather_cat'] = df['weather_cat'].astype(int)
print(df.dtypes)

In [None]:
df.head()

In [None]:
#dropping the values for date and weather for easier processing
df.drop(['date', 'weather'], axis=1, inplace=True)
df = df.dropna()
df.head()

In [None]:
#discrete values check
print(df['weather_cat'].unique())  # Should be [0, 1, 2, 3, 4]

#map for weather
weather_map = {0: 'drizzle', 1: 'rain', 2: 'sun', 3: 'snow', 4: 'fog'}

## Model Training

In [None]:
#Get a single feature vector for obs/labels
X = df[['precipitation', 'temp_max', 'temp_min', 'wind']].values #observations
Y = df['weather_cat'].values

#split training and test data
X_train, X_test ,Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state = 42)

#Define and train the Gaussian HMM
n_states = 5
model = GaussianHMM(n_components=n_states, covariance_type="diag", n_iter=1000, random_state=42)
model.fit(X_train)

#Code written colaboratively and taken from Chat GPT-4o:
#Prompt: I asked Chatgpt why the accuracy for the GaussianHMM was so bad before, and it suggested not using the discrete mapping we had initially which was interefering with our accuracy
#Predict hidden states for the test set
predicted_states_train = model.predict(X_train)

print("Transition matrix:\n", model.transmat_)  # Probability of moving between states
print("\nMeans of each state:\n", model.means_)  # Average values for each feature in each state
print("\nCovariances:\n", model.covars_)  # Variability in each state

#Map each hidden state to the most common label in training data
state_to_label = {}
for state in range(n_states):
    mask = (predicted_states_train == state)  # Find all samples assigned to this state
    if np.sum(mask) > 0:  # Avoid empty clusters
        state_to_label[state] = mode(Y_train[mask]).mode[0]

#Convert hidden states to predicted weather labels
mapped_predictions = np.array([state_to_label[state] for state in predicted_states_train])

In [None]:
#Training data summary
conf_matrix_train = confusion_matrix(Y_train, mapped_predictions)
accuracy_train = accuracy_score(Y_train, mapped_predictions)

print("Confusion Matrix:\n", conf_matrix_train)
print("\nAccuracy Score:", accuracy_train)

In [None]:
#Graph for training data
wlabels_train = [weather_map[label] for label in Y_train]
plt.figure(figsize = (20,10))
plt.plot(predicted_states_train, label="Predicted Hidden States")
plt.plot(wlabels_train, label="Actual Weather Labels", linestyle="dashed")
plt.title("Actual vs. Predicted Weather for Training Data")
plt.legend()
plt.show()

## Model Testing

In [None]:
#Now use the model on the test set
predicted_states_test = model.predict(X_test)
mapped_predictions_test = np.array([state_to_label[state] for state in predicted_states_test])

#Test data summary
conf_matrix_test = confusion_matrix(Y_test, mapped_predictions_test)
accuracy_test = accuracy_score(Y_test, mapped_predictions_test)

print("Confusion Matrix:\n", conf_matrix_test)
print("\nAccuracy Score:", accuracy_test)

In [None]:
#Graph for test data
wlabels_test = [weather_map[label] for label in Y_test]
plt.figure(figsize = (20,10))
plt.plot(predicted_states_test, label="Predicted Hidden States")
plt.plot(wlabels_test, label="Actual Weather Labels", linestyle="dashed")
plt.title("Actual vs. Predicted Weather for Testing Data")
plt.legend()
plt.show()