# CSE150A Milestone 3: Multinomial HMM for Weather Prediction

## Importing and Preprocessing Data

In [None]:
import pandas as pd
import numpy as np
from hmmlearn import hmm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix

df = pd.read_csv('seattle-weather.csv')

In [None]:
def discretize_precipitation(p):
    if p == 0: return 0 #'none' or no precipitation
    elif p <= 5: return 1 #'light' or light precipitation
    else: return 2 #'heavy' or heavy precipitation

def discretize_temp(t):
    if t <= 0: return 0 #'freezing' or below freezing temps
    elif t <= 5: return 1 #'cold' or very cold temps
    elif t <= 15: return 2 #'cool' or cool temps
    else: return 3 #'warm' or warm/hot temps

def discretize_wind(w):
    if w <= 2: return 0 #'calm' or calm winds
    elif w <= 5: return 1 #'moderate' or moderate winds
    else: return 2 #'strong' or strong winds

def discretize_weather(w):
    if w == 'drizzle': return 0
    elif w == 'rain': return 1
    elif w == 'sun': return 2
    elif w == 'snow': return 3
    elif w == 'fog': return 4

df['precip_cat'] = df['precipitation'].apply(discretize_precipitation)
df['temp_max_cat'] = df['temp_max'].apply(discretize_temp)
df['temp_min_cat'] = df['temp_min'].apply(discretize_temp)
df['wind_cat'] = df['wind'].apply(discretize_wind)
df['weather_cat'] = df['weather'].apply(discretize_weather)

#make sure types are all ints
df['precip_cat'] = df['precip_cat'].astype(int)
df['temp_max_cat'] = df['temp_max_cat'].astype(int)
df['temp_min_cat'] = df['temp_min_cat'].astype(int)
df['wind_cat'] = df['wind_cat'].astype(int)
df['weather_cat'] = df['weather_cat'].astype(int)
print(df.dtypes)

In [None]:
df.head()

In [None]:
#dropping the numerical values for precipitation, temp_max, temp_min, wind for easier processing
df.drop(['precipitation', 'temp_max', 'temp_min', 'wind', 'date', 'weather'], axis=1, inplace=True)
df = df.dropna()
df.head()

In [None]:
#getting the hidden states for this model (the weather)
#weather_states = df.weather.unique().tolist()

#discrete values check
print(df['precip_cat'].unique())  # Should be [0, 1, 2]
print(df['temp_max_cat'].unique())  # Should be [0, 1, 2, 3]
print(df['temp_min_cat'].unique())  # Should be [0, 1, 2, 3]
print(df['wind_cat'].unique())  # Should be [0, 1, 2]
print(df['weather_cat'].unique())  # Should be [0, 1, 2, 3, 4]

#maps for evidence
precip_map = {0: 'none', 1: 'light', 2: 'heavy'}
temp_map = {0: 'freezing', 1: 'cold', 2: 'cool', 3: 'warm'}
wind_map = {0: 'calm', 1: 'moderate', 2: 'strong'}

#map for weather
weather_map = {0: 'drizzle', 1: 'rain', 2: 'sun', 3: 'snow', 4: 'fog'}

## Model Training

In [None]:
# Code written colaboratively and taken from Chat GPT-4o
#Get a single feature vector and reshape
X = df[['precip_cat', 'temp_max_cat', 'temp_min_cat', 'wind_cat']].values #observation vector
Y = df['weather_cat'].values

#df.drop(['weather_cat'], axis = 1, inplace = True)

#split training and test data
X_train, X_test ,Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state = 42)

model = hmm.MultinomialHMM(n_components = 5, n_iter = 1000)
model.fit(X_train)

# Predict hidden states for the test set
predicted_states_train = model.predict(X_train)

# To see the learned parameters (emission and transition probabilities)
print("Transition matrix:")
print(model.transmat_)

print("Emission matrix:")
print(model.emissionprob_)

In [None]:
#Compute the confusion matrix for training data
conf_matrix_train = confusion_matrix(Y_train, predicted_states_train)

#Compute accuracy score for training data
accuracy_train = accuracy_score(Y_train, predicted_states_train)

print("Confusion Matrix:\n", conf_matrix_train)
print("\nAccuracy Score:", accuracy_train)

## Model Testing

In [None]:
#Now use the model on the test set
predicted_states_train = model.predict(X_test)

#Compute the confusion matrix for training data
conf_matrix_test = confusion_matrix(Y_test, predicted_states_train)

#Compute accuracy score for training data
accuracy_test = accuracy_score(Y_test, predicted_states_train)

print("Confusion Matrix:\n", conf_matrix_test)
print("\nAccuracy Score:", accuracy_test)