# CSE150A Milestone 3: Multinomial HMM for Weather Prediction

## Importing and Preprocessing Data

In [None]:
import pandas as pd
import numpy as np
from hmmlearn import hmm
from sklearn.model_selection import train_test_split

df = pd.read_csv('seattle-weather.csv')

In [None]:
def discretize_precipitation(p):
    if p == 0: return 0 #'none' or no precipitation
    elif p <= 5: return 1 #'light' or light precipitation
    else: return 2 #'heavy' or heavy precipitation

def discretize_temp(t):
    if t <= 0: return 0 #'freezing' or below freezing temps
    elif t <= 5: return 1 #'cold' or very cold temps
    elif t <= 15: return 2 #'cool' or cool temps
    else: return 3 #'warm' or warm/hot temps

def discretize_wind(w):
    if w <= 2: return 0 #'calm' or calm winds
    elif w <= 5: return 1 #'moderate' or moderate winds
    else: return 2 #'strong' or strong winds

df['precip_cat'] = df['precipitation'].apply(discretize_precipitation)
df['temp_max_cat'] = df['temp_max'].apply(discretize_temp)
df['temp_min_cat'] = df['temp_min'].apply(discretize_temp)
df['wind_cat'] = df['wind'].apply(discretize_wind)

In [None]:
df.head()

In [None]:
#dropping the numerical values for precipitation, temp_max, temp_min, wind for easier processing
df.drop(['precipitation', 'temp_max', 'temp_min', 'wind', 'date'], axis=1, inplace=True)
df = df.dropna()

In [None]:
df.head()

In [None]:
#getting the hidden states for this model (the weather)
weather_states = df.weather.unique().tolist()
#maps for evidence
precip_map = {0: 'none', 1: 'light', 2: 'heavy'}
temp_map = {0: 'freezing', 1: 'cold', 2: 'cool', 3: 'warm'}
wind_map = {0: 'calm', 1: 'moderate', 2: 'strong'}

In [None]:
weather_states

## Model Training

In [None]:
# Code written colaboratively and taken from Chat GPT-4o
#Get a single feature vector and reshape
X = df[['precip_cat', 'temp_max_cat', 'temp_min_cat', 'wind_cat']].values #observation vector
X = X.reshape(-1, 1)
Y = df['weather'].values

df.drop(['weather'], axis = 1, inplace = True)

#split training and test data
X_train, X_test ,Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state = 42)

model = hmm.MultinomialHMM(n_components = 5, n_iter = 1000)

model.fit(X_train)

# To see the learned parameters (emission and transition probabilities)
print("Transition matrix:")
print(model.transmat_)

print("Emission matrix:")
print(model.emissionprob_)