# CSE150A Milestone 3: Multinomial HMM for Weather Prediction

## Importing and Preprocessing Data

In [20]:
import pandas as pd
import numpy as np
from hmmlearn import hmm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix

df = pd.read_csv('seattle-weather.csv')

In [21]:
def discretize_precipitation(p):
    if p == 0: return 0 #'none' or no precipitation
    elif p <= 5: return 1 #'light' or light precipitation
    else: return 2 #'heavy' or heavy precipitation

def discretize_temp(t):
    if t <= 0: return 0 #'freezing' or below freezing temps
    elif t <= 5: return 1 #'cold' or very cold temps
    elif t <= 15: return 2 #'cool' or cool temps
    else: return 3 #'warm' or warm/hot temps

def discretize_wind(w):
    if w <= 2: return 0 #'calm' or calm winds
    elif w <= 5: return 1 #'moderate' or moderate winds
    else: return 2 #'strong' or strong winds

df['precip_cat'] = df['precipitation'].apply(discretize_precipitation)
df['temp_max_cat'] = df['temp_max'].apply(discretize_temp)
df['temp_min_cat'] = df['temp_min'].apply(discretize_temp)
df['wind_cat'] = df['wind'].apply(discretize_wind)

#make sure types are all ints
df['precip_cat'] = df['precip_cat'].astype(int)
df['temp_max_cat'] = df['temp_max_cat'].astype(int)
df['temp_min_cat'] = df['temp_min_cat'].astype(int)
df['wind_cat'] = df['wind_cat'].astype(int)
print(df.dtypes)

date              object
precipitation    float64
temp_max         float64
temp_min         float64
wind             float64
weather           object
precip_cat         int32
temp_max_cat       int32
temp_min_cat       int32
wind_cat           int32
dtype: object


In [13]:
df.head()

Unnamed: 0,date,precipitation,temp_max,temp_min,wind,weather,precip_cat,temp_max_cat,temp_min_cat,wind_cat
0,2012-01-01,0.0,12.8,5.0,4.7,drizzle,0,2,1,1
1,2012-01-02,10.9,10.6,2.8,4.5,rain,2,2,1,1
2,2012-01-03,0.8,11.7,7.2,2.3,rain,1,2,2,1
3,2012-01-04,20.3,12.2,5.6,4.7,rain,2,2,2,1
4,2012-01-05,1.3,8.9,2.8,6.1,rain,1,2,1,2


In [22]:
#dropping the numerical values for precipitation, temp_max, temp_min, wind for easier processing
df.drop(['precipitation', 'temp_max', 'temp_min', 'wind', 'date'], axis=1, inplace=True)
df = df.dropna()
df.head()

Unnamed: 0,weather,precip_cat,temp_max_cat,temp_min_cat,wind_cat
0,drizzle,0,2,1,1
1,rain,2,2,1,1
2,rain,1,2,2,1
3,rain,2,2,2,1
4,rain,1,2,1,2


In [23]:
#getting the hidden states for this model (the weather)
weather_states = df.weather.unique().tolist()
#maps for evidence
precip_map = {0: 'none', 1: 'light', 2: 'heavy'}
temp_map = {0: 'freezing', 1: 'cold', 2: 'cool', 3: 'warm'}
wind_map = {0: 'calm', 1: 'moderate', 2: 'strong'}

In [24]:
weather_states

['drizzle', 'rain', 'sun', 'snow', 'fog']

## Model Training

In [25]:
#discrete valeus check
print(df['precip_cat'].unique())  # Should be [0, 1, 2]
print(df['temp_max_cat'].unique())  # Should be [0, 1, 2, 3]
print(df['temp_min_cat'].unique())  # Should be [0, 1, 2, 3]
print(df['wind_cat'].unique())  # Should be [0, 1, 2]

[0 2 1]
[2 1 0 3]
[1 2 0 3]
[1 2 0]


In [None]:
# Code written colaboratively and taken from Chat GPT-4o
#Get a single feature vector and reshape
X = df[['precip_cat', 'temp_max_cat', 'temp_min_cat', 'wind_cat']].values #observation vector
Y = df['weather'].values

df.drop(['weather'], axis = 1, inplace = True)

#split training and test data
X_train, X_test ,Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state = 42)

model = hmm.MultinomialHMM(n_components = 5, n_iter = 1000)
model.fit(X_train)

# Predict hidden states for the test set
predicted_states = model.predict(X_test)

# To see the learned parameters (emission and transition probabilities)
print("Transition matrix:")
print(model.transmat_)

print("Emission matrix:")
print(model.emissionprob_)

MultinomialHMM has undergone major changes. The previous version was implementing a CategoricalHMM (a special case of MultinomialHMM). This new implementation follows the standard definition for a Multinomial distribution (e.g. as in https://en.wikipedia.org/wiki/Multinomial_distribution). See these issues for details:
https://github.com/hmmlearn/hmmlearn/issues/335
https://github.com/hmmlearn/hmmlearn/issues/340


(1461, 4) (1461,)
Transition matrix:
[[6.15926024e-04 1.18620455e-02 8.95351840e-03 1.10973991e-01
  8.67594519e-01]
 [1.03239144e-02 3.65660883e-06 2.57050518e-04 6.04546310e-01
  3.84869068e-01]
 [1.42499130e-01 2.11023830e-01 4.68028631e-04 6.43482818e-01
  2.52619269e-03]
 [1.38623926e-01 6.36224606e-01 1.20615037e-01 1.15927720e-07
  1.04536315e-01]
 [6.57082872e-01 3.28314482e-01 1.01156917e-02 1.22939995e-03
  3.25755404e-03]]
Emission matrix:
[[0.12930743 0.41565063 0.29015356 0.16488838]
 [0.07776006 0.45095134 0.31251133 0.15877727]
 [0.12961676 0.41499535 0.29077474 0.16461315]
 [0.10096291 0.4348209  0.30320019 0.161016  ]
 [0.1131617  0.42659518 0.29715781 0.16308531]]
