In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from hmmlearn import hmm, vhmm
import joblib
import seaborn as sns
import networkx as nx
import pprint as pprint

# Import data

In [2]:
X = pd.pandas.read_csv("E:/UQAM/Recherche/Code/data/predictions(in).csv",  names=None)
# ['cow L', 'cow T', 'cow W', 'cow H', 'Head L', 'Head T', 'Head W', 'Head H', 'snout L', 'snout R', 'snout W', 'snout H']
pprint.pp(X)

# X = np.concatenate([X_head, X_snout, X_cow], axis=1)
df = pd.DataFrame(X)

print(df.shape, X.shape)

           1  1049  972  833  592    3272    1011    297    494    1125  \
0          2  1051  977  828  584  3270.0  1010.0  299.0  496.0  1120.0   
1          3  1045  974  838  589  1116.0  1372.0  196.0  158.0  3270.0   
2          4  1045  978  838  583  1109.0  1372.0  204.0  158.0  3269.0   
3          5  1038  966  848  597  1105.0  1371.0  199.0  160.0  3268.0   
4          6  1040  968  818  598  3269.0  1011.0  300.0  491.0  1097.0   
...      ...   ...  ...  ...  ...     ...     ...    ...    ...     ...   
23087  23089  1981  977  354  482  1985.0   980.0  724.0  570.0  2008.0   
23088  23090  1982  970  356  484  2010.0  1254.0  147.0  171.0  1993.0   
23089  23091  1997  984  727  566  2011.0  1256.0  156.0  170.0  1987.0   
23090  23092  1075  668  696  818  2013.0  1249.0  158.0  182.0  1984.0   
23091  23093  1078  662  685  833  2013.0  1245.0  161.0  188.0  1988.0   

         1374    183    155  
0      1373.0  186.0  157.0  
1      1013.0  300.0  492.0  
2      10

# Train model

In [3]:
em_scores = list()
vi_scores = list()
em_models = list()
vi_models = list()

for n_components in range(2, 10):
    for idx in range(10):
        em_model = hmm.GaussianHMM(n_components=n_components, random_state=idx,covariance_type = "full", n_iter=52)
        em_model.fit(df)
        em_models.append(em_model)
        em_scores.append(em_model.score(df))
        # variational inference
        vi_model = vhmm.VariationalGaussianHMM(n_components=n_components, random_state=idx, covariance_type='full', n_iter=52)
        vi_model.fit(df)
        vi_models.append(vi_model)
        vi_scores.append(vi_model.score(df))
        # ll = em_model.monitor_.history[-1]
        """ print(f"Training EM({n_components}) Final Log Likelihood={ll} "
              f"Iterations={len(vi_model.monitor_.history)} ") """
        # print(f'Converged: {em_model.monitor_.converged}\t\t'f'Score: {scores[-1]}')

# get the best model
em_model = em_models[np.argmax(em_scores)]
vi_model = vi_models[np.argmax(vi_scores)]
# print(f'The best model had a score of {max(scores)} and 'f'{em_model.n_components} components')


em_Z = em_model.predict(X)
vi_Z = vi_model.predict(X)


ValueError: Input contains NaN.

In [None]:

joblib.dump(em_model, 'models/EM/hmm_jennie_model.pkl')
joblib.dump(vi_model, 'models/Vi/hmm_jennie_model.pkl')

print(em_model.means_)
print(len(em_Z)/30)
print(vi_model.means_)
#print(model.covars_)

In [None]:
transition_matrix = em_model.transmat_
plt.figure(figsize=(5, 4))
sns.heatmap(transition_matrix, annot=True, cmap="YlGnBu", cbar=False)
plt.title('State Transition Probabilities')
plt.xlabel('To State')
plt.ylabel('From State')
plt.show()

In [None]:
transition_matrix = vi_model.transmat_
plt.figure(figsize=(5, 4))
sns.heatmap(transition_matrix, annot=True, cmap="YlGnBu", cbar=False)
plt.title('State Transition Probabilities for all three boxes')
plt.xlabel('To State')
plt.ylabel('From State')
plt.show()

In [None]:

states = pd.DataFrame(em_Z)
states.to_csv("../INF889E Project/Trained data/EM/jennie_states.csv")

states = pd.DataFrame(vi_Z)
states.to_csv("../INF889E Project/Trained data/VI/jennie_states.csv")


In [None]:
# Create a directed graph using NetworkX
G = nx.DiGraph()
transition_matrix = em_model.transmat_
num_states = transition_matrix.shape[0]
dic_state = {
    1 : 'Resting',
    3 : 'Observing',
    2 : 'Flight',
    0 : 'Reminiscing'
}
for i in range(num_states):
    for j in range(num_states):
        prob = transition_matrix[i, j]
        if prob > 0:  # Only add edges for non-zero probabilities
            G.add_edge(f' {dic_state[i]}', f' {dic_state[j]}', weight=prob)


# Use a color palette from seaborn for visual appeal
colors = sns.color_palette("pastel", num_states)

# Visualize the graph with enhanced aesthetics
pos = nx.circular_layout(G)
edge_labels = {(n1, n2): f"{d['weight']:.5f}" for n1, n2, d in G.edges(data=True)}

plt.figure(figsize=(7, 7))
nx.draw_networkx_nodes(G, pos, node_size=1000, node_color=colors, node_shape="o", alpha=0.7)
nx.draw_networkx_edges(G, pos, width=2, edge_color="gray", arrowsize=20)
nx.draw_networkx_labels(G, pos, font_size=8, font_weight='bold')
nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_color='red', font_size=12)

plt.title('Hidden Markov Model Transition States -EM-', fontsize=18)
plt.axis('off')
plt.show()

In [None]:
# Create a directed graph using NetworkX
G = nx.DiGraph()
transition_matrix = vi_model.transmat_
num_states = transition_matrix.shape[0]
for i in range(num_states):
    for j in range(num_states):
        prob = transition_matrix[i, j]
        if prob > 0:  # Only add edges for non-zero probabilities
            G.add_edge(dic_state[i], dic_state[j], weight=prob)

# Use a color palette from seaborn for visual appeal
colors = sns.color_palette("pastel", num_states)

# Visualize the graph with enhanced aesthetics
pos = nx.circular_layout(G)
edge_labels = {(n1, n2): f"{d['weight']:.5f}" for n1, n2, d in G.edges(data=True)}

plt.figure(figsize=(7, 7))
nx.draw_networkx_nodes(G, pos, node_size=1000, node_color=colors, node_shape="o", alpha=0.7)
nx.draw_networkx_edges(G, pos, width=2, edge_color="gray", arrowsize=20)
nx.draw_networkx_labels(G, pos, font_size=8, font_weight='bold')
nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_color='red', font_size=12)

plt.title('Hidden Markov Model Transition States -VI-', fontsize=18)
plt.axis('off')
plt.show()

In [None]:
states_em = pd.unique(em_Z)
print(df['cow H'])
plt.subplot(2,2,1)
for i in states_em:
    x = np.linspace(1,len(df['cow H']),len(df['cow H']))[(em_Z == i)]
    y = df['cow H'][(em_Z == i)]
    plt.plot(x, y, '.')
    plt.title("Left")
plt.subplot(2,2,2)
for i in states_em:
    x = np.linspace(1,len(df['cow L']),len(df['cow L']))[(em_Z == i)]
    y = df['cow L'][(em_Z == i)]
    plt.plot(x, y, '.')
    plt.title("Top")
plt.subplot(2,2,3)
for i in states_em:
    x = np.linspace(1,len(df['cow T']),len(df['cow T']))[(em_Z == i)]
    y = df['cow T'][(em_Z == i)]
    plt.plot(x, y, '.')
    plt.title("Width")
plt.subplot(2,2,4)
for i in states_em:
    x = np.linspace(1,len(df['cow W']),len(df['cow W']))[(em_Z == i)]
    y = df['cow W'][(em_Z == i)]
    plt.plot(x, y, '.')
    plt.title("Height")


plt.tight_layout()
# plt.legend(labels=state_names, loc="lower right", ncol=1)
plt.suptitle('Cow Coordinates (Jenny), EM gaussian')

In [None]:
states_em = pd.unique(em_Z)
plt.subplot(2,2,1)
for i in states_em:
    x = np.linspace(1,len(df['head H']),len(df['head H']))[(em_Z == i)]
    y = df['head H'][(em_Z == i)]
    plt.plot(x, y, '.')
    plt.title("Left")
plt.subplot(2,2,2)
for i in states_em:
    x = np.linspace(1,len(df['head L']),len(df['head L']))[(em_Z == i)]
    y = df['head L'][(em_Z == i)]
    plt.plot(x, y, '.')
    plt.title("Top")
plt.subplot(2,2,3)
for i in states_em:
    x = np.linspace(1,len(df['head T']),len(df['head T']))[(em_Z == i)]
    y = df['head T'][(em_Z == i)]
    plt.plot(x, y, '.')
    plt.title("Width")
plt.subplot(2,2,4)
for i in states_em:
    x = np.linspace(1,len(df['head W']),len(df['head W']))[(em_Z == i)]
    y = df['head W'][(em_Z == i)]
    plt.plot(x, y, '.')
    plt.title("Height")


plt.tight_layout()
# plt.legend(labels=state_names, loc="lower right", ncol=1)
plt.suptitle('Head Coordinates (Jenny), EM gaussian')

In [None]:
states_em = pd.unique(em_Z)
plt.subplot(2,2,1)
for i in states_em:
    x = np.linspace(1,len(df['snout H']),len(df['snout H']))[(em_Z == i)]
    y = df['snout H'][(em_Z == i)]
    plt.plot(x, y, '.')
    plt.title("Left")
plt.subplot(2,2,2)
for i in states_em:
    x = np.linspace(1,len(df['snout L']),len(df['snout L']))[(em_Z == i)]
    y = df['snout L'][(em_Z == i)]
    plt.plot(x, y, '.')
    plt.title("Top")
plt.subplot(2,2,3)
for i in states_em:
    x = np.linspace(1,len(df['snout R']),len(df['snout R']))[(em_Z == i)]
    y = df['snout R'][(em_Z == i)]
    plt.plot(x, y, '.')
    plt.title("Width")
plt.subplot(2,2,4)
for i in states_em:
    x = np.linspace(1,len(df['snout W']),len(df['snout W']))[(em_Z == i)]
    y = df['snout W'][(em_Z == i)]
    plt.plot(x, y, '.')
    plt.title("Height")


plt.tight_layout()
# plt.legend(labels=state_names, loc="lower right", ncol=1)
plt.suptitle('Snout Coordinates (Jenny), EM gaussian')

In [None]:
remodel.monitor_
remodel.monitor_.converged
