In [13]:
import pandas as pd

In [14]:
data = pd.read_csv('student_data.csv')
print(data.head())

   StudentID  Day Mood ShirtColor
0          1    1    H          R
1          1    2    H          R
2          1    3    S          B
3          1    4    S          B
4          1    5    H          R


Iniitial probability distribution

In [15]:
initial_probability = []

count_H = 0
count_S = 0

for i in range(len(data)):
    if data.loc[i, "Day"] == 1 and data.loc[i, "Mood"] == "H":
        count_H += 1
    elif data.loc[i, "Day"] == 1 and data.loc[i, "Mood"] == "S":
        count_S += 1

initial_probability.append(count_H / len(set(data["StudentID"])))
initial_probability.append(count_S / len(set(data["StudentID"])))

print("Initial Probability: ")
print(initial_probability)

Initial Probability: 
[0.6, 0.4]


Transition Matrix


In [16]:
count_H_H = 0
count_H_S = 0
count_S_S = 0
count_S_H = 0

for i in range(len(data) - 1):
    if (data.loc[i, "StudentID"] == data.loc[i + 1, "StudentID"]) and \
       (data.loc[i + 1, "Day"] == data.loc[i, "Day"] + 1):

        if data.loc[i, "Mood"] == "H" and data.loc[i + 1, "Mood"] == "H":
            count_H_H += 1
        elif data.loc[i, "Mood"] == "H" and data.loc[i + 1, "Mood"] == "S":
            count_H_S += 1
        elif data.loc[i, "Mood"] == "S" and data.loc[i + 1, "Mood"] == "S":
            count_S_S += 1
        elif data.loc[i, "Mood"] == "S" and data.loc[i + 1, "Mood"] == "H":
            count_S_H += 1


total_H = count_H_H + count_H_S
total_S = count_S_S + count_S_H

transition_matrix = [
    [count_H_H/total_H, count_H_S/total_H],
    [count_S_H/total_S, count_S_S/total_S]
]

print("Transition matrix:")
print(transition_matrix)

Transition matrix:
[[0.6545454545454545, 0.34545454545454546], [0.45, 0.55]]


Emission Matrix

In [17]:
R_H, B_H, G_H, R_S, B_S, G_S = 0, 0, 0, 0, 0, 0

for i in range(len(data)):
    if data.loc[i, "Mood"] == "H" and data.loc[i, "ShirtColor"] == "R":
        R_H += 1
    elif data.loc[i, "Mood"] == "H" and data.loc[i, "ShirtColor"] == "B":
        B_H += 1
    elif data.loc[i, "Mood"] == "H" and data.loc[i, "ShirtColor"] == "G":
        G_H += 1
    elif data.loc[i, "Mood"] == "S" and data.loc[i, "ShirtColor"] == "R":
        R_S += 1
    elif data.loc[i, "Mood"] == "S" and data.loc[i, "ShirtColor"] == "B":
        B_S += 1
    elif data.loc[i, "Mood"] == "S" and data.loc[i, "ShirtColor"] == "G":
        G_S += 1

total_Happy = R_H + B_H + G_H
total_Sad = R_S + B_S + G_S
emission_matrix = [
    [R_H/total_Happy, B_H/total_Happy, G_H/total_Happy],
    [R_S/total_Sad, B_S/total_Sad, G_S/total_Sad]
]

print("Emission matrix:")
print(emission_matrix)

Emission matrix:
[[0.7192982456140351, 0.0, 0.2807017543859649], [0.0, 0.8604651162790697, 0.13953488372093023]]


In [18]:
all_possible_sequences = []
# 0 -> "H", 1 -> "S"
moods = [0, 1]

for mood1 in moods:
    for mood2 in moods:
        for mood3 in moods:
            all_possible_sequences.append([mood1, mood2, mood3])

print(all_possible_sequences)

[[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 1, 1], [1, 0, 0], [1, 0, 1], [1, 1, 0], [1, 1, 1]]


In [19]:
# In observation 0 -> "R", 1 -> "B", 2 -> "G"
def hidden_markov_model(initial_probability, transition_matrix, emission_matrix, all_possible_sequences, observations=[0, 1, 2]):
    probabilities = []

    for seq in all_possible_sequences:
        P = (
            initial_probability[seq[0]] *
            emission_matrix[seq[0]][observations[0]] *
            transition_matrix[seq[1]][seq[0]] *
            emission_matrix[seq[1]][observations[1]] *
            transition_matrix[seq[2]][seq[1]] *
            emission_matrix[seq[2]][observations[2]]
        )

        probabilities.append(P)

    return probabilities


result = hidden_markov_model(initial_probability, transition_matrix, emission_matrix, all_possible_sequences)
print(result)

[0.0, 0.0, 0.016204740180260375, 0.012824827075801997, 0.0, 0.0, 0.0, 0.0]


In [20]:
sequence = [tuple(["H" if i == 0 else "S" for i in sequence] + ["R", "B", "G"]) for sequence in all_possible_sequences]

In [21]:
dict_of_probabilities = dict(zip(sequence, result))
print("Hidden Markov Model Probabilities:")
print(dict_of_probabilities)

Hidden Markov Model Probabilities:
{('H', 'H', 'H', 'R', 'B', 'G'): 0.0, ('H', 'H', 'S', 'R', 'B', 'G'): 0.0, ('H', 'S', 'H', 'R', 'B', 'G'): 0.016204740180260375, ('H', 'S', 'S', 'R', 'B', 'G'): 0.012824827075801997, ('S', 'H', 'H', 'R', 'B', 'G'): 0.0, ('S', 'H', 'S', 'R', 'B', 'G'): 0.0, ('S', 'S', 'H', 'R', 'B', 'G'): 0.0, ('S', 'S', 'S', 'R', 'B', 'G'): 0.0}


In [22]:
maximum = max(dict_of_probabilities, key=dict_of_probabilities.get)
print(maximum, "->", dict_of_probabilities[maximum])

('H', 'S', 'H', 'R', 'B', 'G') -> 0.016204740180260375
