# **Hidden Markov Model for Student Mood Analysis**

**1. Loading and Inspecting the Dataset**

In [1]:
import pandas as pd
from google.colab import files

uploaded = files.upload()

data = pd.read_csv('student_data.csv')

print(data.head())
print(data.info())

print("Unique Student IDs: ", data['StudentID'].unique())
print("Unique Moods: ", data['Mood'].unique())
print("Unique Shirt Colors: ", data['ShirtColor'].unique())

Saving student_data.csv to student_data.csv
   StudentID  Day Mood ShirtColor
0          1    1    H          R
1          1    2    H          R
2          1    3    S          B
3          1    4    S          B
4          1    5    H          R
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   StudentID   100 non-null    int64 
 1   Day         100 non-null    int64 
 2   Mood        100 non-null    object
 3   ShirtColor  100 non-null    object
dtypes: int64(2), object(2)
memory usage: 3.3+ KB
None
Unique Student IDs:  [1 2 3 4 5]
Unique Moods:  ['H' 'S']
Unique Shirt Colors:  ['R' 'B' 'G']


**2. Parameter Learning**


*   Initial Probability Distribution



In [18]:
first_day_data = data[data['Day'] == 1]

print("\n        --- Fisrt Day Data ---")
print(first_day_data)

initial_counts = first_day_data['Mood'].value_counts()

print("\n        --- Initial Counts ---")
print(initial_counts)

total_students = len(first_day_data)

print("\n        --- Total Students ---")
print(total_students)

initial_probabilities = initial_counts / total_students

print("\n        --- Initial Probabilities ---")
print(initial_probabilities)


        --- Fisrt Day Data ---
    StudentID  Day Mood ShirtColor
0           1    1    H          R
20          2    1    H          G
40          3    1    S          B
60          4    1    H          R
80          5    1    S          B

        --- Initial Counts ---
Mood
H    3
S    2
Name: count, dtype: int64

        --- Total Students ---
5

        --- Initial Probabilities ---
Mood
H    0.6
S    0.4
Name: count, dtype: float64



*   **Transition Matrix**


In [30]:
transition_counts = {
    'H': {'H':0, 'S':0},
    'S': {'H':0, 'S':0}
}

for student in data['StudentID'].unique():
  student_data = data[data['StudentID'] == student]
  student_data = student_data.sort_values('Day')
  moods = list(student_data['Mood'])

  print(f"\n        --- Student {student} Data ---")
  print(student_data)
  print(f"\n        --- Student {student} Moods ---")
  print(moods)


  for i in range(len(moods)- 1):
    from_state = moods[i]
    to_state = moods[i+1]
    transition_counts[from_state][to_state] += 1

print("\n        --- Transition Counts ---")
print(transition_counts)

transition_probabilities = {}

for from_state in transition_counts:
    transition_probabilities[from_state] = {}
    total = 0
    for to_state in transition_counts[from_state]:
        total += transition_counts[from_state][to_state]

    for to_state in transition_counts[from_state]:
        count = transition_counts[from_state][to_state]
        probability = count / total
        transition_probabilities[from_state][to_state] = probability

print("\n        --- Transition Probabilities ---")
print(transition_probabilities)



        --- Student 1 Data ---
    StudentID  Day Mood ShirtColor
0           1    1    H          R
1           1    2    H          R
2           1    3    S          B
3           1    4    S          B
4           1    5    H          R
5           1    6    H          G
6           1    7    H          R
7           1    8    S          B
8           1    9    S          G
9           1   10    S          B
10          1   11    H          R
11          1   12    H          R
12          1   13    H          G
13          1   14    S          B
14          1   15    S          B
15          1   16    S          B
16          1   17    H          R
17          1   18    H          G
18          1   19    H          R
19          1   20    S          B

        --- Student 1 Moods ---
['H', 'H', 'S', 'S', 'H', 'H', 'H', 'S', 'S', 'S', 'H', 'H', 'H', 'S', 'S', 'S', 'H', 'H', 'H', 'S']

        --- Student 2 Data ---
    StudentID  Day Mood ShirtColor
20          2    1    H         


*   **Emission Matrix**

In [38]:
emission_probabilities = {}

for mood in data['Mood'].unique():

    emission_probabilities[mood] = {}

    mood_data = data[data['Mood'] == mood]

    color_counts = mood_data['ShirtColor'].value_counts()

    total_mood = len(mood_data)

    for color in color_counts.index:
        emission_probabilities[mood][color] = color_counts[color] / total_mood

print("        --- Emission Probabilities ---")
for mood in emission_probabilities:
    print(f"Mood {mood}:")
    for color, prob in emission_probabilities[mood].items():
        print(f"  {color}: {prob:.2f}")

        --- Emission Probabilities ---
Mood H:
  R: 0.72
  G: 0.28
Mood S:
  B: 0.86
  G: 0.14


**3. Decoding the Most Likely Mood Sequence**

In [42]:
observed_sequence = ['R', 'B', 'G']

all_sequences = [
    ['H','H','H'], ['H','H','S'],
    ['H','S','H'], ['H','S','S'],
    ['S','H','H'], ['S','H','S'],
    ['S','S','H'], ['S','S','S']
]

sequence_probabilities = {}

for seq in all_sequences:
    m1, m2, m3 = seq

    prob = (initial_probabilities[m1] *
            emission_probabilities[m1].get('R', 0) *
            transition_probabilities[m1][m2] *
            emission_probabilities[m2].get('B', 0) *
            transition_probabilities[m2][m3] *
            emission_probabilities[m3].get('G', 0))

    sequence_probabilities[tuple(seq)] = prob

print("\n        --- Sequence Probabilities ---")
for seq, prob in sequence_probabilities.items():
    print(f"{seq}: {prob:.4f}")

most_likely_sequence = max(sequence_probabilities, key=sequence_probabilities.get)
print("\n        --- Most Likely Mood Sequence ---")
print(most_likely_sequence)


        --- Sequence Probabilities ---
('H', 'H', 'H'): 0.0000
('H', 'H', 'S'): 0.0000
('H', 'S', 'H'): 0.0162
('H', 'S', 'S'): 0.0098
('S', 'H', 'H'): 0.0000
('S', 'H', 'S'): 0.0000
('S', 'S', 'H'): 0.0000
('S', 'S', 'S'): 0.0000

        --- Most Likely Mood Sequence ---
('H', 'S', 'H')


**4. Probabilities and Most Likely Mood Sequence**

In [45]:
# 1. Three Probability Matrices

print("        --- Initial Probabilities ---")
for mood, prob in initial_probabilities.items():
    print(f"{mood}: {prob:.2f}")

print("\n        --- Transition Probabilities ---")
for from_mood, to_probs in transition_probabilities.items():
    print(f"{from_mood}: ", end="")
    for to_mood, prob in to_probs.items():
        print(f"{to_mood}:{prob:.2f} ", end="")
    print()

print("\n        --- Emission Probabilities ---")
for mood, color_probs in emission_probabilities.items():
    print(f"{mood}: ", end="")
    for color, prob in color_probs.items():
        print(f"{color}:{prob:.2f} ", end="")
    print()

# 2. Probability calculations for all possible mood sequences

print("\n        --- Sequence Probabilities ---")
for seq, prob in sequence_probabilities.items():
    print(f"{seq}: {prob:4f}")

# 3. Identification of most likely sequence

print("\n        --- Most Likely Mood Sequence ---")
print(most_likely_sequence)

        --- Initial Probabilities ---
H: 0.60
S: 0.40

        --- Transition Probabilities ---
H: H:0.65 S:0.35 
S: H:0.45 S:0.55 

        --- Emission Probabilities ---
H: R:0.72 G:0.28 
S: B:0.86 G:0.14 

        --- Sequence Probabilities ---
('H', 'H', 'H'): 0.000000
('H', 'H', 'S'): 0.000000
('H', 'S', 'H'): 0.016205
('H', 'S', 'S'): 0.009845
('S', 'H', 'H'): 0.000000
('S', 'H', 'S'): 0.000000
('S', 'S', 'H'): 0.000000
('S', 'S', 'S'): 0.000000

        --- Most Likely Mood Sequence ---
('H', 'S', 'H')
