In [2]:
import pandas as pd

# Load and preprocess data
df = pd.read_csv("student_data.csv")
df.columns = ["StudentID", "Day", "Mood", "ShirtColor"]

first_day = df[df["Day"] == 1]
initial_prob = first_day["Mood"].value_counts(normalize=True).reindex(["H", "S"], fill_value=0)

#  Transition matrix
transition_counts = {"H->H": 0, "H->S": 0, "S->H": 0, "S->S": 0}
for i, group in df.groupby("StudentID"):
    moods = group.sort_values("Day")["Mood"].tolist()
    for j in range(len(moods) - 1):
        pair = f"{moods[j]}->{moods[j + 1]}"
        transition_counts[pair] += 1

p_hh = transition_counts["H->H"] / (transition_counts["H->H"] + transition_counts["H->S"])
p_hs = transition_counts["H->S"] / (transition_counts["H->H"] + transition_counts["H->S"])
p_sh = transition_counts["S->H"] / (transition_counts["S->H"] + transition_counts["S->S"])
p_ss = transition_counts["S->S"] / (transition_counts["S->H"] + transition_counts["S->S"])

transition_matrix = pd.DataFrame(
    [[p_hh, p_hs],
     [p_sh, p_ss]],
    index=["H", "S"],
    columns=["H", "S"]
)


emission_matrix = pd.crosstab(df["Mood"], df["ShirtColor"], normalize="index")
emission_matrix = emission_matrix.reindex(index=["H", "S"], columns=["R", "G", "B"], fill_value=0)

observation = ["R", "B", "G"]
moods = ["H", "S"]
results = []

for M1 in moods:
    for M2 in moods:
        for M3 in moods:
            prob = (
                initial_prob[M1] *
                emission_matrix.loc[M1, observation[0]] *
                transition_matrix.loc[M1, M2] *
                emission_matrix.loc[M2, observation[1]] *
                transition_matrix.loc[M2, M3] *
                emission_matrix.loc[M3, observation[2]]
            )
            results.append(((M1, M2, M3), prob))

results_df = pd.DataFrame(results, columns=["Sequence (M1,M2,M3)", "Probability"])
results_df = results_df.sort_values("Probability", ascending=False).reset_index(drop=True)

# --- Print neatly formatted results ---
print("\n Initial Probability Distribution ")
for mood, p in initial_prob.items():
    print(f"P({mood}) = {p:.4f}")

print("\n Transition Matrix")
print(transition_matrix.round(4))

print("\n Emission Matrix ")
print(emission_matrix.round(4))

print("\n Sequence Probabilities")
for idx, row in results_df.iterrows():
    print(f"{row['Sequence (M1,M2,M3)']}: {row['Probability']:.8f}")

most_likely = results_df.iloc[0]
print("\nMost likely sequence:", most_likely["Sequence (M1,M2,M3)"])
print(f"Probability = {most_likely['Probability']:.8f}")



 Initial Probability Distribution 
P(H) = 0.6000
P(S) = 0.4000

 Transition Matrix
        H       S
H  0.6545  0.3455
S  0.4500  0.5500

 Emission Matrix 
ShirtColor       R       G       B
Mood                              
H           0.7193  0.2807  0.0000
S           0.0000  0.1395  0.8605

 Sequence Probabilities
('H', 'S', 'H'): 0.01620474
('H', 'S', 'S'): 0.00984532
('H', 'H', 'S'): 0.00000000
('H', 'H', 'H'): 0.00000000
('S', 'H', 'H'): 0.00000000
('S', 'H', 'S'): 0.00000000
('S', 'S', 'H'): 0.00000000
('S', 'S', 'S'): 0.00000000

Most likely sequence: ('H', 'S', 'H')
Probability = 0.01620474
