In [1]:
import numpy as np
import pandas as pd

# Load the dataset
df = pd.read_csv('C:/Users/ASUS/Downloads/solar_weather.csv')

# Check if the weather_type column exists
if 'weather_type' not in df.columns:
    raise ValueError("The dataset must contain a 'weather_type' column.")

# Extract the weather_type column as an observation sequence
weather_data = df['weather_type'].values

# Initialize parameters
n_states = 5  # Assuming hidden states are labeled from 0 to 4
n_observations = 5  # Assuming weather types are labeled from 1 to 5

# Function to compute initial probabilities
def compute_initial_probabilities(weather_data):
    initial_counts = np.zeros(n_states)
    # Count each state occurrence
    for state in weather_data:
        initial_counts[int(state) - 1] += 1
    # Normalize to get initial probabilities
    initial_probabilities = initial_counts / initial_counts.sum() if initial_counts.sum() > 0 else np.ones(n_states) / n_states
    return initial_probabilities

# Function to compute transition matrix
def compute_transition_matrix(weather_data):
    transition_matrix = np.zeros((n_states, n_states))
    for i in range(len(weather_data) - 1):
        current_state = int(weather_data[i]) - 1
        next_state = int(weather_data[i + 1]) - 1
        transition_matrix[current_state][next_state] += 1
    transition_matrix /= transition_matrix.sum(axis=1, keepdims=True)
    return transition_matrix

# Function to compute emission matrix with structured logic
def compute_emission_matrix(weather_data):
    emission_matrix = np.zeros((n_states, n_observations))
    
    # Map weather types to states using the given mapping
    for i in range(len(weather_data)):
        current_observation = int(weather_data[i]) - 1  # Convert to 0-indexed

        # Define logic to assign states based on weather types
        if current_observation == 0:  # 1 = Fog
            current_state = 0  # State 0 for fog
        elif current_observation == 1:  # 2 = Sunny
            current_state = 1  # State 1 for sunny
        elif current_observation == 2:  # 3 = Windy and Humidity
            current_state = 2  # State 2 for windy and humidity
        elif current_observation == 3:  # 4 = Cloudy
            current_state = 3  # State 3 for cloudy
        else:  # 5 = Rain
            current_state = 4  # State 4 for rain

        emission_matrix[current_state][current_observation] += 1

    # Apply Laplace smoothing
    alpha = 1  # Smoothing factor
    emission_matrix += alpha
    emission_matrix /= emission_matrix.sum(axis=1, keepdims=True)

    return emission_matrix

# Forward Algorithm
def forward_algorithm(obs_sequence, transition_matrix, emission_matrix, initial_state_distribution):
    n_obs = len(obs_sequence)
    alpha = np.zeros((n_obs, n_states))
    alpha[0, :] = initial_state_distribution * emission_matrix[:, obs_sequence[0]]
    
    for t in range(1, n_obs):
        for j in range(n_states):
            alpha[t, j] = np.sum(alpha[t - 1, :] * transition_matrix[:, j]) * emission_matrix[j, obs_sequence[t]]
    
    
    return alpha

# Viterbi Algorithm
def viterbi(obs_sequence, transition_matrix, emission_matrix, initial_state_distribution):
    n_obs = len(obs_sequence)
    viterbi = np.zeros((n_obs, n_states))
    backpointer = np.zeros((n_obs, n_states), dtype=int)
    viterbi[0, :] = initial_state_distribution * emission_matrix[:, obs_sequence[0]]
    
    for t in range(1, n_obs):
        for j in range(n_states):
            max_prob = np.max(viterbi[t - 1, :] * transition_matrix[:, j])
            backpointer[t, j] = np.argmax(viterbi[t - 1, :] * transition_matrix[:, j])
            viterbi[t, j] = max_prob * emission_matrix[j, obs_sequence[t]]
    
    best_path = np.zeros(n_obs, dtype=int)
    best_path[-1] = np.argmax(viterbi[-1, :])
    
    for t in range(n_obs - 2, -1, -1):
        best_path[t] = backpointer[t + 1, best_path[t + 1]]
    
    return best_path

# Main function to execute all steps
def main():
    # Compute matrices
    initial_probabilities = compute_initial_probabilities(weather_data)
    transition_matrix = compute_transition_matrix(weather_data)
    emission_matrix = compute_emission_matrix(weather_data)

    # Convert weather data to observation sequence (0-indexed)
    obs_sequence = [int(obs) - 1 for obs in weather_data]

    # Run the forward algorithm
    alpha = forward_algorithm(obs_sequence, transition_matrix, emission_matrix, initial_probabilities)
    print("Forward Algorithm Output (alpha):")
    print(alpha)
    

    # Run the Viterbi algorithm
    best_path = viterbi(obs_sequence, transition_matrix, emission_matrix, initial_probabilities)
    print("Most Probable Hidden State Sequence (0-indexed):")
    print(best_path)

    # Print transition, initial, and emission matrices
    print("Initial Probabilities:")
    print(initial_probabilities)
    print("Transition Matrix:")
    print(transition_matrix)
    print("Emission Matrix:")
    print(emission_matrix)

if __name__ == "__main__":
    main()


Forward Algorithm Output (alpha):
[[5.08101246e-06 5.08120344e-06 5.08111811e-06 3.70980202e-01
  5.08103561e-06]
 [3.44882604e-09 4.15916396e-08 2.91103329e-07 3.54247811e-01
  2.07059554e-07]
 [3.12174837e-09 3.95792500e-08 2.77837981e-07 3.38269614e-01
  1.97565734e-07]
 ...
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00]
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00]
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00]]
Most Probable Hidden State Sequence (0-indexed):
[3 3 3 ... 0 0 0]
Initial Probabilities:
[0.14217181 0.18004228 0.1608936  0.37100053 0.14589178]
Transition Matrix:
[[9.54568201e-01 4.04632542e-02 3.68172719e-03 8.22133257e-04
  4.64684015e-04]
 [3.21497121e-02 9.04087163e-01 4.84080388e-02 9.68160777e-03
  5.67347860e-03]
 [3.19024606e-03 5.24021605e-02 8.72863957e-01 5.97934237e-02
  1.17502132e-02]
 [2.46561832e-04 3.95868720e-03 2.48342557e-02 9.54947674e-01
  1.60128212