In [96]:
from py2neo import Graph
import numpy as np
import random

# Connect to Neo4j
graph = Graph("bolt://localhost:7687", auth=("neo4j", "password"))


In [104]:
class HiddenMarkovModel:
    def __init__(self, graph, hidden_states, observable_states, transition_matrix, emission_matrix, initial_distribution):
        """
        Initializes a Hidden Markov Model (HMM) and stores it in a Neo4j database.
        """
        self.hidden_states = hidden_states
        self.observable_states = observable_states
        self.transition_matrix = np.array(transition_matrix)
        self.emission_matrix = np.array(emission_matrix)
        self.initial_distribution = np.array(initial_distribution)
        self.graph = graph
        self.create_neo4j_hmm(clear=True, model_name="TrueModel")
    def create_neo4j_hmm(self, clear=False, model_name="OriginalModel"):
        """Creates the HMM structure in Neo4j, supporting True, Viterbi, and Baum-Welch models."""
        print(f"Storing HMM in Neo4j as {model_name}...")
        if clear:
            self.graph.run("MATCH (n) DETACH DELETE n")

        node_type = "HiddenState" if model_name == "TrueModel" else f"{model_name}Node"
        edge_type = "TRANSITION" if model_name == "TrueModel" else f"{model_name}Connection"

        for state, prob in zip(self.hidden_states, self.initial_distribution):
            self.graph.run(f"""
                CREATE (:{node_type} {{name: $name, initial_prob: $prob, model: $model}})
            """, parameters={"name": state, "prob": float(prob), "model": model_name})

        for state in self.observable_states:
            self.graph.run(f"""
                CREATE (:ObservableState {{name: $name, model: $model}})
            """, parameters={"name": state, "model": model_name})

        for i, from_state in enumerate(self.hidden_states):
            for j, to_state in enumerate(self.hidden_states):
                prob = float(self.transition_matrix[i][j])
                if prob > 0:
                    self.graph.run(f"""
                        MATCH (a:{node_type} {{name: $from, model: $model}}), (b:{node_type} {{name: $to, model: $model}})
                        CREATE (a)-[:{edge_type} {{probability: $prob}}]->(b)
                    """, parameters={"from": from_state, "to": to_state, "prob": prob, "model": model_name})
        
        for i, from_state in enumerate(self.hidden_states):
            for j, obs in enumerate(self.observable_states):
                prob = float(self.emission_matrix[i][j])
                if prob > 0:
                    self.graph.run(f"""
                        MATCH (h:{node_type} {{name: $from, model: $model}}), (o:ObservableState {{name: $to, model: $model}})
                        CREATE (h)-[:EMITS {{probability: $prob}}]->(o)
                    """, parameters={"from": from_state, "to": obs, "prob": prob, "model": model_name})
    
    def generate_observations(self, n=10):
        """Generates a sequence of observable states based on the HMM."""   
        observations = []
        order_hidden_states=[]
        state = np.random.choice(self.hidden_states, p=self.initial_distribution)
        for _ in range(n):
            obs = np.random.choice(self.observable_states, p=self.emission_matrix[self.hidden_states.index(state)])
            observations.append(obs)
            order_hidden_states.append(state)
            state = np.random.choice(self.hidden_states, p=self.transition_matrix[self.hidden_states.index(state)])
        return observations, order_hidden_states


In [105]:


class Viterbi:
    def __init__(self, hmm):
        self.hmm = hmm
    
    def run(self, observations):
        """Viterbi algorithm for most likely hidden state sequence."""
        n_states = len(self.hmm.hidden_states)
        n_obs = len(observations)
        viterbi_table = np.zeros((n_states, n_obs))
        backpointer = np.zeros((n_states, n_obs), dtype=int)

        for s in range(n_states):
            viterbi_table[s, 0] = self.hmm.initial_distribution[s] * self.hmm.emission_matrix[s, self.hmm.observable_states.index(observations[0])]

        for t in range(1, n_obs):
            for s in range(n_states):
                max_prob, max_state = max(
                    (viterbi_table[s_prev, t - 1] * self.hmm.transition_matrix[s_prev, s] * 
                     self.hmm.emission_matrix[s, self.hmm.observable_states.index(observations[t])], s_prev)
                    for s_prev in range(n_states)
                
                )
                viterbi_table[s, t] = max_prob
                backpointer[s, t] = max_state

        best_last_state = np.argmax(viterbi_table[:, -1])
        best_path = [best_last_state]
        for t in range(n_obs - 1, 0, -1):
            best_last_state = backpointer[best_last_state, t]
            best_path.insert(0, best_last_state)
        
        most_likely_states = [self.hmm.hidden_states[i] for i in best_path]
        return most_likely_states


In [106]:

# Define Paper Bag HMM parameters
hidden_states_paperbag = ['A', 'B']  # The bags (Hidden states)
observable_states_paperbag = ['j', 'k']  # The chips (Observable states)

# Transition probabilities (Bag -> Bag)
transition_matrix_paperbag = [
    [0.40, 0.60],  # If in Bag A: 5% stay in A, 95% switch to B
    [0.80, 0.20]   # If in Bag B: 80% stay in B, 20% switch to A
]

# Emission probabilities (Bag -> Chip drawn)
emission_matrix_paperbag = [
    [2/5, 3/5],  # If in Bag A: 80% chance to draw 'j', 20% chance to draw 'k'
    [1/5, 4/5]   # If in Bag B: 20% chance to draw 'j', 80% chance to draw 'k'
]

# Initial probability distribution (starting in Bag A)
initial_distribution_paperbag = [1.0, 0.0]  # Always start in Bag A

# Connect to Neo4j database (Replace with your credentials)
graph = Graph("bolt://localhost:7687", auth=("neo4j", "password"))

# Initialize the Hidden Markov Model
hmm = HiddenMarkovModel(graph, hidden_states_paperbag, observable_states_paperbag, transition_matrix_paperbag, emission_matrix_paperbag, initial_distribution_paperbag)

# Generate Observations (Example predefined sequence)
observations,True_order = hmm.generate_observations(10)
print("\n=== Observations ===")
print(observations)


print("\n=== True Order ===")
print(True_order)

# Run Viterbi Algorithm
viterbi_model = Viterbi(hmm)
viterbi_path = viterbi_model.run(observations)
print("\n=== Viterbi Path ===")
print(viterbi_path)



Storing HMM in Neo4j as TrueModel...

=== Observations ===
['j', 'k', 'k', 'j', 'k', 'k', 'j', 'j', 'k', 'j']

=== True Order ===
['A', 'B', 'A', 'A', 'A', 'A', 'A', 'B', 'A', 'A']

=== Viterbi Path ===
['A', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'B', 'A']


In [117]:
class BaumWelch:
    def __init__(self, hmm):
        self.hmm = hmm
    
    def run(self, observations, n_iterations=100):
        """Baum-Welch algorithm for training HMM parameters and storing updates in Neo4j."""
        n_states = len(self.hmm.hidden_states)
        n_obs = len(observations)
        obs_indices = [self.hmm.observable_states.index(obs) for obs in observations]


        for _ in range(n_iterations):
            alpha = np.zeros((n_states, n_obs))
            beta = np.zeros((n_states, n_obs))
            gamma = np.zeros((n_states, n_obs))
            xi = np.zeros((n_states, n_states, n_obs - 1))
            
            alpha[:, 0] = self.hmm.initial_distribution * self.hmm.emission_matrix[:, obs_indices[0]]
            for t in range(1, n_obs):
                alpha[:, t] = (alpha[:, t - 1] @ self.hmm.transition_matrix) * self.hmm.emission_matrix[:, obs_indices[t]]
            
            beta[:, -1] = 1
            for t in range(n_obs - 2, -1, -1):
                beta[:, t] = self.hmm.transition_matrix @ (self.hmm.emission_matrix[:, obs_indices[t + 1]] * beta[:, t + 1])
            
            gamma = (alpha * beta) / np.sum(alpha * beta, axis=0)
            for t in range(n_obs - 1):
                xi[:, :, t] = (alpha[:, t] * self.hmm.transition_matrix * self.hmm.emission_matrix[:, obs_indices[t + 1]] * beta[:, t + 1])
                xi[:, :, t] /= np.sum(xi[:, :, t])
            
            self.hmm.initial_distribution = gamma[:, 0]
            self.hmm.transition_matrix = np.sum(xi, axis=2) / np.sum(gamma[:, :-1], axis=1, keepdims=True)
            for k in range(len(self.hmm.observable_states)):
                mask = np.array(obs_indices) == k
                self.hmm.emission_matrix[:, k] = np.sum(gamma[:, mask], axis=1) / np.sum(gamma, axis=1)
        
        self.hmm.create_neo4j_hmm(clear=False, model_name="BaumWelch")
        
        
        return self.hmm.transition_matrix, self.hmm.emission_matrix


hidden_states_paperbag = ['A', 'B']  # The bags (Hidden states)
observable_states_paperbag = ['j', 'k']  # The chips (Observable states)

transition_matrix_paperbag = [
    [0.40, 0.60],  # If in Bag A: 40% stay in A, 60% switch to B
    [0.80, 0.20]   # If in Bag B: 80% stay in B, 20% switch to A
]

emission_matrix_paperbag = [
    [2/5, 3/5],  # If in Bag A: 40% chance to draw 'j', 60% chance to draw 'k'
    [1/5, 4/5]   # If in Bag B: 20% chance to draw 'j', 80% chance to draw 'k'
]

initial_distribution_paperbag = [1.0, 0.0]  # Always start in Bag A

graph = Graph("bolt://localhost:7687", auth=("neo4j", "password"))
hmm = HiddenMarkovModel(graph, hidden_states_paperbag, observable_states_paperbag, transition_matrix_paperbag, emission_matrix_paperbag, initial_distribution_paperbag)

observations, _ = hmm.generate_observations(n=100)
print("Generated Observations:", observations)

baum_welch = BaumWelch(hmm)
updated_transition_matrix, updated_emission_matrix = baum_welch.run(observations)
print("Initial Transition Matrix:\n", transition_matrix_paperbag)
print("Final Updated Transition Matrix:\n", updated_transition_matrix)
print("Initial Emission Matrix:\n", emission_matrix_paperbag)
print("Final Updated Emission Matrix:\n", updated_emission_matrix)


Storing HMM in Neo4j as TrueModel...
Generated Observations: ['j', 'j', 'j', 'k', 'j', 'k', 'k', 'j', 'j', 'k', 'k', 'k', 'j', 'k', 'k', 'k', 'j', 'j', 'k', 'k', 'k', 'j', 'k', 'k', 'k', 'k', 'j', 'j', 'k', 'k', 'k', 'k', 'k', 'k', 'k', 'k', 'k', 'j', 'k', 'j', 'k', 'k', 'k', 'k', 'k', 'k', 'j', 'j', 'k', 'k', 'k', 'k', 'k', 'k', 'k', 'k', 'k', 'k', 'k', 'k', 'k', 'k', 'k', 'k', 'j', 'k', 'k', 'k', 'k', 'j', 'j', 'k', 'k', 'j', 'k', 'k', 'k', 'k', 'j', 'k', 'k', 'k', 'j', 'k', 'k', 'k', 'j', 'k', 'k', 'j', 'j', 'k', 'k', 'j', 'k', 'k', 'k', 'j', 'k', 'j']
Storing HMM in Neo4j as BaumWelch...
Initial Transition Matrix:
 [[0.4, 0.6], [0.8, 0.2]]
Final Updated Transition Matrix:
 [[0.2973807  0.82109601]
 [0.60067918 0.27642193]]
Initial Emission Matrix:
 [[0.4, 0.6], [0.2, 0.8]]
Final Updated Emission Matrix:
 [[0.34108717 0.65891283]
 [0.21638292 0.78361708]]
