In [1]:
import numpy as np
import matplotlib.pyplot as plt

def initialize_parameters(file_path):
    """
    Initialize the parameters for the EM algorithm.
    
    Args:
    file_path (str): The path to the CSV file containing the data.
    
    Returns:
    dict: A dictionary containing initialized parameters and data.
    """
    data = np.genfromtxt('simulated_data.csv', delimiter=',')
    y = data[1:, 1:2].T[0]  # Assuming y is the second column in the data
    T = len(y)
    M = 200  # Number of iterations

    # Initial parameter values
    sigmaH_sq = 2
    sigmaL_sq = 1
    p = 0.5

    # Initialize other parameters
    pStar = np.zeros(T)
    volStar = np.zeros(T)
    parVec = np.zeros([M, 3])
    likVec = np.zeros(M)

    return {
        'y': y, 'T': T, 'M': M, 'sigmaH_sq': sigmaH_sq, 'sigmaL_sq': sigmaL_sq, 'p': p,
        'pStar': pStar, 'volStar': volStar, 'parVec': parVec, 'likVec': likVec
    }

def e_step(params):
    """
    Perform the E-step of the EM algorithm.
    
    Args:
    params (dict): A dictionary containing the parameters and data.
    
    Returns:
    dict: Updated parameters after the E-step.
    """
    y, T, sigmaH_sq, sigmaL_sq, p = params['y'], params['T'], params['sigmaH_sq'], params['sigmaL_sq'], params['p']
    pStar = params['pStar']

    for t in range(T):
        f1 = np.exp(-y[t]**2/(2*sigmaH_sq))/np.sqrt(2*np.pi*sigmaH_sq)
        f2 = np.exp(-y[t]**2/(2*sigmaL_sq))/np.sqrt(2*np.pi*sigmaL_sq)
        pStar[t] = (f1 * p) / (f1 * p + f2 * (1 - p))

    params['pStar'] = pStar
    return params

def m_step(params):
    """
    Perform the M-step of the EM algorithm.
    
    Args:
    params (dict): A dictionary containing the parameters and data.
    
    Returns:
    dict: Updated parameters after the M-step.
    """
    y, T, pStar = params['y'], params['T'], params['pStar']
    
    sigmaH_sq = np.sum(pStar * y**2) / np.sum(pStar)
    sigmaL_sq = np.sum((1 - pStar) * y**2) / np.sum(1 - pStar)
    p = np.sum(pStar) / T

    params.update({'sigmaH_sq': sigmaH_sq, 'sigmaL_sq': sigmaL_sq, 'p': p})

    # Compute maximized EM log-likelihood value
    logLik = 0
    for t in range(T):
        f1 = np.exp(-y[t]**2/(2*sigmaH_sq))/np.sqrt(2*np.pi*sigmaH_sq)
        f2 = np.exp(-y[t]**2/(2*sigmaL_sq))/np.sqrt(2*np.pi*sigmaL_sq)
        logLik += pStar[t]*(np.log(f1)+np.log(p)) + (1-pStar[t])*(np.log(f2)+np.log(1-p))
        params['volStar'][t] = np.sqrt(sigmaH_sq)*pStar[t] + np.sqrt(sigmaL_sq)*(1-pStar[t])

    params['likVec'][params['current_iteration']] = logLik
    params['parVec'][params['current_iteration']] = [sigmaH_sq, sigmaL_sq, p]

    return params

def run_em_algorithm(file_path):
    """
    Run the EM algorithm for the Markov Switching model.
    
    Args:
    file_path (str): The path to the CSV file containing the data.
    """
    params = initialize_parameters(file_path)

    for iteration in range(params['M']):
        params['current_iteration'] = iteration
        params = e_step(params)
        params = m_step(params)

        # Optionally: Print/log the progress and intermediate results
        print(f"Iteration {iteration+1}: sigmaH_sq = {params['sigmaH_sq']}, sigmaL_sq = {params['sigmaL_sq']}, p = {params['p']}")

    # Final parameter values
    print('Final Estimates:')
    print('sigma_H =', params['parVec'][-1][0])
    print('sigma_L =', params['parVec'][-1][1])
    print('p =', params['parVec'][-1][2])
    print('Log-likelihood =', params['likVec'][-1])

    # Additional analysis and plotting can be added here
    # ...

# Example usage
file_path = 'simulated_data.csv'  # Replace with your actual file path
run_em_algorithm(file_path)


Iteration 1: sigmaH_sq = 15.661122709102688, sigmaL_sq = 0.8061154271365603, p = 0.6117409242766711
Iteration 2: sigmaH_sq = 17.62267583086914, sigmaL_sq = 0.3776226072031781, p = 0.5518051223919115
Iteration 3: sigmaH_sq = 18.89215525928465, sigmaL_sq = 0.1924459106434875, p = 0.5187826837535662
Iteration 4: sigmaH_sq = 19.059486317708423, sigmaL_sq = 0.10993760864840954, p = 0.5162969236632604
Iteration 5: sigmaH_sq = 18.692579521061127, sigmaL_sq = 0.0709535736166654, p = 0.527482281410262
Iteration 6: sigmaH_sq = 18.279728202180966, sigmaL_sq = 0.053397695542168666, p = 0.5398856128948633
Iteration 7: sigmaH_sq = 17.95778529700094, sigmaL_sq = 0.045545334556090784, p = 0.549790869150625
Iteration 8: sigmaH_sq = 17.746799096015195, sigmaL_sq = 0.041819466089665525, p = 0.5564373442826137
Iteration 9: sigmaH_sq = 17.62083213088417, sigmaL_sq = 0.039963905499670334, p = 0.5604710347704132
Iteration 10: sigmaH_sq = 17.549229450292923, sigmaL_sq = 0.03900947621356936, p = 0.562786866773

  logLik += pStar[t]*(np.log(f1)+np.log(p)) + (1-pStar[t])*(np.log(f2)+np.log(1-p))
  logLik += pStar[t]*(np.log(f1)+np.log(p)) + (1-pStar[t])*(np.log(f2)+np.log(1-p))


Iteration 20: sigmaH_sq = 17.462567266413355, sigmaL_sq = 0.037939166201211134, p = 0.5656127688384923
Iteration 21: sigmaH_sq = 17.462484027856494, sigmaL_sq = 0.03793817939959131, p = 0.5656154954200088
Iteration 22: sigmaH_sq = 17.462439244905205, sigmaL_sq = 0.03793764852422629, p = 0.5656169623506451
Iteration 23: sigmaH_sq = 17.462415151683494, sigmaL_sq = 0.03793736292235569, p = 0.5656177515619861
Iteration 24: sigmaH_sq = 17.462402189629866, sigmaL_sq = 0.037937209272336334, p = 0.5656181761552508
Iteration 25: sigmaH_sq = 17.462395216126517, sigmaL_sq = 0.03793712661034619, p = 0.5656184045839968
Iteration 26: sigmaH_sq = 17.46239146443363, sigmaL_sq = 0.037937082139029216, p = 0.565618527477031
Iteration 27: sigmaH_sq = 17.462389446053127, sigmaL_sq = 0.037937058213882636, p = 0.5656185935925119
Iteration 28: sigmaH_sq = 17.46238836018137, sigmaL_sq = 0.037937045342373105, p = 0.5656186291620908
Iteration 29: sigmaH_sq = 17.462387775991694, sigmaL_sq = 0.037937038417616746, 