In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm

# Data Generation
np.random.seed(0)
n_samples = 300
true_mean1, true_mean2 = 0, 5
true_std1, true_std2 = 1, 1.5
data = np.concatenate((np.random.normal(true_mean1, true_std1, int(0.4 * n_samples)),
                       np.random.normal(true_mean2, true_std2, int(0.6 * n_samples))))

# EM Algorithm Implementation
mean1, mean2 = -20, 10
std1, std2 = 20, 10
log_likelihood = []
means1 = [mean1]
means2 = [mean2]

# Configuration to plot mean or change of mean
plot_change_of_mean = False  # Change to True to plot change of mean
color_bars_by_gaussian = True  # Set to False to not color points by Gaussian
fix_y_axis = True  # Condition to fix y-axis

max_iter = 50
for iteration in range(max_iter):
    # E-step
    resp1 = norm.pdf(data, mean1, std1)
    resp2 = norm.pdf(data, mean2, std2)
    gamma = resp1 / (resp1 + resp2)

    # M-step
    mean1 = np.sum(gamma * data) / np.sum(gamma)
    std1 = np.sqrt(np.sum(gamma * (data - mean1) ** 2) / np.sum(gamma))
    mean2 = np.sum((1 - gamma) * data) / np.sum(1 - gamma)
    std2 = np.sqrt(np.sum((1 - gamma) * (data - mean2) ** 2) / np.sum(1 - gamma))
    
    # Update means history
    means1.append(mean1)
    means2.append(mean2)

    # Compute log-likelihood
    ll_current = np.sum(np.log(resp1 * gamma + resp2 * (1 - gamma)))
    log_likelihood.append(ll_current)

    y_offset = 0.005  # A small positive offset from the x-axis.

    fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(10, 6), gridspec_kw={'height_ratios': [2, 1, 1]}, dpi=200)

    # Histogram and data points
    if color_bars_by_gaussian:
        colors = ['red' if g > 0.5 else 'blue' for g in gamma]
        ax1.hist(data, bins=30, density=True, alpha=0.5, color='gray', label='Data Histogram', edgecolor='black')
        # Position data points just above the x-axis, increasing size and opacity for visibility.
        ax1.scatter(data, np.full_like(data, y_offset), c=gamma, cmap='coolwarm', s=20, alpha=0.9, label='Data Points')
    else:
        ax1.hist(data, bins=30, density=True, alpha=0.5, label='Data')

    # Plotting Gaussian distributions and setting titles/legends
    x_values = np.linspace(min(data), max(data), 1000)
    ax1.plot(x_values, norm.pdf(x_values, mean1, std1), 'r-', label='Gaussian 1')
    ax1.plot(x_values, norm.pdf(x_values, mean2, std2), 'b-', label='Gaussian 2')
    ax1.set_title(f'Iteration {iteration + 1}, Log-Likelihood: {ll_current:.3f}')
    ax1.legend()

    if fix_y_axis:
        ax1.set_ylim([0,0.35])  # Adjust if needed based on your data's range

    # Plot log-likelihood
    ax2.plot(log_likelihood, '-o', label='Log-Likelihood', markersize=5)
    ax2.legend()

    # Choose to plot mean or change of mean
    if plot_change_of_mean and iteration > 0:
        changes_mean1 = np.diff(means1)
        changes_mean2 = np.diff(means2)
        ax3.plot(changes_mean1, '-r', label='Change in Mean 1', markersize=5)
        ax3.plot(changes_mean2, '-b', label='Change in Mean 2', markersize=5)
    else:
        ax3.plot(means1, '-or', label='Mean 1', markersize=5)
        ax3.plot(means2, '-ob', label='Mean 2', markersize=5)
    ax3.legend()

    plt.show()
