In [7]:
import numpy as np
import pandas as pd
import random

In [8]:
# randomly rolls (with equal probability) either coin A or coin B num_rolls times
def roll(tA, tB, num_rolls):
    p = np.random.binomial(1, 0.5)
    if (p == 0):
        return np.random.binomial(1, tA, num_rolls).tolist()
    else:
        return np.random.binomial(1, tB, num_rolls).tolist()

iterations = 20
tosses = [roll(0.8, 0.1, 10) for _ in range(iterations)]

In [11]:
class EM:
    def __init__(self, iterations, tosses):
        self.iterations = iterations
        self.tosses = tosses
    
    # calculates the number of heads and tails per coin given their likelihood
    def e_step(self, tosses, gA, gB):
        A_heads, B_heads, A_tails, B_tails = 0,0,0,0
        for toss in tosses:
            num_heads = toss.count(1)
            num_tails = len(toss) - num_heads

            # likelihood and probability the toss was from coin A / coin B
            likelihood_A = pow(gA, num_heads) * pow(1 - gA, num_tails)
            likelihood_B = pow(gB, num_heads) * pow(1 - gB, num_tails)
            p_A = likelihood_A / (likelihood_A + likelihood_B)
            p_B = 1 - p_A

            # heads and tails attributed to coins A and B given probabilities
            A_heads += p_A * num_heads
            B_heads += p_B * num_heads
            A_tails += p_A * num_tails
            B_tails += p_B * num_tails

        return A_heads, B_heads, A_tails, B_tails

    # re-estimates the thetas for coins A and B
    def m_step(self, A_heads, B_heads, A_tails, B_tails):
        gA = A_heads / (A_heads + A_tails)
        gB = B_heads / (B_heads + B_tails)
        return gA, gB

    # runs the algorithm
    def run(self):
        # generating unique random thetas for coins A and B
        gA = random.uniform(0, 1)
        gB = random.uniform(0, 1)
        guess_thetas = [round(gA, 2), round(gB, 2)]

        # adding guess to dataframe + printing original and initial thetas
        df = pd.DataFrame([guess_thetas], columns = ['A','B'])

        for i in range(self.iterations):
            A_heads, B_heads, A_tails, B_tails = self.e_step(self.tosses, gA, gB)
            gA, gB = self.m_step(A_heads, B_heads, A_tails, B_tails)
            guess_thetas = [round(gA, 2), round(gB, 2)]

            # adding new guess to dataframe
            df.loc[len(df)] = guess_thetas

        return df

EM = EM(iterations, tosses) # 20 iterations, data set of tosses
EM.run()

Unnamed: 0,A,B
0,0.96,1.0
1,0.41,0.89
2,0.19,0.82
3,0.11,0.79
4,0.11,0.79
5,0.11,0.79
6,0.11,0.79
7,0.11,0.79
8,0.11,0.79
9,0.11,0.79
