In [1]:
import numpy as np
from scipy.special import binom

In [2]:
CS = 2
CG = 3
PCSTS = 0.5
PCGTS = 0.3
PCGTG = 0.8
PCSTG = 0.15

In [3]:
expTS = (1 / (PCSTS*PCGTG - PCSTG*PCGTS)) * (PCGTG*CS - PCSTG*CG)
expTG = (1 / (PCSTS*PCGTG - PCSTG*PCGTS)) * (PCSTS*CG - PCGTS*CS)
print(expTS)
print(expTG)

3.23943661971831
2.5352112676056335


In [4]:
realizations = 1000000
starCount = (int(expTS / 5) + 1) * 5
galaCount = (int(expTG / 5) + 1) * 5

likelihood = np.zeros([starCount, galaCount])
for TS in np.arange(starCount):
    for TG in np.arange(galaCount):
        totalCorr = 0
        for _ in np.arange(realizations):
            stars = np.random.rand(TS)
            galaxies = np.random.rand(TG)
            
            classifiedStars = len(np.where(stars < PCSTS)[0]) + len(np.where((galaxies > PCGTG) & (galaxies < PCGTG + PCSTG))[0])
            classifiedGalaxies = len(np.where(galaxies < PCGTG)[0]) + len(np.where((stars > PCSTS) & (stars < PCGTS + PCSTS))[0])
            
            if ((classifiedStars == CS) & (classifiedGalaxies == CG)):
                totalCorr += 1
        likelihood[TS][TG] += totalCorr / realizations

In [5]:
np.where(likelihood == np.max(likelihood))

(array([2]), array([3]))

In [6]:
calcLikelihood = np.zeros([starCount, galaCount])
for TS in np.arange(starCount):
    for TG in np.arange(galaCount):
        sumProb = 0
        lowerLim = np.max([0, CS - TG])
        upperLim = np.min([CS, TS])
        if lowerLim > upperLim:
            continue
        for CsfTs in np.arange(lowerLim, upperLim + 1):
            PCsfTsTS = binom(TS, CsfTs) * (PCSTS**CsfTs) * ((1 - PCSTS)**(TS-CsfTs))
            PCsfTgTG = binom(TG, CS-CsfTs) * (PCSTG**(CS-CsfTs)) * ((1 - PCSTG)**(TG - CS + CsfTs))
            
            As = TS - CsfTs
            Ag = TG - (CS - CsfTs)
            Ng = As + Ag
            if Ng < CG:
                continue
            Pg = (As/Ng) * (PCGTS / (1 - PCSTS)) + (Ag/Ng) * (PCGTG / (1 - PCSTG))
            
            PCg = binom(Ng, CG) * (Pg**CG) * ((1-Pg)**(Ng-CG))
            
            sumProb += PCsfTsTS * PCsfTgTG * PCg
            
        calcLikelihood[TS][TG] = sumProb

In [7]:
likelihood

array([[0.      , 0.      , 0.      , 0.      , 0.      ],
       [0.      , 0.      , 0.      , 0.      , 0.178987],
       [0.      , 0.      , 0.      , 0.219155, 0.11522 ],
       [0.      , 0.      , 0.177237, 0.157662, 0.050616],
       [0.      , 0.115934, 0.15295 , 0.074654, 0.018849]])

In [8]:
calcLikelihood

array([[0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.18122921],
       [0.        , 0.        , 0.        , 0.22531239, 0.10242715],
       [0.        , 0.        , 0.18886993, 0.13733546, 0.05186594],
       [0.        , 0.12398943, 0.14007898, 0.06949578, 0.02261333]])

In [9]:
np.where(calcLikelihood == np.max(calcLikelihood))

(array([2]), array([3]))

## Interpretation

This shows a clear and obvious failure. The formula derived from expectation values does not hold up in the maximum likelihood test. Bummer.