In [0]:
import scipy.stats as stats
import math


def get_weighted_data(data, total):
  weighted_data = {}
  for X_val in data:
    weighted_data[X_val] = data[X_val] / total
  return weighted_data


def get_mean(data, total):
  sum = 0
  for X_value in data:
    sum+= X_value*data[X_value]
  return sum / total


def poisson_distr(mean, k):
  return math.exp(-mean)*(mean**k)/math.factorial(k)


def do_all_for_me(data, total):
  mean = get_mean(data, total)
  print('Mean:', mean)
  predicted_data = {}

  for X_val, prob in get_weighted_data(data, total).items():
    print('{}    {:<.5f}    {:<.8f}'.format(X_val, prob, poisson_distr(mean, X_val)))
    predicted_data[X_val] = poisson_distr(mean, X_val)
  return predicted_data


def process_data(data, store_index):
  for k, val in data.items():
    if k > store_index:
      data[store_index] += val
    else:
      data[k] = val

  data = {k: val for k, val in data.items() if k <=store_index}
  return data


def print_predictions(data, predicted_data, total):
  print('================DATA===============')
  {print('%s:%s'%(k, val)) for k, val in data.items()}
  print('============PREDICTIONS=============')
  {print('%s:%s'%(k, prob*total)) for k, prob in predicted_data.items()}

In [4]:
p = 1/1000000
n = 9500
lam = n*p
start = 0
end = 1
print(lam)

X = stats.binom(n=n, p = p)
Y = stats.poisson(lam)

0.0095


In [0]:
Xs = [X.pmf(x) for x in range(200,400)]
Ys = [Y.pmf(x) for x in range(200,400)]

In [0]:
total_bin_prob = 0
total_poisson_prob = 0
for k in range(start , end+1):
  total_bin_prob += X.pmf(k)
  total_poisson_prob += Y.pmf(k)


In [7]:
1 - X.pmf(0), 1- Y.pmf(0)

(0.009455022262191304, 0.009455017557099543)

In [8]:
total_bin_prob, total_poisson_prob

(0.99995516443674, 0.999955159776108)

In [9]:
1 - total_bin_prob,   1 - total_poisson_prob


(4.483556325995952e-05, 4.484022389195097e-05)

In [10]:
Y.pmf(1)

0.009410177333207554

# Poisson Modelling

In [11]:
data = {
    0: 109,
    1: 65,
    2: 22,
    3: 3,
    4: 1
}
total = 200


mean = get_mean(data, total)

for X_val, prob in get_weighted_data(data, total).items():
  print('{}    {:<.5f}    {:<.8f}'.format(X_val, prob, poisson_distr(mean, X_val)))




0    0.54500    0.54335087
1    0.32500    0.33144403
2    0.11000    0.10109043
3    0.01500    0.02055505
4    0.00500    0.00313465


In [0]:
arr = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 1, 0, 0, 3, 4, 1, 0, 2, 0, 2, 3, 1, 2, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 2, 1, 3, 0, 2, 5, 2, 1, 1, 1, 2, 1, 3, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 1, 2, 0, 0, 1, 3, 1, 2, 0, 1, 4, 2, 1, 0]
arr = arr[13:]
arr = [x for x in arr if x <=5]

In [13]:
data = {}
for val in arr:
  data[val] = data.get(val, 0) + 1

total = 40

mean = get_mean(data, total)

for X_val, prob in get_weighted_data(data, total).items():
  print('{}    {:<.5f}    {:<.8f}'.format(X_val, prob, poisson_distr(mean, X_val)))



1    0.32500    0.33187211
0    0.22500    0.21762106
3    0.12500    0.12863501
4    0.05000    0.04904210
2    0.25000    0.25305249
5    0.02500    0.01495784


In [14]:
data = {0: 82, 1: 25, 2: 4, 3: 0, 4: 2}
total = 113

do_all_for_me(data, total)

Mean: 0.36283185840707965
0    0.72566    0.69570340
1    0.22124    0.25242336
2    0.03540    0.04579362
3    0.00000    0.00553846
4    0.01770    0.00050238


{0: 0.6957034003432575,
 1: 0.2524233576466687,
 2: 0.04579361798014786,
 3: 0.0055384611716403015,
 4: 0.0005023825399054256}

In [15]:
data = {0: 162, 1: 267, 2: 271, 3: 185, 4: 111, 5: 61, 6: 27, 7: 8, 8: 3, 9: 1}
total = 1096

predicted_data = do_all_for_me(data, total)
{k:prob*total for k, prob in predicted_data.items()}

Mean: 2.156934306569343
0    0.14781    0.11567921
1    0.24361    0.24951247
2    0.24726    0.26909100
3    0.16880    0.19347054
4    0.10128    0.10432581
5    0.05566    0.04500478
6    0.02464    0.01617873
7    0.00730    0.00498521
8    0.00274    0.00134410
9    0.00091    0.00032213


{0: 126.78441963830575,
 1: 273.4656642563456,
 2: 294.9237364516428,
 3: 212.04370832472128,
 4: 114.34108724444364,
 5: 49.325242745595766,
 6: 17.731884709639356,
 7: 5.463787207193357,
 8: 1.4731287588737563,
 9: 0.3530491064454135}

In [16]:
data = {0: 55, 1: 20, 2: 21, 3: 1, 4: 1, 5: 1, 6: 0, 7: 1}
data = process_data(data, 3)
total = 100

predicted_data = do_all_for_me(data, total)

print_predictions(data, predicted_data, total)


Mean: 0.74
0    0.55000    0.47711392
1    0.20000    0.35306430
2    0.21000    0.13063379
3    0.04000    0.03222300
0:55
1:20
2:21
3:4
0:47.711391552103436
1:35.306429748556546
2:13.063379006965919
3:3.222300155051594


In [17]:
mean = 4.5
X = stats.poisson(mean)
P = X.pmf
1 - (P(0) + P(1) + P(2))

0.8264219290899639

In [18]:
mean = - math.log(1/3)
X = stats.poisson(mean)
P = X.pmf
1 - (P(0) + P(1))

0.3004625704439634

In [19]:
mean = 4

X = stats.poisson(mean)
P = X.pmf
1 - (P(0) + P(1) + P(2))

0.7618966944464557

In [20]:
minutes = 120
particles = 482
mean = particles/minutes*2

X = stats.poisson(mean)
P = X.pmf
P(3)

0.028035207525106787

In [21]:
mean = particles / minutes
X = stats.poisson(mean)
P = X.pmf
2*P(0)*P(3) + 2*P(1)*P(2)

0.02803520752510679

In [22]:
mean = 0.5
X = stats.poisson(mean)
P = X.pmf
P(2)**2

0.005748116268303788

In [23]:
mean = 1
X = stats.poisson(mean)
P = X.pmf
P(4)

0.015328310048810101

In [24]:
mean = 2.5
X = stats.poisson(mean)
P = X.pmf
1 - (sum(P(x) for x in range(0,4)))

I = stats.expon(1.5)
1 - math.exp(-0.7)

0.5034146962085905

In [26]:
n = 10
mean = 3*n
X = stats.poisson(3)
P = X.pmf
1 - X.cdf(7)

0.01190450385635744