In [None]:
#Conditional Probability

In [1]:
from numpy import random
random.seed(0)

totals = {20:0, 30:0, 40:0, 50:0, 60:0, 70:0}
purchases = {20:0, 30:0, 40:0, 50:0, 60:0, 70:0}
totalPurchases = 0
for _ in range(100000):
    ageDecade = random.choice([20, 30, 40, 50, 60, 70])
    purchaseProbability = float(ageDecade) / 100.0
    totals[ageDecade] += 1
    if (random.random() < purchaseProbability):
        totalPurchases += 1
        purchases[ageDecade] += 1

In [2]:
totals

{20: 16576, 30: 16619, 40: 16632, 50: 16805, 60: 16664, 70: 16704}

In [3]:
purchases

{20: 3392, 30: 4974, 40: 6670, 50: 8319, 60: 9944, 70: 11713}

In [4]:
totalPurchases

45012

In [5]:
purchaseProbability

0.2

In [6]:
# The probability of someone in their 30's buying something is just the percentage of how many 30-year-olds bought something:
# E --> Purchase
# F --> Event of being in an a Age Group (30s) in this example

PEF = float(purchases[30]) / float(totals[30])
print('P(purchase | 30s): ' + str(PEF))

P(purchase | 30s): 0.29929598652145134


In [7]:
purchases[30]

4974

In [8]:
totals[30]

16619

In [9]:
# P(F) is just the probability of being 30 in this data set:

PF = float(totals[30]) / 100000.0
print("P(30's): " +  str(PF))

P(30's): 0.16619


In [10]:
# And P(E) is the overall probability of buying something, regardless of your age:

PE = float(totalPurchases) / 100000.0
print("P(Purchase):" + str(PE))

P(Purchase):0.45012


In [11]:
# What is P(E)P(F)?
print("P(30's)P(Purchase)" + str(PE * PF))

P(30's)P(Purchase)0.07480544280000001


In [12]:
# P(E,F) is different from P(E|F). P(E,F) would be the probability of both being in your 30's 
# and buying something, out of the total population - not just the 
# population of people in their 30's:

print("P(30's, Purchase)" + str(float(purchases[30]) / 100000.0))


P(30's, Purchase)0.04974


In [13]:
# P(E,F) = P(E)P(F), and they are pretty close in this example. 
# But because E and F are actually dependent on each other, and the randomness of the data we're working with, 
# it's not quite the same.

# We can also check that P(E|F) = P(E,F)/P(F) and sure enough, it is:
print((purchases[30] / 100000.0) / PF)

0.29929598652145134


In [20]:
# totals independent from purchases example
from numpy import random
random.seed(0)

totals = {20:0, 30:0, 40:0, 50:0, 60:0, 70:0}
purchases = {20:0, 30:0, 40:0, 50:0, 60:0, 70:0}

totalPurchases = 0
for _ in range(100000):
    ageDecade = random.choice([20, 30, 40, 50, 60, 70])
    
    # purchaseProbability = float(ageDecade) / 100.0
    purchaseProbability = .40
    
    totals[ageDecade] += 1
    if (random.random() < purchaseProbability):
        totalPurchases += 1
        purchases[ageDecade] += 1

In [15]:
totals

{20: 16576, 30: 16619, 40: 16632, 50: 16805, 60: 16664, 70: 16704}

In [16]:
purchases

{20: 6710, 30: 6627, 40: 6670, 50: 6665, 60: 6638, 70: 6720}

In [17]:
totalPurchases

40030

In [18]:
for x, y in totals.items():
  print(x, y)

20 16576
30 16619
40 16632
50 16805
60 16664
70 16704


In [19]:
for x, y in purchases.items():
  print(x, y)

20 6710
30 6627
40 6670
50 6665
60 6638
70 6720


In [21]:
for x, y in purchases.items():
  print(x, y)

20 6710
30 6627
40 6670
50 6665
60 6638
70 6720


In [22]:
for x, y in totals.items():
  print(x, y)

20 16576
30 16619
40 16632
50 16805
60 16664
70 16704


In [23]:
PEF = float(purchases[30]) / float(totals[30])
print ("P(purchase | 30s): ", PEF)

P(purchase | 30s):  0.3987604549010169


In [26]:
PE = float(totalPurchases) / 100000.0
print ("P(purchase): ", PEF)

P(purchase):  0.3987604549010169
