# Bite Size Bayes

This notebook presents example code and exercise solutions for Think Bayes.

Copyright 2020 Allen B. Downey

MIT License: https://opensource.org/licenses/MIT

In [64]:
import pandas as pd
import numpy as np

In [65]:
from utils import read_gss

gss = read_gss('data/gss_bayes')
gss.head()

Unnamed: 0,year,relig,srcbelt,region,adults,wtssall,ballot,cohort,feminist,polviews,partyid,race,sex,educ,age,indus10,occ10,id_,realinc
0,1972,3,3,3,1,0.4446,0,1949,0,0,2,1,2,16,23,5170,520,1,18951.0
1,1972,2,3,3,2,0.8893,0,1902,0,0,1,1,1,10,70,6470,7700,2,24366.0
2,1972,1,3,3,2,0.8893,0,1924,0,0,3,1,2,12,48,7070,4920,3,24366.0
3,1972,5,3,3,2,0.8893,0,1945,0,0,1,1,2,17,27,5170,800,4,30458.0
4,1972,1,3,3,2,0.8893,0,1911,0,0,0,1,2,12,61,6680,5020,5,50763.0


In [66]:
def replace_invalid(series, bad_vals, replacement=np.nan):
    series.replace(bad_vals, replacement, inplace=True)
    
replace_invalid(gss.feminist, [0, 8, 9])
replace_invalid(gss.polviews, [0, 8, 9])
replace_invalid(gss.partyid, [8, 9])
replace_invalid(gss.indus10, [0])
replace_invalid(gss.occ10, [0])

In [67]:
def values(series):
    return series.value_counts().sort_index()

https://gssdataexplorer.norc.org/projects/52787/variables/1698/vshow

In [68]:
values(gss.feminist)

1.0     298
2.0    1083
Name: feminist, dtype: int64

https://gssdataexplorer.norc.org/projects/52787/variables/178/vshow

In [69]:
values(gss.polviews)

1.0     1560
2.0     6236
3.0     6754
4.0    20515
5.0     8407
6.0     7876
7.0     1733
Name: polviews, dtype: int64

https://gssdataexplorer.norc.org/projects/52787/variables/141/vshow

In [70]:
values(gss.partyid)

0.0     9999
1.0    12942
2.0     7485
3.0     9474
4.0     5462
5.0     9661
6.0     6063
7.0      995
Name: partyid, dtype: int64

https://gssdataexplorer.norc.org/projects/52787/variables/82/vshow

In [71]:
values(gss.race)

1    50340
2     8802
3     3324
Name: race, dtype: int64

https://gssdataexplorer.norc.org/projects/52787/variables/81/vshow

In [72]:
values(gss.sex)

1    27562
2    34904
Name: sex, dtype: int64

https://gssdataexplorer.norc.org/projects/52787/variables/17/vshow

6870	Banking and related activities

In [73]:
values(gss.indus10).head()

170.0    458
180.0    444
190.0     37
270.0     69
280.0     36
Name: indus10, dtype: int64

In [74]:
np.mean(gss.indus10 == 6870)

0.013767489514295777

In [75]:
(gss.indus10 == 6870).mean()

0.013767489514295777

In [76]:
subset = gss.dropna(subset=['sex', 'polviews', 'partyid', 'indus10'])
subset.shape

(50287, 19)

In [77]:
globals().update(subset)

In [78]:
female = sex == 2
values(female)

False    23205
True     27082
Name: sex, dtype: int64

In [79]:
liberal = polviews <= 2
values(liberal)

False    42874
True      7413
Name: polviews, dtype: int64

In [80]:
democrat = partyid <= 1
values(democrat)

False    31862
True     18425
Name: partyid, dtype: int64

In [81]:
banker = indus10 == 6870
values(banker)

False    49556
True       731
Name: indus10, dtype: int64

In [82]:
total = 0
for x in banker:
    if x is True:
        total += 1
        
total

731

In [83]:
total / len(banker)

0.014536560144769025

In [84]:
def prob(A):
    """Probability of A"""
    return A.mean()

In [85]:
def count(A):
    """Number of instances of A"""
    return A.sum()

In [86]:
prob(female)

0.5385487302881461

In [87]:
prob(liberal)

0.14741384453238413

In [88]:
prob(democrat)

0.36639688189790603

In [89]:
prob(banker)

0.014536560144769025

In [90]:
prob(democrat & liberal)

0.08300355956807923

In [91]:
count(banker[female])

563

In [92]:
prob(banker[female])

0.020788715752160108

In [93]:
prob(female & banker)

0.011195736472647006

In [94]:
prob(banker & female) / prob(female)

0.020788715752160104

In [95]:
def conditional(A, B):
    """Conditional probability of A given B"""
    return prob(A[B])

In [96]:
conditional(banker, female)

0.020788715752160108

In [97]:
conditional(liberal, democrat)

0.22654002713704205

In [98]:
conditional(democrat, liberal)

0.5630648860110616

In [99]:
conditional(democrat, female)

0.4006720330847057

In [100]:
def conjunction(A, B):
    """Probability of both A and B"""
    return prob(A) * conditional(B, A)

In [101]:
prob(liberal & democrat)

0.08300355956807923

In [102]:
conjunction(liberal, democrat)

0.08300355956807923

In [103]:
prob(liberal) * prob(democrat)

0.05401197298524823

In [104]:
conjunction(democrat, liberal)

0.08300355956807923

In [110]:
prob(banker) * conditional(female, banker) / prob(female)

0.020788715752160104

In [34]:
def bayes_theorem(A, B):
    """Conditional probability of A given B, using Bayes's theorem"""
    return prob(A) * conditional(B, A) / prob(B)

In [35]:
bayes_theorem(democrat, liberal)

0.5630648860110616

In [36]:
conditional(banker, female)

0.020788715752160104

In [37]:
conditional(banker, female & liberal)

0.015170355632927133

In [38]:
conditional(banker & democrat, female & liberal)

0.007709524993782641