# Probability

In [1]:
include("utils.jl")
using .Utils
checkpkgs("CSV", "DataFrames", "Statistics")

## Linda the Banker

## Probability

In [2]:
getfile("https://github.com/AllenDowney/BiteSizeBayes/raw/master/gss_bayes.csv")

In [3]:
import CSV
using DataFrames

gss = CSV.read("gss_bayes.csv", DataFrame; header=1)
first(gss, 7)

Unnamed: 0_level_0,caseid,year,age,sex,polviews,partyid,indus10
Unnamed: 0_level_1,Int64,Int64,Float64,Int64,Float64,Float64,Float64
1,1,1974,21.0,1,4.0,2.0,4970.0
2,2,1974,41.0,1,5.0,0.0,9160.0
3,5,1974,58.0,2,6.0,1.0,2670.0
4,6,1974,30.0,1,5.0,4.0,6870.0
5,7,1974,48.0,1,5.0,4.0,7860.0
6,8,1974,67.0,1,5.0,0.0,2870.0
7,10,1974,54.0,2,6.0,0.0,9470.0


In [4]:
banker = gss.indus10 .== 6870
first(banker, 10)

10-element BitVector:
 0
 0
 0
 1
 0
 0
 0
 0
 0
 0

In [5]:
sum(banker)

728

In [6]:
using Statistics # for `mean()`
mean(banker)

0.014769730168391155

## The Probability Function

In [7]:
prob(A) = mean(A)

prob (generic function with 1 method)

In [8]:
female = gss.sex .== 2;

In [9]:
prob(female)

0.5378575776019476

## Political Views and Parties

In [10]:
liberal = gss.polviews .<= 3;

In [11]:
prob(liberal)

0.27374721038750255

In [12]:
democrat = gss.partyid .<= 1;

In [13]:
prob(democrat)

0.3662609048488537

## Conjunction

In [14]:
prob(banker)

0.014769730168391155

In [15]:
prob(democrat)

0.3662609048488537

In [16]:
prob(banker .& democrat)

0.004686548995739501

In [17]:
prob(democrat .& banker)

0.004686548995739501

## Conditional Probability

In [18]:
selected = democrat[liberal];

In [19]:
prob(selected)

0.5206403320240125

In [20]:
selected = female[banker]
prob(selected)

0.7706043956043956

In [21]:
conditional(proposition, given)= prob(proposition[given])
conditional(proposition; given)= prob(proposition[given]) # to support either positional or keywork parameters

conditional (generic function with 2 methods)

In [22]:
conditional(liberal, given=female)

0.27581004111500884

## Conditional Probability Is Not Commutative

In [23]:
conditional(female, given=banker)

0.7706043956043956

In [24]:
conditional(banker, given=female)

0.02116102749801969

## Condition and Conjunction

In [25]:
conditional(female, given=liberal .& democrat)

0.576085409252669

In [26]:
conditional(liberal .& female, given=banker)

0.17307692307692307

## Laws of Probability

### Theorem 1

In [27]:
mean(female[banker])

0.7706043956043956

In [28]:
conditional(female, given=banker)

0.7706043956043956

In [29]:
prob(female .& banker) ./ prob(banker)

0.7706043956043956

### Theorem 2

In [30]:
prob(liberal .& democrat)

0.1425238385067965

In [31]:
prob(democrat) .* conditional(liberal, democrat)

0.1425238385067965

### Theorem 3

In [32]:
conditional(liberal, given=banker)

0.2239010989010989

In [33]:
prob(liberal) .* conditional(banker, liberal) / prob(banker)

0.2239010989010989

### The Law of Total Probability

In [34]:
prob(banker)

0.014769730168391155

In [35]:
male = gss.sex .== 1;

In [36]:
prob(male .& banker) + prob(female .& banker)

0.014769730168391155

In [37]:
(prob(male) * conditional(banker, given=male) +
prob(female) * conditional(banker, given=female))

0.014769730168391153

In [38]:
B = gss.polviews
sort(combine(groupby(gss, :polviews), nrow))

Unnamed: 0_level_0,polviews,nrow
Unnamed: 0_level_1,Float64,Int64
1,1.0,1442
2,2.0,5808
3,3.0,6243
4,4.0,18943
5,5.0,7940
6,6.0,7319
7,7.0,1595


In [39]:
i = 4
prob(B.==i) * conditional(banker, B.==i)

0.005822682085615744

In [40]:
sum(prob(B.==i) * conditional(banker, B.==i) 
    for i in 1:7)

0.014769730168391157

## Exercises

In [41]:
prob(female .& banker)

0.011381618989653074

In [42]:
prob(female .& banker .& liberal)

0.002556299452221546

In [43]:
prob(female .& banker .& liberal .& democrat)

0.0012375735443294787

In [44]:
conditional(liberal, given=democrat)

0.3891320002215698

In [45]:
young = gss[!, "age"] .< 30
prob(young)

0.19435991073240008

In [46]:
old = gss[!, "age"] .>= 65
prob(old)

0.17328058429701765

In [47]:
conservative = gss[!, "polviews"] .>= 5
prob(conservative)

0.3419354838709677

In [48]:
# Solution

prob(young .& liberal)

0.06579427875836884

In [49]:
# Solution

conditional(liberal, given=young)

0.338517745302714

In [50]:
# Solution

prob(old .& conservative)

0.06701156421180766

In [51]:
# Solution

conditional(old, given=conservative)

0.19597721609113564