In [None]:
from datascience import *
import numpy as np
import matplotlib
from mpl_toolkits.mplot3d import Axes3D
%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')

# Updating Probabilities

Recall: For an Event $E$</br>
$P(E) = \dfrac{\text{chance of E happening}}{\text{all possible outcomes}}$

## Conditional Probabilities

Instead of looking at all possible outcomes, we want to focus on a subset of those outcomes.

$P(\text{A given B has happened}) = P(A|B) = \dfrac{\text{Chance of A happening from the B group}}{\text{Chance of B happening}}$


### Scenario 1

* Class consists of second years (60%) and third years (40%)
* 50% of the second years have declared their major
* 80% of the third years have declared their major


In [None]:
# Create a table of data using specific attributes of the scenario.
# np.array(list) converts list to an array
# provided all the elements of list are of the same type

n = 100
second = round(n * 0.6)
third = round(n * 0.4)

year = np.array(['Second'] * second + ['Third'] * third)
major = np.array(['Declared'] * (round(second * 0.5)) + ['Undeclared'] * (round(second * 0.5)) + \
                 ['Declared'] * (round(third * 0.8))  + ['Undeclared'] * (round(third * 0.2)))
                 
students = Table().with_columns(
    'Year', year,
    'Major', major
)
students

### Scenario 2

Testing for a disease k% of the population studied has the disease.
* False positive is 5%
* False negative is 0%

In [None]:
# returns the result of a test after creating a data set following particular attributes
def create_population(prior_disease_prob, n):
    disease = round(n * prior_disease_prob)
    no_disease = round(n * (1 - prior_disease_prob))

    status = np.array(['Disease'] * disease  +  ['No disease'] * no_disease)
    result = np.array(['Test +'] * (disease) + ['Test +'] * (round(no_disease * 0.05))  + \
                 ['Test -'] * (round(no_disease * 0.95)))
                 
    t = Table().with_columns(
    'Status', status,
    'Test Result', result
    )
    return t.pivot('Test Result', 'Status')


## More Likely Than Not ##

Regular probability:</br>
If we pick one student at random, which is more likely: Second year or Third year?

SOLUTION: Second year because there are more of them. 

What if we ask the question differently?

***If we pick one student at random who has declared their major,***</br>
***is a second or third year more likely?***

In [None]:
students.show(3)

In [None]:
students.pivot('Major', 'Year')

In [None]:
# Verify: 60% of students are Second years, 40% are Third years



In [None]:
# Verify: 50% of Second years have Declared



In [None]:
# Verify: 80% of Third years have Declared



In [None]:
# Conditional Probability
# Chance of second year, given that they have declared
# P(second year | declared)



In [None]:
# Conditional Probability
# P(third year | declared)



Which was more likely? Why do you think that is?

## Tree Diagram Calculation

![image-3.png](attachment:image-3.png)


In [None]:
# Conditional Probability
# P(second year | declared), from tree diagram



## Decisions ##

Use the defined function above to create a population that has</br>
* a 1/1000 chance of getting a disease
* with an $n$ of 10000

In [None]:
create_population(1/1000, 10000)

In [None]:
# Find the probability that someone who test positive has the disease.
# P(Has Disease|Test Positive)



### Baye's Rule for conditional probability

Purpose of Baye's Rule </br>
Update your prediction based on new information. </br>

In a multi-stage experiment, find the chance of an event at </br>
an earlier stage, given the result of a later stage.

$P(A|B) = \dfrac{P(B|A)\cdot P(A)}{P(B)}$


In [None]:
#Baye's Rule
#P(disease | tested +) 
# = (P(tested +|disease) * P(disease)) / P(tested +)

p_a = 10 / 10000
p_b = 510 / 10000
p_ba = 10 / 10

(p_a * p_ba) / p_b

In [None]:
#  P(disease | tested +)
# = P(disease & tested +) / P(tested +)
# if prior probability of disease is 1/1000



In [None]:
#  P(disease | tested +)
# = P(disease & tested +) / P(tested +)
# if prior probability of disease is 1/10



Use the defined function above to create a population that has</br>
* a 1/10 chance of getting a disease
* with an $n$ of 10000

In [None]:
create_population(1/10, 10000)

In [None]:
# P(Has Disease|Test Positive)


In [None]:
# P(disease | tested +)
# if prior probability of disease is 0.5



Use the defined function above to create a population that has</br>
* a 1/2 chance of getting a disease
* with an $n$ of 10000

In [None]:
create_population(0.5, 10000)

In [None]:
# P(Has Disease|Test Positive)



![image.png](attachment:image.png)