In [1]:
import numpy as np
import pandas as pd
import random

In [6]:
# How likely is it that you roll doubles when rolling two dice
rolls = np.random.choice([1,2,3,4,5,6], size=(2, 100_000))
rolls

array([[6, 2, 2, ..., 6, 5, 2],
       [4, 6, 4, ..., 6, 2, 5]])

In [11]:
die1 = rolls[0]
die2 = rolls[1]
(die1 == die2).mean()

0.16669

In [7]:
df = pd.DataFrame(rolls)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,99990,99991,99992,99993,99994,99995,99996,99997,99998,99999
0,6,2,2,4,1,2,5,6,1,5,...,6,6,2,4,5,4,2,6,5,2
1,4,6,4,2,6,6,5,6,6,6,...,5,6,2,6,3,2,2,6,2,5


In [8]:
df["match"] = (df[0] == df[1])
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,99991,99992,99993,99994,99995,99996,99997,99998,99999,match
0,6,2,2,4,1,2,5,6,1,5,...,6,2,4,5,4,2,6,5,2,False
1,4,6,4,2,6,6,5,6,6,6,...,6,2,6,3,2,2,6,2,5,False


In [5]:
df['match'].mean()

0.16896

If you flip 8 coins, what is the probability of getting exactly 3 heads? What is the probability of getting more than 3 heads?

# What is the probability that we will roll a 1 on a 6 sided dice?
- Because we're answering questions experimentally, we'll roll 100,000 times
- Because we're only rolling one dice at a time, our trial is choosing between `[1, 2, 3, 4, 5, 6]`

In [7]:
# write the numpy code to choose 1-6, 100,000 times
rolls = np.random.choice([1,2,3,4,5,6], size = 100_000)
rolls

array([6, 6, 2, ..., 6, 5, 2])

In [8]:
# if the job is to calculate probability, we need to count up the number of 1s that we rolled
# the count of the occurences out of 100,000 is the probability
rolls == 2 # the comparison operator == is working on the entire array for us, so we don't need a loop at all

array([False, False,  True, ..., False, False,  True])

In [10]:
(rolls == 2).sum() # tells us the sum total of all the Trues from the result of comparing each and every roll to 2 with == 

16515

In [11]:
(rolls == 2).sum() / rolls.size

0.16515

In [12]:
# Experimental probabilty based on our 100,000 simulated dice rolls
(rolls == 2).mean()

0.16515

In [13]:
# Theoretical probability
1/6

0.16666666666666666

In [17]:
# produce the experimental results of the chance of rolling a 1 on a 6 sided die
rolls
(rolls == 3).sum()
(rolls == 3).mean()

0.16786

In [18]:
# What is the probability of rolling a 1 or a 2 on a six sided dice?
# experimentally, let's roll a huge number of dice
# sum the count of ones plus the count of twos divided by the .size to get the average 
rolls = np.random.choice([1,2,3,4,5,6], size = 1_000_000)
rolls

array([4, 6, 4, ..., 3, 2, 2])

In [22]:
sum_of_rolls = ((rolls == 1) + (rolls == 2)).sum()
sum_of_rolls / rolls.size

0.333363

In [23]:
2/6

0.3333333333333333

In [24]:
(rolls == 1).mean() + (rolls == 2).mean() # adding the means because each event is exclusive of there (we can only have a 1 if we roll a 1, it can't be 2 also)

0.33336299999999996

In [30]:
((rolls == 1) | (rolls == 2)).mean() # we're using the | "pipe" operator to mean "or", so we're saying if the roll is 1 or 2

0.333363

In [35]:
# let's roll 2 dice at a time, what are the odds that they are both 5?
# chancce that each die in that pair is 5

# we could make one numpy array with 1,000,000 trials
# Each trial is an array of numbers (1 through 6)

rolls = np.random.choice([1,2,3,4,5,6], size = (1_000_000, 2))
df = pd.DataFrame(rolls)
df

Unnamed: 0,0,1
0,6,4
1,6,4
2,5,3
3,4,3
4,5,1
...,...,...
999995,6,2
999996,1,3
999997,1,6
999998,1,2


In [37]:
df[0] == 5

0         False
1         False
2          True
3         False
4          True
          ...  
999995    False
999996    False
999997    False
999998    False
999999    False
Name: 0, Length: 1000000, dtype: bool

In [38]:
# == gives you back a true or a false
df["first_is_five"] = df[0] == 5

In [40]:
df["second_is_five"] = df[1] == 5
df

Unnamed: 0,0,1,first_is_five,second_is_five
0,6,4,False,False
1,6,4,False,False
2,5,3,True,False
3,4,3,False,False
4,5,1,True,False
...,...,...,...,...
999995,6,2,False,False
999996,1,3,False,False
999997,1,6,False,False
999998,1,2,False,False


In [42]:
# & on a series or numpy array means "AND"
(df[0] == 5) & (df[1] == 5)

0         False
1         False
2         False
3         False
4         False
          ...  
999995    False
999996    False
999997    False
999998    False
999999    False
Length: 1000000, dtype: bool

In [47]:
# Create a new column called both_are_five and assign the value
# Then, get the average of that column (because we can average a column a booleans)
df["both_are_five"] = (df[0] == 5) & (df[1] == 5)
df["both_are_five"].mean()

0.027817

In [48]:
# df["new_column"] = df.oldColumn + 2 creates a new column
# df["new_column"].mean() gets the mean of that column
# df.existing_column.mean()    accesses the .existing_column and gets the .mean()

In [49]:
# Theoretical probability of rolling a 5 on two dice is the p(rolling 5) * p(rolling a 5)
1/6 * 1/6

0.027777777777777776

In [None]:
# probabilty of roling doubles on two dice is 1/ 6 because there are 6 ways to roll doubles out of a set of 
# 36 possibilities
# So to get doubles when rolling two dice, probability = 1/6 × 1/6 = 1/36 = 1 ÷ 36 = 0.0278, or 2.78 percent.
# how do I write this with Python???

![](http://www.stayorswitch.com/blog/wp-content/uploads/2014/06/Screen-Shot-2016-10-27-at-11.39.17-PM-300x134.png)