In [1]:
import pandas as pd
import numpy as np

# Example Challenge
- Let's roll 3 dice at a time.
- And setup some questions

In [17]:
n_trials = 100_000
n_dice = 3

rolls = np.random.choice([1, 2, 3, 4, 5, 6], size=(n_trials, n_dice))
rolls = pd.DataFrame(rolls)
rolls.columns = ["die1", "die2", "die3"]
rolls.head()

Unnamed: 0,die1,die2,die3
0,5,3,2
1,2,2,5
2,6,4,4
3,1,4,2
4,2,4,3


In [18]:
# Chances of rolling all 3 of the same number
(1/6)**3

0.0046296296296296285

In [26]:
# chances of rolling 1, 1, 1 plus chances of rolling 2, 2, 2 ...
((1/6) ** 3) * 6

0.02777777777777777

In [21]:
# what are the chances we roll triplets?
# if the values in column1 match column2 match column3, then True for that row
# example: [1, 1, 1] should give us a triple, [2, 2, 2] gives us a triple

die1_2_match = (rolls.die1 == rolls.die2)
die2_3_match = (rolls.die2 == rolls.die3)

rolls["all_match"] = (die1_2_match & die2_3_match)
rolls.all_match.mean()

0.02778

In [25]:
# What are the chances of rolling all 3s?
((rolls.die1 == 3) & (rolls.die2 == 3) & (rolls.die3 == 3)).mean()

0.00479

In [27]:
rolls.head()

Unnamed: 0,die1,die2,die3,all_match
0,5,3,2,False
1,2,2,5,False
2,6,4,4,False
3,1,4,2,False
4,2,4,3,False


In [31]:
# What are our chances of rolling a 15 or above on 3 dice
((rolls.die1 + rolls.die2 + rolls.die3) > 15).mean()

0.04552

In [None]:
rolls = rolls.drop(columns="all_match")

In [33]:
rolls.head()

Unnamed: 0,die1,die2,die3
0,5,3,2
1,2,2,5
2,6,4,4
3,1,4,2
4,2,4,3


In [34]:
# Each row is a trial, we're running 100k trials
# To sum a row, we use .sum() and set axis=1
# axis=0 is the default and means column-wise
rolls.sum(axis=1)

0        10
1         9
2        14
3         7
4         9
         ..
99995     7
99996     6
99997    12
99998    10
99999    11
Length: 100000, dtype: int64

In [35]:
# What's the average pip per 3 dice
rolls.mean(axis=1)

0        3.333333
1        3.000000
2        4.666667
3        2.333333
4        3.000000
           ...   
99995    2.333333
99996    2.000000
99997    4.000000
99998    3.333333
99999    3.666667
Length: 100000, dtype: float64

In [None]:
# Let's get into using .apply
# What are the chances of rolling a sum of 15 or more on 3 dice?

In [None]:
# blow off the idea of iterating or looping
# Let's make a single function  built to run on a single row (and this could work w/ columns too)
# but in this case, we're operating on a row

# step 0: blow off iterating
# step 1: write a function that runs on a single row

In [36]:
rolls.head(1)

Unnamed: 0,die1,die2,die3
0,5,3,2


In [37]:
rolls.iloc[0:1, ]

Unnamed: 0,die1,die2,die3
0,5,3,2


In [38]:
def sum_row(row):
    return row.die1 + row.die2 + row.die3

In [39]:
sum_row(rolls.iloc[0:1], )

0    10
dtype: int64

In [41]:
# Step 2, use .apply
# no axis argument defaults to axis 0 which means .apply the function to the column
# axis=1 applies the function to each row
rolls.apply(sum_row, axis=1)

0        10
1         9
2        14
3         7
4         9
         ..
99995     7
99996     6
99997    12
99998    10
99999    11
Length: 100000, dtype: int64

In [42]:
# What are the chances of rolling a 1 then a 2 then a 3, in order?
# non-apply version
((rolls.die1 == 1) & (rolls.die2 == 2) & (rolls.die3 == 3)).mean()

0.00419

In [43]:
# But what if the problem is more row-wise?
# If we're going to use .apply, let's remember our steps for setup
# Step 0: blow off any idea of looping or iterating
# Step 1: Write a single function that works on a single value
# This value could be a single piece of data if we apply to the column
# OR this value could be a single row of the df if we .apply to the row
# Step 2: use .apply(function_name, axis=1)`

In [44]:
# checks if a row's die1 is 1, that row's die2 is 2, that row's die3 is 3
def example1(row):
    return (row.die1 == 1) and (row.die2 == 2) and (row.die3 == 3)

In [45]:
rolls.apply(example1, axis=1)

0        False
1        False
2        False
3        False
4        False
         ...  
99995    False
99996    False
99997    False
99998    False
99999    False
Length: 100000, dtype: bool

In [47]:
# What are the chances that each successive die rolls higher than the last?
((rolls.die1 < rolls.die2) & (rolls.die2 < rolls.die3)).mean()

0.09404