# Random Numbers and Probability

In [1]:
import yfinance as yf
from datetime import datetime

In [2]:
tickers = yf.Ticker('ASML')

In [3]:
history_price = tickers.history(period="1mo")

In [4]:
history_price.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 20 entries, 2023-12-04 00:00:00-05:00 to 2024-01-02 00:00:00-05:00
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Open          20 non-null     float64
 1   High          20 non-null     float64
 2   Low           20 non-null     float64
 3   Close         20 non-null     float64
 4   Volume        20 non-null     int64  
 5   Dividends     20 non-null     float64
 6   Stock Splits  20 non-null     float64
dtypes: float64(6), int64(1)
memory usage: 1.2 KB


## Sampling from dataframe

In [5]:
import numpy as np

### Sampling one 

In [6]:
history_price['Close'].sample()

Date
2023-12-26 00:00:00-05:00    762.679993
Name: Close, dtype: float64

In [7]:
# Get the same result by setting the seed.
np.random.seed(42)
history_price['Close'].sample()

Date
2023-12-04 00:00:00-05:00    690.320007
Name: Close, dtype: float64

### Sampling two 

#### Samping without replacement

In [8]:
history_price['Close'].sample(2)

Date
2024-01-02 00:00:00-05:00    716.919983
2023-12-27 00:00:00-05:00    764.030029
Name: Close, dtype: float64

#### Samping with replacement

In [14]:
history_price['Close'].sample(5, replace = True).sort_values(axis = 0)

Date
2023-12-05 00:00:00-05:00    694.530029
2023-12-07 00:00:00-05:00    699.650024
2023-12-07 00:00:00-05:00    699.650024
2023-12-11 00:00:00-05:00    710.239990
2023-12-15 00:00:00-05:00    752.960022
Name: Close, dtype: float64

## Discrete distributions

- Expected Value: mean of a probability distribution
- Uniform Discrete Distributions: all outcomes have the same probability
- Law of large numbers: As the size of the sample increases, the sample mean will approach the expected value

In [12]:
np.linspace(1,5,9)

array([1. , 1.5, 2. , 2.5, 3. , 3.5, 4. , 4.5, 5. ])

## Continuous distributions

- Continuous uniform distribution: all continuous outcomes have the same probability

In [98]:
from scipy.stats import uniform

In [100]:
# P(x<=7, starts form 0, ends at 12)
uniform.cdf(7, 0, 12)

0.5833333333333334

- Generating random numbers according to uniform distribution

In [101]:
uniform.rvs(0, 5, size=10)

array([1.35771458, 4.82625915, 2.28632581, 4.21011538, 0.97190017,
       2.05676953, 3.49756105, 0.69176546, 0.66372711, 4.84768434])

## Binomial distribution

- Expected value: $n \times p$

In [103]:
from scipy.stats import binom

In [104]:
# binom.rvs(tests per trials, probability of success, total number of trials)
binom.rvs(1, 0.5, size=1)

array([1])

In [105]:
# P(heads) = 7
# pmf: probability mass function 
# binom.pmf(num heads, num trials, prob of heads)
binom.pmf(7, 10, 0.5)

0.11718749999999999

In [106]:
# P(heads≤7)
binom.cdf(7, 10, 0.5)

0.9453125

In [117]:
# Simulate 52 weeks of 3 deals
deals = binom.rvs(3, 0.3, size = 52)
deals

array([2, 1, 0, 2, 3, 3, 0, 1, 1, 1, 2, 1, 2, 2, 2, 1, 0, 1, 2, 0, 1, 0,
       1, 2, 2, 0, 2, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 2, 1, 1,
       0, 1, 1, 0, 1, 1, 2, 3])

In [120]:
# Print mean deals won per week
deals.mean()

1.1346153846153846