In [2]:
from scipy.stats import expon
from sympy import symbols
from sympy import integrate
import numpy as np

# Chapter 2 Random Variables

## 2.0 Random Variables

**Random variable** $X: S\to R$, from sample spcae to real line (as the domian of probability function).

**Probability functions** $P(x): R\to [0, 1]$, from real line to the value of probabiliy. 

## 2.1 Discrete Variables

Discrete random variables
- Probability Mass Function (**p.m.f.**)
- Cumulative Distribution Function (**c.d.f.**)
- Expectation $E(X) = \sum_i p_ix_i$

In [7]:
# Calculate the expectation

# Input
x = [1, 2, 3, 4, 5]
p = [0.2, 0.2, 0.2, 0.2, 0.2]

# Cal
exp = 0
for i, j in zip(x, p):
    exp += i * j

# Output
print('Expectation: {}'.format(exp))

Expectation: 3.0


## 2.2 Continuous Variables

Continous random variables
- Probability Density Function (p.d.f.)
- Cumulative Distribution Function (c.d.f.)
- Expectation $E(X) = \int_{-\infty}^{\infty}xf(x)dx$

In [12]:
# Calculate the c.d.f
x = symbols('x')

# Input
func = 1.5 - 6 * (x - 50) ** 2
domain = (49.5, 50.5)

# Cal
cumulation = integrate(func, (x, domain[0], domain[1])) # N.B. You can use 'oo' to denote \infty

# Output
print('Cumulation from {} to {}: {:.6f}'.format(domain[0], domain[1], cumulation))

Cumulation from 49.5 to 50.5: 1.000000


In [14]:
# Calculate the expectation
x = symbols('x')

# Input
func = 1.5 - 6 * (x - 50) ** 2
domain = (49.5, 50.5)

# Cal
exp = integrate(x * func, (x, domain[0], domain[1]))

# Output
print('Expectation: {:.6f}'.format(exp))

Expectation: 50.000000


## 2.3 Expectation and Variance

Variance of random variables

$$Var(X) = E(X-E(X))^2 = E(X^2) - E(X)^2$$

In [17]:
# Calculate the variance for discrete variables

# Input
x = [1, 2, 3, 4, 5]
p = [0.2, 0.2, 0.2, 0.2, 0.2]

# Cal
exp = 0
exp2 = 0
for i, j in zip(x, p):
    exp += i * j
    exp2 += i * i * j
var = exp2 - exp ** 2

# Output
print('Variance: {:.4f}'.format(var))

Variance: 2.0


In [4]:
# Calculate the variance for continuous variance
x = symbols('x')

# Input
func = 1.5 - 6 * (x - 50) ** 2
domain = (49.5, 50.5)

# Cal
exp = integrate(x * func, (x, domain[0], domain[1]))
exp2 = integrate(x * x * func, (x, domain[0], domain[1]))
var = exp2 - exp

# Output
print('Variance: {:.4f}'.format(var))

Variance: 2450.0500


Quantiles of Random Variables
- Upper quartile $Q_3$: c.d.f. = 0.75
- Lower quartile $Q_1$: c.d.f. = 0.25
- Interquartile range $IQR$: $Q_3 - Q_1$

In [None]:
# Calculate the Q1
# TODO
x = symbols('x')
y = symbols('y')

# Input
func = 1.5 - 6 * (x - 50) ** 2
domain = (49.5, 50.5)

# Cal
cumulation = integrate(func, (x, domain[0], domain[1])) # N.B. You can use 'oo' to denote \infty
func = integrate(func, (x, domain[0], y))

# Output
print('Cumulation from {} to {}: {:.6f}'.format(domain[0], domain[1], cumulation))

Chebyshev's Inequality

$$P(\mu -c \sigma \leq X \leq \mu + c \sigma) \geq 1-\frac{1}{c^2}, c\geq 1$$

## 2.4 Jointly Distributed Random Variables

Discrete jointly distribution random variables
\begin{equation}   
    P(X = x_i, Y = y_j) = p_{ij} \geq 0 \text{ satisfying } \sum_i \sum_j p_{ij} = 1
\end{equation}

Continuous jointly distribution random variables
\begin{equation}
    f(x,y) \geq 0 \text{ satisfying } \int \int f(x,y) dxdy= 1
\end{equation}

Cumulative Distribution Function: 
\begin{equation}   
    F(x,y) = P(X \leq x_j, Y \leq y_j)
\end{equation}
    Discrete CDF
    \begin{equation}   
        F(x,y) = \sum_{i:x_i \leq x} \sum_{j:y_j \leq y} p_{ij}
    \end{equation}
    Continuous CDF
    \begin{equation}   
        F(x,y) = \int_{- \infty}^{ y} \int_{- \infty}^{ x} f(w, z) dwdz
    \end{equation}

## 2.5 Covarance and Covv

$Cov(X, Y) = E(XY) - E(X)E(Y)$

$Corr(X, Y) = \frac{Cov(X, Y)}{\sqrt{Var(X)Var(Y)}}$

In [3]:
# Calculate the Covarance for discrete

# Input [X * Y]
value_x = np.array([1, 2, 3])
value_y = np.array([1, 2, 3, 4])
prob_matrix = np.array([[0.12, 0.08, 0.07, 0.05], [0.08, 0.15, 0.21, 0.13], [0.01, 0.01, 0.02, 0.07]])

# Cal expectation of x
exp_x =  0
for i in range(len(value_x)):
    exp_x += value_x[i] * np.sum(prob_matrix, axis=1)[i]

# Cal expectation of y
exp_y = 0
for i in range(len(value_y)):
    exp_y += value_y[i] * np.sum(prob_matrix, axis=0)[i]
    
# Cal variance of x
exp_x2 = 0
for i in range(len(value_x)):
    exp_x2 += (value_x[i] ** 2) * np.sum(prob_matrix, axis=1)[i]
var_x = exp_x2 - (exp_x ** 2)

# Cal variance of y
exp_y2 = 0
for i in range(len(value_y)):
    exp_y2 += (value_y[i] ** 2) * np.sum(prob_matrix, axis=0)[i]
var_y = exp_y2 - (exp_y ** 2)

# Cal covarance
exp_xy = 0
for i in range(len(value_x)):
    for j in range(len(value_y)):
        exp_xy += value_x[i] * value_y[j] * prob_matrix[i, j]
cov = exp_xy - exp_x * exp_y

## Cal correlation
corr = cov / ((var_x * var_y) ** (1/2))
        
# Output
print('EXP X\t {:.4f}\nEXP Y\t {:.4f}\nVAR X\t {:.4f}\nVAR Y\t \
{:.4f}\nCOV\t {:.4f}\nCORR\t {:.4f}'.format(exp_x, exp_y, var_x, var_y, cov, corr))

Expectation of x:  1.79
Expectation of y:  2.59
Variance of x:  0.3858999999999999
Variance of y:  1.161900000000001
Covariance:  0.22389999999999954
Correlation:   0.33437386749732556


In [33]:
# Calculate the Covarance for continous
x = symbols('x')
y = symbols('y')

# Input
func = 8 * x * y - 2 * x * (y ** 2)
domain_x = (0, 1)
domain_y = (1, 2)

# Cal expectation of x
exp_x = float(integrate(x * integrate(func, (y, domain_y[0], domain_y[1])), (x, domain_x[0], domain_x[1])))

# Cal expectation of y
exp_y = float(integrate(y * integrate(func, (x, domain_x[0], domain_x[1])), (y, domain_y[0], domain_y[1]), (y, domain_y[0], domain_y[1])))

# Cal variance of x
exp_x2 = float(integrate(x * x * integrate(func, (y, domain_y[0], domain_y[1])), (x, domain_x[0], domain_x[1])))
var_x = exp_x2 - exp_x ** 2

# Cal variance of y
exp_y2 = float(integrate(y * y * integrate(func, (x, domain_x[0], domain_x[1])), (y, domain_y[0], domain_y[1])))
var_y = exp_y2 - exp_y ** 2

# Cal covarance
exp_xy = float(integrate(x * y * func, (x, domain_x[0], domain_x[1]), (y, domain_y[0], domain_y[1])))
cov = exp_xy - exp_x * exp_y

# Cal correlation
corr = cov/(var_x * var_y) ** (1/2)

# Output
print('EXP X\t {:.4f}\nEXP Y\t {:.4f}\nVAR X\t {:.4f}\nVAR Y\t \
{:.4f}\nCOV\t {:.4f}\nCORR\t {:.4f}'.format(exp_x, exp_y, var_x, var_y, cov, corr))

EXP X	 2.4444
EXP Y	 5.5833
VAR X	 -4.1420
VAR Y	 -22.3736
COV	 -9.9259
CORR	 -1.0311


## 2.6 Combinations and Functions of Random Variables

$E(aX+b) = aE(X) + b$

$E(X_1+X_2) = E(X_1) + E(X_2)$

$E(\bar{X}) = E(X)$

$Var(aX+b) = a^2Var(X)$

$Var(X_1+X_2) = Var(X_1) + Var(X_2) + 2Cov(X_1, X_2)$

$Var(\bar{X}) = \frac{\sigma^2}{n}$