#Question
# 1) Take any suitable pmf and find Expected value and variance
# 2) Take any suitable joint pmf and find Expected value and variance

In [1]:
'''
Import here useful libraries
Run this cell first for convenience
'''
# Import here useful libraries
import numpy as np
from scipy import stats
import scipy
import warnings
from sympy import symbols
from sympy import integrate
warnings.simplefilter('ignore', DeprecationWarning)

# Random Variables

## Discrete Random Variable

### Definition of a Random Variable

- Random variable $X$: mapping from sample space $S$ to a real line $R$
- Numerical value $X(w)$ mapped to each outcome $w$ of a particular experiment

### Probability Mass Function

- Probability Mass Function (p.m.f.): set of probability values $p_i$ assigned to each value taken by the discrete random variable $x_i$
- $ 0 \leq p_i \leq 1 \text{ and } \sum_i p_i = 1$
- Probability: $P(X = x_i) = p_i$

### Cumulative Distribution Function

- Cumulative Distribution Function (CDF): $F(x) = P(X \leq x)$

In [2]:
# Verify if provided function is a probability density function
# Parameters
a = 2
b = 1
x_i = 49.5 # Integration from x_i to x_f
x_f = 50.5
from scipy.integrate import quad
def integrand(x, a, b):
    return 1.5 - 6*(x - 50)**2
I = quad(integrand, x_i, x_f, args=(a,b))
print("Result and error: ", I)

Result and error:  (0.9999999999999989, 1.1102230246251553e-14)


### Cumulative Distribution Function


- Cumulative Distribution Function for continuous Random Variables:
    - $F(x) = \int_{-\infty}^x f(y) dy$
    - $f(x) = \frac{dF(x)}{dx}$
    - $P(a < X < b) = F(b) - F(a)$
    - $P(a < X < b) = P(a \leq X \leq b) = P(a \leq X < b) = P(a < X \leq b)$

## Expectation of a Random Variable

### Expectations of Discrete Random Variables

- Expectation of a discrete random variable $X$ with p.m.f. $p$:
\begin{equation}
    E(X) = \sum_i p_i x_i
\end{equation}

In [None]:
from scipy.stats import rv_discrete

x = [10, 20, 30]
p = [0.2, 0.3, 0.5]
distribution = rv_discrete(values=(x, p))
print("Expected value: ", distribution.expect())

Expected value:  23.0


## Variance of a Random Variable

### Definition and Interpretation of Variance

- Variance ($ \sigma ^2$): $Var(X) = E(X - E(X))^2 = E(X^2) - \mu ^2$
- Positive quantity measuring the spread of the distribution about its mean value
- Standard Deviation($\sigma$): $\sqrt{Var(x)}$

In [None]:
from scipy.stats import rv_discrete

x = [10, 20, 30]
p = [0.2, 0.3, 0.5]
distribution = rv_discrete(values=(x, p))
print("Variance: ", distribution.var())
print("Standard Deviation: ", distribution.std())

Variance:  61.0
Standard Deviation:  7.810249675906654


## Jointly Distributed Random Variables

### Joint Probability Distributions

- Discrete:
\begin{equation}   
    P(X = x_i, Y = y_j) = p_{ij} \geq 0 \text{ satisfying } \sum_i \sum_j p_{ij} = 1
\end{equation}



- Joint Cumulative Distribution Function:
\begin{equation}   
    F(x,y) = P(X \leq x_j, Y \leq y_j)
\end{equation}
    - Discrete:
    \begin{equation}   
        F(x,y) = \sum_{i:x_i \leq x} \sum_{j:y_j \leq y} p_{ij}
    \end{equation}


### Marginal Probability Distributions

- Marginal probability distribution: obtained by summing or integrating the joint probability distribution over the values of the other random variable
    - Discrete:
    \begin{equation}   
        P(X = x_i) = p_{i+} = \sum_j p_{ij}
    \end{equation}


In [3]:
# Calculate the Expectation and variance for discrete random variables

# Input X and Y are in a table with corresponding probabilities
value_x = np.array([1, 2, 3])
value_y = np.array([1, 2, 3, 4])
prob_matrix = np.array([[0.12, 0.08, 0.07, 0.05], [0.08, 0.15, 0.21, 0.13], [0.01, 0.01, 0.02, 0.07]]) # Covariance matrix

# Expectation of x
exp_x =  0
for i in range(len(value_x)):
    exp_x += value_x[i] * np.sum(prob_matrix, axis=1)[i]
print("Expectation of x: ", exp_x)

# Expectation of y
exp_y = 0
for i in range(len(value_y)):
    exp_y += value_y[i] * np.sum(prob_matrix, axis=0)[i]
print("Expectation of y: ", exp_y)

# Variance of x
exp_x2 = 0
for i in range(len(value_x)):
    exp_x2 += (value_x[i] ** 2) * np.sum(prob_matrix, axis=1)[i]
var_x = exp_x2 - (exp_x ** 2)
print("Variance of x: ", var_x)

# Variance of y
exp_y2 = 0
for i in range(len(value_y)):
    exp_y2 += (value_y[i] ** 2) * np.sum(prob_matrix, axis=0)[i]
var_y = exp_y2 - (exp_y ** 2)
print("Variance of y: ", var_y)

Expectation of x:  1.79
Expectation of y:  2.59
Variance of x:  0.3858999999999999
Variance of y:  1.161900000000001
