# Week 8: Random Variables & Expected Value**Course**: BSMA1002 - Statistics for Data Science I  **Topic**: Random Variables, PMF, Expected Value## Learning Objectives- Understand discrete and continuous random variables- Calculate expected value and variance- Work with probability mass functions (PMF)- Apply expected value to decision making

In [None]:
import numpy as npimport pandas as pdimport matplotlib.pyplot as pltimport seaborn as snsfrom scipy import statsimport warningswarnings.filterwarnings('ignore')np.random.seed(42)plt.style.use('seaborn-v0_8-darkgrid')print("✓ Libraries loaded")

## 1. Random Variables**Random Variable**: Numerical outcome of a random process### Types- **Discrete**: Countable values (dice, coin flips)- **Continuous**: Infinite values in range (height, time)**Example**: X = outcome of rolling a fair die  Values: {1, 2, 3, 4, 5, 6}, each with P(X=x) = 1/6

In [None]:
# Random variable exampleprint("Random Variable: Die Roll")print("="*70)x = np.array([1, 2, 3, 4, 5, 6])p = np.array([1/6] * 6)print("Outcome  | Probability")print("-" * 25)for xi, pi in zip(x, p):    print(f"{xi:^8} | {pi:.4f}")# Visualize PMFfig, ax = plt.subplots(figsize=(10, 6))ax.bar(x, p, width=0.6, edgecolor='black', linewidth=2, alpha=0.7)ax.set_xlabel('Outcome (X)', fontsize=12)ax.set_ylabel('P(X)', fontsize=12)ax.set_title('Probability Mass Function: Fair Die', fontsize=14, fontweight='bold')ax.set_xticks(x)ax.grid(True, alpha=0.3, axis='y')plt.tight_layout()plt.show()print(f"\n✓ PMF properties: Σ P(X=x) = {p.sum():.1f}")

## 2. Expected Value (Mean)**Expected Value**: Long-run average value$$E[X] = \mu = \sum_{i} x_i \cdot P(X = x_i)$$**Variance**:$$Var(X) = E[(X - \mu)^2] = \sum_{i} (x_i - \mu)^2 \cdot P(X = x_i)$$

In [None]:
# Expected value calculationprint("Expected Value Calculation")print("="*70)x = np.array([1, 2, 3, 4, 5, 6])p = np.array([1/6] * 6)# Expected valueexpected = np.sum(x * p)print(f"E[X] = Σ x·P(X=x)")print(f"     = (1)(1/6) + (2)(1/6) + ... + (6)(1/6)")print(f"     = {expected:.4f}")# Variancevariance = np.sum((x - expected)**2 * p)std_dev = np.sqrt(variance)print(f"\nVar(X) = Σ (x-μ)²·P(X=x) = {variance:.4f}")print(f"SD(X) = √Var(X) = {std_dev:.4f}")# Simulationn_rolls = 10000simulated = np.random.choice(x, size=n_rolls, p=p)sim_mean = simulated.mean()print(f"\nSimulation ({n_rolls:,} rolls):")print(f"  Sample mean: {sim_mean:.4f}")print(f"  Theoretical: {expected:.4f}")# Visualizefig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))# PMF with expected valueax1.bar(x, p, width=0.6, edgecolor='black', linewidth=2, alpha=0.7)ax1.axvline(expected, color='red', linestyle='--', linewidth=3, label=f'E[X] = {expected:.2f}')ax1.set_xlabel('X', fontsize=12)ax1.set_ylabel('P(X)', fontsize=12)ax1.set_title('PMF with Expected Value', fontsize=13, fontweight='bold')ax1.legend()ax1.grid(True, alpha=0.3, axis='y')# Simulationax2.hist(simulated, bins=np.arange(0.5, 7.5, 1), density=True, alpha=0.7,          edgecolor='black', linewidth=1.5, label='Simulated')ax2.bar(x, p, width=0.6, alpha=0.5, color='red', label='Theoretical')ax2.axvline(sim_mean, color='blue', linestyle='--', linewidth=2, label=f'Mean = {sim_mean:.2f}')ax2.set_xlabel('X', fontsize=12)ax2.set_ylabel('Probability', fontsize=12)ax2.set_title(f'Simulation: {n_rolls:,} Rolls', fontsize=13, fontweight='bold')ax2.legend()ax2.grid(True, alpha=0.3, axis='y')plt.tight_layout()plt.show()

## 3. Real Application: Casino Game AnalysisUse expected value for decision making.

In [None]:
# Casino gameprint("Casino Game: Expected Value Analysis")print("="*70)# Pay $5 to roll two dice# Win $20 if sum is 7 or 11# Win $10 if doubles# Otherwise lose $5cost = 5total_outcomes = 36# Calculate probabilitiesoutcomes_7_11 = 8  # (1,6), (2,5), (3,4), (4,3), (5,2), (6,1), (5,6), (6,5)outcomes_doubles = 6  # (1,1), (2,2), (3,3), (4,4), (5,5), (6,6)p_win_20 = outcomes_7_11 / total_outcomesp_win_10 = outcomes_doubles / total_outcomesp_lose = 1 - p_win_20 - p_win_10print(f"Cost to play: ${cost}")print(f"\nOutcomes:")print(f"  Win $20 (sum 7 or 11): P = {p_win_20:.4f}")print(f"  Win $10 (doubles): P = {p_win_10:.4f}")print(f"  Lose $5 (other): P = {p_lose:.4f}")# Expected valuepayoffs = np.array([15, 5, -5])  # net payoffsprobs = np.array([p_win_20, p_win_10, p_lose])ev = np.sum(payoffs * probs)print(f"\nExpected Value:")print(f"  E[Net] = (15)({p_win_20:.4f}) + (5)({p_win_10:.4f}) + (-5)({p_lose:.4f})")print(f"         = ${ev:.2f}")if ev > 0:    print(f"\n✅ Favorable to player")elif ev < 0:    print(f"\n❌ Favorable to house (expected loss: ${-ev:.2f} per game)")else:    print(f"\n⚖️ Fair game")# Simulationn_games = 10000np.random.seed(42)results = []for _ in range(n_games):    die1, die2 = np.random.randint(1, 7, 2)    total = die1 + die2    if total in [7, 11]:        results.append(15)    elif die1 == die2:        results.append(5)    else:        results.append(-5)sim_ev = np.mean(results)cumulative = np.cumsum(results)print(f"\nSimulation ({n_games:,} games):")print(f"  Simulated EV: ${sim_ev:.2f}")print(f"  Final result: ${cumulative[-1]:.2f}")# Visualizefig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))# Outcome probabilitieslabels = ['Win $20', 'Win $10', 'Lose $5']colors = ['green', 'lightgreen', 'red']bars = ax1.bar(labels, probs, color=colors, edgecolor='black', linewidth=1.5)ax1.set_ylabel('Probability', fontsize=12)ax1.set_title('Game Outcome Probabilities', fontsize=13, fontweight='bold')ax1.grid(True, alpha=0.3, axis='y')for bar, prob in zip(bars, probs):    ax1.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01,            f'{prob:.3f}', ha='center', fontsize=10, fontweight='bold')# Cumulative resultsax2.plot(cumulative, linewidth=1, alpha=0.7)ax2.axhline(0, color='black', linestyle='-', linewidth=1)ax2.axhline(ev * n_games, color='red', linestyle='--', linewidth=2,            label=f'Expected: ${ev * n_games:.0f}')ax2.set_xlabel('Number of Games', fontsize=12)ax2.set_ylabel('Cumulative Payoff ($)', fontsize=12)ax2.set_title(f'Cumulative Results: {n_games:,} Games', fontsize=13, fontweight='bold')ax2.legend()ax2.grid(True, alpha=0.3)plt.tight_layout()plt.show()

## Summary### Key Concepts| Concept | Formula | Description ||---------|---------|-------------|| **Random Variable** | X | Numerical outcome || **PMF** | P(X = x) | Discrete probabilities || **Expected Value** | E[X] = Σ x·P(x) | Long-run average || **Variance** | Var(X) = E[(X-μ)²] | Spread measure |### Properties of Expected Value- E[aX + b] = aE[X] + b- Var(aX + b) = a²Var(X)- For independent X, Y: E[X+Y] = E[X] + E[Y]### Applications- Casino games and gambling- Insurance pricing- Investment decisions- Risk assessment---**Next Week**: Discrete Distributions (Binomial, Poisson)