# Week 12: Course Review & Applications**Course**: BSMA1002 - Statistics for Data Science I  **Topic**: Integration and Real-World Applications## Course SummaryComprehensive review of all statistical concepts

In [None]:
import numpy as npimport pandas as pdimport matplotlib.pyplot as pltfrom scipy import statsnp.random.seed(42)plt.style.use('seaborn-v0_8-darkgrid')print("✓ All tools loaded for comprehensive review")

## 1. Descriptive Statistics Review### Central Tendency- **Mean**: Average value- **Median**: Middle value (robust)- **Mode**: Most frequent### Dispersion- **Range**: Max - Min- **Variance**: Average squared deviation- **Standard Deviation**: √Variance- **IQR**: Q3 - Q1

In [None]:
# Comprehensive exampledata = np.random.normal(100, 15, 1000)stats_summary = {    'Mean': data.mean(),    'Median': np.median(data),    'Std Dev': data.std(),    'Q1': np.percentile(data, 25),    'Q3': np.percentile(data, 75),    'IQR': np.percentile(data, 75) - np.percentile(data, 25)}print("Descriptive Statistics Summary")print("="*50)for stat, value in stats_summary.items():    print(f"{stat:12s}: {value:.2f}")fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))ax1.hist(data, bins=30, edgecolor='black', alpha=0.7)ax1.axvline(stats_summary['Mean'], color='red', linestyle='--', linewidth=2, label='Mean')ax1.axvline(stats_summary['Median'], color='green', linestyle='--', linewidth=2, label='Median')ax1.set_title('Distribution with Central Tendency', fontweight='bold')ax1.legend()ax2.boxplot(data)ax2.set_title('Box Plot Showing IQR', fontweight='bold')ax2.set_ylabel('Value')plt.tight_layout()plt.show()

## 2. Probability Distributions Summary| Type | Distribution | Parameters | Use Case ||------|--------------|------------|----------|| **Discrete** | Binomial | n, p | Fixed trials || **Discrete** | Poisson | λ | Count of events || **Continuous** | Uniform | a, b | Equal probability || **Continuous** | Exponential | λ | Time between events || **Continuous** | Normal | μ, σ | Most common, CLT |

In [None]:
# Visual comparison of distributionsfig, axes = plt.subplots(2, 3, figsize=(15, 10))# Binomialx = range(21)ax = axes[0, 0]probs = [stats.binom.pmf(k, 20, 0.5) for k in x]ax.bar(x, probs, edgecolor='black')ax.set_title('Binomial(n=20, p=0.5)', fontweight='bold')# Poissonax = axes[0, 1]probs = [stats.poisson.pmf(k, 5) for k in x]ax.bar(x, probs, edgecolor='black', color='orange')ax.set_title('Poisson(λ=5)', fontweight='bold')# Normalax = axes[0, 2]x_norm = np.linspace(-4, 4, 1000)ax.plot(x_norm, stats.norm.pdf(x_norm), linewidth=2)ax.fill_between(x_norm, stats.norm.pdf(x_norm), alpha=0.3)ax.set_title('Normal(μ=0, σ=1)', fontweight='bold')# Uniformax = axes[1, 0]x_unif = np.linspace(-1, 11, 1000)ax.plot(x_unif, stats.uniform.pdf(x_unif, 0, 10), linewidth=2, color='purple')ax.fill_between(x_unif, stats.uniform.pdf(x_unif, 0, 10), alpha=0.3, color='purple')ax.set_title('Uniform(0, 10)', fontweight='bold')# Exponentialax = axes[1, 1]x_exp = np.linspace(0, 10, 1000)ax.plot(x_exp, stats.expon.pdf(x_exp, scale=2), linewidth=2, color='green')ax.fill_between(x_exp, stats.expon.pdf(x_exp, scale=2), alpha=0.3, color='green')ax.set_title('Exponential(λ=0.5)', fontweight='bold')# Hide last subplotaxes[1, 2].axis('off')plt.tight_layout()plt.show()print("✓ All major distributions visualized")

## 3. Real-World Integrated Application**Business Scenario**: E-commerce conversion optimizationCombining multiple statistical concepts:1. Descriptive statistics for data exploration2. Probability for A/B testing3. Normal distribution for confidence intervals

In [None]:
# Integrated business application# Simulate A/B test datan_visitors_a = 1000n_visitors_b = 1000conversion_rate_a = 0.10conversion_rate_b = 0.12conversions_a = np.random.binomial(1, conversion_rate_a, n_visitors_a)conversions_b = np.random.binomial(1, conversion_rate_b, n_visitors_b)obs_rate_a = conversions_a.mean()obs_rate_b = conversions_b.mean()lift = (obs_rate_b - obs_rate_a) / obs_rate_aprint("E-Commerce A/B Test Analysis")print("="*70)print(f"\nVariant A (Control):")print(f"  Visitors: {n_visitors_a:,}")print(f"  Conversions: {conversions_a.sum()}")print(f"  Rate: {obs_rate_a:.2%}")print(f"\nVariant B (Treatment):")print(f"  Visitors: {n_visitors_b:,}")print(f"  Conversions: {conversions_b.sum()}")print(f"  Rate: {obs_rate_b:.2%}")print(f"\nLift: {lift:+.1%}")# Statistical testpooled = (conversions_a.sum() + conversions_b.sum()) / (n_visitors_a + n_visitors_b)se = np.sqrt(pooled * (1 - pooled) * (1/n_visitors_a + 1/n_visitors_b))z_score = (obs_rate_b - obs_rate_a) / sep_value = 2 * (1 - stats.norm.cdf(abs(z_score)))significant = p_value < 0.05print(f"\nStatistical Significance:")print(f"  Z-score: {z_score:.2f}")print(f"  P-value: {p_value:.4f}")print(f"  Significant (α=0.05): {'YES ✅' if significant else 'NO ❌'}")# Visualizationfig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))# Conversion ratesvariants = ['Variant A\n(Control)', 'Variant B\n(Treatment)']rates = [obs_rate_a, obs_rate_b]colors = ['lightblue', 'lightgreen']bars = ax1.bar(variants, rates, color=colors, edgecolor='black', linewidth=2)ax1.set_ylabel('Conversion Rate', fontsize=12)ax1.set_title('A/B Test Results', fontsize=14, fontweight='bold')ax1.grid(True, alpha=0.3, axis='y')for bar, rate in zip(bars, rates):    ax1.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.002,            f'{rate:.2%}', ha='center', fontsize=12, fontweight='bold')# Business impactif significant:    revenue_per_visitor = 50  # $    additional_conversions = (obs_rate_b - obs_rate_a) * n_visitors_b    revenue_impact = additional_conversions * revenue_per_visitor * 365  # annual        ax2.bar(['Current\nRevenue', 'With\nVariant B'],            [n_visitors_a * obs_rate_a * revenue_per_visitor * 365,            n_visitors_b * obs_rate_b * revenue_per_visitor * 365],           color=['gray', 'green'], edgecolor='black', linewidth=2)    ax2.set_ylabel('Annual Revenue ($)', fontsize=12)    ax2.set_title(f'Business Impact: +${revenue_impact:,.0f}/year', fontsize=14, fontweight='bold')    ax2.grid(True, alpha=0.3, axis='y')else:    ax2.text(0.5, 0.5, 'Not Significant\nNeed More Data',             ha='center', va='center', fontsize=16, transform=ax2.transAxes)    ax2.axis('off')plt.tight_layout()plt.show()if significant:    print(f"\n💰 Business Impact: Deploying Variant B could generate")    print(f"    +${revenue_impact:,.0f} additional annual revenue!")

## Course Summary### What We Learned**Weeks 1-3**: Data collection and summarization  **Week 4**: Central tendency (mean, median, mode)  **Week 5**: Dispersion (variance, SD, IQR)  **Week 6**: Correlation and relationships  **Week 7**: Probability fundamentals  **Week 8**: Random variables and expected value  **Week 9**: Discrete distributions (Binomial, Poisson)  **Week 10**: Continuous distributions (Uniform, Exponential)  **Week 11**: Normal distribution and Z-scores  **Week 12**: Integration and applications  ### Key Skills Acquired✅ Descriptive statistics analysis  ✅ Probability calculations  ✅ Distribution identification and application  ✅ Statistical inference basics  ✅ Real-world problem solving  ### Tools Mastered- NumPy for numerical computing- Pandas for data manipulation- Matplotlib/Seaborn for visualization- SciPy for statistical functions---## Next Steps- **Statistics II**: Hypothesis testing, regression, ANOVA- **Machine Learning**: Apply statistical foundations- **Data Science Projects**: Real-world applications**Congratulations on completing Statistics for Data Science I! ��**