In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm
import math

# Central Limit Theorem(CLT)

The objective of this section is to provide you with a hands-on opportunity to observe and understand the Central Limit Theorem in action. The CLT is a fundamental result that supports many statistical techniques and methods. It provides a theoretical basis for making inferences about population parameters based on sample statistics.<br>
First of all, select three different probability distributions. These distributions will serve as the
population distributions from which you will take out samples.<br>
Now for each of the distributions, perform the following steps:
1. Generate a large number of random samples with a specific sample size from the chosen distribution.
2. Calculate the mean of each sample.
3. Plot the histogram of the sample means and overlay it with the expected normal distribution based on the Central Limit Theorem.
4. Repeat steps a to c for increasing sample sizes and observe how the distribution of sample means changes as we increase the sample size.
<br>

Document your observations and insights from each experiment. Compare the distribution of sample means for each sample size and discuss how they align with the principles of the Central Limit Theorem.

In [None]:
colors = ['darkgreen', 'green', 'forestgreen', 'mediumseagreen','teal',
          'darkcyan', 'deepskyblue', 'dodgerblue', 'blue',
          'navy', 'indigo', 'darkslateblue', 'darkorchid']

In [None]:
sample_sizes = [1, 5, 10, 30, 50, 100, 500, 1000]


In [None]:
def demonstrate_clt(population_distribution, dist_name, sample_sizes):
    # Determine the number of rows and columns based on the number of sample sizes
    n = len(sample_sizes)
    rows = math.ceil(n / 2)
    cols = 2 if n > 1 else 1

    plt.figure(figsize=(18, 6 * rows))
    for i, sample_size in enumerate(sample_sizes, 1):
        #1.Generate a large number of random samples.
        #2. Calculate the mean of each sample.
        sample_means = [np.mean(population_distribution(size=sample_size)) for _ in range(1000)]
        
        #Calculate mean and variance of the sample means
        mean_of_sample_means = np.mean(sample_means)
        variance_of_sample_means = np.var(sample_means)
        
        #3.1 Plot the histogram of the sample means 
        plt.subplot(rows, cols, i)
        plt.hist(sample_means, bins=30, density=True, alpha=0.6, color=colors[-i], label=f'Sample Means (n={sample_size})')
        
        #3.2 Overlay it with the expected normal distribution.
        mu, sigma = mean_of_sample_means, np.std(sample_means)
        xmin, xmax = plt.xlim()
        x = np.linspace(xmin, xmax, 100)
        p = norm.pdf(x, mu, sigma)
        plt.plot(x, p, 'k', linewidth=2, label='Normal Distribution')
        
        # Include mean and variance in the title or as a text annotation
        plt.title(f'{dist_name} (n={sample_size})\nMean: {mean_of_sample_means:.5f}, Variance: {variance_of_sample_means:.5f}')
        plt.legend()

    plt.tight_layout()
    plt.show()

## 1. Uniform Distribution:

In [None]:
demonstrate_clt(lambda size: np.random.uniform(low=0, high=1, size=size), 'Uniform Distribution', sample_sizes)


## 2. Exponential Distribution


In [None]:
demonstrate_clt(lambda size: np.random.exponential(scale=1, size=size), 'Exponential Distribution', sample_sizes)


## 3. Binomial Distribution

In [None]:
demonstrate_clt(lambda size: np.random.binomial(n=10, p=0.5, size=size), 'Binomial Distribution', sample_sizes)
