<a href="https://colab.research.google.com/github/TaliNatan/DSPS_TNatan/blob/main/HW2_TNatan/CentralLimitTheorem.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
from scipy.stats import norm
import matplotlib.pyplot as plt
np.random.seed(456) # setting the seed so when I run the notebook from start to finish, I get the same plots every time

In [None]:
mysize = (np.logspace(1, 4.3, 100, base=10)).astype(int) # from Dr. Bianco

# Chi-Squared Distribution

In [None]:
all_mean_chi_dist = np.zeros(len(mysize))
all_STD_chi_dist = np.zeros(len(mysize))


for i in range(len(mysize)):
    chi_dist = np.random.chisquare(100, size=mysize[i])
    all_mean_chi_dist[i] = np.mean(chi_dist)
    all_STD_chi_dist[i] = np.std(chi_dist)

In [None]:
plt.scatter(mysize, all_mean_chi_dist, c = 'k', s = 7, zorder = 10)
plt.axhline(y = 100, color = 'r', linestyle = '--', label = r'$\mu_{exp}$')
plt.xlabel('Sample Size')
plt.ylabel('Sample Mean')
plt.title('Chi-Squared Distribution')
plt.xlim(-500, 20000)
plt.legend(loc = 'upper right')
plt.show()

## Figure 1.
The above plot shows the sample mean with respect to the sample size for a chi-squared distribution. As the sample size increases, the sample mean converges to the expected mean (100). This makes sense in terms of the Central Limit Theorem; as our sample increases, the mean of the sample should converge to some 'true' mean of the population.

In [None]:
mu_chi, std_chi = norm.fit(all_mean_chi_dist) 

x_chi = np.linspace(90, 110, 1000)
p_chi = norm.pdf(x_chi, mu_chi, std_chi)

In [None]:
plt.hist(all_mean_chi_dist, bins = 30, color = 'orange', density=True)
plt.plot(x_chi, p_chi, 'b', linewidth=2, linestyle = ':', zorder = 10, label = 'Gaussian Fit')
plt.xlabel('Sample Mean')
plt.ylabel('Counts')
plt.title('Chi-Squared Distribution')
plt.legend(loc = 'upper right')
plt.xlim(92,108)
plt.show()

## Figure 2.
The above plot shows the histogram of the sample mean for a chi-squared distribution in 30 bins. A gaussian fit has been applied to the data and is shown as a blue dotted line. This is a bad fit for the data as there is not enough strength at the mean.

# Normal Distribution

In [None]:
all_mean_normal_dist = np.zeros(len(mysize))
all_STD_normal_dist = np.zeros(len(mysize))


for i in range(len(mysize)):
    normal_dist = np.random.normal(100.00, 1.0, size=mysize[i])
    all_mean_normal_dist[i] = np.mean(normal_dist)
    all_STD_normal_dist[i] = np.std(normal_dist)

In [None]:
plt.scatter(mysize, all_mean_normal_dist, c = 'k', s = 7, zorder = 10)
plt.axhline(y = 100, color = 'r', linestyle = '--', label = r'$\mu_{exp}$')
plt.xlabel('Sample Size')
plt.ylabel('Sample Mean')
plt.title('Normal Distribution')
plt.xlim(-500, 20000)
plt.legend(loc = 'upper right')
plt.show()

## Figure 3.
The above plot shows the sample mean with respect to the sample size for a normal distribution. As the sample size increases, the sample mean converges to the expected mean (100). This makes sense in terms of the Central Limit Theorem; as our sample increases, the mean of the sample should converge to some 'true' mean of the population.

In [None]:
mu_normal, std_normal = norm.fit(all_mean_normal_dist) 

x_normal = np.linspace(90, 110, 1000)
p_normal = norm.pdf(x_normal, mu_normal, std_normal)

In [None]:
plt.hist(all_mean_normal_dist, bins = 30, color = 'orange', density=True)
plt.plot(x_normal, p_normal, 'b', linewidth=2, linestyle = ':', zorder = 10, label = 'Gaussian Fit')
plt.xlabel('Sample Mean')
plt.ylabel('Counts')
plt.title('Normal Distribution')
plt.legend(loc = 'upper right')
plt.xlim(99.5,100.5)
plt.show()

## Figure 4.
The above plot shows the histogram of the sample mean for a normal distribution in 30 bins. A gaussian fit has been applied to the data and is shown as a blue dotted line. This is a bad fit for the data as there is not enough strength at the mean.

# Poisson Distribution

In [None]:
all_mean_poisson_dist = np.zeros(len(mysize))
all_STD_poisson_dist = np.zeros(len(mysize))


for i in range(len(mysize)):
    poisson_dist = np.random.poisson(100, size=mysize[i])
    all_mean_poisson_dist[i] = np.mean(poisson_dist)
    all_STD_poisson_dist[i] = np.std(poisson_dist)

In [None]:
plt.scatter(mysize, all_mean_poisson_dist, c = 'k', s = 7, zorder = 10)
plt.axhline(y = 100, color = 'r', linestyle = '--', label = r'$\mu_{exp}$')
plt.xlabel('Sample Size')
plt.ylabel('Sample Mean')
plt.title('Poisson Distribution')
plt.xlim(-500, 20000)
plt.legend(loc = 'upper right')
plt.show()

## Figure 5.
The above plot shows the sample mean with respect to the sample size for a poisson distribution. As the sample size increases, the sample mean converges to the expected mean (100). This makes sense in terms of the Central Limit Theorem; as our sample increases, the mean of the sample should converge to some 'true' mean of the population.

In [None]:
mu_poisson, std_poisson = norm.fit(all_mean_poisson_dist) 

x_poisson = np.linspace(90, 110, 1000)
p_poisson = norm.pdf(x_poisson, mu_poisson, std_poisson)

In [None]:
plt.hist(all_mean_poisson_dist, bins = 30, color = 'orange', density=True)
plt.plot(x_poisson, p_poisson, 'b', linewidth=2, linestyle = ':', zorder = 10, label = 'Gaussian Fit')
plt.xlabel('Sample Mean')
plt.ylabel('Counts')
plt.title('Poisson Distribution')
plt.legend(loc = 'upper right')
plt.xlim(95,105)
plt.show()

## Figure 6.
The above plot shows the histogram of the sample mean for a poisson distribution in 30 bins. A gaussian fit has been applied to the data and is shown as a blue dotted line. This is a bad fit for the data as there is not enough strength at the mean.

# Binomial Distribution

In [None]:
all_mean_binomial_dist = np.zeros(len(mysize))
all_STD_binomial_dist = np.zeros(len(mysize))


for i in range(len(mysize)):
    binomial_dist = np.random.binomial(200, 0.5, size=mysize[i])
    all_mean_binomial_dist[i] = np.mean(binomial_dist)
    all_STD_binomial_dist[i] = np.std(binomial_dist)

In [None]:
plt.scatter(mysize, all_mean_binomial_dist, c = 'k', s = 7, zorder = 10)
plt.axhline(y = 100, color = 'r', linestyle = '--', label = r'$\mu_{exp}$')
plt.xlabel('Sample Size')
plt.ylabel('Sample Mean')
plt.title('Binomial Distribution')
plt.xlim(-500, 20000)
plt.legend(loc = 'upper right')
plt.show()

## Figure 7.
The above plot shows the sample mean with respect to the sample size for a binomial distribution. As the sample size increases, the sample mean converges to the expected mean (100). This makes sense in terms of the Central Limit Theorem; as our sample increases, the mean of the sample should converge to some 'true' mean of the population.

In [None]:
mu_binomial, std_binomial = norm.fit(all_mean_binomial_dist) 

x_binomial = np.linspace(90, 110, 1000)
p_binomial = norm.pdf(x_binomial, mu_binomial, std_binomial)

In [None]:
plt.hist(all_mean_binomial_dist, bins = 30, color = 'orange', density=True)
plt.plot(x_binomial, p_binomial, 'b', linewidth=2, linestyle = ':', zorder = 10, label = 'Gaussian Fit')
plt.xlabel('Sample Mean')
plt.ylabel('Counts')
plt.title('Binomial Distribution')
plt.legend(loc = 'upper right')
plt.xlim(95,105)
plt.show()

## Figure 8.
The above plot shows the histogram of the sample mean for a binomial distribution in 30 bins. A gaussian fit has been applied to the data and is shown as a blue dotted line. This is a bad fit for the data as there is not enough strength at the mean.

# Gumbel Distribution

In [None]:
all_mean_gumbel_dist = np.zeros(len(mysize))
all_STD_gumbel_dist = np.zeros(len(mysize))


for i in range(len(mysize)):
    gumbel_dist = np.random.gumbel(100 - 0.5772156649, 1, size=mysize[i])
    all_mean_gumbel_dist[i] = np.mean(gumbel_dist)
    all_STD_gumbel_dist[i] = np.std(gumbel_dist)

In [None]:
plt.scatter(mysize, all_mean_gumbel_dist, c = 'k', s = 7, zorder = 10)
plt.axhline(y = 100, color = 'r', linestyle = '--', label = r'$\mu_{exp}$')
plt.xlabel('Sample Size')
plt.ylabel('Sample Mean')
plt.title('Gumbel Distribution')
plt.xlim(-500, 20000)
plt.legend(loc = 'upper right')
plt.show()

## Figure 9.
The above plot shows the sample mean with respect to the sample size for a Gumbel distribution. As the sample size increases, the sample mean converges to the expected mean (100). This makes sense in terms of the Central Limit Theorem; as our sample increases, the mean of the sample should converge to some 'true' mean of the population.

In [None]:
mu_gumbel, std_gumbel = norm.fit(all_mean_gumbel_dist) 

x_gumbel = np.linspace(90, 110, 1000)
p_gumbel = norm.pdf(x_gumbel, mu_gumbel, std_gumbel)

In [None]:
plt.hist(all_mean_gumbel_dist, bins = 30, color = 'orange', density=True)
plt.plot(x_gumbel, p_gumbel, 'b', linewidth=2, linestyle = ':', zorder = 10, label = 'Gaussian Fit')
plt.xlabel('Sample Mean')
plt.ylabel('Counts')
plt.title('Gumbel Distribution')
plt.legend(loc = 'upper right')
plt.xlim(99,101)
plt.show()

## Figure 10.
The above plot shows the histogram of the sample mean for a Gumbel distribution in 30 bins. A gaussian fit has been applied to the data and is shown as a blue dotted line. This is a bad fit for the data as there is not enough strength at the mean.