# Campionamento



# Media campionaria
La media campionaria di $N$ campioni indipendenti identicametne distribuiti,

$$\overline{X} = \frac{1}{N} \sum_{n=1}^{N} X_n \ ,$$

è una variabile casuale **todo**

# Teorema del limite centrale
La media di $N$ variabili iid con media $\mu$ e varianza $\sigma^2$ è una variabile casuale, la cui distribuzione di probabilità tende alla distribuzione normale $\mathscr{N}\left(\mu, \frac{\sigma^2}{N} \right)$ per $N \rightarrow \infty$.

In [1]:
#

import numpy as np


def sample_to_pdf(x, bins=100, density=True):
  """ evaluate pdf, from histogram, and normalizing with uniform integration rule """
  hist, bin_edges = np.histogram(x, bins=bins, density=True)   # get data from histogram
  bin_cen = 0.5 * ( bin_edges[:-1] + bin_edges[1:] )           # bin center
  d_bin = bin_edges[1:] - bin_edges[:-1]                       # bin width
  pdf = hist / np.sum(hist * d_bin)                           # normalization to get int pdf = 1
  return pdf, bin_cen


In [2]:
#> List of random number generators for the population
# numpy random number generators with parameters
rgen_list = [
    { 'name': 'Normal',
      'generator': np.random.default_rng().normal, 'params': {},   # normal distribution
      'population_params': {'avg': 0., 'sigma': 1.}, \
      'write_params': lambda mu, sigma, n: {'loc': mu, 'scale': sigma, 'size': n},
      'sample_avg': [],
      'sample_avg_stat': {'avg': 0., 'std': 0.},
      'histograms': [],
      'xlimit': [-3., 3.]
    },
    { 'name': 'Uniform',
      'generator': np.random.default_rng().uniform, 'params': {},   # uniform distribution
      'population_params': {'avg': 0., 'sigma': 1.}, \
      'write_params': lambda mu, sigma, n: {'low': mu-sigma, 'high': mu+sigma, 'size': n},
      'sample_avg': [],
      'sample_avg_stat': {'avg': 0., 'std': 0.},
      'histograms': [],
      'xlimit': [-2, 2.]
    },
]

n_gen = len(rgen_list)

In [10]:
samples = { 'gen': {} }
sample_size_max = 100
n_samples       = 1000
population_size = 10000
sample_size_v   = [ sample_size_max ]

#> Generate populations with different probability distribution and draw samples from them
for rgen in rgen_list:

  print(rgen['name'])

  samples['gen'][rgen['name']] = {}

  #> Generate population
  pop_params = rgen['population_params']
  gen_params = rgen['write_params'](pop_params['avg'], pop_params['sigma'], population_size)
  pop = rgen['generator'](**gen_params)
    
  #> Loop over sample size
  for sample_size in sample_size_v:

    samples['gen'][rgen['name']][str(sample_size)] = {
         'pdf': [], 'sample_avgs': [],
         'xlim': rgen['xlimit'],
         'sigma_pop_num': np.std(pop), 'sigma_pop': None, 'sigma2_ratio': None}
      
    #> Take samples and evaluate average
    i_e = 0
    for i_s in range(n_samples):
      sam = pop[i_e:i_e+sample_size]
      f, x = sample_to_pdf(sam, bins=20)

      samples['gen'][rgen['name']][str(sample_size)]['pdf'].append({'x': x, 'f': f})
      samples['gen'][rgen['name']][str(sample_size)]['sample_avgs'].append(np.mean(sam))

      i_e = i_e + sample_size


#> Statistics of the sample average
print("Distribution:sample_size - sigma2_ratio = sigma2_pop/sigma2_sample_avg - sample_mean: avg, std dev")
for rk, r in samples['gen'].items():
  print(f"{rk}:")
  #> Loop over sample size
  for sk, s in r.items():
    s['sample_avg_avg'] = np.mean(s['sample_avgs'])
    s['sample_avg_std'] = np.std(s['sample_avgs'])
    s['sigma2_ratio'] = s['sigma_pop_num']**2 / s['sample_avg_std']**2

    print(f"{rk}:{sk} - sigma2_ratio: {s['sigma2_ratio']} - sample_mean: avg, std: {s['sample_avg_avg']}, {s['sample_avg_std']}")

Normal
(10000,)
Uniform
(10000,)
Distribution:sample_size - sigma2_ratio = sigma2_pop/sigma2_sample_avg - sample_mean: avg, std dev
Normal:
Normal:200 - sigma2_ratio: nan - sample_mean: avg, std: nan, nan
Uniform:
Uniform:200 - sigma2_ratio: nan - sample_mean: avg, std: nan, nan


# Campioni ridotti e $t$-Student