In [4]:
from numpy.random import normal
import pandas as pd

We know that maximum likelihood estimator for mean and variance for a Gaussian distribution will be sample mean and "special" sample variance where the denominator is N i.e., the number of samples.

In [5]:
# can use third argument of normal function
def generateGaussian(mean=0, sigma=1, samples=10):
    x = [None]*samples
    for i in range(samples):
        x[i] = normal(mean,sigma)
    return x

In [6]:
def MLEMean(samples):
    m = 0
    for sample in samples:
        m += sample
    return m/len(samples)

In [7]:
def MLEVar(samples):
    var = 0
    m = MLEMean(samples)
    for sample in samples:
        var += (sample - m)**2
    return var/len(samples)



We know that the unbiased estimators of mean and variance for a Gaussian distribution are sample mean and sample variance respectively.

In [8]:
def UnbiasedMean(samples):
    return MLEMean(samples)

In [9]:
def UnbiasedVar(samples):
    var = 0
    m = MLEMean(samples)
    for sample in samples:
        var += (sample - m)**2
    return var/(len(samples) - 1)

In [16]:

def calcDiff(N,runs=100):
    mleMeanDiff = [None]*100
    mleVarDiff = [None]*100
    unbiasedMeanDiff = [None]*100
    unbiasedVarDiff = [None]*100

    for i in range(runs):
        samples = generateGaussian(samples=N)
        mleMean = MLEMean(samples)
        mleVar = MLEVar(samples)
        unbiasedMean = UnbiasedMean(samples)
        unbiasedVar = UnbiasedVar(samples)

        mleMeanDiff[i] = mean - mleMean
        mleVarDiff[i] = variance - mleVar

        unbiasedMeanDiff[i] = mean - unbiasedMean
        unbiasedVarDiff[i] = variance - unbiasedVar
    return mleMeanDiff, mleVarDiff, unbiasedMeanDiff, unbiasedVarDiff

In [17]:
# generating samples
mean = 0
std_dev = variance = 1


In [18]:
# calculating and tabulating for N = 10
indices = [i+1 for i in range(100)]
mleMeanDiff_10, mleVarDiff_10, unbiasedMeanDiff_10, unbiasedVarDiff_10 = calcDiff(N=10,runs=100)
df_10 = pd.DataFrame({'MLE_Diff_mean_10': mleMeanDiff_10,
                   'MLE_Diff_var_10' : mleVarDiff_10,
                   'Unbiased_Diff_mean_10':unbiasedMeanDiff_10,
                   'Unbiased_Diff_var_10' :unbiasedVarDiff_10},index = indices)
print("N = 10")
df_10

N = 10


Unnamed: 0,MLE_Diff_mean_10,MLE_Diff_var_10,Unbiased_Diff_mean_10,Unbiased_Diff_var_10
1,-0.147720,0.051172,-0.147720,-0.054254
2,-0.287259,0.359385,-0.287259,0.288205
3,0.474200,0.603266,0.474200,0.559185
4,-0.283418,0.462472,-0.283418,0.402746
5,-0.556625,-0.125223,-0.556625,-0.250248
6,-0.321343,-0.000114,-0.321343,-0.111238
7,0.066480,-0.306181,0.066480,-0.451312
8,-0.220426,-0.052026,-0.220426,-0.168918
9,0.064246,0.285308,0.064246,0.205898
10,-0.219013,-0.425598,-0.219013,-0.583998


In [19]:
mleMeanDiff_100, mleVarDiff_100, unbiasedMeanDiff_100, unbiasedVarDiff_100 = calcDiff(N=100,runs=100)
df_100 = pd.DataFrame({'MLE_Diff_mean_100': mleMeanDiff_100,
                   'MLE_Diff_var_100' : mleVarDiff_100,
                   'Unbiased_Diff_mean_100':unbiasedMeanDiff_100,
                   'Unbiased_Diff_var_100' :unbiasedVarDiff_100},index = indices)
print("N = 100")
df_100

N = 100


Unnamed: 0,MLE_Diff_mean_100,MLE_Diff_var_100,Unbiased_Diff_mean_100,Unbiased_Diff_var_100
1,-0.079807,0.174238,-0.079807,0.165897
2,0.029076,0.184862,0.029076,0.176629
3,-0.200785,0.119353,-0.200785,0.110458
4,0.108193,-0.262660,0.108193,-0.275414
5,-0.100435,-0.158191,-0.100435,-0.169890
6,-0.033148,0.172004,-0.033148,0.163641
7,-0.118076,0.001416,-0.118076,-0.008670
8,-0.018682,-0.117312,-0.018682,-0.128598
9,-0.012792,0.124556,-0.012792,0.115714
10,0.066028,0.046426,0.066028,0.036794


In [20]:
mleMeanDiff_1000, mleVarDiff_1000, unbiasedMeanDiff_1000, unbiasedVarDiff_1000 = calcDiff(N=1000,runs=100)
df_1000 = pd.DataFrame({'MLE_Diff_mean_1000': mleMeanDiff_1000,
                   'MLE_Diff_var_1000' : mleVarDiff_1000,
                   'Unbiased_Diff_mean_1000':unbiasedMeanDiff_1000,
                   'Unbiased_Diff_var_1000' :unbiasedVarDiff_1000},index = indices)
print("N = 1000")
df_1000

N = 1000


Unnamed: 0,MLE_Diff_mean_1000,MLE_Diff_var_1000,Unbiased_Diff_mean_1000,Unbiased_Diff_var_1000
1,-0.036144,0.023178,-0.036144,0.022200
2,-0.032517,-0.004628,-0.032517,-0.005634
3,0.007890,-0.015487,0.007890,-0.016504
4,0.016345,0.005318,0.016345,0.004322
5,-0.021968,0.018245,-0.021968,0.017262
6,-0.036833,-0.041180,-0.036833,-0.042222
7,0.002822,-0.066421,0.002822,-0.067488
8,0.010168,0.008519,0.010168,0.007527
9,-0.027218,-0.003381,-0.027218,-0.004385
10,0.050837,0.001265,0.050837,0.000265


In [21]:
mleMeanDiff_10000, mleVarDiff_10000, unbiasedMeanDiff_10000, unbiasedVarDiff_10000 = calcDiff(N=10000,runs=100)
df_10000 = pd.DataFrame({'MLE_Diff_mean_10000': mleMeanDiff_10000,
                   'MLE_Diff_var_10000' : mleVarDiff_10000,
                   'Unbiased_Diff_mean_10000':unbiasedMeanDiff_10000,
                   'Unbiased_Diff_var_10000' :unbiasedVarDiff_10000},index = indices)
print("N = 10000")
df_10000

N = 10000


Unnamed: 0,MLE_Diff_mean_10000,MLE_Diff_var_10000,Unbiased_Diff_mean_10000,Unbiased_Diff_var_10000
1,0.016572,-0.024951,0.016572,-0.025053
2,-0.004193,-0.024669,-0.004193,-0.024772
3,-0.021007,0.003535,-0.021007,0.003435
4,0.005856,-0.003851,0.005856,-0.003951
5,-0.010691,0.006399,-0.010691,0.006299
6,0.014589,-0.006036,0.014589,-0.006137
7,-0.000555,0.009449,-0.000555,0.009350
8,0.004148,-0.004851,0.004148,-0.004952
9,-0.014978,-0.020200,-0.014978,-0.020302
10,-0.016943,-0.003781,-0.016943,-0.003882


In [22]:
mleMeanDiff_100000, mleVarDiff_100000, unbiasedMeanDiff_100000, unbiasedVarDiff_100000 = calcDiff(N=100000,runs=100)
df_100000 = pd.DataFrame({'MLE_Diff_mean_100000': mleMeanDiff_100000,
                   'MLE_Diff_var_100000' : mleVarDiff_100000,
                   'Unbiased_Diff_mean_100000':unbiasedMeanDiff_100000,
                   'Unbiased_Diff_var_100000' :unbiasedVarDiff_100000},index = indices)
print("N = 100000")
df_100000

N = 100000


Unnamed: 0,MLE_Diff_mean_100000,MLE_Diff_var_100000,Unbiased_Diff_mean_100000,Unbiased_Diff_var_100000
1,-0.002266,0.002909,-0.002266,0.002899
2,-0.000621,0.003170,-0.000621,0.003160
3,0.000913,0.004819,0.000913,0.004809
4,-0.004010,-0.004246,-0.004010,-0.004256
5,0.000293,-0.002944,0.000293,-0.002954
6,0.002679,0.002033,0.002679,0.002023
7,-0.003151,-0.006573,-0.003151,-0.006584
8,0.003728,-0.005558,0.003728,-0.005568
9,0.001038,0.003503,0.001038,0.003493
10,-0.003419,0.006210,-0.003419,0.006200


We see that as N tends to infinity, MLE for variance tends to the unbiased variance. Also, as the 

In [33]:
df_10.to_csv('N_10.csv')
df_100.to_csv('N_100.csv')
df_1000.to_csv('N_1000.csv')
df_10000.to_csv('N_10000.csv')
df_100000.to_csv('N_100000.csv')