Notebook created: 2018-05-23 03:37:55  
Generated from: _build_jl/jl/lln_clt.rst  

In [None]:
#=

@author : Spencer Lyon <spencer.lyon@nyu.edu>
          Victoria Gregory <victoria.gregory@nyu.edu>

=#
using Plots
pyplot()
using Distributions
using LaTeXStrings

n = 100
srand(42)  # reproducible results

# == Arbitrary collection of distributions == #
distributions = Dict("student's t with 10 degrees of freedom" => TDist(10),
                 "β(2, 2)" => Beta(2.0, 2.0),
                 "lognormal LN(0, 1/2)" => LogNormal(0.5),
                 "γ(5, 1/2)" => Gamma(5.0, 2.0),
                 "poisson(4)" => Poisson(4),
                 "exponential with lambda = 1" => Exponential(1))

num_plots = 3
dist_data = zeros(num_plots, n)
sample_means = []
dist_means = []
titles = []
for i = 1:num_plots
    dist_names = collect(keys(distributions))
    # == Choose a randomly selected distribution == #
    name = dist_names[rand(1:length(dist_names))]
    dist = pop!(distributions, name)

    # == Generate n draws from the distribution == #
    data = rand(dist, n)

    # == Compute sample mean at each n == #
    sample_mean = Array{Float64}(n)
    for j=1:n
        sample_mean[j] = mean(data[1:j])
    end

    m = mean(dist)

    dist_data[i, :] = data'
    push!(sample_means, sample_mean)
    push!(dist_means, m * ones(n))
    push!(titles, name)

end

# == Plot == #
N = repmat(reshape(repmat(1:n, 1, num_plots)', 1, n * num_plots), 2, 1)
heights = [zeros(1, n * num_plots); reshape(dist_data, 1, n * num_plots)]
plot(N, heights, layout=(3, 1), label="", color=:grey, alpha=0.5)
plot!(1:n, dist_data', layout=(3, 1), color=:grey, markershape=:circle,
        alpha=0.5, label="", linewidth=0)
plot!(1:n, sample_means, linewidth=3, alpha=0.6, color=:green, legend=:topleft,
      layout=(3, 1), label=[LaTeXString("\$\\bar{X}_n\$") "" ""])
plot!(1:n, dist_means, color=:black, linewidth=1.5, layout=(3, 1),
      linestyle=:dash, grid=false, label=[LaTeXString("\$\\mu\$") "" ""])
plot!(title=reshape(titles, 1, length(titles)))

In [None]:
srand(12)  # reproducible results
n = 200
dist = Cauchy()
data = rand(dist, n)

function plot_draws()
    t = "$n observations from the Cauchy distribution"
    N = repmat(linspace(1, n, n), 1, 2)'
    heights = [zeros(1,n); data']
    plot(1:n, data, color=:blue, markershape=:circle,
         alpha=0.5, title=t, legend=:none, linewidth=0)
    plot!(N, heights, linewidth=0.5, color=:blue)
end

plot_draws()

In [None]:
function plot_means()
    # == Compute sample mean at each n == #
    sample_mean = Array{Float64}(n)
    for i=1:n
        sample_mean[i] = mean(data[1:i])
    end

    # == Plot == #
    plot(1:n, sample_mean, color=:red,
         alpha=0.6, label=L"$\bar{X}_n$",
         linewidth=3, legendfont=font(12))
    plot!(1:n, zeros(n), color=:black,
          linewidth=1, linestyle=:dash, label="", grid=false)
end

plot_means()

In [None]:
srand(42)  # reproducible results
ns = [1, 2, 4, 8]
dom = 0:9

pdfs = []
titles = []
for n in ns
    b = Binomial(n, 0.5)
    push!(pdfs, pdf(b, dom))
    t = LaTeXString("\$n = $n\$")
    push!(titles, t)
end

bar(dom, pdfs, layout=4, alpha=0.6, xlims=(-0.5, 8.5), ylims=(0, 0.55),
    xticks=dom, yticks=[0.0, 0.2, 0.4], legend=:none, title=reshape(titles, 1, length(titles)))

In [None]:
# == Set parameters == #
srand(42)  # reproducible results
n = 250    # Choice of n
k = 10000  # Number of draws of Y_n
dist = Exponential(1./2.)  # Exponential distribution, lambda = 1/2
μ, s = mean(dist), std(dist)

# == Draw underlying RVs. Each row contains a draw of X_1,..,X_n == #
data = rand(dist, k, n)

# == Compute mean of each row, producing k draws of \bar X_n == #
sample_means = mean(data, 2)

# == Generate observations of Y_n == #
Y = sqrt(n) * (sample_means .- μ)

# == Plot == #
xmin, xmax = -3 * s, 3 * s
histogram(Y, nbins=60, alpha=0.5, xlims=(xmin, xmax),
          norm=true, label="")
xgrid = linspace(xmin, xmax, 200)
plot!(xgrid, pdf.(Normal(0.0, s), xgrid), color=:black,
      linewidth=2, label=LaTeXString("\$N(0, \\sigma^2=$(s^2))\$"),
      legendfont=font(12))

In [None]:
using KernelDensity

beta_dist = Beta(2.0, 2.0)


function gen_x_draws(k)
    bdraws = rand(beta_dist, 3, k)

    # == Transform rows, so each represents a different distribution == #
    bdraws[1, :] -= 0.5
    bdraws[2, :] += 0.6
    bdraws[3, :] -= 1.1

    # == Set X[i] = bdraws[j, i], where j is a random draw from {1, 2, 3} == #
    js = rand(1:3, k)
    X = Array{Float64}(k)
    for i=1:k
        X[i]=  bdraws[js[i], i]
    end

    # == Rescale, so that the random variable is zero mean == #
    m, sigma = mean(X), std(X)
    return (X .- m) ./ sigma
end

nmax = 5
reps = 100000
ns = 1:nmax

# == Form a matrix Z such that each column is reps independent draws of X == #
Z = Array{Float64}(reps, nmax)
for i=ns
    Z[:, i] = gen_x_draws(reps)
end

# == Take cumulative sum across columns
S = cumsum(Z, 2)

# == Multiply j-th column by sqrt j == #
Y = S .* (1. ./ sqrt.(ns))'

# == Plot == #
a, b = -3, 3
gs = 100
xs = linspace(a, b, gs)

x_vec = []
y_vec = []
z_vec = []
colors = []
for n=ns
    kde_est = kde(Y[:, n])
    _xs, ys = kde_est.x, kde_est.density
    push!(x_vec, collect(_xs))
    push!(y_vec, ys)
    push!(z_vec, collect(n*ones( length(_xs))))
    push!(colors, RGBA(0, 0, 0, 1-(n-1)/nmax))
end

plot(x_vec, z_vec, y_vec, color = reshape(colors,1,length(colors)), legend=:none)
plot!(xlims=(a,b), xticks=[-3; 0; 3], ylims=(1, nmax), yticks=ns, ylabel="n",
    xlabel = "\$ Y_n \$", zlabel = "\$ p(y_n) \$" , zlims=(0, 0.4), zticks=[0.2; 0.4])

In [None]:
# == Set parameters == #
srand(42)   # reproducible results
n = 250     # Choice of n
k = 100000  # Number of draws of Y_n
dist = Uniform(0, π/2)
μ, s = mean(dist), std(dist)

g = sin
g′ = cos

# == Draw underlying RVs. Each row contains a draw of X_1,..,X_n == #
data = rand(dist, k, n)

# == Compute mean of each row, producing k draws of \bar X_n == #
sample_means = mean(data, 2)

error_obs = sqrt(n) .* (g.(sample_means) - g.(μ))

# == Plot == #
asymptotic_sd = g′(μ) .* s
xmin = -3 * g′(μ) * s
xmax = -xmin
histogram(error_obs, nbins=60, alpha=0.5, normed=true, label="")
xgrid = linspace(xmin, xmax, 200)
plot!(xgrid, pdf.(Normal(0.0, asymptotic_sd), xgrid), color=:black,
      linewidth=2, label=LaTeXString("\$N(0, g'(\\mu)^2\\sigma^2\$)"),
      legendfont=font(12), xlims=(xmin, xmax), grid=false)

In [None]:
# == Set parameters == #
n = 250
replications = 50000
dw = Uniform(-1, 1)
du = Uniform(-2, 2)
sw, su = std(dw), std(du)
vw, vu = sw^2, su^2
Σ = [vw    vw
     vw vw+vu]

# == Compute Σ^{-1/2} == #
Q = inv(sqrtm(Σ))

# == Generate observations of the normalized sample mean == #
error_obs = Array{Float64}(2, replications)
for i=1:replications
    # == Generate one sequence of bivariate shocks == #
    X = Array{Float64}(2, n)
    W = rand(dw, n)
    U = rand(du, n)

    # == Construct the n observations of the random vector == #
    X[1, :] = W
    X[2, :] = W + U

    # == Construct the i-th observation of Y_n == #
    error_obs[:, i] = sqrt(n) .* mean(X, 2)
end

chisq_obs = squeeze(sum((Q * error_obs).^2, 1), 1)

# == Plot == #
xmin, xmax = 0, 8
histogram(chisq_obs, nbins=50, normed=true, label="")
xgrid = linspace(xmin, xmax, 200)
plot!(xgrid, pdf.(Chisq(2), xgrid), color=:black,
      linewidth=2, label="Chi-squared with 2 degrees of freedom",
      legendfont=font(12), xlims=(xmin, xmax), grid=false)