Skip to content

Latest commit

 

History

History
204 lines (170 loc) · 6.66 KB

statistics.md

File metadata and controls

204 lines (170 loc) · 6.66 KB

Statistics

using Gadfly, RDatasets
set_default_plot_size(21cm, 8cm)
p1 = plot(dataset("datasets", "iris"), x="SepalLength", y="SepalWidth",
          Geom.point)
p2 = plot(dataset("datasets", "iris"), x="SepalLength", y="SepalWidth",
          Stat.binmean, Geom.point)
hstack(p1,p2)
using DataFrames, Gadfly, Distributions
set_default_plot_size(21cm, 8cm)
x = -4:0.1:4
Da = [DataFrame(x=x, ymax=pdf.(Normal(μ),x), u="μ=$μ") for μ in [-1,1]]
Db = [DataFrame(x=randn(200).+μ, u="μ=$μ") for μ in [-1,1]]

p1 = plot(vcat(Da...), x=:x, y=:ymax, ymin=[0.0], ymax=:ymax, color=:u, 
    Geom.line, Geom.ribbon, Guide.ylabel("Density"), Theme(alphas=[0.6]),
    Guide.colorkey(title="", pos=[2.5,0.6]), Guide.title("Parametric PDF")
)
p2 = plot(vcat(Db...), x=:x, color=:u, Theme(alphas=[0.6]),
    Stat.density(bandwidth=0.5), Geom.polygon(fill=true, preserve_order=true),
    Coord.cartesian(xmin=-4, xmax=4, ymin=0, ymax=0.4),
    Guide.colorkey(title="", pos=[2.5,0.6]), Guide.title("Kernel PDF")
)
hstack(p1,p2)
using CategoricalArrays
using Gadfly
set_default_plot_size(14cm, 8cm)
n = 400
group = repeat([-1, 1], inner=200)
x = randn(n) .+ group

plot(x=x, color=categorical(group), Guide.colorkey(title="", pos=[3.6,0.7]),
    layer(Stat.density, Geom.line, Geom.polygon(fill=true, preserve_order=true), alpha=[0.4]),
    layer(Stat.quantile_bars(quantiles=[0.05, 0.95]), Geom.segment),
    Guide.title("Density with bars showing the central 90% CI"),
    Guide.ylabel("Density"), Coord.cartesian(xmin=-4, xmax=4)
)
using DataFrames, Gadfly, RDatasets, Statistics
set_default_plot_size(21cm, 8cm)
salaries = dataset("car","Salaries")
salaries.Salary /= 1000.0
salaries.Discipline = ["Discipline $(x)" for x in salaries.Discipline]
df = combine(groupby(salaries, [:Rank, :Discipline]), :Salary.=>mean)
df.label = string.(round.(Int, df.Salary_mean))

p1 = plot(df, x=:Discipline, y=:Salary_mean, color=:Rank, 
    Scale.x_discrete(levels=["Discipline A", "Discipline B"]),
    label=:label, Geom.label(position=:centered), Stat.dodge(position=:stack),
    Geom.bar(position=:stack)
)
p2 = plot(df, y=:Discipline, x=:Salary_mean, color=:Rank, 
    Coord.cartesian(yflip=true), Scale.y_discrete,
    label=:label, Geom.label(position=:right), Stat.dodge(axis=:y),
    Geom.bar(position=:dodge, orientation=:horizontal), 
    Scale.color_discrete(levels=["Prof", "AssocProf", "AsstProf"]),
    Guide.yticks(orientation=:vertical), Guide.ylabel(nothing)
)
hstack(p1, p2)
using DataFrames, Gadfly
set_default_plot_size(14cm, 8cm)
sigmoid(x) = 1 ./ (1 .+ exp.(-x))
npoints = 30
gshift, x = rand([0,2], npoints), range(-9, 9, length=npoints)
y, ye = sigmoid(x+gshift), 0.2*rand(npoints)
df = DataFrame(x=x, y=y, ymin=y-ye, ymax=y+ye, g=gshift)

plot(y=[sigmoid, x->sigmoid(x+2)], xmin=[-10], xmax=[10],
    Geom.line, Stat.func(100), color=[0,2], Guide.xlabel("x"),
    layer(df, x=:x, y=:y, ymin=:ymin, ymax=:ymax, color=:g,
        Geom.point, Geom.yerrorbar, Stat.x_jitter(range=1)), 
    Scale.color_discrete_manual("deepskyblue","yellow3", levels=[0,2]),
    Guide.colorkey(title="Function", labels=["Sigmoid(x)", "Sigmoid(x+2)"]),
    Theme(errorbar_cap_length=0mm, key_position=:inside)
)
using Distributions, Gadfly, RDatasets
set_default_plot_size(21cm, 8cm)
iris, geyser = dataset.("datasets", ["iris", "faithful"])
df = combine(groupby(iris, :Species), :SepalLength=>(x->fit(Normal, x))=>:d)
ds2 = fit.([Normal, Uniform], [geyser.Eruptions])

yeqx(x=4:6) = layer(x=x, Geom.abline(color="gray80"))
xylabs = [Guide.xlabel("Theoretical q"), Guide.ylabel("Sample q")]
p1 = plot(df, x=:d, y=iris[:,1], color=:Species, Stat.qq, yeqx(4:8),
    xylabs..., Guide.title("3 Samples, 1 Distribution"))
p2 = plot(geyser, x=ds2, y=:Eruptions, color=["Normal","Uniform"], Stat.qq,
    yeqx(0:6), xylabs..., Guide.title("1 Sample, 2 Distributions"),
  Theme(discrete_highlight_color=c->nothing, alphas=[0.5], point_size=2pt)
)
hstack(p1, p2)
using Compose, Gadfly, RDatasets
set_default_plot_size(21cm,8cm)
salaries = dataset("car","Salaries")
salaries.Salary /= 1000.0
salaries.Discipline = ["Discipline $(x)" for x in salaries.Discipline]

p = plot(salaries[salaries.Rank.=="Prof",:], x=:YrsService, y=:Salary, 
    color=:Sex, xgroup = :Discipline,
    Geom.subplot_grid(Geom.point,
  layer(Stat.smooth(method=:lm, levels=[0.95, 0.99]), Geom.line, Geom.ribbon)), 
    Scale.xgroup(levels=["Discipline A", "Discipline B"]),
    Guide.colorkey(title="", pos=[0.43w, -0.4h]), 
    Theme(point_size=2pt, alphas=[0.5])
)
using DataFrames, Gadfly
set_default_plot_size(14cm, 8cm)
x = range(0.1, stop=4.9, length=30)
D = DataFrame(x=x, y=x.+randn(length(x)))
p = plot(D, x=:x, y=:y, Geom.point,
  layer(Stat.smooth(method=:lm, levels=[0.90,0.99]), Geom.line, Geom.ribbon(fill=false)),
     Theme(lowlight_color=c->"gray", line_style=[:solid, :dot])
)
using Gadfly, Random
set_default_plot_size(14cm, 8cm)
Random.seed!(1234)
plot(x=rand(25), y=rand(25), Stat.step, Geom.line)
using DataFrames, Gadfly, Distributions
using Gadfly: w,h
set_default_plot_size(21cm, 8cm)
D = DataFrame(Dist=["Prior", "Posterior"],  
    Density=[Normal(-0.22, 0.02), Normal(-0.29, 0.015)])

xcoord = Coord.cartesian(xmin=-0.4, xmax=-0.1)
gck = Guide.colorkey(title="", pos=[0.5w, -0.4h])
p1 = plot(D, y=:Density, color=:Dist, Guide.title("color=:Dist"), gck,
    layer(Stat.unidistribution, Geom.line, Geom.ribbon, alpha=[0.8]), xcoord)
p2 = plot(D, y=:Density, color=:Dist, layer(Stat.unidistribution, Geom.line),
    layer(Stat.unidistribution([[0.0001, 0.05], [0.95, 0.9999]]), Geom.ribbon),
    Guide.ylabel(nothing), Guide.title("color=:Dist"), gck)
p3 = plot(D, y=:Density, group=:Dist, xcoord, gck,
    layer(Stat.unidistribution([[0.0001, 0.1],[0.1, 0.9], [0.9, 0.9999]]), Geom.ribbon, alpha=[0.8]),
    Scale.color_discrete_manual("orange", "yellow", "coral"), Theme(lowlight_color=identity),
    Guide.title("group=:Dist"), Guide.ylabel(nothing)
)
hstack(p1, p2, p3)
using Gadfly, Distributions, Random
set_default_plot_size(14cm, 8cm)
Random.seed!(1234)
plot(x=rand(1:4, 500), y=rand(500), Stat.x_jitter(range=0.5), Geom.point)
using Gadfly, Random
set_default_plot_size(14cm, 8cm)
Random.seed!(1234)
plot(x=rand(10), y=rand(10), Stat.xticks(ticks=[0.0, 0.1, 0.9, 1.0]), Geom.point)