In [1]:
using Turing, Distributions, CSV, DataFrames, Optim, StatsBase

# Load data
data = CSV.read("data/ice_cream_sales.csv", DataFrame);
names(data)

│   exception = ErrorException("Required dependency DelimitedFiles [8bb1440f-4735-579b-a4ab-409b98df4dab] failed to load from a cache file.")
└ @ Base loading.jl:1818


6-element Vector{String}:
 "ID"
 "Temperature"
 "Is_Weekend"
 "Hours_Open"
 "Electricity_Usage"
 "Ice_Cream_Sales"

In [2]:
#= Convert continous data to Float64
And the categorical data to integers =#

data[!, :Temperature] = convert(Array{Float64}, data[!, :Temperature])
data[!, :Is_Weekend] = convert(Array{Int}, data[!, :Is_Weekend])
data[!, :Hours_Open] = convert(Array{Int}, data[!, :Hours_Open])
data[!, :Ice_Cream_Sales] = convert(Array{Float64}, data[!, :Ice_Cream_Sales])
data[!, :Electricity_Usage] = convert(Array{Float64}, data[!, :Electricity_Usage]);

In [3]:
# Get variables
temperature = data[!, :Temperature]
is_weekend = data[!, :Is_Weekend]
hours_open = data[!, :Hours_Open]
electricity_usage = data[!, :Electricity_Usage]
ice_cream_sales = data[!, :Ice_Cream_Sales];

In [4]:
@model function ice_cream_sales_model(temperature, is_weekend, hours_open, electricity_usage, ice_cream_sales)
    n = min(length(temperature), length(is_weekend), length(hours_open), length(electricity_usage), length(ice_cream_sales))

    # The priors were started as (0,1) and iteratively adjusted using Optim.jl
    α_ics ~ Normal(20, 1)
    α_eu ~ Normal(10, 1)
    β_temp_ics ~ Normal(30, 1)
    β_hours_ics ~ Normal(25, 1)
    β_weekend_ics ~ Normal(100, 1)
    β_temp_eu ~ Normal(2, 1)
    β_hours_eu ~ Normal(3, 1)
    β_eu_ics ~ Normal(-0.13, 1) 

    for i in 1:n
        # Priors for independent variables
        temperature[i] ~ Normal(25, 2)
        is_weekend[i] ~ Bernoulli(2/7)

        # How each model is dependent on it's predecessors
        if is_weekend[i] == 1
            hours_open[i] ~ DiscreteUniform(10,11)
        else
            hours_open[i] ~ DiscreteUniform(8,10)
        end

        electricity_usage[i] ~ Normal(α_eu + temperature[i] * β_temp_eu + hours_open[i] * β_hours_eu, 1)
        
        # How the dependent variable is dependent on the independent variables
        ice_cream_sales[i] ~ Normal(α_ics + temperature[i] * β_temp_ics + hours_open[i] * β_hours_ics + is_weekend[i] * β_weekend_ics
         + electricity_usage[i] * β_eu_ics, 3)
    end
end

ice_cream_sales_model (generic function with 2 methods)

In [5]:
cont_sampler = HMC(0.01, 10, :temperature, :electricity_usage, :ice_cream_sales)
disc_sampler = PG(10, :is_weekend, :hours_open)
sampler = Gibbs(cont_sampler, disc_sampler)

Gibbs{(:temperature, :electricity_usage, :ice_cream_sales, :is_weekend, :hours_open), Tuple{HMC{Turing.Essential.ForwardDiffAD{0}, (:temperature, :electricity_usage, :ice_cream_sales), AdvancedHMC.UnitEuclideanMetric}, PG{(:is_weekend, :hours_open), AdvancedPS.ResampleWithESSThreshold{typeof(AdvancedPS.resample_systematic), Float64}}}}((HMC{Turing.Essential.ForwardDiffAD{0}, (:temperature, :electricity_usage, :ice_cream_sales), AdvancedHMC.UnitEuclideanMetric}(0.01, 10), PG{(:is_weekend, :hours_open), AdvancedPS.ResampleWithESSThreshold{typeof(AdvancedPS.resample_systematic), Float64}}(10, AdvancedPS.ResampleWithESSThreshold{typeof(AdvancedPS.resample_systematic), Float64}(AdvancedPS.resample_systematic, 0.5))))

In [6]:
# Now let's try optim
model_optim = ice_cream_sales_model(temperature, is_weekend, hours_open, electricity_usage, ice_cream_sales)

DynamicPPL.Model{typeof(ice_cream_sales_model), (:temperature, :is_weekend, :hours_open, :electricity_usage, :ice_cream_sales), (), (), Tuple{Vector{Float64}, Vector{Int64}, Vector{Int64}, Vector{Float64}, Vector{Float64}}, Tuple{}, DynamicPPL.DefaultContext}(ice_cream_sales_model, (temperature = [24.273285037096446, 25.503474431148458, 24.37002405766209, 24.37749519735116, 26.632613529864656, 25.953476759663754, 23.280889235876757, 22.06142358898691, 20.77133033773803, 25.08756332406122  …  27.88968302198113, 22.020745012649975, 24.783201090136597, 21.12625085079417, 24.805501713106118, 20.97052730229581, 27.764905318715257, 21.868932876733844, 22.15767543558101, 24.99857631463805], is_weekend = [0, 0, 0, 0, 0, 1, 1, 0, 0, 0  …  0, 0, 0, 0, 0, 1, 1, 0, 0, 0], hours_open = [10, 9, 8, 10, 8, 11, 11, 8, 9, 10  …  9, 8, 10, 10, 9, 11, 11, 9, 8, 10], electricity_usage = [92.11831434065243, 83.91781748696985, 88.29061662682687, 85.5618654162385, 94.404976320194, 98.23792371386881, 86.591963

In [7]:
map_estimate = optimize(model_optim, MAP(), AcceleratedGradientDescent(), Optim.Options(iterations=1000000, show_trace=true, show_every=10000))

Iter     Function value   Gradient norm 
     0     1.192569e+06     7.053294e+05
 * time: 0.010756969451904297
 10000     1.880141e+04     4.414033e-02
 * time: 2.355782985687256
 20000     1.880141e+04     7.877583e-03
 * time: 4.214664936065674
 30000     1.880141e+04     5.545849e-03
 * time: 5.953790903091431
 40000     1.880141e+04     4.614464e-03
 * time: 7.709409952163696
 50000     1.880141e+04     2.173154e-03
 * time: 9.418855905532837
 60000     1.880141e+04     9.911990e-04
 * time: 11.069024801254272


ModeResult with maximized lp of -18801.41
8-element Named Vector{Float64}
A              │ 
───────────────┼──────────
:α_ics         │    20.601
:α_eu          │   9.70611
:β_temp_ics    │   30.2174
:β_hours_ics   │   25.7812
:β_weekend_ics │   98.5197
:β_temp_eu     │   2.01892
:β_hours_eu    │   3.02148
:β_eu_ics      │ -0.130024