In [1]:
using JLD
include("../data/synthetic.jl")
using .Synthetic
using TOML
using Gen
using PyPlot
using LinearAlgebra
using ProgressBars
using Statistics
using Distributions
using CSV
using Random
using DataFrames

In [2]:
Random.seed!(1234)
include("../src/model.jl")
include("../src/inference.jl")
include("../data/processing_iso.jl")

using .Model
using .Inference
using .ProcessingISO

In [147]:
experiment = 1
config_path = "../experiments/config/ISO/$(experiment).toml"
config = TOML.parsefile(config_path)
logmeanexp(x) = logsumexp(x)-log(length(x))


# +
# Load and process data
df = DataFrame(CSV.File(config["paths"]["data"]))
weekday_df = df[df[!, :IsWeekday] .== "TRUE", :]

importanceWeights = generateImportanceWeights(config["new_means"], config["new_vars"], weekday_df)
T, Y, SigmaU, regions_key = resampleData(config["subsample_params"]["nSamplesPerState"], importanceWeights, weekday_df)
# -

# Scale T and Y
T /= 100
Y /= 1000
println()
regions = Set(regions_key)

Ts = Dict()
Ys = Dict()

for region in regions
    Ts[region] = weekday_df[weekday_df[!, :Region] .== region, :DryBulbTemp]/100
    Ys[region] = weekday_df[weekday_df[!, :Region] .== region, :RealTimeDemand]/1000
end




In [4]:
obj_label = collect(Iterators.flatten([fill(i, 25) for (i,_) in enumerate(keys(importanceWeights))]))

objIdKey = Dict()
for (i, stats) in enumerate(keys(importanceWeights))
    objIdKey[stats] = i
end
# generate initial parameters from prior
# ty, xy, uy, ynoise
@gen function generateLS(mean, scale)
    LS = @trace(normal(mean, scale), :LS)
    return LS
end
@gen function generateNoise(shape, scale)
    Noise = @trace(inv_gamma(shape, scale), :Noise)
    return Noise
end
nObj = length(importanceWeights)
MappedGenerateLS = Map(generateLS)
MappedGenerateNoise = Map(generateNoise)

@gen function thetaProposal(trace, var::Float64)
    mu = trace[:theta]
    @trace(normal(mu, var), :theta)
end

@gen function alphaProposal(trace, i::Int, var::Float64)
    mu = trace[:alpha => i => :LS]
    @trace(normal(mu, var), :alpha => i => :LS)
end

@gen function betaProposal(trace, i::Int, var::Float64)
    mu = trace[:beta => i => :LS]
    @trace(normal(mu, var), :beta => i => :LS)
end

@gen function NoiseProposal(trace, var::Float64)
    cur = trace[:noise]
    
    Shape = (cur * cur / var) + 2
    Scale = cur * (Shape - 1)
    
    @trace(inv_gamma(Shape, Scale), :noise)
end

DynamicDSLFunction{Any}(Dict{Symbol,Any}(), Dict{Symbol,Any}(), Type[Any, Float64], ##NoiseProposal#1912, Bool[0, 0], false)

In [5]:
@gen function LinearMLMOffset(ts::Vector{Float64}, obj_label, nObj)
    n = length(ts)
    theta = @trace(normal(0,1), :theta) #tyLS
    alpha = @trace(MappedGenerateLS(fill(0.0, nObj), fill(10.0, nObj)), :alpha) #alpha
    sigma = @trace(inv_gamma(4.0, 4.0), :noise) #tyLS
    for i in 1:n
        obj = Int(obj_label[i])
        t = ts[i]
        @trace(normal(theta * t + alpha[obj], sigma), "y-$i")
    end
end

DynamicDSLFunction{Any}(Dict{Symbol,Any}(), Dict{Symbol,Any}(), Type[Array{Float64,1}, Any, Any], ##LinearMLMOffset#1913, Bool[0, 0, 0], false)

In [6]:
constraints = choicemap()
for (i, y) in enumerate(Y)
    constraints["y-$i"] = y
end
n_run = 5000
PosteriorSamples = []
(trace, _) = generate(LinearMLMOffset, (T, obj_label, nObj), constraints)
for iter=tqdm(1:n_run)
    (trace, _) = mh(trace, thetaProposal, (0.5, ))
    (trace, _) = mh(trace, NoiseProposal, (0.5, ))
    for k in 1:nObj
        (trace, _) = mh(trace, alphaProposal, (k, 0.5))
    end 
    push!(PosteriorSamples, get_choices(trace))
end

100.00%┣███████████████████████████████████████████████████████▉┫ 5000/5000 00:33<00:00, 149.57 it/s]


In [59]:
# evaluarion
Ycfs, mask, doT = [], nothing, nothing
preds = Dict()

avg, noises = nothing, [] 
avg_noises= []
doTs = [doT for doT in 0.2:0.01:0.8]
for region in regions
    preds[region] = Dict()
    for doT in doTs
        preds[region][doT] = []
    end
end

for (i,doT) in tqdm(enumerate(doTs))
    mask = T .!= doT
    pred_at_doT = []
    noises = []
    for j in 1:n_run
        theta = PosteriorSamples[j][:theta]
        alpha = [PosteriorSamples[j][:alpha=>k=>:LS] for k in 1:nObj]
        noise = PosteriorSamples[j][:noise]
        for region in regions
            push!(preds[region][doT], Normal(theta * doT .+ alpha[objIdKey[region]], noise))
        end
    end
end

100.00%┣█████████████████████████████████████████████████████████████▉┫ 61/61 00:11<00:00, 5.28 it/s]


In [44]:
truthIntMeans = Dict()
truthIntVars = Dict()
yNoise = 0.2
yScale = 1.
LS = 0.25
for (i, region) in tqdm(enumerate(regions))
    kTT = processCov(rbfKernelLog(Ts[region], Ts[region], LS), yScale, yNoise)
    means = []
    vars = []
    for doT in doTs
        kTTs = processCov(rbfKernelLog(Ts[region], [doT], LS), yScale, nothing)
        kTsTs = processCov(rbfKernelLog([doT], [doT], LS), yScale, nothing)
        push!(means, (kTTs' * (kTT \ Ys[region]))[1])
        push!(vars, (kTsTs - kTTs' * (kTT \ kTTs))[1])
    end
    truthIntMeans[region] = means
    truthIntVars[region] = vars
   
end

100.00%┣████████████████████████████████████████████████████████████████┫ 6/6 00:04<00:00, 1.29 it/s]


In [176]:
estIntSamples = Dict()
estIntLogLikelihoods = Dict()

indecesDict = Dict()

for region in regions
    indecesDict[region] = regions_key .== region
    
    estIntSamples[region] = Dict()
    estIntLogLikelihoods[region] = Dict()
    
    for doT in doTs
        estIntSamples[region][doT] = []
        estIntLogLikelihoods[region][doT] = []
    end
end

nOuter = 5000
burnIn = 1000
stepSize = 100
# preds doT x 200 x N
for i in burnIn:stepSize:nOuter
    for (j, doT) in enumerate(doTs)
        for region in regions
            indeces = indecesDict[region]
            
            truth = truthIntMeans[region][j]
            dist = preds[region][doT][i]
            truthLogLikelihood = loglikelihood(dist, [truth])
            push!(estIntLogLikelihoods[region][doT], truthLogLikelihood)  
        end
    end
end

In [177]:
scores = Dict()

for region in regions
    scores[region] = 0
    
    for doT in doTs
        scores[region] += logmeanexp([Real(llh) for llh in estIntLogLikelihoods[region][doT]])
    end
    
    scores[region] /= length(doTs)
end

In [179]:
scores

Dict{Any,Any} with 6 entries:
  "NH"   => 0.152335
  "CT"   => -0.687135
  "NEMA" => -0.190515
  "RI"   => 0.192545
  "ME"   => 0.23525
  "VT"   => 0.271347