In [1]:
# Run the script in parallel
using Distributed

# Add processes
rmprocs(workers()) # This will remove all worker processes
addprocs(0) # Change this to the number of cores you want to use

[33m[1m└ [22m[39m[90m@ Distributed /Users/julia/buildbot/worker/package_macos64/build/usr/share/julia/stdlib/v1.6/Distributed/src/cluster.jl:1041[39m


Int64[]

In [2]:
@everywhere begin
    using Pkg
    Pkg.activate(".")
    Pkg.instantiate()
    #Pkg.status()
end

[32m[1m  Activating[22m[39m environment at `~/github/DifferentiableUserModels-JT/Project.toml`


In [3]:
@everywhere begin
    using ArgParse
    using BSON
    using Distributions
    using Flux
    using Stheno
    using Tracker
    using Printf
end

In [4]:
@everywhere include(joinpath(@__DIR__, "NeuralProcesses.jl/src/NeuralProcesses.jl"))

[33m[1m└ [22m[39m[90m@ Base.Docs docs/Docs.jl:240[39m


In [5]:
@everywhere using .NeuralProcesses

In [6]:
#@everywhere begin
    #include(joinpath(@__DIR__, "NeuralProcesses.jl/src/NeuralProcesses.jl"))
    #include("NeuralProcesses.jl/src/NeuralProcesses.jl")    
    #using .NeuralProcesses
#end


In [7]:

# parser = ArgParseSettings()
# @add_arg_table! parser begin
#     "--gen"
#         help = "Experiment setting: gridworld, menu_search, h_menu_search"
#         arg_type = String
#         default = "menu_search"
#     "--n_traj"
#         help = "Number of context trajectories. Setting to 0 randomizes between 1 and 8."
#         arg_type = Int
#         default = 0
#     "--n_epochs"
#         help = "Number of training epochs."
#         arg_type = Int
#         default = 50
#     "--n_batches"
#         help = "Number of batches."
#         arg_type = Int
#         default = 25
#     "--batch_size"
#         help = "Batch size."
#         arg_type = Int
#         default = 4
#     "--params"
#         help = "Return params?"
#         arg_type = Bool
#         default = false
#     "--p_bias"
#         help = "Probability of generating a sample with biased model"
#         arg_type = Float64
#         default = 0.0
#     "--bson"
#         help = "Directly specify the file to save the model to and load it from."
#         arg_type = String
#     "--epsilon"
#         help = "Value for epsilon."
#         arg_type = Float64
# end
# args = parse_args(parser)

In [8]:
# Make a dictionary to just use the default arguments from the argument parser
@everywhere begin
    function get_default_args()
        defaults = Dict(
            "gen" => "menu_search",
            "n_traj" => 0,
            "n_epochs" => 50,
            "n_batches" => 25,
            "batch_size" => 4,
            "params" => false,
            "p_bias" => 0.0,
            "bson" => "",
            "epsilon" => 0.0
        )
        return defaults
    end
    
    args = get_default_args()
end

In [9]:
# Don't bother initializing the model
# println("Initializing model...")

# model = anp_ex2(
#     dim_embedding=128,
#     num_encoder_heads=8,
#     num_encoder_layers=6,
#     num_decoder_layers=6,
#     args=args
# ) |> gpu


In [10]:
# Don't bother initializing the loss
# println("Initializing loss...")

# loss(xs...) = np_elbo(
#     xs...,
#     num_samples=5,
#     fixed_σ_epochs=3
# )

In [11]:
# Make the data generator
@everywhere begin
    println("Initializing data generator")
    
    batch_size  = args["batch_size"]
    
    # Redundant. Required to fit the DataGenerator definition
    x_context = Distributions.Uniform(-2, 2)
    x_target  = Distributions.Uniform(-2, 2)
    
    num_context = Distributions.DiscreteUniform(10, 10)
    num_target  = Distributions.DiscreteUniform(10, 10)
    
    data_gen = NeuralProcesses.DataGenerator(
                    SearchEnvSampler(args;),
                    batch_size=batch_size,
                    x_context=x_context,
                    x_target=x_target,
                    num_context=num_context,
                    num_target=num_target,
                    σ²=1e-8
                )
    println("Data gen initialized")
end

Initializing data generator
Data gen initialized


In [13]:
@everywhere begin
    # Variables normally defined in the part where you train the model
    #tasks_per_epoch=2^5
    #total_epochs = total_epochs=args["n_epochs"]
    #starting_epoch=0
    batches=args["n_batches"]
    experiment = "menu_search"
    
    # Divide out batch size to get the number of batches per epoch.
    #batches_per_epoch = div(tasks_per_epoch, data_gen.batch_size)
    
    # Display the settings of the training run.
    #@printf("Epochs:               %-6d\n", total_epochs)
    #@printf("Starting epoch:       %-6d\n", starting_epoch)
    #@printf("Tasks per epoch:      %-6d\n", batches_per_epoch * data_gen.batch_size)
    @printf("Batch size:           %-6d\n", data_gen.batch_size)
    @printf("Number of batches     %-6d\n", batches)
end

Batch size:           4     
Number of batches     25    


In [None]:
# # Use the data generator
# for batch_n in 1:batches-1
#     # Warmup epoch
#     if batch_n == starting_epoch
#         n_mini_batches = 1
#     else
#         n_mini_batches = batches_per_epoch
#     end
#     # Generate data
#     data = gen_batch(data_gen, n_mini_batches; eval=false)

#     if experiment == "menu_search"
#         BSON.bson("data/ex2/"*string(batch_n)*".bson", data=data)
#     end
# end

In [None]:


@distributed for batch_n in 1:batches-1
#for batch_n in 1:batches-1
    @printf("Running batch:               %-6d\n", batch_n)
    # Warmup epoch
    if batch_n == starting_epoch
        n_mini_batches = 1
    else
        n_mini_batches = batches_per_epoch
    end
    # Generate data
    data = gen_batch(data_gen, n_mini_batches; eval=false)

    if experiment == "menu_search"
        filename = "data/ex2/"*string(batch_n)*".bson"
        # If the file doesn't exist, create it
        if !isfile(filename)
            open(filename, "w") do f
            end
            # File is now created
        end
        # Save data to the file
        BSON.bson(filename, data=data)
        @printf("Finished batch:               %-6d\n", batch_n)
    end
end



In [34]:
data = gen_batch(data_gen, 3; eval=false);

In [40]:
xc,yc,xt,yt = data[2];
size(xt);
batch_size

4

In [24]:
num_context = rand(Distributions.DiscreteUniform(10, 10))

10

In [14]:
using HDF5
# Add multiple pieces of metadata to the dataset

metadata = Dict(
"gen_type" => "SearchEnvSampler / menu_search",
"eval" => false,
"batch_size" => batch_size,
"n_minibatches" => n_minibatches,
"n_traj" => "random(1-8)" #This is what happens when it's set to 0 in args dictionary
"noise_variance" => 1e-8,
"p_bias" => args["p_bias"]
"epsilon" => args["epsilon"]

)



function create_hdf5_ex2(data, filename, batch_size, metadata)
    # Open the HDF5 file for writing, overwriting if it exists
    h5open(filename, "w") do fid
        # Loop over the data vector
        for (i, d) in enumerate(data)
            # Create a group for each mini-batch
            g = create_group(fid, "mini_batch_$i")

            # Add datasets to the group
            g["xc"] = d[1]
            g["yc"] = d[2]
            g["xt"] = d[3]
            g["yt"] = d[4]

            # Add metadata to the group
            for (key, value) in metadata
                write_attribute(g, key, value)
            end
        end
    end
end



create_hdf5 (generic function with 1 method)

In [42]:


data_gen.num_target

DiscreteUniform(a=10, b=10)

In [15]:
filename = "data/ex2/batch1.hdf"
create_hdf5(data, filename)

In [None]:
# Don't bother training the model
# println("Proceeding to training loop...")

# mkpath("models/"*string(args["bson"]))

# train_model!(
#         model,
#         loss,
#         data_gen,
#         ADAM(5e-4),
#         bson=args["bson"],
# 	experiment=args["gen"],
#         starting_epoch=0,
#         tasks_per_epoch=2^5,
#         batches=args["n_batches"],
# 	total_epochs=args["n_epochs"],
#         epsilon=args["epsilon"]
#     )