In [1]:
import StatsBase: predict
import Base: getindex, show
import MLBase: Kfold
using MLMetrics
using SparseRegression


Use "abstract type AvgMode end" instead.

Use "abstract type AbstractBinary end" instead.

Use "abstract type AbstractMultiClass end" instead.

Use "MultiClass{T,N}(...) where {T,N}" instead.


In [2]:
function Fakedata(N,d)
    n_obs = 100
    x = randn((n_obs,d))
    y = sum(x*randn(d),2)
    
    hcat(x,y)
end

Fakedata (generic function with 1 method)

In [3]:
Fakedata(1000,3)

100×4 Array{Float64,2}:
  0.0973409  -0.426206   -2.20467     1.15517  
  1.09764    -2.23121    -1.22836    -2.37771  
 -0.0911682   2.04924    -0.969256    4.05808  
 -0.149748   -2.73146    -0.570739   -3.91302  
 -0.223951    1.1719      0.788278    1.18538  
 -1.48807    -1.0224     -0.914728   -1.10797  
  0.854177   -0.355109    1.13253    -1.36962  
  0.910098    1.13904     0.275291    1.73251  
  0.497604    0.238259    0.499603    0.0450758
  0.829776   -0.268876   -0.193977   -0.140337 
 -2.43997     1.57523    -0.898356    2.87821  
 -0.67792     0.364261   -1.22959     1.49173  
  0.183108   -0.392075    0.771805   -1.23491  
  ⋮                                            
  1.82227     0.456899    0.400614    0.681478 
  0.190407    1.18768    -0.711358    2.51324  
  1.10039     0.393971    0.0269222   0.777559 
  0.663291   -0.501412   -0.278055   -0.468015 
 -0.650657   -0.0870616  -0.425518    0.111399 
  0.428484   -0.592906   -0.182926   -0.729092 
 -0.840298   -0.

In [4]:
immutable Task 
    task_type::String
    target::Int
    features::Array{Int}
end

function Task(;task_type="regression", target=nothing, data=nothing)
    if target == nothing || data == nothing
        throw("Requires target and data to be set")
    end
    
    features = size(data,2)
    features = deleteat!( collect(1:features), target)
    
    Task(task_type, target, features)
end

immutable Learner
    name::String
    parameters::Union{Void,Dict{Any}}
    Learner(learner::String) = new(learner, Dict())
    Learner(learner::String, parameters::Dict{Any}) = new(learner, parameters)
end

function show(io::IO,l::Learner)
    println("Learner: $(l.name)")
    for (key, value) in l.parameters
       println(" ▁ ▂ ▃ $key: $value") 
    end
end

immutable Resampling
    method::String
    iterations::Int
    Resampling() = new("KFold", 3)
end

abstract type Parameter end

immutable DiscreteParameter <: Parameter 
    name::String
    values::Array{Any}
    DiscreteParameter(;name=nothing,values=nothing) = new(name, values)
end

immutable ContinuousParameter <: Parameter
    name::String
    lower::Real
    upper::Real
    transform::Function
    ContinuousParameter(;name=nothing, lower=nothing, upper=nothing, transform=nothing) = new(name, lower, upper, transform)
end


immutable ParametersSet
   parameters::Array{Parameter}
end

getindex(p::ParametersSet, i::Int64) = p.parameters[i]

immutable MLRModel{T}
    model::T
    parameters
end

In [5]:
#### ABSTRACT FUNCTIONS ####

function MLRModel(learner::Learner, task::Task, data)
    # Calls function with name "makeModelname"
    f_name = learner.name
    f_name = "make" * titlecase(f_name)
    
    f = getfield(Main, Symbol(f_name))
    f(learner, task, data)
end    

function learnᵧ(learner::Learner, task::Task, data)
    modelᵧ = MLRModel(learner, task, data)
    learnᵧ!(modelᵧ, learner=learner, task=task, data=data)
    modelᵧ
end

learnᵧ (generic function with 1 method)

In [6]:
### TRANSITION ###
function makeRidge(learner::Learner, task::Task, data)
    if isempty(learner.parameters)
        model = SModel(data[:, task.features], data[:, task.target])
    else
        parameters = []
        push!(parameters, get_λ(learner.parameters, data))
        model = SModel(data[:, task.features], data[:, task.target], L2DistLoss(), L2Penalty(), parameters...)
    end
    MLRModel(model, copy(learner.parameters))
end

function makeGlm(learner::Learner, task::Task, data)
    if isempty(learner.parameters)
        model = SModel(data[:, task.features], data[:, task.target])
    else
        parameters = []
        if get(learner.parameters, "λ", false) !== false
            # Add λ
            push!(parameters, get_λ(learner.parameters, task))
        end
        if get(learner.parameters, "penalty", false) !== false
            # Add penalty
            push!(parameters, learner.parameters["penalty"])
        end
        if get(learner.parameters, "loss", false) !== false
            # Add penalty
            push!(parameters, learner.parameters["loss"])
        end
        model = SModel(data[:, task.features], data[:, task.target], parameters...)
    end
    MLRModel(model, copy(learner.parameters))
end

makeGlm (generic function with 1 method)

In [7]:
#### MODEL WRAPPERS ####
using SparseRegression

function get_λ(parameters, task)
    if get(parameters, "λ", false) == false
        lambda = fill(0.0, task.features)
    elseif typeof(parameters["λ"]) <: Real
        lambda = fill(parameters["λ"], length(task.features) )
    elseif typeof(parameters["λ"]) <: Vector{Float64}
        lambda = copy(parameters["λ"])
    end
    lambda
end


function predictᵧ(modelᵧ::MLRModel{<:SModel}; data=data, task=task)
    predict(modelᵧ.model, data[:, task.features])
end

function learnᵧ!(modelᵧ::MLRModel{<:SModel}; learner=nothing::Learner, data=nothing::Matrix{Real}, task=nothing::Task)
    learn!(modelᵧ.model)
end

learnᵧ! (generic function with 1 method)

In [8]:
function update_parameters!(array, range) 
    array[1] += 1
    for i in 1:length(array)
        if array[i] > range[i][end]
            array[i+1] += 1
            array[i] = range[i][1]
        end
    end
end

function parameters_dictionary(ps::ParametersSet, array, discrete_dictionary)
    dict = Dict()
    for i in 1:length(array)
        if typeof(ps[i]) <: ContinuousParameter
            dict[ps[i].name] = ps[i].transform( convert(Float64, array[i]) )
        else
            dict[ps[i].name] = discrete_dictionary[ps[i].name][array[i]]
        end
    end
    dict
end

function get_samples(sampler::Resampling, n_obs::Int64)
    trainᵢ = []
    testᵢ = []
    if sampler.method == "KFold"
        kfold = Kfold(n_obs, sampler.iterations)
        for train in kfold
            push!(trainᵢ, collect(train))
            push!(testᵢ, setdiff(1:n_obs, trainᵢ[end]))
        end
    end  
    trainᵢ, testᵢ
end

function tune(;learner=nothing::Learner, task=nothing::Task, data=nothing::Matrix{Real}, 
                parameters_set=nothing::ParametersSet, sampler=Resampling()::Resampling, 
                measure=nothing::Function)
    
    n_parameters = length(parameters_set.parameters)
    n_obs        = size(data,1)
        
    parameters_array = Array{Any}(n_parameters)
    parameters_range = Array{Tuple}(n_parameters)
    
    # For discrete parameters, the range is set to 1:n_discrete_values
    # The discrete dictionary variable allows to connect this range to 
    # the actual discrete value
    discrete_dictionary = Dict()
    
    total_parameters = 1
    
    # Prepare parameters
    for i in 1:n_parameters
        if typeof(parameters_set[i]) <: ContinuousParameter 
            lower = parameters_set[i].lower
            upper = parameters_set[i].upper
            parameters_array[i] = lower
            parameters_range[i] = Tuple(lower:upper)
            params = length(lower:upper)
        else
            parameters_array[i] = 1
            parameters_range[i] = Tuple(1:length(parameters_set[i].values))
            discrete_dictionary[parameters_set[i].name] = parameters_set[i].values
            params = length(parameters_set[i].values)
        end
        total_parameters *= params
    end
    
    
    
    # Loop over parameters
    for i in 1:total_parameters
        # Set new parametersparameters_set[i].values
        update_parameters!(parameters_array, parameters_range) 
        pd = parameters_dictionary(parameters_set, parameters_array, discrete_dictionary)

        # Update learner with new parameters
        lrn = Learner(learner.name, pd)
                
        # Get training/testing validation sets
        trainⱼ, testⱼ = get_samples(sampler, n_obs)
        
        scores = []
        for j in 1:length(trainⱼ)  
            modelᵧ = learnᵧ(lrn, task, data[trainⱼ[j], :])
            preds = predictᵧ(modelᵧ, data=data[testⱼ[j],:], task=task)
            
            score = measure( data[testⱼ[j], task.target], preds)
            push!(scores, score)
        end
        println("Trained:")
        println(lrn)
        println("Average CV accuracy: $(mean(scores))\n")
    end
end

tune (generic function with 1 method)

In [16]:
ps = ParametersSet([
    ContinuousParameter(
        name = "λ",
        lower = -4,
        upper = 1,
        transform = x->10^x
    )
        ,
    DiscreteParameter(
        name = "penalty",
        values = [L1Penalty(), L2Penalty()]
    )
])

data = Fakedata(1000,3)

task = Task(task_type="regression", target=4, data=data)
lrn = Learner("glm")
learnᵧ(lrn,task,data)
tune(learner=lrn, task=task, data=data, parameters_set=ps, measure=mean_squared_error)

[1m[36mINFO: [39m[22m[36mSweep finished
[39m[1m[36mINFO: [39m[22m[36mConverged after 18 iterations: [-0.68388, -1.38216, 0.878894]
[39m

Trained:
Learner: glm
 ▁ ▂ ▃ penalty: L1Penalty
 ▁ ▂ ▃ λ: 0.001

Average CV accuracy: 3.3280955130486892e-6

Trained:
Learner: glm
 ▁ ▂ ▃ penalty: L1Penalty
 ▁ ▂ ▃ λ: 0.01

Average CV accuracy: 0.0003592596315147756

Trained:
Learner: glm
 ▁ ▂ ▃ penalty: L1Penalty
 ▁ ▂ ▃ λ: 0.1

Average CV accuracy: 0.04028597046451981

Trained:
Learner: glm
 ▁ ▂ ▃ penalty: L1Penalty
 ▁ ▂ ▃ λ: 1.0

Average CV accuracy: 2.294579016910561

Trained:
Learner: glm
 ▁ ▂ ▃ penalty: L1Penalty
 ▁ ▂ ▃ λ: 10.0

Average CV accuracy: 2.9276403421561685

Trained:
Learner: glm


[1m[36mINFO: [39m[22m[36mConverged after 18 iterations: [-0.683888, -1.38238, 0.879054]
[39m[1m[36mINFO: [39m[22m[36mConverged after 13 iterations: [-0.684047, -1.38228, 0.87909]
[39m[1m[36mINFO: [39m[22m[36mConverged after 17 iterations: [-0.672395, -1.37159, 0.870453]
[39m[1m[36mINFO: [39m[22m[36mConverged after 17 iterations: [-0.673906, -1.37175, 0.867849]
[39m[1m[36mINFO: [39m[22m[36mConverged after 16 iterations: [-0.676896, -1.37378, 0.869332]
[39m[1m[36mINFO: [39m[22m[36mConverged after 22 iterations: [-0.543055, -1.30089, 0.749015]
[39m[1m[36mINFO: [39m[22m[36mConverged after 23 iterations: [-0.59643, -1.24141, 0.769539]
[39m[1m[36mINFO: [39m[22m[36mConverged after 15 iterations: [-0.599445, -1.27581, 0.780547]
[39m[1m[36mINFO: [39m[22m[36mConverged after 12 iterations: [-0.0, -0.304757, 0.0]
[39m[1m[36mINFO: [39m[22m[36mConverged after 15 iterations: [-0.0, -0.333514, 0.0]
[39m[1m[36mINFO: [39m[22m[36mConverged 

 ▁ ▂ ▃ penalty: L2Penalty
 ▁ ▂ ▃ λ: 0.0001

Average CV accuracy: 3.604703416597752e-8

Trained:
Learner: glm
 ▁ ▂ ▃ penalty: L2Penalty
 ▁ ▂ ▃ λ: 0.001

Average CV accuracy: 3.5925834875100503e-6

Trained:
Learner: glm
 ▁ ▂ ▃ penalty: L2Penalty
 ▁ ▂ ▃ λ: 0.01

Average CV accuracy: 0.0003889354181530369

Trained:
Learner: glm
 ▁ ▂ ▃ penalty: L2Penalty
 ▁ ▂ ▃ λ: 0.1

Average CV accuracy: 0.0375478154462754

Trained:
Learner: glm
 ▁ ▂ ▃ penalty: L2Penalty
 ▁ ▂ ▃ λ: 1.0

Average CV accuracy: 0.8877517449758807

Trained:
Learner: glm
 ▁ ▂ ▃ penalty: L2Penalty
 ▁ ▂ ▃ λ: 10.0

Average CV accuracy: 2.4757423479712064



[1m[36mINFO: [39m[22m[36mSweep finished
[39m[1m[36mINFO: [39m[22m[36mSweep finished
[39m[1m[36mINFO: [39m[22m[36mSweep finished
[39m[1m[36mINFO: [39m[22m[36mSweep finished
[39m[1m[36mINFO: [39m[22m[36mSweep finished
[39m[1m[36mINFO: [39m[22m[36mSweep finished
[39m[1m[36mINFO: [39m[22m[36mSweep finished
[39m[1m[36mINFO: [39m[22m[36mSweep finished
[39m[1m[36mINFO: [39m[22m[36mSweep finished
[39m[1m[36mINFO: [39m[22m[36mSweep finished
[39m[1m[36mINFO: [39m[22m[36mSweep finished
[39m[1m[36mINFO: [39m[22m[36mSweep finished
[39m[1m[36mINFO: [39m[22m[36mSweep finished
[39m[1m[36mINFO: [39m[22m[36mSweep finished
[39m[1m[36mINFO: [39m[22m[36mSweep finished
[39m

LoadError: [91mBoundsError: attempt to access 2-element Array{Any,1} at index [3][39m

In [10]:
data = FakeData(1000,3)

task = Task(task_type="regression", target=4, data=data)
lrn  = Learner("ridge")

train = 1:80
test  = 81:100


modelᵧ = learnᵧ(lrn, task, data[train,:])
pred = predictᵧ(modelᵧ, data=data[test,:], task=task)

mean_squared_error(data[test,task.target],pred)

LoadError: [91mUndefVarError: FakeData not defined[39m

In [30]:
using Dagger
model = SModel(data[:, task.features], data[:, task.target])
fun(a,b,c)=SModel(a,b,c)
dc1=delayed(fun)(data[:, task.features], data[:, task.target])
dc2=delayed(learn!)(model)
typeof(dc1.inputs)

Tuple{Array{Float64,2},Array{Float64,1}}

In [32]:
# maybe data provider and maybe copy dagger graph mutliple times???

3-element Array{Float64,1}:
 0.1
 0.1
 0.1

In [12]:

# What is the alternative
g(x)=x
setfield!(x->x^2,:g,Main)

1.0