In [1]:
using JuMP, Gurobi

lambda = 0.01
mu = 0.00001

rets = readcsv("./rets.csv")
rets = rets[2:length(rets[:,1]), 2]

data = readcsv("./full_norm_pct.csv")
dates = data[2:length(data[:,1]), 1] #extract date column
data = data[:, 2:length(data[1,:])]
cols = data[1,:] #extract column titles
data = data[2:length(data[:,1]),:] #remove column titles

data = convert(Array{Float64},data)
rets = convert(Array{Float64},rets)

num_factors = length(data[1,:])
num_dates = length(dates)

m = Model(solver = GurobiSolver(OutputFlag = 0))

@variable(m, b[1:num_factors])
@variable(m, yt[1:num_dates])
@variable(m, t[1:num_factors])
@variable(m, maxt)
@constraint(m, [i in 1:num_dates], yt[i] == dot(b, data[i, :]))
@constraint(m, t .>= b)
@constraint(m, t .>= -b)
@constraint(m, [i in 1:num_factors], maxt >= t[i])
@objective(m, Min, sum( (rets - yt).^2 ) + lambda * maxt + mu * sum(t)) #LASSO with L-Infinite Regularization
# @objective(m, Min, sum( (rets - yt).^2 ) + mu * maxt) #L-Infinite Regularization
# @objective(m, Min, sum( (rets - yt).^2 ) + lambda * sum(t)) #LASSO
# @objective(m, Min, sum( (rets - yt).^2 ) + lambda * sum(b.^2))
# @objective(m, Min, sum( (rets - yt).^2 ))

solve(m)

selection = []

bs = getvalue(b)
count = 0
for i in 1:length(bs)
    if bs[i] > 0.0000001
        print(bs[i], "\t", cols[i],"\n")
        count += 1
        push!(selection, i)
    end
end
print(count,"\n")

sel_data = data[:, selection]
num_factors = length(sel_data[1,:])

m2 = Model(solver = GurobiSolver(OutputFlag = 0))

@variable(m2, b2[1:num_factors])
@variable(m2, yt2[1:num_dates])
@constraint(m2, [i in 1:num_dates], yt2[i] == dot(b2, sel_data[i, :]))
@objective(m2, Min, sum( (rets - yt2).^2 ) + lambda * sum(b2.^2))

solve(m2)

print("\n\n\n\n")
bs2 = getvalue(b2)
count = 0
for i in 1:length(bs2)
    if bs2[i] > 0.0000001
        print(bs2[i], "\t\n")
        count += 1
    end
end
print(count)

Academic license - for non-commercial use only
0.0011759247814741188	debt
0.001176379876343023	debtusd
0.0011763428882005291	ebitdausd
0.0011760615672456337	epsusd
0.0011756745194993299	equityusd
0.001176365102263119	evebit
0.0011763909144036763	grossmargin
0.00047536625334277966	investmentsc
0.0011762452646387346	ncfcommon
0.0011755777254571168	ncfi
0.0003844290660398657	ncfinv
0.000977196246159535	netincdis
0.001176191143616711	netincnci
0.0003095008904610131	payables
0.001176399094804994	pe
0.00018843827851626203	prefdivis
0.0011675827528454283	revenueusd
4.1632847106380803e-7	sharefactor
0.0005278349398877406	sps
0.0011754952358894615	taxexp
20
Academic license - for non-commercial use only




0.0017851584881661288	
0.003674232877735314	
0.0022548143017249063	
0.0020480713305107098	
0.00011698191037164823	
0.0010645910523629798	
0.005740175467731956	
0.004745915100783196	
0.000745539099854532	
0.0023025906519179833	
1.6288406720170137e-5	
0.0007870906865632969	
0.00547826630714621

In [2]:
bs = getvalue(b)
count = 0
for i in 1:length(bs)
    if bs[i] > 0.0000001
        print(bs[i], "\t", cols[i],"\n")
        count += 1
    end
end
print(count)

0.0011759247814741188	debt
0.001176379876343023	debtusd
0.0011763428882005291	ebitdausd
0.0011760615672456337	epsusd
0.0011756745194993299	equityusd
0.001176365102263119	evebit
0.0011763909144036763	grossmargin
0.00047536625334277966	investmentsc
0.0011762452646387346	ncfcommon
0.0011755777254571168	ncfi
0.0003844290660398657	ncfinv
0.000977196246159535	netincdis
0.001176191143616711	netincnci
0.0003095008904610131	payables
0.001176399094804994	pe
0.00018843827851626203	prefdivis
0.0011675827528454283	revenueusd
4.1632847106380803e-7	sharefactor
0.0005278349398877406	sps
0.0011754952358894615	taxexp
20

In [3]:
sel_data

7×20 Array{Float64,2}:
 -0.00103938   4.06405    0.156267    …  -1.14286  -4.03807    2.05337  
  0.30437      6.46051    0.00650794      0.0       0.245311   0.498002 
 -0.634001    -0.279962  -3.18678         0.0      -1.1883    -2.03745  
 -2.86162     -0.494445  -1.13603         0.0       2.1999     0.89586  
 -0.676859    -2.1649    -1.48907         0.0       1.08407   -0.244727 
  3.36258      1.34137   -0.886255    …   0.0      -0.399574  -1.8382   
  0.0807311   -0.166374  47.4322          0.0      -2.67365    0.0910096

In [4]:
num_factors

20

In [232]:
getobjectivevalue(m)

0.020081852971495825

In [268]:
data[:,[1,20,37]]

7×3 Array{Float64,2}:
  0.0434216   0.106106   -0.716398
 -0.673924   -0.2201      4.23729 
 -2.1544     -0.597726   -0.476165
  1.75225    -1.36557     0.574577
 -0.70383     6.28081    -4.20583 
  0.141441   -0.0405702  -0.926255
 -0.386225    0.556266    0.91608 