In [None]:
# Getting data from aerospace database.
using CSV, DataFrames, Statistics, Random
include("augment.jl")
include("bin_to_leaves.jl")

In [None]:
# Optional extra cores
using Distributed
addprocs(2)

In [None]:
columns = ["Re", "thick", "M", "C_L"];
X = CSV.read("airfoil_X.csv", copycols=true, header=columns, delim=",");
Y = CSV.read("airfoil_Y.csv", copycols=true, header=["C_D"], delim=",");
X = Matrix(X); Y = Matrix(Y);
Re = Array(range(10000, stop=35000, step=5000));
thick = [100,110,120,130,140,145];
M = [0.4, 0.5, 0.6, 0.7, 0.8, 0.9];
cl = Array(range(0.35, stop=0.70, step=0.05));

In [None]:
# Plotting some perspectives on the data
using Plots
plt = Plots.plot(X[:,3], X[:,4], Y[:,1], seriestype=:scatter, markersize = 2)
display(plt)

In [None]:
# Splitting data
(train_X, train_y), (test_X, test_y) = IAI.split_data(:regression, seed=1, Matrix(X), Matrix(Y), train_proportion=0.8);
size(train_X)

In [None]:
# Let's do fun things with the data (everything has already been taken a log of!) 
lnr = IAI.OptimalTreeRegressor(random_seed=1, max_depth=7, cp=1e-8,  minbucket=0.02, regression_sparsity=:all, 
    regression_lambda = 0.0001)
IAI.fit!(lnr, train_X, train_y)

In [None]:
# Cross-validation of best tree
grids = IAI.GridSearch(lnr, max_depth = [2,3], minbucket=[0.03,0.06,0.09,0.12,0.15])
IAI.fit!(grids, train_X, train_y, test_X, test_y)

In [None]:
# Seeing best learner
lnr = IAI.get_learner(grids)

In [None]:
# How about trying sparse hyperplane splits? 
lnr = IAI.OptimalTreeRegressor(random_seed=1, max_depth=3, cp=0.001, minbucket=0.05, 
    hyperplane_config=(sparsity=2,), regression_sparsity=:all, regression_lambda = 0.0001, fast_num_support_restarts =10)
grids = IAI.GridSearch(lnr, max_depth = [2,3], minbucket=[0.03,0.06,0.09,0.12,0.15])
IAI.fit!(grids, train_X, train_y, test_X, test_y)

In [None]:
# Seeing best learner
lnr = IAI.get_learner(grids)

In [None]:
# MSE errors 
println("Training MSE:", 1 - IAI.score(lnr, train_X, train_y, criterion=:mse))
println("Test MSE:", 1- IAI.score(lnr, test_X, test_y, criterion=:mse))
# MSE error of global posynomial
Re = exp.(test_X[:,1]); thickness = exp.(test_X[:,2]); M = exp.(test_X[:,3]); C_L = exp.(test_X[:,4]); C_D = exp.(test_y);
CDp = 0.0470226 .* (Re).^-0.388166 .* thickness.^0.782129 .* (M).^-0.339824 .* (C_L).^0.94829 +
    190.63 .* (Re).^-0.218175 .* thickness.^3.94137 .* (M).^19.2346 .* (C_L).^1.14997 +
    1.62158 .* (Re).^-0.549562 .* thickness.^1.2895 .* (M).^3.03057 .* (C_L).^1.77464 +
    2.91642e-12 .* (Re).^1.18062 .* thickness.^-1.75547 .* (M).^0.105431 .*(C_L).^-1.4407;
CDp = CDp.^(1/1.64722);
MSEposy = sum((log.(C_D)-log.(CDp)).^2)/size(C_D,1)
println("Test MSE of global posynomial: ", MSEposy)

In [None]:
# Plotting some perspectives on the data, with predictions
using Plots
leaf_index, all_leaves = bin_to_leaves(lnr, X);
predictions = IAI.predict(lnr, X)
# plt = Plots.plot(X[:,1], X[:,4], Y[:,1], seriestype=:scatter, markersize = 2)
plt = Plots.plot(X[:,3], X[:,4], predictions, zcolor=leaf_index, seriestype=:scatter, markersize = 2)
display(plt)

In [None]:
# Let's try convex regression... this is the MIO approach for HW2
include("convexRegress.jl");
thetas, ksis = convexRegress(Y,X,10,1/1000, 1e-4)
println("Test MSE of convex regression: ", mean((thetas-Y).^2))

In [None]:
# Plotting results
plt = Plots.plot(X[:,3], X[:,4], Y[:,1], seriestype=:scatter, markersize = 2)
plt = Plots.plot!(X[:,3], X[:,4], thetas, seriestype=:scatter, markersize = 2)
display(plt)