# Imports

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.svm import SVR
import pandas as pd
import timeit
import time
import tensorflow as tf
from joblib import dump, load
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping

# Functions

In [None]:
def poly_svm(train_data):
    degrees = [1,2,3,4]
    times = []
    regs = []
    for degree in degrees:
        t0 = time.time()
        reg = SVR(kernel="poly", degree = degree)
        reg.fit(train_data[:,:-1], train_data[:,-1])
        t1 = time.time()
        times.append(t1-t0)
        regs.append(reg)
        print("Done degree {}".format(degree))
    return times, regs

def poly_svm_boost(train_data, M_train_data, data_type):
    degrees = [1,2,3,4]
    times = []
    M_regs = []
    g_regs = []
    for degree in degrees:
        t0 = time.time()
        reg = SVR(kernel="poly", degree = degree)
        reg.fit(M_train_data[:,:-1], M_train_data[:,-1])
        M = reg.predict(train_data[:,:2])
        train_data[:,-1] = train_data[:,-1]/M
        reg_g = SVR(kernel="poly", degree = degree)
        if data_type == 'le':
            reg_g.fit(train_data[:,1:-1], train_data[:,-1])
        elif data_type == 'g':
            reg_g.fit(train_data[:,:-1], train_data[:,-1])
        else:
            raise Exception("Invalid data type")
        t1 = time.time()
        times.append(t1-t0)
        M_regs.append(reg)
        g_regs.append(reg_g)
        print("Done degree {}".format(degree))
    
    return times, M_regs, g_regs

def rbf_svm(train_data):
    t0 = time.time()
    reg = SVR(kernel="rbf")
    reg.fit(train_data[:,:-1], train_data[:,-1])
    t1 = time.time()
    return t1-t0, reg

def rbf_svm_boost(train_data, M_train_data, data_type):
    t0 = time.time()
    reg = SVR(kernel="rbf")
    reg.fit(M_train_data[:,:-1], M_train_data[:,-1])
    M = reg.predict(train_data[:,:2])
    train_data[:,-1] = train_data[:,-1]/M
    reg_g = SVR(kernel="rbf")
    if data_type == 'le':
        reg_g.fit(train_data[:,1:-1], train_data[:,-1])
    elif data_type == 'g':
        reg_g.fit(train_data[:,:-1], train_data[:,-1])
    else:
        raise Exception("Invalid data type")
    t1 = time.time()
    return t1-t0, reg, reg_g

def NN(data):
    X_train = data[:,:-1]
    y_train = data[:,-1]
    #create model
    model = Sequential()

    #get number of columns in training data
    n_cols = X_train.shape[1]

    #add model layers
    model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1, activation='relu'))

    model.compile(optimizer='adam', loss='mse')
    early_stopping_monitor = EarlyStopping(patience=10)
    history = model.fit(X_train, y_train, validation_split=0.2, epochs=100, callbacks=[early_stopping_monitor])

    return model

# RN Dataset

## $a/c \le 1$

In [None]:
d1_le1 = np.load("data/1_RN_data_ac_le1.npy")
d1_le1.shape

### No Boosting

In [None]:
def no_boosting(data, data_name):
    ###################################
    ########## No boosting ############
    ###################################
    # Poly SVM
    times, regs = poly_svm(data)
    # saving models
    i = 1
    for reg in regs:
        dump(reg, 'results/no_boosting/run_2/models/poly/{}_poly_degree_{}.joblib'.format(data_name,
                                                                                     i))
        i += 1
    np.save("results/no_boosting/run_2/times/{}_poly_times.npy".format(data_name), times)

    # RBF SVM
    rbf_time, rbf_reg = rbf_svm(data)
    # saving models
    dump(rbf_reg, 'results/no_boosting/run_2/models/rbf/{}_rbf.joblib'.format(data_name))
    np.save("results/no_boosting/run_2/times/{}_rbf_times.npy".format(data_name), rbf_time)
    
#     # NN 
#     model = NN(data)
#     model.save('results/no_boosting/run_2/models/NN/{}.h5'.format(data_name))
    return times, regs, rbf_time, rbf_reg

In [None]:
times, regs, rbf_time, rbf_reg = no_boosting(d1_le1, "d1_le1")

### Boosting

In [None]:
def boosting(full_data, M_data, data_name, data_type):
    ###################################
    ############ Boosting #############
    ###################################
    # Poly SVM
    times, M_regs, g_regs = poly_svm_boost(full_data, M_data, data_type)
    
    # saving models
    for i in range(4):
        dump(M_regs[i], 'results/boosting/models/poly/M_{}_poly_degree_{}.joblib'.format(data_name,i))
        dump(g_regs[i], 'results/boosting/models/poly/g_{}_poly_degree_{}.joblib'.format(data_name,i))

    np.save("results/boosting/times/{}_poly_times.npy".format(data_name), times)

    # RBF SVM
    rbf_times, rbf_M_reg, rbf_g_reg = rbf_svm_boost(full_data, M_data, data_type)
    
    # saving models
    dump(rbf_M_reg, 'results/boosting/models/rbf/M_{}_rbf.joblib'.format(data_name))
    dump(rbf_g_reg, 'results/boosting/models/rbf/g_{}_rbf.joblib'.format(data_name))

    np.save("results/boosting/times/{}_rbf_times.npy".format(data_name), rbf_times)
    
#     # NN 
#     model = NN(data)
#     model.save('results/no_boosting/run_2/models/NN/{}.h5'.format(data_name))
    return times, M_regs, g_regs, rbf_times, rbf_M_reg, rbf_g_reg

In [None]:
data = []
for row in d1_le1:
    if (row[2] > 1.5 and row[2] < 1.65):
        data.append(np.delete(row, 2))
M_data = np.array(data)

boost_times, M_regs, g_regs, rbf_boost_times, rbf_M_regs, rbf_g_regs = boosting(d1_le1,
                                                                                M_data, 
                                                                                "d1_le1", 
                                                                                "le")

### Results

In [None]:
# Loading test dataset
df = pd.read_csv("../../data/3_datasets_new/test/1_RN_data_TEST.csv")
df.drop(["c/b", "F"], axis=1, inplace=True)
d1_test = df.to_numpy()
d1_test_le = d1_test[d1_test[:,0]<=1]
d1_test_g = d1_test[d1_test[:,0]>1]
d1_test.shape, d1_test_g.shape, d1_test_le.shape

In [None]:
def evaluate(reg, X, Y):
    results = reg.predict(X)
    mse = np.sum((results - Y)**2)/len(data)
    return mse, results

algos = ["Poly 1", "Poly 2", "Poly 3", "Poly 4", "rbf"]
no_boost_mse = []
boost_mse = []

# Poly
for i in range(4):
    # NO BOOST
    mse, _ = evaluate(regs[i], d1_test_le[:,:-1], d1_test_le[:,-1])
    no_boost_mse.append(mse)
    
    # BOOST
    _, M = evaluate(M_regs[i], d1_test_le[:,:2], d1_test_le[:,-1])
    _, g = evaluate(g_regs[i], d1_test_le[:,1:-1], d1_test_le[:,-1])
    boost_mse.append(np.sum((M*g - d1_test_le[:,-1])**2)/len(d1_test_le[:,-1]))
    
# RBF
# NO BOOST
mse, _ = evaluate(rbf_reg, d1_test_le[:,:-1], d1_test_le[:,-1])
no_boost_mse.append(mse)

# BOOST
_, M = evaluate(rbf_M_regs, d1_test_le[:,:2], d1_test_le[:,-1])
_, g = evaluate(rbf_g_regs, d1_test_le[:,1:-1], d1_test_le[:,-1])
boost_mse.append(np.sum((M*g - d1_test_le[:,-1])**2)/len(d1_test_le[:,-1]))

In [None]:
plt.plot(algos, no_boost_mse, label="No Boosting")
plt.plot(algos, boost_mse, label="Boosting")
plt.legend()
plt.semilogy()

# $a/c >1$

In [None]:
d1_le1 = np.load("data/1_RN_data_ac_g1.npy")
d1_le1.shape

In [None]:
# No Boosting
times, regs, rbf_time, rbf_reg = no_boosting(d1_le1, "d1_g1")

# Boosting
data = []
for row in d1_le1:
    if (row[2] > 1.5 and row[2] < 1.65):
        data.append(np.delete(row, 2))
M_data = np.array(data)

boost_times, M_regs, g_regs, rbf_boost_times, rbf_M_regs, rbf_g_regs = boosting(d1_le1,
                                                                                M_data, 
                                                                                "d1_g1", 
                                                                                "g")

In [None]:
# Loading test dataset
df = pd.read_csv("../../data/3_datasets_new/test/1_RN_data_TEST.csv")
df.drop(["c/b", "F"], axis=1, inplace=True)
d1_test = df.to_numpy()
d1_test_le = d1_test[d1_test[:,0]<=1]
d1_test_g = d1_test[d1_test[:,0]>1]
d1_test.shape, d1_test_g.shape, d1_test_le.shape

In [None]:
algos = ["Poly 1", "Poly 2", "Poly 3", "Poly 4", "rbf"]
no_boost_mse_1 = []
boost_mse_1 = []

# Poly
for i in range(4):
    # NO BOOST
    mse, _ = evaluate(regs[i], d1_test_le[:,:-1], d1_test_le[:,-1])
    no_boost_mse_1.append(mse)
    
    # BOOST
    _, M = evaluate(M_regs[i], d1_test_le[:,:2], d1_test_le[:,-1])
    _, g = evaluate(g_regs[i], d1_test_le[:,:-1], d1_test_le[:,-1])
    boost_mse_1.append(np.sum((M*g - d1_test_le[:,-1])**2)/len(d1_test_le[:,-1]))
    
# RBF
# NO BOOST
mse, _ = evaluate(rbf_reg, d1_test_le[:,:-1], d1_test_le[:,-1])
no_boost_mse_1.append(mse)

# BOOST
_, M = evaluate(rbf_M_regs, d1_test_le[:,:2], d1_test_le[:,-1])
_, g = evaluate(rbf_g_regs, d1_test_le[:,:-1], d1_test_le[:,-1])
boost_mse_1.append(np.sum((M*g - d1_test_le[:,-1])**2)/len(d1_test_le[:,-1]))

In [None]:
plt.plot(algos, no_boost_mse, label="No Boosting (a/c<=1)")
plt.plot(algos, boost_mse, label="Boosting (a/c<=1)")
plt.plot(algos, no_boost_mse_1, label="No Boosting (a/c>1)")
plt.plot(algos, boost_mse_1, label="Boosting (a/c>1)")
plt.legend()
plt.semilogy()

## $0< a/c <2$

In [None]:
# Loading test dataset
df = pd.read_csv("../../data/3_datasets_new/test/1_RN_data_TEST.csv")
df.drop(["c/b", "F"], axis=1, inplace=True)
d_test = df.to_numpy()

# Testing
MSE = []
for i in range(1, 5):
    reg = load('../Test_1/poly_svm/models/d1/d1_poly_degree_{}.joblib'.format(i))
    results = reg.predict(d_test[:,:-1])
    mse = np.sum((results - d_test[:,-1])**2)/len(d_test)
    print("Mean squared error: ", mse)
    MSE.append(mse)
    
reg = load('../Test_1/rbf_svm/d1_rbf.joblib')
results = reg.predict(d_test[:,:-1])
mse = np.sum((results - d_test[:,-1])**2)/len(d_test)
print("Mean squared error: ", mse)
MSE.append(mse)

In [None]:
plt.plot(algos, no_boost_mse, label="No Boosting (a/c<=1)")
plt.plot(algos, boost_mse, label="Boosting (a/c<=1)")
plt.plot(algos, no_boost_mse_1, label="No Boosting (a/c>1)")
plt.plot(algos, boost_mse_1, label="Boosting (a/c>1)")
plt.plot(algos, MSE, label="All points")
plt.legend()
plt.semilogy()

# RN Eqn dataset

### a/c <= 1

In [None]:
d1_le1 = np.load("data/2_RN_eqn_ac_le1.npy")

# No Boosting
times, regs, rbf_time, rbf_reg = no_boosting(d1_le1, "d2_le1")

# Boosting
data = []
for row in d1_le1:
    if (row[2] > 1.5 and row[2] < 1.65):
        data.append(np.delete(row, 2))
M_data = np.array(data)

boost_times, M_regs, g_regs, rbf_boost_times, rbf_M_regs, rbf_g_regs = boosting(d1_le1,
                                                                                M_data, 
                                                                                "d2_le1", 
                                                                                "le")

In [None]:
# Loading test dataset
df = pd.read_csv("../../data/3_datasets_new/test/")
df.drop(["c/b", "F"], axis=1, inplace=True)
d1_test = df.to_numpy()
d1_test_le = d1_test[d1_test[:,0]<=1]
d1_test_g = d1_test[d1_test[:,0]>1]
d1_test.shape, d1_test_g.shape, d1_test_le.shape

algos = ["Poly 1", "Poly 2", "Poly 3", "Poly 4", "rbf"]
no_boost_mse = []
boost_mse = []

# Poly
for i in range(4):
    # NO BOOST
    mse, _ = evaluate(regs[i], d1_test_le[:,:-1], d1_test_le[:,-1])
    no_boost_mse.append(mse)
    
    # BOOST
    _, M = evaluate(M_regs[i], d1_test_le[:,:2], d1_test_le[:,-1])
    _, g = evaluate(g_regs[i], d1_test_le[:,1:-1], d1_test_le[:,-1])
    boost_mse.append(np.sum((M*g - d1_test_le[:,-1])**2)/len(d1_test_le[:,-1]))
    
# RBF
# NO BOOST
mse, _ = evaluate(rbf_reg, d1_test_le[:,:-1], d1_test_le[:,-1])
no_boost_mse.append(mse)

# BOOST
_, M = evaluate(rbf_M_regs, d1_test_le[:,:2], d1_test_le[:,-1])
_, g = evaluate(rbf_g_regs, d1_test_le[:,1:-1], d1_test_le[:,-1])
boost_mse.append(np.sum((M*g - d1_test_le[:,-1])**2)/len(d1_test_le[:,-1]))

### a/c > 1

In [4]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.svm import SVR
import pandas as pd
import timeit
import time
import tensorflow as tf
from joblib import dump, load
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping

def poly_svm(train_data):
    degrees = [1,2,3,4]
    times = []
    regs = []
    for degree in degrees:
        t0 = time.time()
        reg = SVR(kernel="poly", degree = degree)
        reg.fit(train_data[:,:-1], train_data[:,-1])
        t1 = time.time()
        times.append(t1-t0)
        regs.append(reg)
        print("Done degree {}".format(degree))
    return times, regs

def poly_svm_boost(train_data, M_train_data, data_type):
    degrees = [1,2,3,4]
    times = []
    M_regs = []
    g_regs = []
    for degree in degrees:
        t0 = time.time()
        reg = SVR(kernel="poly", degree = degree)
        reg.fit(M_train_data[:,:-1], M_train_data[:,-1])
        M = reg.predict(train_data[:,:2])
        train_data[:,-1] = train_data[:,-1]/M
        reg_g = SVR(kernel="poly", degree = degree)
        if data_type == 'le':
            reg_g.fit(train_data[:,1:-1], train_data[:,-1])
        elif data_type == 'g':
            reg_g.fit(train_data[:,:-1], train_data[:,-1])
        else:
            raise Exception("Invalid data type")
        t1 = time.time()
        times.append(t1-t0)
        M_regs.append(reg)
        g_regs.append(reg_g)
        print("Done degree {}".format(degree))

    return times, M_regs, g_regs

def rbf_svm(train_data):
    t0 = time.time()
    reg = SVR(kernel="rbf")
    reg.fit(train_data[:,:-1], train_data[:,-1])
    t1 = time.time()
    return t1-t0, reg

def rbf_svm_boost(train_data, M_train_data, data_type):
    t0 = time.time()
    reg = SVR(kernel="rbf")
    reg.fit(M_train_data[:,:-1], M_train_data[:,-1])
    M = reg.predict(train_data[:,:2])
    train_data[:,-1] = train_data[:,-1]/M
    reg_g = SVR(kernel="rbf")
    if data_type == 'le':
        reg_g.fit(train_data[:,1:-1], train_data[:,-1])
    elif data_type == 'g':
        reg_g.fit(train_data[:,:-1], train_data[:,-1])
    else:
        raise Exception("Invalid data type")
    t1 = time.time()
    return t1-t0, reg, reg_g

def no_boosting(data, data_name):
    ###################################
    ########## No boosting ############
    ###################################
    # Poly SVM
    times, regs = poly_svm(data)
    # saving models
    i = 1
    for reg in regs:
        dump(reg, 'results/no_boosting/run_2/models/poly/{}_poly_degree_{}.joblib'.format(data_name,
                                                                                     i))
        i += 1
    np.save("results/no_boosting/run_2/times/{}_poly_times.npy".format(data_name), times)

    # RBF SVM
    rbf_time, rbf_reg = rbf_svm(data)
    # saving models
    dump(rbf_reg, 'results/no_boosting/run_2/models/rbf/{}_rbf.joblib'.format(data_name))
    np.save("results/no_boosting/run_2/times/{}_rbf_times.npy".format(data_name), rbf_time)
    return times, regs, rbf_time, rbf_reg

def boosting(full_data, M_data, data_name, data_type):
    ###################################
    ############ Boosting #############
    ###################################
    # Poly SVM
    times, M_regs, g_regs = poly_svm_boost(full_data, M_data, data_type)

    # saving models
    for i in range(4):
        dump(M_regs[i], 'results/boosting/models/poly/M_{}_poly_degree_{}.joblib'.format(data_name,i))
        dump(g_regs[i], 'results/boosting/models/poly/g_{}_poly_degree_{}.joblib'.format(data_name,i))

    np.save("results/boosting/times/{}_poly_times.npy".format(data_name), times)

    # RBF SVM
    rbf_times, rbf_M_reg, rbf_g_reg = rbf_svm_boost(full_data, M_data, data_type)

    # saving models
    dump(rbf_M_reg, 'results/boosting/models/rbf/M_{}_rbf.joblib'.format(data_name))
    dump(rbf_g_reg, 'results/boosting/models/rbf/g_{}_rbf.joblib'.format(data_name))

    np.save("results/boosting/times/{}_rbf_times.npy".format(data_name), rbf_times)
    return times, M_regs, g_regs, rbf_times, rbf_M_reg, rbf_g_reg

def evaluate(reg, X, Y):
    results = reg.predict(X)
    mse = np.sum((results - Y)**2)/len(X)
    return mse, results

def testing(d_le1, d_g1, d_test, data_name):
    ################# a/c <= 1###########################
    # No Boosting
    times, regs, rbf_time, rbf_reg = no_boosting(d_le1, data_name+"_le1")
    # Boosting
    data = []
    for row in d_le1:
        if (row[2] > 1.5 and row[2] < 1.65):
            data.append(np.delete(row, 2))
    M_data = np.array(data)
    boost_times, M_regs, g_regs, rbf_boost_times, rbf_M_regs, rbf_g_regs = boosting(d_le1,M_data,data_name+"_le1","le")
    # Testing
    d_test_le = d_test[d_test[:,0]<=1]
    no_boost_mse = []
    boost_mse = []
    # Poly
    for i in range(4):
        # NO BOOST
        mse, _ = evaluate(regs[i], d_test_le[:,:-1], d_test_le[:,-1])
        no_boost_mse.append(mse)

        # BOOST
        _, M = evaluate(M_regs[i], d_test_le[:,:2], d_test_le[:,-1])
        _, g = evaluate(g_regs[i], d_test_le[:,1:-1], d_test_le[:,-1])
        boost_mse.append(np.sum((M*g - d_test_le[:,-1])**2)/len(d_test_le[:,-1]))

    # RBF
    # NO BOOST
    mse, _ = evaluate(rbf_reg, d_test_le[:,:-1], d_test_le[:,-1])
    no_boost_mse.append(mse)

    # BOOST
    _, M = evaluate(rbf_M_regs, d_test_le[:,:2], d_test_le[:,-1])
    _, g = evaluate(rbf_g_regs, d_test_le[:,1:-1], d_test_le[:,-1])
    boost_mse.append(np.sum((M*g - d_test_le[:,-1])**2)/len(d_test_le[:,-1]))

    ################# a/c > 1###########################
    # No Boosting
    times, regs, rbf_time, rbf_reg = no_boosting(d_g1, data_name+"_g1")
    # Boosting
    data = []
    for row in d_g1:
        if (row[2] > 1.5 and row[2] < 1.65):
            data.append(np.delete(row, 2))
    M_data = np.array(data)
    boost_times, M_regs, g_regs, rbf_boost_times, rbf_M_regs, rbf_g_regs = boosting(d_g1,M_data,data_name+"_g1","g")
    # Testing
    d_test_g = d_test[d_test[:,0]>1]
    no_boost_mse_g = []
    boost_mse_g = []
    # Poly
    for i in range(4):
        # NO BOOST
        mse, _ = evaluate(regs[i], d_test_g[:,:-1], d_test_g[:,-1])
        no_boost_mse_g.append(mse)

        # BOOST
        _, M = evaluate(M_regs[i], d_test_g[:,:2], d_test_g[:,-1])
        _, g = evaluate(g_regs[i], d_test_g[:,1:-1], d_test_g[:,-1])
        boost_mse_g.append(np.sum((M*g - d_test_g[:,-1])**2)/len(d_test_g[:,-1]))

    # RBF
    # NO BOOST
    mse, _ = evaluate(rbf_reg, d_test_g[:,:-1], d_test_g[:,-1])
    no_boost_mse_g.append(mse)

    # BOOST
    _, M = evaluate(rbf_M_regs, d_test_g[:,:2], d_test_g[:,-1])
    _, g = evaluate(rbf_g_regs, d_test_g[:,1:-1], d_test_g[:,-1])
    boost_mse_g.append(np.sum((M*g - d_test_g[:,-1])**2)/len(d_test_g[:,-1]))

    return boost_mse, no_boost_mse, boost_mse_g, no_boost_mse_g

In [5]:
d1_le1 = np.load("data/1_RN_data_ac_le1.npy")
d1_g1 = np.load("data/1_RN_data_ac_g1.npy")
df = pd.read_csv("../../data/3_datasets_new/test/1_RN_data_TEST.csv")
df.drop(["c/b", "F"], axis=1, inplace=True)
d1_test = df.to_numpy()

boost_mse, no_boost_mse, boost_mse_g, no_boost_mse_g = testing(d_le1=d1_le1, d_g1=d1_g1, 
                                                               d_test=d1_test, data_name="d1")

Done degree 1
Done degree 2
Done degree 3
Done degree 4
Done degree 1
Done degree 2
Done degree 3
Done degree 4
Done degree 1
Done degree 2
Done degree 3
Done degree 4
Done degree 1
Done degree 2
Done degree 3
Done degree 4


ValueError: X has 2 features, but SVR is expecting 3 features as input.