# Imports and dataset loading

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import math
from sklearn.multioutput import MultiOutputRegressor
from sklearn import linear_model
from sklearn.neighbors import KNeighborsRegressor
from sklearn.kernel_ridge import KernelRidge
from sklearn.neural_network import MLPRegressor
from sklearn.svm import SVR
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import cross_val_score
from keras.wrappers.scikit_learn import KerasRegressor
from lazypredict.Supervised import LazyRegressor
from sklearn.model_selection import train_test_split
import autokeras as ak
import tensorflow as tf
import pandas as pd

maxAmpNumber = 7

gainArray     = []
lossArray     = []

originalDataset1 = []
binaryDataset1   = []

originalDataset2 = []
binaryDataset2   = []

for i in range(2, maxAmpNumber + 1):
    originalDataset1 = pd.read_csv('d1/{}amp/dataset-{}.txt'.format(i, i), sep = ',', header = None)
    originalDataset2 = pd.read_csv('d2/{}amp/dataset-{}.txt'.format(i, i), sep = ',', header = None)
    
    binaryDataset1.append([])
    binaryDataset2.append([])
    
    for line in originalDataset1.values:
        myList = [1 if j != 0 else 0 for j in line[:40]]
        myList.extend(line[40:])
        binaryDataset1[-1].append(myList)

    binaryDataset1[-1] = pd.DataFrame(binaryDataset1[-1])
    
    for line in originalDataset2.values:
        myList = [1 if j != 0 else 0 for j in line[:40]]
        myList.extend(line[40:])
        binaryDataset2[-1].append(myList)
        
    binaryDataset2[-1] = pd.DataFrame(binaryDataset2[-1])

'''
ampNumber -> [2, 3, 4, 5, 6, 7]
datasetIndex -> [0, 1] (full and optimized dataset)
'''
def loadDataset(ampIndex, datasetIndex): 
    ampIndex -= 2
    
    binaryDataset = binaryDataset1 if datasetIndex == 0 else binaryDataset2
    dataset       = binaryDataset[ampIndex].values[:, :40]
    
    for i in range(0, ampNumber * 2, 2):
        gain    = np.array(binaryDataset[ampIndex].values[:, 40 + i])
        #gain    = gainScaler.transform(gain.reshape(-1, 1))
        gain    = gain.reshape(-1, 1)
        dataset = np.hstack((dataset, gain))
        
        loss    = np.array(binaryDataset[ampIndex].values[:, 40 + i + 1])
        #loss    = lossScaler.transform(loss.reshape(-1, 1))
        loss    = loss.reshape(-1, 1)
        dataset = np.hstack((dataset, loss))
                
    X, y = np.array(dataset[:, :40]), np.array(dataset[:, 40:])
    return X, y
    
def getGainError(value1, value2):
    return (value1 - value2)**2

def getLossError(value1, value2):
    return (value1 - value2)**2



# Model setup

In [2]:
def initModels(ampNumber):
    models = []
    
    for i in range(ampNumber):
        models.append(MultiOutputRegressor(KNeighborsRegressor()))
    
    return models

# Cross validation function

In [3]:
def crossValidate(regressor, X, y, folds = 5):
    foldSize   = int(math.ceil(X.shape[0] / folds))
    gainErrors = []
    lossErrors = []
    
    for i in range(folds): 
        sliceBegin = i * foldSize
        sliceEnd   = (i + 1) * foldSize
        
        X_train = np.delete(X, np.s_[sliceBegin: sliceEnd], 0)
        y_train = np.delete(y, np.s_[sliceBegin: sliceEnd], 0)
        
        regressor.fit(X_train, y_train)
            
        X_test = X[sliceBegin: sliceEnd]
        y_test = y[sliceBegin: sliceEnd]
        
        gainError = 0
        lossError = 0
        
        prediction = regressor.predict(X_test)
        
        for predicted, expected in zip(prediction, y_test):
            gainError += getGainError(predicted[0], expected[0]) 
            lossError += getLossError(predicted[1], expected[1])
                 
        gainErrors.append(gainError / foldSize)
        lossErrors.append(lossError / foldSize)
        
    return np.sqrt(np.array(gainErrors)), np.sqrt(np.array(lossErrors))

In [4]:
def trainModel(ampNumber, datasetIndex, models):  
    linkGainErrors = []
    linkLossErrors = []
    
    for i in range(ampNumber):
        X, y = loadDataset(ampNumber, datasetIndex)
        
        gainErrors, lossErrors = crossValidate(models[i], X, y[:, i * 2: i * 2 + 2])
        
        print(gainErrors, "=> %0.2f (+/- %0.2f)" % (np.mean(gainErrors), gainErrors.std() * 2))
        print(lossErrors, "=> %0.2f (+/- %0.2f)\n" % (np.mean(lossErrors), lossErrors.std() * 2))

        linkGainErrors.append(np.mean(gainErrors))
        linkLossErrors.append(np.mean(lossErrors))
    
    return gainErrors, lossErrors

# Plotting functions

In [5]:
def plotErrorbar(x, y, yerr1, yerr12, ampNumber):
    ticks = [i + 1 for i in range(ampNumber)]
    fig, ax = plt.subplots()
    
    plt.errorbar(x, y, yerr = yerr1, capsize = 10, label = 'Full dataset')
    plt.errorbar(x, y, yerr = yerr2, capsize = 10, capthick = 4, label = 'Optimized dataset')
    plt.xticks(ticks)

    #setar tÃ­tulos, legendas, labels e etc.
    
    title = 'Error for ' + str(len(x)) + ' amplifiers'
    
    plt.title(title)
    ax.set_xlabel('Amplifier')
    ax.set_ylabel('RMSE')
    ax.legend()
    
    plt.savefig("graphs/graph-{}amps".format(ampNumber))

# Putting all together

In [6]:
for ampNumber in range(2, maxAmpNumber + 1):
    for datasetIndex in range(2):
        print("#{} amps. {} dataset".format(ampNumber, "Full" if datasetIndex == 0 else "Otimized"))
        gainArray = []
        lossArray = []
    
        models = initModels(i)
        trainModel(ampNumber, datasetIndex, models)

#2 amps. Full dataset
[5.75351998 3.4563191  3.56341869 2.98505464 4.46420585] => 4.04 (+/- 1.96)
[2.65928769 4.94594402 4.02367012 3.15075294 2.09423656] => 3.37 (+/- 2.02)

[3.85313044 2.18304129 1.07542337 2.11086429 4.08665969] => 2.66 (+/- 2.28)
[3.33910262 1.19236416 0.89231187 2.3485965  1.34328309] => 1.82 (+/- 1.80)

#2 amps. Otimized dataset
[0.         0.76376262 0.8819171  1.22247472 0.8537499 ] => 0.74 (+/- 0.81)
[4.87100263 5.13906606 3.22249041 3.19965276 2.68245327] => 3.82 (+/- 1.98)

[0.78244631 0.93333333 0.92074849 0.67986927 4.50924975] => 1.57 (+/- 2.95)
[3.37457816 2.44744765 0.47022453 1.2489996  1.32664992] => 1.77 (+/- 2.04)

#3 amps. Full dataset
[4.96904285 3.97756482 3.15178087 4.531254   1.35958345] => 3.60 (+/- 2.55)
[2.54896573 6.0925466  3.29423296 4.99129169 3.17016198] => 4.02 (+/- 2.63)

[2.48598994 4.35776009 1.48726958 6.21100977 5.72547133] => 4.05 (+/- 3.64)
[3.33405345 4.57789038 4.07917982 2.87643331 3.33939109] => 3.64 (+/- 1.21)

[1.4506983  

[0. 0. 0. 0. 0.] => 0.00 (+/- 0.00)
[0.         0.         0.         0.         0.82416338] => 0.16 (+/- 0.66)

