# Imports and dataset loading

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import math
from sklearn.svm import SVR
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
from sklearn.model_selection import train_test_split
from lazypredict.Supervised import LazyRegressor

models        = []
strategyArray = []
gainArray     = []
lossArray     = []
ampNumber     = 4

originalDataSet = pd.read_csv('dataset.txt', sep = ',',header = None)
originalDataSet = originalDataSet.sample(frac = 1, random_state = 5)

binaryDataSet   = []

for line in originalDataSet.values:
    myList = [1 if i != 0 else 0 for i in line[:40]]
    myList.extend(line[40:])
    binaryDataSet.append(myList)

binaryDataSet = pd.DataFrame(binaryDataSet)
gainScaler    = MinMaxScaler(feature_range = (-1, 1))
lossScaler    = MinMaxScaler(feature_range = (-1, 1))

gainScalerAux = []
lossScalerAux = []

for i in range(0, ampNumber * 2, 2):
    gainScalerAux.extend(binaryDataSet.values[:, 40 + i])
    lossScalerAux.extend(binaryDataSet.values[:, 40 + i + 1])

gainScaler.fit(np.array(gainScalerAux).reshape(-1, 1))
lossScaler.fit(np.array(lossScalerAux).reshape(-1, 1))

def loadDataset(): 
    dataSet = binaryDataSet.values[:, :40]
    
    for i in range(0, ampNumber * 2, 2):
        gain    = np.array(binaryDataSet.values[:, 40 + i])
        gain    = gainScaler.transform(gain.reshape(-1, 1))
        dataSet = np.hstack((dataSet, gain))
        
        loss    = np.array(binaryDataSet.values[:, 40 + i + 1])
        loss    = lossScaler.transform(loss.reshape(-1, 1))
        dataSet = np.hstack((dataSet, loss))
            
    features, result = np.array(dataSet[:, :40]), np.array(dataSet[:, 40:])
    
    return features, result

def invertGainNorm(value):
    auxArray = np.array([value, 0, 0, 0, 0, 0]).reshape(-1, 1)
    return round(gainScaler.inverse_transform(auxArray)[0][0])

def invertLossNorm(value):
    auxArray = np.array([value, 0, 0, 0, 0, 0]).reshape(-1, 1)
    return round(lossScaler.inverse_transform(auxArray)[0][0])
    
def getGainError(value1, value2):
    return abs(invertGainNorm(value1) - invertGainNorm(value2))

def getLossError(value1, value2):
    return abs(invertLossNorm(value1) - invertLossNorm(value2))



# Model setup

In [2]:
def initModels():
    models = []
    
    for i in range(ampNumber):
        models.append(MultiOutputRegressor(KNeighborsRegressor()))
    
    return models

# Cross validation function

In [3]:
def crossValidate(regressor, features, result, folds = 5):
    foldSize   = math.ceil(features.shape[0] / folds)
    gainErrors = []
    lossErrors = []
    
    for i in range(folds): 
        sliceBegin = i * foldSize
        sliceEnd   = (i + 1) * foldSize
        
        X_train = np.delete(features, np.s_[sliceBegin: sliceEnd], 0)
        y_train = np.delete(result, np.s_[sliceBegin: sliceEnd], 0)
        
        if isinstance(regressor, Sequential):
            regressor.fit(X_train, y_train, epochs = 500, verbose = 0)  
        else:
            regressor.fit(X_train, y_train)
            
        X_test = features[sliceBegin: sliceEnd]
        y_test = result[sliceBegin: sliceEnd]
        
        gainError = 0
        lossError = 0
        
        prediction = regressor.predict(X_test)
        
        for predicted, expected in zip(prediction, y_test):
            gainError += getGainError(predicted[0], expected[0]) 
            lossError += getLossError(predicted[1], expected[1])
                 
        gainErrors.append((gainError / ampNumber) / foldSize)
        lossErrors.append((lossError / ampNumber) / foldSize) # average loss error by amp
        
    return np.array(gainErrors), np.array(lossErrors)

In [4]:
def trainModel(models):   
    features, result       = loadDataset()
    gainErrors, lossErrors = crossValidate(models[0], features, result[:, 0: 2])
    
    print(gainErrors, "=> %0.2f (+/- %0.2f)" % (np.mean(gainErrors), gainErrors.std() * 2))
    print(lossErrors, "=> %0.2f (+/- %0.2f)\n" % (np.mean(lossErrors), lossErrors.std() * 2))
    
    gainArray.append(gainErrors)
    lossArray.append(lossErrors)
    strategyArray.append("KNN - Amp 1")
    
    prediction = models[0].predict(features)

    for i in range(1, ampNumber):
        gainErrors, lossErrors = crossValidate(models[i], np.hstack((features, prediction)), result[:, i * 2: i * 2 + 2]) 
        print(gainErrors, "=> %0.2f (+/- %0.2f)" % (np.mean(gainErrors), gainErrors.std() * 2))
        print(lossErrors, "=> %0.2f (+/- %0.2f)\n" % (np.mean(lossErrors), lossErrors.std() * 2))

        gainArray.append(gainErrors)
        lossArray.append(lossErrors)
        strategyArray.append("KNN - Amp {}".format(i + 1))
        
        prediction = models[i].predict(np.hstack((features, prediction)))
    
    return gainErrors, lossErrors

# Plotting functions

In [5]:
def plotDistribution(models):
    train_results = []
    test_results  = []
    features, result                 = loadDataset()
    X_train, X_test, y_train, y_test = train_test_split(features, result, test_size = 0.3, random_state = 5)
    
    models[0].fit(X_train, y_train[:, 0: 2])
    train_results.append(models[0].predict(X_train))
    test_results.append(models[0].predict(X_test))
    
    for i in range(1, ampNumber):
        models[i].fit(np.hstack((X_train, train_results[i - 1])), y_train[:, i * 2: i * 2 + 2])
        train_results.append(models[i].predict(np.hstack((X_train, train_results[i - 1]))))
        test_results.append(models[i].predict(np.hstack((X_test, test_results[i - 1]))))
    
    fig   = plt.figure(figsize = (15, 5))
    vGain = np.vectorize(invertGainNorm)
    vLoss = np.vectorize(invertLossNorm)
    
    for i in range(ampNumber):
        ax = fig.add_subplot(2, 2, i + 1)
        
        yGain    = vGain(y_test[:, i * 2])
        yLoss    = vLoss(y_test[:, i * 2 + 1])
        testGain = vGain(test_results[i][:, 0])
        testLoss = vLoss(test_results[i][:, 1])
        
        ax.scatter(yGain, yLoss)
        ax.scatter(testGain, testLoss, c = "m")
        ax.set_title("Amplifier {}".format(i + 1))
    
    fig.suptitle("Test Error", fontsize = 16)
    fig.text(0.5, 0.04, 'Gain', ha='center', va='center', fontsize = 14)
    fig.text(0.06, 0.5, 'Loss', ha='center', va='center', rotation='vertical', fontsize = 14)
    fig.legend(["expected", "predicted"])

    plt.show()
    return y_test, test_results

In [6]:
def plotGainError(yTest, predicted):
    fig, axis = plt.subplots(figsize = (10, 5))
    data      = []
    
    #(1053, 8) (4, 1053, 2)
    #print(np.array(yTest).shape, np.array(predicted).shape)
    
    for i in range(ampNumber):
        gainError = []
        
        yTestAmp     = yTest[:, i * 2]
        predictedAmp = predicted[i][:, 0]
        
        for a, b in zip(predictedAmp, yTestAmp):
            gainError.append(getGainError(a, b)) 
        
        data.append(gainError)
        
    axis.boxplot(data)
    axis.set_title("Test Gain Error", fontsize = 16)
    axis.set_xticklabels(np.repeat(strategyArray, 1))
    axis.set_ylabel("Absolute error (dB)", fontsize = 14)

    plt.show()

In [7]:
def plotLossError(yTest, predicted):
    fig, axis = plt.subplots(figsize = (10, 5))
    data      = []
    
    for i in range(ampNumber):
        lossError = []
        
        yTestAmp     = yTest[:, i * 2 + 1]
        predictedAmp = predicted[i][:, 1]
        
        for a, b in zip(predictedAmp, yTestAmp):
            lossError.append(getLossError(a, b)) 
        
        data.append(lossError)
        
    axis.boxplot(data)
    axis.set_title("Test Loss Error", fontsize = 16)
    axis.set_xticklabels(np.repeat(strategyArray, 1))
    axis.set_ylabel("Absolute error (dB)", fontsize = 14)

    plt.show()

In [8]:
def plotLinkTestError(yTest, predicted):
    fig, axis = plt.subplots(figsize = (10, 5))
    lossData  = [] 
    gainData  = [] 
    
    yTestGain     = yTest[:, 0]
    yTestLoss     = yTest[:, 1]
    predictedGain = np.array(predicted[0][:, 0]).flatten()
    predictedLoss = np.array(predicted[0][:, 1]).flatten()
    
    for i in range(1, ampNumber):
        yTestGain     = np.hstack((yTestGain, yTest[:, i * 2]))
        yTestLoss     = np.hstack((yTestLoss, yTest[:, i * 2 + 1]))
        predictedGain = np.hstack((predictedGain, np.array(predicted[i][:, 0]).flatten()))
        predictedLoss = np.hstack((predictedLoss, np.array(predicted[i][:, 1]).flatten()))
    
    for i in range(len(yTestGain)):
        lossData.append(getLossError(predictedLoss[i], yTestLoss[i]))
        gainData.append(getGainError(predictedGain[i], yTestGain[i]))
            
    print("Loss error mean: {}. Loss error median: {}".format(np.mean(lossData), np.median(lossData)))
    print("Gain error mean: {}. Gain error median: {}".format(np.mean(gainData), np.median(gainData)))
    
    axis.boxplot([lossData, gainData])
    axis.set_title("Link Test Error", fontsize = 16)
    axis.set_xticklabels(["Gain", "Loss"])
    axis.set_ylabel("Absolute error (dB)", fontsize = 14)

    plt.show()

In [9]:
def plotLearningCurves(ampNumber, model, X, y, cv):
    fig, axis                              = plt.subplots(figsize = (10, 5))
    train_sizes, train_scores, test_scores = learning_curve(model, X, y, cv = cv, 
                                                            train_sizes = np.linspace(0.1, 1.0, 6),
                                                            scoring = "neg_mean_squared_error")
       
    train_scores = -train_scores
    test_scores  = -test_scores

    train_scores_mean = np.mean(train_scores, axis = 1)
    train_scores_std  = np.std(train_scores, axis = 1)
    test_scores_mean  = np.mean(test_scores, axis = 1)
    test_scores_std   = np.std(test_scores, axis = 1)
    
    print("Size splits: {}".format(train_sizes))
    print("Train scores: {} ".format(train_scores_mean))
    print("Test scores: {} ".format(test_scores_mean))

    # Plot learning curve
    axis.plot(train_sizes, train_scores_mean, 'o-', color = "r", label = "Training score")
    axis.plot(train_sizes, test_scores_mean, 'o-', color = "g", label = "Test score")
    axis.set_title("Learning Error - Amplifier {}".format(ampNumber + 1), fontsize = 16)
    axis.set_xlabel("Train size", fontsize = 14)
    axis.set_ylabel("MSE", fontsize = 14)
    axis.legend(loc = "best")
    
    plt.show()

# Putting all together

In [10]:
X, y                  = loadDataset()
individualPredictions = []

for i in range(0, ampNumber * 2, 2):
    
    reg = LazyRegressor(verbose = 0, ignore_warnings = False)

    X_train, X_test, y_train, y_test = train_test_split(X, y[:, i: i + 2], test_size = 0.3, random_state = 0)
    models, predictions              = reg.fit(X_train, X_test, y_train, y_test)
    
    individualPredictions.append(predictions)

 12%|█▏        | 5/43 [00:00<00:00, 46.71it/s]

AdaBoostRegressor model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.
BayesianRidge model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.
ElasticNetCV model failed to execute
For multi-task outputs, use MultiTaskElasticNetCV


 21%|██        | 9/43 [00:00<00:01, 21.14it/s]

GammaRegressor model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.


 37%|███▋      | 16/43 [00:01<00:03,  8.43it/s]

GeneralizedLinearRegressor model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.
GradientBoostingRegressor model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.
HistGradientBoostingRegressor model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.
HuberRegressor model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.


 56%|█████▌    | 24/43 [00:02<00:02,  9.43it/s]

LarsCV model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.
LassoCV model failed to execute
For multi-task outputs, use MultiTaskLassoCV
LassoLarsCV model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.
LassoLarsIC model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.
LinearSVR model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.


 74%|███████▍  | 32/43 [00:03<00:01,  7.28it/s]

NuSVR model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.
OrthogonalMatchingPursuitCV model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.
PassiveAggressiveRegressor model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.
PoissonRegressor model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.


100%|██████████| 43/43 [00:03<00:00, 10.92it/s]
  5%|▍         | 2/43 [00:00<00:02, 19.95it/s]

SGDRegressor model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.
SVR model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.
StackingRegressor model failed to execute
__init__() missing 1 required positional argument: 'estimators'
TweedieRegressor model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.
XGBRegressor model failed to execute
only size-1 arrays can be converted to Python scalars
LGBMRegressor model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.
AdaBoostRegressor model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.
BayesianRidge model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.
ElasticNetCV model failed to execute
For multi-task outputs, use MultiTaskElasticNetCV


 21%|██        | 9/43 [00:00<00:01, 18.19it/s]

GammaRegressor model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.


 37%|███▋      | 16/43 [00:01<00:03,  8.26it/s]

GeneralizedLinearRegressor model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.
GradientBoostingRegressor model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.
HistGradientBoostingRegressor model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.
HuberRegressor model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.


 56%|█████▌    | 24/43 [00:01<00:01, 10.33it/s]

LarsCV model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.
LassoCV model failed to execute
For multi-task outputs, use MultiTaskLassoCV
LassoLarsCV model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.
LassoLarsIC model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.
LinearSVR model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.


 70%|██████▉   | 30/43 [00:04<00:02,  5.05it/s]

NuSVR model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.
OrthogonalMatchingPursuitCV model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.
PassiveAggressiveRegressor model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.
PoissonRegressor model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.


100%|██████████| 43/43 [00:04<00:00,  8.77it/s]
  0%|          | 0/43 [00:00<?, ?it/s]

SGDRegressor model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.
SVR model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.
StackingRegressor model failed to execute
__init__() missing 1 required positional argument: 'estimators'
TweedieRegressor model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.
XGBRegressor model failed to execute
only size-1 arrays can be converted to Python scalars
LGBMRegressor model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.
AdaBoostRegressor model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.


  9%|▉         | 4/43 [00:00<00:01, 36.48it/s]

BayesianRidge model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.
ElasticNetCV model failed to execute
For multi-task outputs, use MultiTaskElasticNetCV


 21%|██        | 9/43 [00:00<00:01, 23.29it/s]

GammaRegressor model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.


 37%|███▋      | 16/43 [00:01<00:03,  8.45it/s]

GeneralizedLinearRegressor model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.
GradientBoostingRegressor model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.
HistGradientBoostingRegressor model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.
HuberRegressor model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.


 56%|█████▌    | 24/43 [00:01<00:01,  9.83it/s]

LarsCV model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.
LassoCV model failed to execute
For multi-task outputs, use MultiTaskLassoCV
LassoLarsCV model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.
LassoLarsIC model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.
LinearSVR model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.


 77%|███████▋  | 33/43 [00:03<00:01,  8.52it/s]

NuSVR model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.
OrthogonalMatchingPursuitCV model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.
PassiveAggressiveRegressor model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.
PoissonRegressor model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.


 93%|█████████▎| 40/43 [00:03<00:00,  9.10it/s]

SGDRegressor model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.
SVR model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.
StackingRegressor model failed to execute
__init__() missing 1 required positional argument: 'estimators'


100%|██████████| 43/43 [00:03<00:00, 11.07it/s]
  0%|          | 0/43 [00:00<?, ?it/s]

TweedieRegressor model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.
XGBRegressor model failed to execute
only size-1 arrays can be converted to Python scalars
LGBMRegressor model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.
AdaBoostRegressor model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.


 14%|█▍        | 6/43 [00:00<00:02, 18.41it/s]

BayesianRidge model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.
ElasticNetCV model failed to execute
For multi-task outputs, use MultiTaskElasticNetCV


 23%|██▎       | 10/43 [00:01<00:05,  6.52it/s]

GammaRegressor model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.


 37%|███▋      | 16/43 [00:02<00:05,  5.13it/s]

GeneralizedLinearRegressor model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.
GradientBoostingRegressor model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.
HistGradientBoostingRegressor model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.
HuberRegressor model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.


 56%|█████▌    | 24/43 [00:02<00:02,  7.36it/s]

LarsCV model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.
LassoCV model failed to execute
For multi-task outputs, use MultiTaskLassoCV
LassoLarsCV model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.
LassoLarsIC model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.
LinearSVR model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.


 70%|██████▉   | 30/43 [00:03<00:01,  7.24it/s]

NuSVR model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.
OrthogonalMatchingPursuitCV model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.
PassiveAggressiveRegressor model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.
PoissonRegressor model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.


100%|██████████| 43/43 [00:05<00:00,  8.45it/s]

SGDRegressor model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.
SVR model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.
StackingRegressor model failed to execute
__init__() missing 1 required positional argument: 'estimators'
TweedieRegressor model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.
XGBRegressor model failed to execute
only size-1 arrays can be converted to Python scalars
LGBMRegressor model failed to execute
y should be a 1d array, got an array of shape (2455, 2) instead.





In [11]:
print(individualPredictions[0]['RMSE'])
print()
print(individualPredictions[1]['RMSE'])

Model
GaussianProcessRegressor              0.41
ExtraTreesRegressor                   0.41
DecisionTreeRegressor                 0.41
ExtraTreeRegressor                    0.41
RandomForestRegressor                 0.41
MLPRegressor                          0.41
BaggingRegressor                      0.41
LinearRegression                      0.42
TransformedTargetRegressor            0.42
RidgeCV                               0.42
Ridge                                 0.42
Lars                                  0.42
KNeighborsRegressor                   0.44
OrthogonalMatchingPursuit             0.45
ElasticNet                            0.47
Lasso                                 0.47
LassoLars                             0.47
DummyRegressor                        0.47
KernelRidge                           0.71
RANSACRegressor              2591086168.72
Name: RMSE, dtype: float64

Model
BaggingRegressor                        0.38
RandomForestRegressor                   0.38
GaussianPr