# Imports and dataset loading

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import math

from lazypredict.Supervised import LazyRegressor
from sklearn.model_selection import train_test_split
import pandas as pd

import os
path      = os.getcwd()
ampNumber = int(os.path.basename(path)[0])

originalDataset = pd.read_csv('../datasets/dataset-{}.txt'.format(ampNumber), sep = ',', header = None)
binaryDataset   = []
    
for line in originalDataset.values:
    myList = [1 if j != 0 else 0 for j in line[:40]]
    myList.extend(line[40:])
    binaryDataset.append(myList)

binaryDataset = pd.DataFrame(binaryDataset)

def loadDataset(): 
    dataset = binaryDataset.values[:, :40]
    
    for i in range(0, ampNumber * 2, 2):
        gain    = np.array(binaryDataset.values[:, 40 + i])
        gain    = gain.reshape(-1, 1)
        dataset = np.hstack((dataset, gain))
        
        loss    = np.array(binaryDataset.values[:, 40 + i + 1])
        loss    = loss.reshape(-1, 1)
        dataset = np.hstack((dataset, loss))
                
    X, y = np.array(dataset[:, :40]), np.array(dataset[:, 40:])
    return X, y



# Model setup

In [2]:
summaryList = []

#1 regressor per amplifier
X, y = loadDataset()

for j in range(0, ampNumber * 2):
    reg = LazyRegressor(verbose = 0, ignore_warnings = False)

    X_train, X_test, y_train, y_test = train_test_split(X, y[:, j], test_size = 0.3, random_state = 0)
    models, summary                  = reg.fit(X_train, X_test, y_train, y_test)
    
    summaryList.append(summary)

 16%|█▋        | 7/43 [00:00<00:00, 65.66it/s]

AdaBoostRegressor model failed to execute
y should be a 1d array, got an array of shape (920, 4) instead.
BayesianRidge model failed to execute
y should be a 1d array, got an array of shape (920, 4) instead.
ElasticNetCV model failed to execute
For multi-task outputs, use MultiTaskElasticNetCV


 21%|██        | 9/43 [00:00<00:01, 32.39it/s]

GammaRegressor model failed to execute
y should be a 1d array, got an array of shape (920, 4) instead.


 37%|███▋      | 16/43 [00:00<00:01, 16.19it/s]

GeneralizedLinearRegressor model failed to execute
y should be a 1d array, got an array of shape (920, 4) instead.
GradientBoostingRegressor model failed to execute
y should be a 1d array, got an array of shape (920, 4) instead.
HistGradientBoostingRegressor model failed to execute
y should be a 1d array, got an array of shape (920, 4) instead.
HuberRegressor model failed to execute
y should be a 1d array, got an array of shape (920, 4) instead.


 56%|█████▌    | 24/43 [00:01<00:01, 16.48it/s]

LarsCV model failed to execute
y should be a 1d array, got an array of shape (920, 4) instead.
LassoCV model failed to execute
For multi-task outputs, use MultiTaskLassoCV
LassoLarsCV model failed to execute
y should be a 1d array, got an array of shape (920, 4) instead.
LassoLarsIC model failed to execute
y should be a 1d array, got an array of shape (920, 4) instead.
LinearSVR model failed to execute
y should be a 1d array, got an array of shape (920, 4) instead.


 77%|███████▋  | 33/43 [00:02<00:01,  7.34it/s]

NuSVR model failed to execute
y should be a 1d array, got an array of shape (920, 4) instead.
OrthogonalMatchingPursuitCV model failed to execute
y should be a 1d array, got an array of shape (920, 4) instead.
PassiveAggressiveRegressor model failed to execute
y should be a 1d array, got an array of shape (920, 4) instead.
PoissonRegressor model failed to execute
y should be a 1d array, got an array of shape (920, 4) instead.


100%|██████████| 43/43 [00:03<00:00, 14.05it/s]

SGDRegressor model failed to execute
y should be a 1d array, got an array of shape (920, 4) instead.
SVR model failed to execute
y should be a 1d array, got an array of shape (920, 4) instead.
StackingRegressor model failed to execute
__init__() missing 1 required positional argument: 'estimators'
TweedieRegressor model failed to execute
y should be a 1d array, got an array of shape (920, 4) instead.
XGBRegressor model failed to execute

LGBMRegressor model failed to execute
y should be a 1d array, got an array of shape (920, 4) instead.





In [4]:
print(summaryList)
finalList = []

'''
if i % 2 ==0 -> it's a gain regressor
else         -> it's a loss regressor
'''

for i in range(0, ampNumber * 2, 2): 
    finalList.append([])

    for j in range(min(len(summaryList[i]['RMSE']), len(summaryList[i + 1]['RMSE']))):
        if summaryList[i]['RMSE'].index[j] in summaryList[i + 1]['RMSE']: #checking if gain model exists in loss dataframe
            
            '''
            summaryList[i]['RMSE'].index[j] -> j model's name for regressor i 
            summaryList[i]['RMSE'][j]       -> j model's RMSE for regressor i
            summaryList[i + 1]['RMSE'][j]   -> j model's RMSE for regressor i + 1
            '''
            
            line = [
                summaryList[i]['RMSE'].index[j], 
                round(summaryList[i]['RMSE'][j], 3),
                round(summaryList[i + 1]['RMSE'][j], 3)
                   ]
            
            finalList[-1].append(line)
    
#finalList[i][j]       -> [name, gain RMSE, loss RMSE] for model #(j + 1) of amp #(i + 1)
print(finalList[0][0]) #  [name, gain RMSE, loss RMSE] for model #1 of amp #1
print(finalList[1][3]) #  [name, gain RMSE, loss RMSE] for model #4 of amp #2

[                            R-Squared  RMSE  Time Taken
Model                                                  
Lars                             0.02  1.29        0.10
OrthogonalMatchingPursuit        0.02  1.29        0.03
MLPRegressor                     0.02  1.29        1.39
RidgeCV                          0.02  1.29        0.02
BaggingRegressor                 0.01  1.29        0.04
Ridge                            0.01  1.29        0.01
ExtraTreeRegressor               0.01  1.29        0.01
DecisionTreeRegressor            0.01  1.29        0.01
ExtraTreesRegressor              0.01  1.29        0.12
GaussianProcessRegressor         0.01  1.29        0.37
RandomForestRegressor            0.01  1.29        0.16
RANSACRegressor                  0.01  1.30        0.03
TransformedTargetRegressor       0.01  1.30        0.03
LinearRegression                 0.01  1.30        0.03
LassoLars                       -0.00  1.29        0.03
ElasticNet                      -0.00  1.29    

IndexError: list index out of range

# Draw table

In [None]:
def sort_gain_loss(item):
    return item[1] + item[2]

def prepTable(ampIndex):
    rowLabels = [] # Nomes das linhas
    colLabels = [] # Nomes das colunas   
    values    = [] # Valores das células
    
    colLabels = ["Amp {} Gain".format(ampIndex + 1), "Amp {} Loss".format(ampIndex + 1), "Avg"] 
    
    # TODO: Ordenar esta lista de forma decrescente pela soma de ganho e perda (data[i][1] + data[i][2])
    data = finalList[ampIndex] 
    data.sort(reverse = False, key = sort_gain_loss)
    for entry in data:
        model = entry[0]
        gain  = entry[1]
        loss  = entry[2]
        avg   = round((gain + loss) / 2, 3)
        
        rowLabels.append(model)
        values.append([gain, loss, avg])    
    
    return rowLabels, colLabels, values

In [None]:
def drawTable(ampIndex):
    rows, cols, values = prepTable(ampIndex)
    
    plt.figure(figsize = (15, 5))
    plt.axis("off")

    plt.table(cellText  = values,
             rowLabels = rows,
             colLabels = cols, 
             loc="upper center")
    
    plt.savefig("amp{}-table".format(ampIndex + 1))

In [None]:
for i in range(ampNumber):
    drawTable(i)