# Imports 

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import math

from lazypredict.Supervised import LazyRegressor
from sklearn.model_selection import train_test_split
import pandas as pd

import os
path      = os.getcwd()
ampNumber = int(os.path.basename(path)[0])

originalDataset = pd.read_csv('../../datasets/dataset-{}.txt'.format(ampNumber), sep = ',', header = None)
binaryDataset   = []



# Dataset balancing

In [2]:
def datasetBalancing(dataset):
    newDataset      = []
    linesPerChannel = []
    currentChannel  = 2

    # Finding channel minimum target 
    count = 0
    for line in dataset.values:
        channels = np.count_nonzero(line[0: 40], axis=0)

        if currentChannel == channels:
            count += 1
        else:
            linesPerChannel.append(count)

            currentChannel = channels
            count          = 0

    linesPerChannel.append(count)   
    
    print(linesPerChannel)
    
    guide = np.array(linesPerChannel)
    guide = guide[guide != 0]
    
    showGuides(guide)
    
    target     = min(guide)
    newDataset = cutDown(dataset, guide, target)
    
    return newDataset

def cutDown(dataset, guide, target):
    newDataset   = []
    currentEntry = 0

    for entry in guide:
        #print(currentEntry, currentEntry + target)

        subset = dataset.values[currentEntry : currentEntry + target]
        newDataset.extend(subset)
        currentEntry += entry
    
    return newDataset

def showGuides(guide):
    channels = [2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20, 30, 40]
    
    for c, g in zip(channels, guide):
        print("{} chs: {} entries".format(c, g))

# Data proccessing

In [3]:
balancedDataset = datasetBalancing(originalDataset)

for line in balancedDataset:
    myList = [1 if j != 0 else 0 for j in line[:40]]
    myList.extend(line[40:])
    binaryDataset.append(myList)

binaryDataset = pd.DataFrame(binaryDataset)

def loadDataset(): 
    dataset = binaryDataset.values[:, :40]
    
    for i in range(0, ampNumber * 2, 2):
        gain    = np.array(binaryDataset.values[:, 40 + i])
        gain    = gain.reshape(-1, 1)
        dataset = np.hstack((dataset, gain))
        
        loss    = np.array(binaryDataset.values[:, 40 + i + 1])
        loss    = loss.reshape(-1, 1)
        dataset = np.hstack((dataset, loss))
                
    X, y = np.array(dataset[:, :40]), np.array(dataset[:, 40:])
    return X, y

[200, 179, 219, 199, 209, 219, 1239, 1238, 976, 179, 809, 919, 219]
2 chs: 200 entries
3 chs: 179 entries
4 chs: 219 entries
5 chs: 199 entries
6 chs: 209 entries
7 chs: 219 entries
8 chs: 1239 entries
9 chs: 1238 entries
10 chs: 976 entries
15 chs: 179 entries
20 chs: 809 entries
30 chs: 919 entries
40 chs: 219 entries


# Model setup

In [None]:
summaryList = []

#1 regressor per amplifier
X, y = loadDataset()

for j in range(0, ampNumber * 2):
    reg = LazyRegressor(verbose = 0, ignore_warnings = False)

    X_train, X_test, y_train, y_test = train_test_split(X, y[:, j], test_size = 0.3, random_state = 0)
    models, summary                  = reg.fit(X_train, X_test, y_train, y_test)
    
    summaryList.append(summary)

 74%|███████▍  | 32/43 [00:08<00:05,  2.15it/s]

RANSACRegressor model failed to execute
RANSAC could not find a valid consensus set. All `max_trials` iterations were skipped because each randomly chosen sub-sample failed the passing criteria. See estimator attributes for diagnostics (n_skips*).


 88%|████████▊ | 38/43 [00:09<00:01,  3.49it/s]

StackingRegressor model failed to execute
__init__() missing 1 required positional argument: 'estimators'


100%|██████████| 43/43 [00:09<00:00,  4.45it/s]
 21%|██        | 9/43 [00:00<00:02, 12.87it/s]

GammaRegressor model failed to execute
Some value(s) of y are out of the valid range for family GammaDistribution


 86%|████████▌ | 37/43 [00:06<00:00,  6.49it/s]

In [None]:
finalList = []

'''
if i % 2 ==0 -> it's a gain regressor
else         -> it's a loss regressor
'''

for i in range(0, ampNumber * 2, 2): 
    myDict = {}
    finalList.append(myDict)
    
    for j in range(min(len(summaryList[i]['RMSE']), len(summaryList[i + 1]['RMSE']))):
        if summaryList[i]['RMSE'].index[j] in summaryList[i + 1]['RMSE']: #checking if gain model exists in loss dataframe
            
            '''
            summaryList[i]['RMSE'].index[j] -> j model's name for regressor i 
            summaryList[i]['RMSE'][j]       -> j model's RMSE for regressor i
            summaryList[i + 1]['RMSE'][j]   -> j model's RMSE for regressor i + 1
            '''
        
            finalList[-1][summaryList[i]['RMSE'].index[j]] = [summaryList[i]['RMSE'][j], summaryList[i + 1]['RMSE'][j]]

#finalList[i][modelName] -> [gain RMSE, loss RMSE] for modelName of amp #(i + 1)

# Draw table

In [None]:
def sort_gain_loss(item):
    return item[1] + item[2]

def prepTable(ampIndex):    
    rowLabels = [] # Nomes das linhas
    colLabels = [] # Nomes das colunas   
    values    = [] # Valores das células
    sumList   = [] # Soma dos valores
    
    colLabels = ["Amp {} Gain".format(ampIndex + 1), "Amp {} Loss".format(ampIndex + 1), "Avg"] 
    
    # TODO: Ordenar esta lista de forma decrescente pela soma de ganho e perda (data[i][1] + data[i][2])
    data = finalList[ampIndex] 
    data = dict(sorted(data.items(), key = lambda x : x[1][0] + x[1][1]))
    
    for key in data:
        model = key
        gain  = data[key][0]
        loss  = data[key][1]
        total = gain + loss
        
        rowLabels.append(model)
        values.append([gain, loss, total])    
        sumList.append(total)
    
    return rowLabels, colLabels, values, sumList

In [None]:
rows, cols, values, sumList = prepTable(0)
commonRegressors = rows

for i in range(1, ampNumber): 
    rows, cols, values, sumList = prepTable(i)
    commonRegressors = set.intersection(set(commonRegressors), set(rows))