In [1]:
import ipynb.fs.full.training as training
import ipynb.fs.full.splitting as splitting
import ipynb.fs.full.misc as misc

In [30]:
import numpy as np
import pandas as pd

In [5]:
from sklearn.preprocessing import MinMaxScaler

### ENSEMBLE CLASS

In [33]:
class create_ensemble():
    
    # STATIC VARS
    def __init__(self, _settings):
        self.settings = _settings
        self.models = []
    
    # ADD A MODEL TO THE ENSEMBLE
    def add_model(self, model):
        models.add(model)

    # PREDICT WITH ALL ENSEMBLE MODELS
    def predict(self, dataset):
        
        # CREATE NEW DATAFRAME
        dataframe = pd.DataFrame()
        
        # LOOP THROUGH MODELS
        for model in self.models:
            
            # FETCH AVERAGE PREDICTION FROM CV MODELS
            if type(model) == 'list':
                collection = []
                
                for sub_model in model:
                    predictions = sub_model.predict(dataset)
                    collection.append(predictions)
                    
                # TRANSPOSE & CALCULATE AVERAGE PER ROW
                averages = collections.transpose().mean()
                
                # PUSH TO DATAFRAME
                dataframe[model[0].name] = averages
            
            # OTHERWISE, PREDICT NORMALLY
            else:
                predictions = model.predict(dataset)
                dataframe[model.name] = predictions

### REGRESSION ENSEMBLE

In [21]:
def regression(primary_dataset, config):
    
    # REQUESTED MODELS
    models = config['regression_ensemble']['models']
    
    # ENSEMBLE CONTAINER
    ensemble = [None] * len(models)
    
    # LOOP THROUGH MODELS
    for index, item in enumerate(models):

        # MODEL PROPS
        name, settings = misc.key_value(item)

        # FOLD CONTAINER
        folds = []

        # IF THE MODEL HAS EXTRA SETTINGS
        if settings:
            folds = splitting.timeseries(
                primary_dataset['train'],
                config['splitting']['validation_folds'],
                window=settings['morph']['window']
            )

        # OTHERWISE
        else:
            folds = splitting.timeseries(
                primary_dataset['train'],
                config['splitting']['validation_folds']
            )

        # FOLD DATA
        temp_predictions = []
        temp_labels = []
        temp_models = []

        # LOOP THROUGH FOLDS
        for fold_index, fold in enumerate(folds):
            
            # PRINT A MESSAGE
            print('TRAINING {} FOLD #{}'.format(name.upper(), fold_index + 1))
            
            # TRAIN & PREDICT WITH THE MODEL
            model, predictions, labels = training.start(fold, name, settings)

            # APPEND TO COLLECTIONS
            temp_models.append(model)
            temp_predictions.append(predictions)
            temp_labels.append(labels)

        # APPEND RESULTS TO ENSEMBLE
        ensemble[index] = {
            'name': '{}_{}'.format(name, index),
            'predictions': np.concatenate(temp_predictions),
            'labels': np.concatenate(temp_labels),
            'models': temp_models
        }
        
        # PRINT A SPACE WHEN NECESSARY
        if index < len(models) -1:
            print()
    
    return ensemble

### CLASSIFIER ENSEMBLE

In [4]:
def classifier(secondary_dataset, config):
    
    # REQUESTED MODELS
    models = config['classification_ensemble']['models']
    
    # ENSEMBLE CONTAINER
    ensemble = [None] * len(models)

    # LOOP THROUGH MODELS
    for index, item in enumerate(config['classification_ensemble']['models']):

        # MODEL PARAMS
        name, settings = misc.key_value(item)
        
        # PRINT A MESSAGE
        print('TRAINING {} MODEL'.format(name.upper()))
        
        # SCALE THE DATASET FEATUERS
        scaled_dataset, scaler = normalize(secondary_dataset)

        # TRAIN & PREDICT WITH THE MODEL
        model, predictions = training.start(scaled_dataset, name, settings)

        # APPEND RESULTS TO ENSEMBLE
        ensemble[index] = {
            'name': '{}_{}'.format(name, index),
            'predictions': predictions,
            'labels': secondary_dataset['test']['labels'],
            'scaler': scaler
        }
    
    return ensemble