# *Modern Deep Learning for Tabular Data*, Chapter 11

**Multi-Model Arrangement**

This notebook contains the complementary code discussed in Chapter 8 of *Modern Deep Learning for Tabular Data*.

External Kaggle links to datasets used in this notebook:
- [Wildfire Satellite Data](https://www.kaggle.com/datasets/washingtongold/wildfire-satellite-data)

You can download these datasets from Kaggle, or import these notebooks into Kaggle and connect them internally.

---

## Imports

In [None]:
# data management
import numpy as np                   # for linear algebra
import pandas as pd                  # for tabular data manipulation and processing

# machine learning
import sklearn                       # for data prep and classical ML
import tensorflow
import tensorflow as tf              # for deep learning
from tensorflow import keras         # for deep learning

# data visualization and graphics
import matplotlib.pyplot as plt      # for visualization fundamentals
import seaborn as sns                # for pretty visualizations
import cv2                           # for image manipulation

# misc
from tqdm.notebook import tqdm       # for progress bars
import math                          # for calculation
import sys                           # for system manipulation
import os                            # for file manipulation
import hyperopt                      # for meta-optimization

---

## Preparation

Loading the data.

In [None]:
data = pd.read_csv('../input/wildfire-satellite-data/fire_archive_M6_156000.csv')[:100_000]
data.drop(['acq_date', 'acq_time', 'instrument'], axis=1, inplace=True)
data['satellite'] = data['satellite'].map({'Terra':0, 'Aqua':1})
data['daynight'] = data['daynight'].map({'N':0, 'D':1})
X = data.drop('type', axis=1)
y = data['type']

from sklearn.model_selection import train_test_split as tts
X_train, X_valid, y_train, y_valid = tts(X, y, train_size = 0.8, random_state = 42)

Preparing sample learners.

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.neural_network import MLPClassifier
models = {'lr': LogisticRegression(),
          'dtc': DecisionTreeClassifier(),
          'rfc': RandomForestClassifier(),
          'gbc': GradientBoostingClassifier(),
          'abc': AdaBoostClassifier(),
          'mlpc': MLPClassifier()}

for model in models:
    print(f'Fitting {model}')
    models[model].fit(X_train, y_train)
from sklearn.metrics import f1_score, accuracy_score
for model in models:
    print(model)
    print(accuracy_score(models[model].predict(X_valid), y_valid))
    print(f1_score(models[model].predict(X_valid), y_valid, average='macro'))

---

## Average Ensembling

In [None]:
class AverageEnsemble:
    def __init__(self, modeldic):
        self.modeldic = modeldic
    def predict(self, x, num_classes = 4):
        votes = np.zeros((len(x), num_classes))
        for model in self.modeldic:
            predictions = self.modeldic[model].predict(x)
            for item, vote in enumerate(predictions):
                votes[item, vote] += 1
        return np.argmax(votes, axis=1)
    
ensemble = AverageEnsemble(models)
pred = ensemble.predict(X_train)
print(f1_score(pred, y_train, average='macro'))
print(accuracy_score(pred, y_train))

---

## Weighted Average Ensembling

Defining a weighted average ensemble model.

In [None]:
class WeightedAverageEnsemble:
    def __init__(self, modeldic, modelweights):
        self.modeldic = modeldic
        self.modelweights = modelweights
    def predict(self, x, num_classes = 4):
        votes = np.zeros((len(x), num_classes))
        for model in self.modeldic:
            predictions = self.modeldic[model].predict(x)
            for item, vote in enumerate(predictions):
                votes[item, vote] += self.modelweights[model]
        return np.argmax(votes, axis=1)

Deriving the optimal weights for classical machine learning learners.

In [None]:
from hyperopt import hp, tpe, fmin

# define the search space
space = {model:hp.normal(model, mu = 1, sigma = 0.4) for model in models}

# define objective function
def obj_func(params):
    ensemble = WeightedAverageEnsemble(models, params)
    return -f1_score(ensemble.predict(X_valid), y_valid,
                     average='macro')

# perform minimization procedure
from hyperopt import fmin, tpe
best = fmin(obj_func, space, algo=tpe.suggest, max_evals=500)


Defining a family of neural network learners.

In [None]:
modelA = keras.models.Sequential(name='modelA')
modelA.add(L.Input((len(X_train.columns),)))
modelA.add(L.Dense(16, activation='relu'))
modelA.add(L.Dense(16, activation='relu'))
modelA.add(L.Dense(4, activation='softmax'))

modelB = keras.models.Sequential(name='modelB')
modelB.add(L.Input((len(X_train.columns),)))
modelB.add(L.Dense(16, activation='relu'))
modelB.add(L.Dense(16, activation='relu'))
modelB.add(L.Dense(16, activation='relu'))
modelB.add(L.Dense(16, activation='relu'))
modelB.add(L.Dense(4, activation='softmax'))

inp = L.Input((len(X_train.columns),))
dense = L.Dense(16, activation='relu')(inp)
branch1a = L.Dense(16, activation='relu')(dense)
branch1b = L.Dense(16, activation='relu')(branch1a)
branch1c = L.Dense(8, activation='relu')(branch1b)
branch2a = L.Dense(8, activation='relu')(dense)
branch2b = L.Dense(8, activation='relu')(branch2a)
concat = L.Concatenate()([branch1c, branch2b])
out = L.Dense(4, activation='softmax')(concat)
modelC = keras.models.Model(inputs=inp, outputs=out, name='modelC')

modelD = keras.models.Sequential(name='modelD')
modelD.add(L.Input((len(X_train.columns),)))
modelD.add(L.Dense(64, activation='relu'))
modelD.add(L.Reshape((8, 8, 1)))
modelD.add(L.Conv2D(8, (3, 3), padding='same', activation='relu'))
modelD.add(L.Conv2D(8, (3, 3), padding='same', activation='relu'))
modelD.add(L.MaxPooling2D(2, 2))
modelD.add(L.Conv2D(16, (3, 3), padding='same', activation='relu'))
modelD.add(L.Conv2D(16, (3, 3), padding='same', activation='relu'))
modelD.add(L.Flatten())
modelD.add(L.Dense(16, activation='relu'))
modelD.add(L.Dense(4, activation='softmax'))

modelE = keras.models.Sequential(name='modelE')
modelE.add(L.Input((len(X_train.columns),)))
modelE.add(L.Dense(64, activation='relu'))
modelE.add(L.Reshape((64, 1)))
modelE.add(L.Conv1D(8, 3, padding='same', activation='relu'))
modelE.add(L.Conv1D(8, 3, padding='same', activation='relu'))
modelE.add(L.MaxPooling1D(2))
modelE.add(L.Conv1D(16, 3, padding='same', activation='relu'))
modelE.add(L.Conv1D(16, 3, padding='same', activation='relu'))
modelE.add(L.Flatten())
modelE.add(L.Dense(16, activation='relu'))
modelE.add(L.Dense(4, activation='softmax'))

models = {'modelA': modelA,
          'modelB': modelB,
          'modelC': modelC,
          'modelD': modelD,
          'modelE': modelE}
for model in models:
    tensorflow.keras.utils.plot_model(models[model], show_shapes=True, dpi=400, to_file=f'{model}.png')
    models[model].compile(optimizer='adam', 
                          loss='sparse_categorical_crossentropy',
                          metrics='accuracy')
    models[model].fit(X_train, y_train, epochs=30)
    
for model in models:
    predictions = np.argmax(models[model].predict(X_valid), axis=1)
    f1 = f1_score(predictions, y_valid, average='macro')
    acc = accuracy_score(predictions, y_valid)
    print(f'{model}: F1 - {f1}, Acc - {acc}')



class WeightedAverageEnsemble:
    def __init__(self, modeldic, modelweights):
        self.modeldic = modeldic
        self.modelweights = modelweights
    def predict(self, x, num_classes = 4):
        votes = np.zeros((len(x), num_classes))
        for model in self.modeldic:
            predictions = self.modeldic[model].predict(x)
            votes += self.modelweights[model] * predictions
        return np.argmax(votes, axis=1)
    
# define the search space
from hyperopt import hp
# space = {model:hp.normal(model, mu=1, sigma=0.5) for model in models}
space = {model:hp.normal(model, mu = 1, sigma = 0.75) for model in models}

# define objective function
def obj_func(params):
    ensemble = WeightedAverageEnsemble(models, params)
    return -f1_score(ensemble.predict(X_valid), y_valid,
                     average='macro')

# perform minimization procedure
from hyperopt import fmin, tpe
best = fmin(obj_func, space, algo=tpe.suggest, max_evals=500)

---

## Input-Informed

In [None]:
for model in models:
    models[model].trainable = False
inp = L.Input((len(X_train.columns),))
mergeList = []
for model in models:
    modelOut = models[model](inp)
    reshape = L.Reshape((4, 1))(modelOut)
    scale = L.Conv1D(1, 1, activation='softmax')(reshape)
    flatten = L.Flatten()(scale)
    mergeList.append(flatten)
concat = L.Add()(mergeList)
scale = L.Lambda(lambda x: x/len(models))(concat)
metaModel = keras.models.Model(inputs=inp, outputs=scale)

tensorflow.keras.utils.plot_model(metaModel, show_shapes=True, dpi=400)

metaModel.compile(optimizer='adam', loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
metaModel.fit(X_train, y_train, epochs=10, validation_data=(X_valid, y_valid))

metaModel.summary()

f1_score(np.argmax(metaModel.predict(X_valid), axis=1),
         y_valid, 
         average='macro')
for model in models:
    models[model].trainable = True
    
metaModel.fit(X_train, y_train, epochs=10, validation_data=(X_valid, y_valid))