---

# 1.0 Loading Model Libraries...

In [1]:
%%time
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

CPU times: user 812 µs, sys: 113 µs, total: 925 µs
Wall time: 590 µs


In [2]:
%%time
import datetime # ...

CPU times: user 7 µs, sys: 1 µs, total: 8 µs
Wall time: 13.4 µs


---

# 2.0 Setting the Notebook Parameters and Default Configuration...

In [7]:
%%time
# I like to disable my Notebook Warnings.
import warnings
warnings.filterwarnings('ignore')

CPU times: user 28 µs, sys: 4 µs, total: 32 µs
Wall time: 37 µs


In [8]:
%%time
# Notebook Configuration...

# Amount of data we want to load into the Model...
DATA_ROWS = None
# Dataframe, the amount of rows and cols to visualize...
NROWS = 50
NCOLS = 15
# Main data location path...
BASE_PATH = '...'

CPU times: user 5 µs, sys: 1 µs, total: 6 µs
Wall time: 11.4 µs


In [9]:
%%time
# Configure notebook display settings to only use 2 decimal places, tables look nicer.
pd.options.display.float_format = '{:,.5f}'.format
pd.set_option('display.max_columns', NCOLS) 
pd.set_option('display.max_rows', NROWS)

CPU times: user 73 µs, sys: 10 µs, total: 83 µs
Wall time: 89.6 µs


---

# 3.0 Loading the Dataset Information (Using Feather)...

In [10]:
import gc
from sklearn.preprocessing import StandardScaler, QuantileTransformer, OneHotEncoder, OrdinalEncoder
def get_data(fill_values  = [3, 2, 0, 3, 0], get_test=False):
    # Load the CSV information into a Pandas DataFrame...
    trn_data = pd.read_feather('../input/parquet-files-amexdefault-prediction/train_data.ftr')
    trn_lbls = pd.read_csv('/kaggle/input/amex-default-prediction/train_labels.csv').set_index('customer_ID')
    if(get_test):
        tst_data = pd.read_feather('../input/parquet-files-amexdefault-prediction/test_data.ftr')

    #%%time
    if(get_test):
        sub = pd.read_csv('/kaggle/input/amex-default-prediction/sample_submission.csv')
    else:
        sub = "Get test off"
    ## 6.1 Training Dataset...
    print(trn_data[:5])
    # We have 458913 customers. and we have 458913 train labels...

    # Calculates the amount of information by costumer or records available...
    trn_num_statements = trn_data.groupby('customer_ID').size().sort_index()
    print(trn_data)
    # Create a new dataset based on aggregated information
    trn_agg_data = (trn_data
                    .groupby('customer_ID')
                    .tail(1)
                    .set_index('customer_ID', drop=True)
                    .sort_index()
                    .drop(['S_2'], axis='columns'))
    del trn_data
    # Merge the labels from the labels dataframe
    trn_agg_data['target'] = trn_lbls.target
    del trn_lbls
    trn_agg_data['num_statements'] = trn_num_statements
    del trn_num_statements
    
    trn_agg_data.reset_index(inplace = True, drop = True) # forget the customer_IDs

    ## 6.2 Test Dataset...

    # Calculates the amount of information by costumer or records available...
    if(get_test):
        tst_num_statements = tst_data.groupby('customer_ID').size().sort_index()

        # Create a new dataset based on aggregated information
        tst_agg_data = (tst_data
                        .groupby('customer_ID')
                        .tail(1)
                        .set_index('customer_ID', drop=True)
                        .sort_index()
                        .drop(['S_2'], axis='columns'))
        del tst_data
        # Merge the labels from the labels dataframe
        tst_agg_data['num_statements'] = tst_num_statements
        del tst_num_statements
        tst_agg_data.reset_index(inplace = True, drop = True) # forget the customer_IDs

    # 7.0 Label / One-Hot Encoding the Categorical Variables...

    ## 7.1 One Hot Encoding Configuration...

    # One-hot Encoding Configuration
    cat_features = ['B_30', 'B_38', 'D_114', 'D_116', 'D_117', 'D_120', 'D_126', 'D_63', 'D_64', 'D_66', 'D_68']

    #trn_agg_data[cat_features] = trn_agg_data[cat_features].astype(object)
    trn_not_cat_features = [f for f in trn_agg_data.columns if f not in cat_features]
    if(get_test):
        tst_not_cat_features = [f for f in tst_agg_data.columns if f not in cat_features]

    #encoder = OneHotEncoder(drop = 'first', sparse = False, dtype = np.float32, handle_unknown = 'ignore')
    encoder = OrdinalEncoder()
    trn_encoded_features = encoder.fit_transform(trn_agg_data[cat_features])
    #feat_names = list(encoder.get_feature_names())

    ## 7.2 Train Dataset One Hot Encoding...

    # One-hot Encoding
    trn_encoded_features = pd.DataFrame(trn_encoded_features)
    #trn_encoded_features.columns = feat_names

    trn_agg_data = pd.concat([trn_agg_data[trn_not_cat_features], trn_encoded_features], axis = 1)

    ## 7.3 Test Dataset One-Hot Encoding...
    if(get_test):
        # One-hot Encoding
        tst_encoded_features = encoder.transform(tst_agg_data[cat_features])
        tst_encoded_features = pd.DataFrame(tst_encoded_features)
        #tst_encoded_features.columns = feat_names

        tst_agg_data = pd.concat([tst_agg_data[tst_not_cat_features], tst_encoded_features], axis = 1)
        tst_agg_data.head()

    features = [f for f in trn_agg_data.columns if f != 'target' and f != 'customer_ID']
    
    c = trn_agg_data[features].columns.str
    cs = [c.startswith('S_', False), c.startswith('P_', False), c.startswith('B_', False), c.startswith('R_', False), c.startswith('D_', False)]
    cs = [trn_agg_data[features].columns[c_i] for c_i in cs]
    print(cs)
    #
    # Impute missing values
    # Old fill type values
    for i_fill in range(len(fill_values)):
        if(fill_values[i_fill]==0):
            trn_agg_data[cs[i_fill]].fillna(value = 0, inplace = True)
            if(get_test):
                tst_agg_data[cs[i_fill]].fillna(value = 0, inplace = True)
        elif(fill_values[i_fill]==1):
            trn_agg_data[cs[i_fill]].fillna(value = np.nanmean(trn_agg_data[cs[i_fill]]), inplace = True)
            if(get_test):
                tst_agg_data[cs[i_fill]].fillna(value = np.nanmean(trn_agg_data[cs[i_fill]]), inplace = True)         
        elif(fill_values[i_fill]==2):
            trn_agg_data[cs[i_fill]].fillna(value = np.nanquantile(trn_agg_data[cs[i_fill]], .25), inplace = True)
            if(get_test):
                tst_agg_data[cs[i_fill]].fillna(value = np.nanquantile(trn_agg_data[cs[i_fill]], .25), inplace = True)       
        elif(fill_values[i_fill]==3):
            trn_agg_data[cs[i_fill]].fillna(value = np.nanquantile(trn_agg_data[cs[i_fill]], .75), inplace = True)
            if(get_test):
                tst_agg_data[cs[i_fill]].fillna(value = np.nanquantile(trn_agg_data[cs[i_fill]], .75), inplace = True)            
    #Fill all others
    trn_agg_data.fillna(value = 0, inplace = True)
    if(get_test):
        tst_agg_data.fillna(value = 0, inplace = True)

    # 10.0 NN Development

    # Release some memory by deleting the original DataFrames...
    gc.collect()
    if(get_test==False):
        tst_agg_data = 0
    return trn_agg_data, tst_agg_data, features, sub

In [11]:
get_data()

FileNotFoundError: [Errno 2] No such file or directory: '../input/parquet-files-amexdefault-prediction/train_data.ftr'

## 10.1 Loading Specific Model Libraries...

In [7]:
%%time
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import ReduceLROnPlateau, LearningRateScheduler, EarlyStopping
from tensorflow.keras.layers import Dense, Input, InputLayer, Add, BatchNormalization, Dropout, Concatenate, Reshape, Conv1D, Flatten
from tensorflow.keras.utils import plot_model
from sklearn.metrics import log_loss

from sklearn.preprocessing import StandardScaler, RobustScaler, MinMaxScaler
import random

CPU times: user 1.68 s, sys: 398 ms, total: 2.07 s
Wall time: 8.12 s


---

## 10.2 Amex Metric, Function...

In [8]:
%%time
# From https://www.kaggle.com/code/inversion/amex-competition-metric-python
import torch

def amex_metric(y_true, y_pred, return_components=False) -> float:
    """Amex metric for ndarrays"""
    def top_four_percent_captured(df) -> float:
        """Corresponds to the recall for a threshold of 4 %"""
        
        df['weight'] = df['target'].apply(lambda x: 20 if x==0 else 1)
        four_pct_cutoff = int(0.04 * df['weight'].sum())
        df['weight_cumsum'] = df['weight'].cumsum()
        df_cutoff = df.loc[df['weight_cumsum'] <= four_pct_cutoff]
        return (df_cutoff['target'] == 1).sum() / (df['target'] == 1).sum()
    
    
    def weighted_gini(df) -> float:
        df['weight'] = df['target'].apply(lambda x: 20 if x==0 else 1)
        df['random'] = (df['weight'] / df['weight'].sum()).cumsum()
        total_pos = (df['target'] * df['weight']).sum()
        df['cum_pos_found'] = (df['target'] * df['weight']).cumsum()
        df['lorentz'] = df['cum_pos_found'] / total_pos
        df['gini'] = (df['lorentz'] - df['random']) * df['weight']
        return df['gini'].sum()

    
    def normalized_weighted_gini(df) -> float:
        """Corresponds to 2 * AUC - 1"""
        
        df2 = pd.DataFrame({'target': df.target, 'prediction': df.target})
        df2.sort_values('prediction', ascending=False, inplace=True)
        return weighted_gini(df) / weighted_gini(df2)

    
    df = pd.DataFrame({'target': tf.experimental.numpy.ravel(y_true), 'prediction': tf.experimental.numpy.ravel(y_pred)})
    df.sort_values('prediction', ascending=False, inplace=True)
    g = normalized_weighted_gini(df)
    d = top_four_percent_captured(df)

    if return_components: return g, d, 0.5 * (g + d)
    return 0.5 * (g + d)

CPU times: user 386 ms, sys: 106 ms, total: 492 ms
Wall time: 2 s


---

## 10.3 Defining the NN Model Architecture...

## 10.3.1 Architecture 01, Simple NN

In [9]:
%%time
def nn_model():
    '''
    '''
    regularization = 4e-4
    activation_func = 'swish'
    inputs = Input(shape = (len(features)))
    
    x = Dense(256, 
              #use_bias  = True, 
              kernel_regularizer = tf.keras.regularizers.l2(regularization), 
              activation = activation_func)(inputs)
    
    x = BatchNormalization()(x)
    
    x = Dense(64, 
              #use_bias  = True, 
              kernel_regularizer = tf.keras.regularizers.l2(regularization), 
              activation = activation_func)(x)
    
    x = BatchNormalization()(x)
    
    x = Dense(64, 
          #use_bias  = True, 
          kernel_regularizer = tf.keras.regularizers.l2(regularization), 
          activation = activation_func)(x)
    
    x = BatchNormalization()(x)

    x = Dense(32, 
              #use_bias  = True, 
              kernel_regularizer = tf.keras.regularizers.l2(regularization), 
              activation = activation_func)(x)
    
    x = BatchNormalization()(x)

    x = Dense(1, 
              #use_bias  = True, 
              #kernel_regularizer = tf.keras.regularizers.l2(regularization),
              activation = 'sigmoid')(x)
    
    model = Model(inputs, x)
    
    return model

CPU times: user 6 µs, sys: 1 µs, total: 7 µs
Wall time: 11.9 µs


---

## 10.3.2 Architecture 02, Concatenated NN

---

## 10.5 Defining Model Training Parameters...

In [10]:
%%time
# Defining model parameters...
BATCH_SIZE         = 256
EPOCHS             = 1 
EPOCHS_COSINEDECAY = 1
DIAGRAMS           = True
USE_PLATEAU        = False
INFERENCE          = False
VERBOSE            = 0 
TARGET             = 'target'

CPU times: user 5 µs, sys: 1 µs, total: 6 µs
Wall time: 11 µs


In [11]:
%%time
def nn_model(features, regularization = 4e-4):
    activation_func = 'swish'
    inputs = Input(shape = (len(features)))

    x0 = Dense(256,
               kernel_regularizer = tf.keras.regularizers.l2(regularization), 
               activation = activation_func)(inputs)
    x1 = Dense(128,
               kernel_regularizer = tf.keras.regularizers.l2(regularization),
               activation = activation_func)(x0)
    x1 = Dense(64,
               kernel_regularizer = tf.keras.regularizers.l2(regularization),
               activation = activation_func)(x1)
    x1 = Dense(32,
           kernel_regularizer = tf.keras.regularizers.l2(regularization),
           activation = activation_func)(x1)
    
    x1 = Concatenate()([x1, x0])
    x1 = Dropout(0.1)(x1)
    
    x1 = Dense(16, kernel_regularizer=tf.keras.regularizers.l2(regularization),activation=activation_func,)(x1)
    
    x1 = Dense(1, 
              #kernel_regularizer=tf.keras.regularizers.l2(regularization),
              activation='sigmoid')(x1)
    
    model = Model(inputs, x1)
    
    return model
    

CPU times: user 7 µs, sys: 0 ns, total: 7 µs
Wall time: 13.1 µs


---

## 10.6 Defining the Model Training Configuration...

In [12]:
 %%time
# Defining model training function...
history_list = []
def fit_model(X_train, y_train, X_val, y_val, model, run = 0):
    '''
    '''
    lr_start = 0.01
    start_time = datetime.datetime.now()
    
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)

    epochs = EPOCHS    
    lr = ReduceLROnPlateau(monitor = 'val_loss', factor = 0.7, patience = 4, verbose = VERBOSE)
    es = EarlyStopping(monitor = 'val_loss',patience = 12, verbose = 1, mode = 'min', restore_best_weights = True)
    tm = tf.keras.callbacks.TerminateOnNaN()
    callbacks = [lr, es, tm]
    
    # Cosine Learning Rate Decay
    if USE_PLATEAU == False:
        epochs = EPOCHS_COSINEDECAY
        lr_end = 0.0002

        def cosine_decay(epoch):
            if epochs > 1:
                w = (1 + math.cos(epoch / (epochs - 1) * math.pi)) / 2
            else:
                w = 1
            return w * lr_start + (1 - w) * lr_end
        
        lr = LearningRateScheduler(cosine_decay, verbose = 0)
        callbacks = [lr, tm]
    
    # Model Initialization...
    #model = nn_model(features)
    optimizer_func = tf.keras.optimizers.Adam(learning_rate = lr_start)
    loss_func = tf.keras.losses.BinaryCrossentropy()
    model.compile(optimizer = optimizer_func, loss = loss_func, metrics = [amex_metric])
    
    
    X_val = scaler.transform(X_val)
    validation_data = (X_val, y_val)
    
    history = model.fit(X_train, 
                        y_train, 
                        validation_data = validation_data, 
                        epochs          = epochs,
                        verbose         = VERBOSE,
                        batch_size      = BATCH_SIZE,
                        shuffle         = True,
                        callbacks       = callbacks
                       )
    print("Model fitted")
    history_list.append(history.history)
        
    print(f'Training Loss: {history_list[-1]["loss"][-1]:.5f}, Validation Loss: {history_list[-1]["val_loss"][-1]:.5f}')
    callbacks, es, lr, tm, history = None, None, None, None, None
    
    
    y_val_pred = model.predict(X_val, batch_size = BATCH_SIZE, verbose = VERBOSE).ravel()
    amex_score = amex_metric(y_val.values, y_val_pred, return_components = False)
    
    print(f'Fold {run} | {str(datetime.datetime.now() - start_time)[-12:-7]}'
          f'| Amex Score: {amex_score:.5f}')
    
    print('')
    
    #score_list.append(amex_score)
    
    tst_data_scaled = scaler.transform(tst_agg_data[features])
    tst_pred = model.predict(tst_data_scaled)
    predictions.append(tst_pred)
    print(amex_score)
    print(history)
    return amex_score, history.history

CPU times: user 6 µs, sys: 1 µs, total: 7 µs
Wall time: 11.9 µs


---

## 10.7 Creating a Model Training Loop and Cross Validating in 5 Folds... 

In [13]:
from sklearn.model_selection import KFold
from sklearn.metrics import roc_auc_score, roc_curve
import math
def train(trn_agg_data, features, model):
    score_list = []
    kf = KFold(n_splits = 5)
    for fold, (trn_idx, val_idx) in enumerate(kf.split(trn_agg_data)):
        X_train, X_val = trn_agg_data.iloc[trn_idx][features], trn_agg_data.iloc[val_idx][features]
        y_train, y_val = trn_agg_data.iloc[trn_idx][TARGET], trn_agg_data.iloc[val_idx][TARGET]
        print("Fold",fold)
        score, history = fit_model(X_train, y_train, X_val, y_val, model)
        print("In loop (train):", history)
        score_list.append(score)
    current_score = np.mean(score_list)
    print(f'OOF AUC: {current_score:.5f}')
    print(history)
    return history

In [14]:
import sys
def sizeof_fmt(num, suffix='B'):
    ''' by Fred Cirera,  https://stackoverflow.com/a/1094933/1870254, modified'''
    for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']:
        if abs(num) < 1024.0:
            return "%3.1f %s%s" % (num, unit, suffix)
        num /= 1024.0
    return "%.1f %s%s" % (num, 'Yi', suffix)

for name, size in sorted(((name, sys.getsizeof(value)) for name, value in locals().items()),
                         key= lambda x: -x[1])[:10]:
    print("{:>30}: {:>8}".format(name, sizeof_fmt(size)))

                           _i6:  5.7 KiB
                           _ii:  2.7 KiB
                          _i12:  2.7 KiB
                           _i8:  1.8 KiB
                           _i9:  1.3 KiB
                          _iii:  1.1 KiB
                          _i11:  1.1 KiB
                StandardScaler:  1.0 KiB
           QuantileTransformer:  1.0 KiB
                 OneHotEncoder:  1.0 KiB


In [15]:
%%time
gc.collect()
from sklearn.model_selection import KFold
from sklearn.metrics import roc_auc_score, roc_curve
import math

# Create empty lists to store NN information...
best_fill_type = [3, 2, 0, 3, 0]
trn_agg_data, tst_agg_data, features, sub = get_data(best_fill_type, get_test=True)

                                         customer_ID         S_2     P_2  \
0  0000099d6bd597052cdcda90ffabf56573fe9d7c79be5f...  2017-03-09 0.93848   
1  0000099d6bd597052cdcda90ffabf56573fe9d7c79be5f...  2017-04-07 0.93652   
2  0000099d6bd597052cdcda90ffabf56573fe9d7c79be5f...  2017-05-28 0.95410   
3  0000099d6bd597052cdcda90ffabf56573fe9d7c79be5f...  2017-06-13 0.96045   
4  0000099d6bd597052cdcda90ffabf56573fe9d7c79be5f...  2017-07-16 0.94727   

     D_39     B_1     B_2     R_1  ...   D_139   D_140   D_141  D_142   D_143  \
0 0.00173 0.00873 1.00684 0.00922  ... 0.00243 0.00371 0.00382    NaN 0.00057   
1 0.00578 0.00492 1.00098 0.00615  ... 0.00396 0.00317 0.00503    NaN 0.00957   
2 0.09149 0.02165 1.00977 0.00682  ... 0.00327 0.00733 0.00043    NaN 0.00343   
3 0.00245 0.01369 1.00293 0.00137  ... 0.00612 0.00452 0.00320    NaN 0.00842   
4 0.00248 0.01519 1.00098 0.00761  ... 0.00367 0.00494 0.00889    NaN 0.00167   

    D_144   D_145  
0 0.00061 0.00267  
1 0.00549 0.0092

In [16]:
score_list = []
predictions = []
history = train(trn_agg_data, features, nn_model(features))

2022-08-14 21:16:28.250901: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


Fold 0


2022-08-14 21:16:35.147329: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: Unable to locate the source code of <function amex_metric at 0x7f3038053b90>. Note that functions defined in certain environments, like the interactive Python shell, do not expose their source code. If that is the case, you should define them in a .py source file. If you are certain the code is graph-compatible, wrap the call using @tf.autograph.experimental.do_not_convert. Original error: could not get source code


TypeError: in user code:

    /opt/conda/lib/python3.7/site-packages/keras/engine/training.py:853 train_function  *
        return step_function(self, iterator)
    /opt/conda/lib/python3.7/site-packages/keras/engine/training.py:842 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    /opt/conda/lib/python3.7/site-packages/tensorflow/python/distribute/distribute_lib.py:1286 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    /opt/conda/lib/python3.7/site-packages/tensorflow/python/distribute/distribute_lib.py:2849 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    /opt/conda/lib/python3.7/site-packages/tensorflow/python/distribute/distribute_lib.py:3632 _call_for_each_replica
        return fn(*args, **kwargs)
    /opt/conda/lib/python3.7/site-packages/keras/engine/training.py:835 run_step  **
        outputs = model.train_step(data)
    /opt/conda/lib/python3.7/site-packages/keras/engine/training.py:792 train_step
        self.compiled_metrics.update_state(y, y_pred, sample_weight)
    /opt/conda/lib/python3.7/site-packages/keras/engine/compile_utils.py:457 update_state
        metric_obj.update_state(y_t, y_p, sample_weight=mask)
    /opt/conda/lib/python3.7/site-packages/keras/utils/metrics_utils.py:73 decorated
        update_op = update_state_fn(*args, **kwargs)
    /opt/conda/lib/python3.7/site-packages/keras/metrics.py:177 update_state_fn
        return ag_update_state(*args, **kwargs)
    /opt/conda/lib/python3.7/site-packages/keras/metrics.py:681 update_state  **
        matches = ag_fn(y_true, y_pred, **self._fn_kwargs)
    <timed exec>:34 amex_metric  **
        
    /opt/conda/lib/python3.7/site-packages/pandas/core/frame.py:614 __init__
        mgr = dict_to_mgr(data, index, columns, dtype=dtype, copy=copy, typ=manager)
    /opt/conda/lib/python3.7/site-packages/pandas/core/internals/construction.py:465 dict_to_mgr
        arrays, data_names, index, columns, dtype=dtype, typ=typ, consolidate=copy
    /opt/conda/lib/python3.7/site-packages/pandas/core/internals/construction.py:119 arrays_to_mgr
        index = _extract_index(arrays)
    /opt/conda/lib/python3.7/site-packages/pandas/core/internals/construction.py:622 _extract_index
        raw_lengths.append(len(val))
    /opt/conda/lib/python3.7/site-packages/tensorflow/python/framework/ops.py:875 __len__
        "shape information.".format(self.name))

    TypeError: len is not well defined for symbolic Tensors. (Reshape:0) Please call `x.shape` rather than `len(x)` for shape information.


In [None]:
plot_model(nn_model(features), show_layer_names = False, show_shapes = True, dpi = 60)

In [None]:
# Plot training history
import matplotlib as plt
def plot_history(history, *, n_epochs=None, plot_lr=False, title=None, bottom=None, top=None):
    """Plot (the last n_epochs epochs of) the training history
    
    Plots loss and optionally val_loss and lr."""
    plt.figure(figsize=(15, 6))
    from_epoch = 0 if n_epochs is None else max(len(history['loss']) - n_epochs, 0)
    
    # Plot training and validation losses
    plt.plot(np.arange(from_epoch, len(history['loss'])), history['loss'][from_epoch:], label='Training loss')
    try:
        plt.plot(np.arange(from_epoch, len(history['loss'])), history['val_loss'][from_epoch:], label='Validation loss')
        best_epoch = np.argmin(np.array(history['val_loss']))
        best_val_loss = history['val_loss'][best_epoch]
        if best_epoch >= from_epoch:
            plt.scatter([best_epoch], [best_val_loss], c='r', label=f'Best val_loss = {best_val_loss:.5f}')
        if best_epoch > 0:
            almost_epoch = np.argmin(np.array(history['val_loss'])[:best_epoch])
            almost_val_loss = history['val_loss'][almost_epoch]
            if almost_epoch >= from_epoch:
                plt.scatter([almost_epoch], [almost_val_loss], c='orange', label='Second best val_loss')
    except KeyError:
        pass
    if bottom is not None: plt.ylim(bottom=bottom)
    if top is not None: plt.ylim(top=top)
    plt.gca().xaxis.set_major_locator(MaxNLocator(integer=True))
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend(loc='lower left')
    if title is not None: plt.title(title)
        
    # Plot learning rate
    if plot_lr and 'lr' in history:
        ax2 = plt.gca().twinx()
        ax2.plot(np.arange(from_epoch, len(history['lr'])), np.array(history['lr'][from_epoch:]), color='g', label='Learning rate')
        ax2.set_ylabel('Learning rate')
        ax2.legend(loc='upper right')
        
    plt.show()

In [None]:
history_list

In [None]:
# Plot training history
plot_history(history_list, 
             title=f"Learning curve",
             plot_lr=True)

# # Plot prediction histogram
# plt.figure(figsize=(16, 5))
# plt.hist(y_va_pred[y_va == 0], bins=np.linspace(0, 1, 21),
#          alpha=0.5, density=True)
# plt.hist(y_va_pred[y_va == 1], bins=np.linspace(0, 1, 21),
#          alpha=0.5, density=True)
# plt.xlabel('y_pred')
# plt.ylabel('density')
# plt.title('OOF Prediction Histogram')
plt.show()

---

# 11.0 Model Prediction and Submissions

In [None]:
sdfa(ea)

In [None]:
%%time
sub.head()

In [None]:
%%time
sub['prediction'] = np.array(predictions).mean(axis = 0)

In [None]:
%%time
sub.to_csv('my_submission.csv', index = False)

In [None]:
%%time
sub.head()

---