In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

import tensorflow as tf

import os
import sys
import datetime

import warnings; warnings.filterwarnings('ignore')

sys.path.append('..')
from config import CFG
from src.model.include_concat import NN
CFG = CFG()
pd.options.display.float_format = '{:,.2f}'.format
pd.set_option('display.max_columns', 30)
pd.set_option('display.max_rows', 100)

In [3]:
from colorama import Style, Fore
red = Style.BRIGHT + Fore.RED
blu = Style.BRIGHT + Fore.BLUE
mgt = Style.BRIGHT + Fore.MAGENTA
grn = Style.BRIGHT + Fore.GREEN
gld = Style.BRIGHT + Fore.YELLOW
res = Style.RESET_ALL

rc = {
    "axes.facecolor": "#FFFEF8",
    "figure.facecolor": "#FFFEF8",
    "axes.edgecolor": "#000000",
    "grid.color": "#EBEBE7" + "30",
    "font.family": "serif",
    "axes.labelcolor": "#000000",
    "xtick.color": "#000000",
    "ytick.color": "#000000",
    "grid.alpha": 0.4
}
sns.set(rc=rc)
palette = ['#302c36', '#037d97', '#E4591E', '#C09741',
           '#EC5B6D', '#90A6B1', '#6ca957', '#D8E3E2']

In [4]:
orig = pd.read_csv(os.path.join(CFG.RAW_DATA, 'Pulsar.csv'))
train = pd.read_csv(os.path.join(CFG.RAW_DATA, 'train.csv')).drop(columns='id')
test = pd.read_csv(os.path.join(CFG.RAW_DATA, 'test.csv')).drop(columns='id')

In [5]:
ensemble = NN(train)

In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

import tensorflow as tf

import os
import sys
import datetime

from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.metrics import log_loss

import warnings; warnings.filterwarnings('ignore')

sys.path.append('..')
from config import CFG
from src.model.include_concat import NN
CFG = CFG()
pd.options.display.float_format = '{:,.2f}'.format
pd.set_option('display.max_columns', 30)
pd.set_option('display.max_rows', 100)

In [7]:
callbacks = [
    tf.keras.callbacks.EarlyStopping(
        monitor='val_loss', patience=20, mode='min', restore_best_weights=True, verbose=2
    ),
    tf.keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss', factor=0.5, patience=10, mode='min', restore_best_weights=True, min_lr=1e-12, verbose=2
    )
]

In [8]:
def log_(column):
    return np.log(-min(column) + 1 + column)

In [9]:
orig = pd.read_csv(os.path.join(CFG.RAW_DATA, 'Pulsar.csv'))
train = pd.read_csv(os.path.join(CFG.RAW_DATA, 'train.csv')).drop(columns='id')
test = pd.read_csv(os.path.join(CFG.RAW_DATA, 'test.csv')).drop(columns='id')

train[['Skewness', 'Skewness_DMSNR_Curve']] = train.apply({'Skewness': log_, 'Skewness_DMSNR_Curve': log_})
test[['Skewness', 'Skewness_DMSNR_Curve']] = test.apply({'Skewness': log_, 'Skewness_DMSNR_Curve': log_})

In [10]:
callbacks = [
    tf.keras.callbacks.EarlyStopping(
        monitor='val_loss', patience=20, mode='min', restore_best_weights=True, verbose=2
    ),
    tf.keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss', factor=0.5, patience=10, mode='min', restore_best_weights=True, min_lr=1e-12, verbose=2
    )
]

In [11]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

import tensorflow as tf
import wandb

import os
import sys
import datetime

from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.metrics import log_loss

import warnings; warnings.filterwarnings('ignore')

sys.path.append('..')
from config import CFG
from src.model.include_concat import NN
CFG = CFG()
pd.options.display.float_format = '{:,.2f}'.format
pd.set_option('display.max_columns', 30)
pd.set_option('display.max_rows', 100)

In [12]:
ensemble = NN(train)
ensemble.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=CFG.LR),
    loss='binary_crossentropy',
    metrics=['binary_accuracy']
)
k_fold = RepeatedStratifiedKFold(n_splits=CFG.NFOLDS, n_repeats=CFG.REPEATS, random_state=CFG.SEED)

params = {
    'folds': CFG.NFOLDS,
    'repeats': CFG.REPEATS,
    'batch_size': CFG.BATCH_SIZE,
    'learning_rate': CFG.LR,
}
wandb.init(project='S3E10', group='NNtest', name='NNtest', config=params)

train_log_loss = []
oof_log_loss = []
models = []
for train_idx, val_idx in k_fold.split(X, y):
    X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
    y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]

    history = ensemble.fit(
        X_train, y_train, 
        batch_size=CFG.BATCH_SIZE, 
        epochs=5, callbacks=callbacks, 
        validation_data=(X_val, y_val)
    )

    train_preds = ensemble.predict(X_train)
    train_loss = log_loss(y_train, train_preds)
    train_log_loss.append(train_loss)

    oof_preds = ensemble.predict(X_val)
    oof_loss = log_loss(y_val, oof_preds)
    oof_log_loss.append(oof_loss)

    models.append(ensemble)
wandb.finish()

In [13]:
X = train.drop('Class', axis=1).values
y = train['Class'].values

In [14]:
callbacks = [
    tf.keras.callbacks.EarlyStopping(
        monitor='val_loss', patience=20, mode='min', restore_best_weights=True, verbose=2
    ),
    tf.keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss', factor=0.5, patience=10, mode='min', restore_best_weights=True, min_lr=1e-12, verbose=2
    ),
    WandbMetricsLogger()
]

In [15]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

import tensorflow as tf
import wandb
from wandb.keras import WandbMetricsLogger

import os
import sys
import datetime

from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.metrics import log_loss

import warnings; warnings.filterwarnings('ignore')

sys.path.append('..')
from config import CFG
from src.model.include_concat import NN
CFG = CFG()
pd.options.display.float_format = '{:,.2f}'.format
pd.set_option('display.max_columns', 30)
pd.set_option('display.max_rows', 100)

In [16]:
from colorama import Style, Fore
red = Style.BRIGHT + Fore.RED
blu = Style.BRIGHT + Fore.BLUE
mgt = Style.BRIGHT + Fore.MAGENTA
grn = Style.BRIGHT + Fore.GREEN
gld = Style.BRIGHT + Fore.YELLOW
res = Style.RESET_ALL

rc = {
    "axes.facecolor": "#FFFEF8",
    "figure.facecolor": "#FFFEF8",
    "axes.edgecolor": "#000000",
    "grid.color": "#EBEBE7" + "30",
    "font.family": "serif",
    "axes.labelcolor": "#000000",
    "xtick.color": "#000000",
    "ytick.color": "#000000",
    "grid.alpha": 0.4
}
sns.set(rc=rc)
palette = ['#302c36', '#037d97', '#E4591E', '#C09741',
           '#EC5B6D', '#90A6B1', '#6ca957', '#D8E3E2']

In [17]:
def log_(column):
    return np.log(-min(column) + 1 + column)

In [18]:
orig = pd.read_csv(os.path.join(CFG.RAW_DATA, 'Pulsar.csv'))
train = pd.read_csv(os.path.join(CFG.RAW_DATA, 'train.csv')).drop(columns='id')
test = pd.read_csv(os.path.join(CFG.RAW_DATA, 'test.csv')).drop(columns='id')

train[['Skewness', 'Skewness_DMSNR_Curve']] = train.apply({'Skewness': log_, 'Skewness_DMSNR_Curve': log_})
test[['Skewness', 'Skewness_DMSNR_Curve']] = test.apply({'Skewness': log_, 'Skewness_DMSNR_Curve': log_})

In [19]:
X = train.drop('Class', axis=1).values
y = train['Class'].values

In [20]:
callbacks = [
    tf.keras.callbacks.EarlyStopping(
        monitor='val_loss', patience=20, mode='min', restore_best_weights=True, verbose=2
    ),
    tf.keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss', factor=0.5, patience=10, mode='min', restore_best_weights=True, min_lr=1e-12, verbose=2
    ),
    WandbMetricsLogger()
]

In [21]:
ensemble = NN(train)
ensemble.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=CFG.LR),
    loss='binary_crossentropy',
    metrics=['binary_accuracy']
)
k_fold = RepeatedStratifiedKFold(n_splits=CFG.NFOLDS, n_repeats=CFG.REPEATS, random_state=CFG.SEED)

params = {
    'folds': CFG.NFOLDS,
    'repeats': CFG.REPEATS,
    'batch_size': CFG.BATCH_SIZE,
    'learning_rate': CFG.LR,
}
wandb.init(project='S3E10', group='NNtest', name='NNtest', config=params)

train_log_loss = []
oof_log_loss = []
models = []
for train_idx, val_idx in k_fold.split(X, y):
    X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
    y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]

    history = ensemble.fit(
        X_train, y_train, 
        batch_size=CFG.BATCH_SIZE, 
        epochs=5, callbacks=callbacks, 
        validation_data=(X_val, y_val)
    )

    train_preds = ensemble.predict(X_train)
    train_loss = log_loss(y_train, train_preds)
    train_log_loss.append(train_loss)

    oof_preds = ensemble.predict(X_val)
    oof_loss = log_loss(y_val, oof_preds)
    oof_log_loss.append(oof_loss)

    models.append(ensemble)
wandb.finish()

In [22]:
X = train.drop('Class', axis=1)
y = train['Class']

In [23]:
ensemble = NN(train)
ensemble.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=CFG.LR),
    loss='binary_crossentropy',
    metrics=['binary_accuracy']
)
k_fold = RepeatedStratifiedKFold(n_splits=CFG.NFOLDS, n_repeats=CFG.REPEATS, random_state=CFG.SEED)

params = {
    'folds': CFG.NFOLDS,
    'repeats': CFG.REPEATS,
    'batch_size': CFG.BATCH_SIZE,
    'learning_rate': CFG.LR,
}
wandb.init(project='S3E10', group='NNtest', name='NNtest', config=params)

train_log_loss = []
oof_log_loss = []
models = []
for train_idx, val_idx in k_fold.split(X, y):
    X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
    y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]

    history = ensemble.fit(
        X_train, y_train, 
        batch_size=CFG.BATCH_SIZE, 
        epochs=5, callbacks=callbacks, 
        validation_data=(X_val, y_val)
    )

    train_preds = ensemble.predict(X_train)
    train_loss = log_loss(y_train, train_preds)
    train_log_loss.append(train_loss)

    oof_preds = ensemble.predict(X_val)
    oof_loss = log_loss(y_val, oof_preds)
    oof_log_loss.append(oof_loss)

    models.append(ensemble)
wandb.finish()

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01666878278325991, max=1.0)…

In [24]:
ensemble = NN(test)
ensemble.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=CFG.LR),
    loss='binary_crossentropy',
    metrics=['binary_accuracy']
)
k_fold = RepeatedStratifiedKFold(n_splits=CFG.NFOLDS, n_repeats=CFG.REPEATS, random_state=CFG.SEED)

params = {
    'folds': CFG.NFOLDS,
    'repeats': CFG.REPEATS,
    'batch_size': CFG.BATCH_SIZE,
    'learning_rate': CFG.LR,
}
wandb.init(project='S3E10', group='NNtest', name='NNtest', config=params)

train_log_loss = []
oof_log_loss = []
models = []
for train_idx, val_idx in k_fold.split(X, y):
    X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
    y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]

    history = ensemble.fit(
        X_train, y_train, 
        batch_size=CFG.BATCH_SIZE, 
        epochs=5, callbacks=callbacks, 
        validation_data=(X_val, y_val)
    )

    train_preds = ensemble.predict(X_train)
    train_loss = log_loss(y_train, train_preds)
    train_log_loss.append(train_loss)

    oof_preds = ensemble.predict(X_val)
    oof_loss = log_loss(y_val, oof_preds)
    oof_log_loss.append(oof_loss)

    models.append(ensemble)
wandb.finish()