# Table of Content

1. [Packages 📦 and Basic Setup](#basic)
2. [Pre-Processing 👎🏻 -> 👍](#preprocess)
3. [The Model 👷‍♀️](#model)
4. [Training 💪🏻](#train)

## Disclaimer

<a id = "basic"> </a>

# Packages 📦 and Basic Setup

In [None]:
%%capture
!pip install wandb

import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

# Weights and Biases
import wandb
from wandb.keras import WandbCallback
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
api_key = user_secrets.get_secret("WANDB_API_KEY")
wandb.login(key=api_key);
wandb.init(project='TPS May 2021', entity='sauravmaheshkar')

# Basic Paths
train = pd.read_csv('../input/tabular-playground-series-may-2021/train.csv')
test = pd.read_csv('../input/tabular-playground-series-may-2021/test.csv')
sample_submission = pd.read_csv('../input/tabular-playground-series-may-2021/sample_submission.csv')

In [14]:
%%capture

#save them as artifacts
artifact =  wandb.Artifact(name="train", type="dataset")
artifact.add_file("../input/tabular-playground-series-may-2021/train.csv")
wandb.log_artifact(artifact)

artifact =  wandb.Artifact(name="test", type="dataset")
artifact.add_file("../input/tabular-playground-series-may-2021/test.csv")
wandb.log_artifact(artifact)

artifact =  wandb.Artifact(name="submission", type="dataset")
artifact.add_file("../input/tabular-playground-series-may-2021/sample_submission.csv")
wandb.log_artifact(artifact)

In [15]:
train = train.drop('id', axis=1)
test = test.drop('id', axis=1)
train.head()

Unnamed: 0,feature_0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,...,feature_41,feature_42,feature_43,feature_44,feature_45,feature_46,feature_47,feature_48,feature_49,target
0,0,0,1,0,1,0,0,0,0,0,...,0,0,21,0,0,0,0,0,0,Class_2
1,0,0,0,0,2,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Class_1
2,0,0,0,0,0,0,0,0,0,2,...,0,1,0,0,0,0,13,2,0,Class_1
3,0,0,0,0,0,0,0,3,0,0,...,0,0,0,0,0,0,0,1,0,Class_4
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,Class_2


<a id = 'preprocess'> </a>
# Pre-Processing 👎🏻 -> 👍

In [16]:
for i in range(50):
    mean, std = train[f'feature_{i}'].mean(), train[f'feature_{i}'].std()
    train[f'feature_{i}'] = train[f'feature_{i}'].apply(lambda x : (x-mean)/std)
    test[f'feature_{i}'] = test[f'feature_{i}'].apply(lambda x : (x-mean)/std)
    
train.head()

Unnamed: 0,feature_0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,...,feature_41,feature_42,feature_43,feature_44,feature_45,feature_46,feature_47,feature_48,feature_49,target
0,-0.277525,-0.218276,1.704962,-0.320158,0.143682,-0.267521,-0.31214,-0.456384,-0.264504,-0.493716,...,-0.290583,-0.230017,8.633664,-0.215267,-0.244955,-0.238006,-0.262238,-0.376793,-0.331436,Class_2
1,-0.277525,-0.218276,-0.219657,-0.320158,0.50268,1.395959,-0.31214,-0.456384,-0.264504,-0.493716,...,-0.290583,-0.230017,-0.261043,-0.215267,-0.244955,-0.238006,-0.262238,-0.376793,-0.331436,Class_1
2,-0.277525,-0.218276,-0.219657,-0.320158,-0.215316,-0.267521,-0.31214,-0.456384,-0.264504,0.556609,...,-0.290583,0.204609,-0.261043,-0.215267,-0.244955,-0.238006,8.478137,0.399419,-0.331436,Class_1
3,-0.277525,-0.218276,-0.219657,-0.320158,-0.215316,-0.267521,-0.31214,0.657726,-0.264504,-0.493716,...,-0.290583,-0.230017,-0.261043,-0.215267,-0.244955,-0.238006,-0.262238,0.011313,-0.331436,Class_4
4,-0.277525,-0.218276,-0.219657,-0.320158,-0.215316,-0.267521,-0.31214,-0.456384,-0.264504,-0.493716,...,-0.290583,-0.230017,-0.261043,-0.215267,-0.244955,-0.238006,-0.262238,0.011313,-0.331436,Class_2


In [17]:
# transform target column into 0,1,2,3 values
label_dict = {val:idx for idx, val in enumerate(sorted(train['target'].unique()))}
train['target'] = train['target'].map(label_dict)

target = train['target']
train.drop(['target'], inplace=True, axis=1)

In [18]:
train = train.values
target = target.values
target = tf.keras.utils.to_categorical(target)

In [19]:
X_train, X_val, y_train, y_val = train_test_split(train, target, test_size = 0.25, random_state = 2021, stratify=target)

<a id='model'></a>
# The Model 👷‍♀️

In [20]:
num_features = 50
num_classes = 4

In [21]:
model = tf.keras.models.Sequential([
        tf.keras.layers.Dense(2048, input_dim=num_features, activation='relu', kernel_initializer='he_uniform'),
        tf.keras.layers.Dropout(0.5),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dense(1028, activation='relu'),
        tf.keras.layers.Dropout(0.5),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dense(512, activation='relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dense(256, activation='relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dense(num_classes, activation='softmax')
    ])

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics='accuracy')

<a id='train'></a>
# Training 💪🏻

In [22]:
model.fit(X_train, y_train,
          batch_size = 256, epochs = 10, verbose = 2,
          validation_data=(X_val, y_val),
          callbacks = [WandbCallback()])

Epoch 1/20
293/293 - 26s - loss: 1.2754 - accuracy: 0.4996 - val_loss: 1.1239 - val_accuracy: 0.5690
Epoch 2/20
293/293 - 24s - loss: 1.1439 - accuracy: 0.5665 - val_loss: 1.1132 - val_accuracy: 0.5738
Epoch 3/20
293/293 - 23s - loss: 1.1277 - accuracy: 0.5705 - val_loss: 1.1087 - val_accuracy: 0.5754
Epoch 4/20
293/293 - 23s - loss: 1.1200 - accuracy: 0.5721 - val_loss: 1.1107 - val_accuracy: 0.5759
Epoch 5/20
293/293 - 23s - loss: 1.1133 - accuracy: 0.5733 - val_loss: 1.1064 - val_accuracy: 0.5757
Epoch 6/20
293/293 - 23s - loss: 1.1104 - accuracy: 0.5739 - val_loss: 1.1127 - val_accuracy: 0.5717
Epoch 7/20
293/293 - 23s - loss: 1.1078 - accuracy: 0.5747 - val_loss: 1.1063 - val_accuracy: 0.5769
Epoch 8/20
293/293 - 23s - loss: 1.1047 - accuracy: 0.5745 - val_loss: 1.1053 - val_accuracy: 0.5764
Epoch 9/20
293/293 - 23s - loss: 1.1042 - accuracy: 0.5752 - val_loss: 1.1056 - val_accuracy: 0.5762
Epoch 10/20
293/293 - 23s - loss: 1.1017 - accuracy: 0.5750 - val_loss: 1.1065 - val_accura

<tensorflow.python.keras.callbacks.History at 0x7f6682544650>

In [23]:
score = model.evaluate(X_val, y_val, verbose = 0)
print('Test loss: {}%'.format(score[0] * 100))
print('Test score: {}%'.format(score[1] * 100))
print("MLP Error: %.2f%%" % (100 - score[1] * 100))

Test loss: 111.50500774383545%
Test score: 57.555997371673584%
MLP Error: 42.44%


In [24]:
sample_submission[['Class_1','Class_2', 'Class_3', 'Class_4']] = model.predict(test)

sample_submission.to_csv('my_submission.csv',index = False)
sample_submission.head()

Unnamed: 0,id,Class_1,Class_2,Class_3,Class_4
0,100000,0.105076,0.628442,0.133946,0.132536
1,100001,0.111102,0.661734,0.17458,0.052583
2,100002,0.094141,0.60411,0.151101,0.150648
3,100003,0.081183,0.589617,0.203186,0.126013
4,100004,0.061083,0.66319,0.212295,0.063432
