In [1]:
!pip install -q kaggle

Moving JSON file into /.kaggle

In [2]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/

To Make the JSON file secure

In [3]:
!chmod 600 /root/.kaggle/kaggle.json

Copying the dataset locally

In [None]:
!kaggle competitions download -c santander-customer-satisfaction

In [7]:
! unzip "/content/santander-customer-satisfaction.zip" -d "/content/drive/MyDrive/Kaggle Competitions/Santander Customer Satisfaction"

Archive:  /content/santander-customer-satisfaction.zip
  inflating: /content/drive/MyDrive/Kaggle Competitions/Santander Customer Satisfaction/sample_submission.csv  
  inflating: /content/drive/MyDrive/Kaggle Competitions/Santander Customer Satisfaction/test.csv  
  inflating: /content/drive/MyDrive/Kaggle Competitions/Santander Customer Satisfaction/train.csv  


In [4]:
import os
os.chdir("/content/drive/MyDrive/Kaggle Competitions/Santander Customer Satisfaction")
import torch
import pandas as pd
import numpy as np
import torch.nn as nn
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, log_loss
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

In [5]:
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")

In [6]:
X = train.drop(['ID','TARGET'], axis=1).values
y = train['TARGET'].values

In [7]:
scaler = StandardScaler()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3,
                                                    random_state=23,stratify=y)
X_scl_trn = scaler.fit_transform(X_train)
X_scl_tst = scaler.transform(X_test)


In [8]:
pca = PCA(n_components=0.9)
X_PC_trn = pca.fit_transform(X_scl_trn)
X_PC_tst = pca.transform(X_scl_tst)
print(X_PC_trn.shape)
print(X_PC_tst.shape)

(53214, 74)
(22806, 74)


In [9]:
X_torch = torch.from_numpy(X_PC_trn)
y_torch = torch.from_numpy(y_train).reshape(-1,1)
X_torch_test = torch.from_numpy(X_PC_tst)
y_torch_test = torch.from_numpy(y_test).reshape(-1,1)
print(X_torch.size())
print(y_torch.size())
print(X_torch_test.size())
print(y_torch_test.size())

torch.Size([53214, 74])
torch.Size([53214, 1])
torch.Size([22806, 74])
torch.Size([22806, 1])


Defining the Model

In [10]:
class MLPClassifier(torch.nn.Module):
    def __init__(self, num_features):
        super().__init__()
        self.apply(self._init_weights) # Weight Initialization
        self.linear1 = nn.Linear(in_features=num_features, out_features=50)
        self.linear2 = nn.Linear(50, 20)
        self.linear3 = nn.Linear(20,10)
        self.linear4 = nn.Linear(10,1)
        self.relu = nn.ReLU()
        self.selu = nn.SELU()
        self.dropout = nn.Dropout(0.2)
        #self.bn = nn.BatchNorm1d(100)

    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            module.weight.data.xavier_uniform_()
            if module.bias is not None:
                module.bias.data.zero_()

    def forward(self, x):
        #x = self.dropout(x)
        x = self.linear1(x)
        x = self.linear2(x)
        #x = self.bn(x)
        x = self.linear3(x)
        output = self.linear4(x)
        return output

In [11]:
torch.manual_seed(23)
model = MLPClassifier(num_features=X_PC_trn.shape[1])

In [12]:
criterion = torch.nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adamax(model.parameters())
optimizer

Adamax (
Parameter Group 0
    betas: (0.9, 0.999)
    differentiable: False
    eps: 1e-08
    foreach: None
    lr: 0.002
    maximize: False
    weight_decay: 0
)

### Training Loop

In [13]:
train_losses = []
test_losses = []
for epoch in np.arange(0,500):
    # Forward pass: Compute predicted y by passing x to the model
    y_pred_prob = model(X_torch.float())
    y_pred_prob_test = model(X_torch_test.float())

    # Compute and print loss
    loss = criterion(y_pred_prob, y_torch.float())
    train_losses.append(loss.detach().numpy())
    tst_loss = criterion(y_pred_prob_test, y_torch_test.float() )
    test_losses.append(tst_loss.detach().numpy())

    # Zero gradients, perform a backward pass, and update the weights.
    optimizer.zero_grad()

    # perform a backward pass (backpropagation)
    loss.backward()

    # Update the parameters
    optimizer.step()
    if epoch%10 == 0:
      print('epoch: ', epoch+1,' train loss: ', loss.item(), " test loss:", tst_loss.item())

epoch:  1  train loss:  0.5776174664497375  test loss: 0.5769173502922058
epoch:  11  train loss:  0.4893755614757538  test loss: 0.488797664642334
epoch:  21  train loss:  0.4064651131629944  test loss: 0.40595483779907227
epoch:  31  train loss:  0.3123723864555359  test loss: 0.3123684525489807
epoch:  41  train loss:  0.21887360513210297  test loss: 0.21890360116958618
epoch:  51  train loss:  0.16333651542663574  test loss: 0.16367512941360474
epoch:  61  train loss:  0.14932623505592346  test loss: 0.14932918548583984
epoch:  71  train loss:  0.14753344655036926  test loss: 0.1479054093360901
epoch:  81  train loss:  0.14712348580360413  test loss: 0.1481776088476181
epoch:  91  train loss:  0.14691299200057983  test loss: 0.1481482833623886
epoch:  101  train loss:  0.14678983390331268  test loss: 0.14811602234840393
epoch:  111  train loss:  0.14671793580055237  test loss: 0.14831072092056274
epoch:  121  train loss:  0.1466740369796753  test loss: 0.1484694927930832
epoch:  13

In [15]:
from sklearn.pipeline import Pipeline
scaler = StandardScaler()
pca = PCA(n_components=0.9)
pipe = Pipeline([('SCL', scaler), ('PCA', pca)])
X_PC_trn = pipe.fit_transform(X_train)
test_PC = pipe.transform(test.drop('ID', axis=1).values)
print(test_PC.shape)

(75818, 74)


In [28]:
tst_torch_pc = torch.from_numpy(test_PC).float()
tst_torch_pc.size()

torch.Size([75818, 74])

In [32]:
model = model.eval()
y_pred = model(tst_torch_pc)
y_pred.shape

torch.Size([75818, 1])

In [39]:
act = nn.Sigmoid()
y_pred_prob = act(y_pred)
y_pred_prob[:,0].shape

torch.Size([75818])

In [41]:
submit = pd.DataFrame({'ID':test.ID,'TARGET': y_pred_prob[:,0].detach().numpy()})
submit.to_csv("sbt_pytrch_1.csv", index=False)

In [42]:
!kaggle competitions submit -c santander-customer-satisfaction -f "sbt_pytrch_1.csv" -m "Pytorch 1st"

100% 1.30M/1.30M [00:01<00:00, 939kB/s] 
Successfully submitted to Santander Customer Satisfaction