In [1]:
import pandas as pd
from pandas import DataFrame as df

In [2]:
train_df = pd.read_csv('/kaggle/input/titanic/train.csv', index_col=0)
train_df.head(5)

Unnamed: 0_level_0,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [3]:
train_df.dtypes

Survived      int64
Pclass        int64
Name         object
Sex          object
Age         float64
SibSp         int64
Parch         int64
Ticket       object
Fare        float64
Cabin        object
Embarked     object
dtype: object

In [4]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()

mean, std = dict(), dict()


def normalize(df, train):
    for col in df:
        if col == 'Survived':
            continue
        if df[col].dtype == object:
            le.fit(df[col])
            df[col] = le.transform(df[col])
            
        df[col] = df[col].astype(float)
        if train:
            mean[col] = df[col].mean()
            std[col] = df[col].std()
        
        df[col].fillna(mean[col], inplace=True)        
        df[col] = (df[col] - mean[col]) / std[col]    
    
    df.drop('Name', axis=1, inplace=True)    

In [5]:
normalize(train_df, True)
train_df.describe()

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
count,891.0,891.0,891.0,891.0,891.0,891.0,891.0,891.0,891.0,891.0
mean,0.383838,-7.575933000000001e-17,-1.31582e-16,1.874046e-16,3.5886e-17,4.5854330000000005e-17,6.778466e-17,-1.1962000000000002e-17,2.432273e-16,6.579099e-17
std,0.486592,1.0,1.0,0.8950551,1.0,1.0,1.0,1.0,1.0,1.0
min,0.0,-1.565228,-1.354813,-2.015566,-0.4742788,-0.4734077,-1.685474,-0.6480577,-3.629337,-1.937372
25%,0.0,-0.3691575,-1.354813,-0.5300051,-0.4742788,-0.4734077,-0.8963307,-0.4888737,0.4512488,-0.6782923
50%,0.0,0.8269128,0.737281,0.0,-0.4742788,-0.4734077,-0.007610727,-0.3571902,0.4512488,0.5807877
75%,1.0,0.8269128,0.737281,0.3649113,0.4325504,-0.4734077,0.9010246,-0.02423274,0.4512488,0.5807877
max,1.0,0.8269128,0.737281,3.462699,6.780355,6.970233,1.700126,9.66174,0.4512488,1.839868


In [6]:
from torch.utils.data import Dataset, DataLoader
from torch import tensor
import torch

class CustomDataset(Dataset):
    def __init__(self, df):        
        self.X = tensor(df.drop('Survived', axis=1).values)
        survived, nsurvived = tensor(df['Survived'].values), tensor((1 - df['Survived']).values)
        survived, nsurvived = survived.view(survived.shape[0], 1), nsurvived.view(nsurvived.shape[0], 1)
        self.Y = torch.cat((nsurvived, survived), axis=1).float()
        
    def __len__(self):
        return self.X.shape[0]
    
    def __getitem__(self, index):
        return self.X[index], self.Y[index]
    
dataset = CustomDataset(train_df)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)
    

In [7]:
from torch import nn
class Net(nn.Module):
    def __init__(self, input_size, hidden_size1, output_size):
        super(Net, self).__init__()
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.LazyLinear(hidden_size1, dtype=float),
            nn.ReLU(),
            nn.LazyLinear(output_size, dtype=float),
        )
                
        
    def forward(self, x):                
        return self.fc(x)


In [8]:
hparams = {
    'input_size': train_df.shape[1],
    'hidden_size1': 100,
    'output_size': 2,    
}

lr = 1e-1
wd = 0
ec = 1000


In [9]:
model = Net(**hparams)
model.train()
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=lr, weight_decay=wd)



In [10]:
for epoch in range(ec):
    correct = 0
    total = 0
    
    for batch in dataloader:
        x_tensor, y_tensor = batch        
        y_pred = model(x_tensor)
        loss = criterion(y_pred, y_tensor)        
        
        predictions = torch.argmax(y_pred, dim=1)
        targets = torch.argmax(y_tensor, dim=1)
        total += predictions.shape[0]
        correct += (predictions==targets).sum().item()
        
        assert not torch.isnan(loss)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    print(f'{(100*correct / total):.2f}')

78.23
80.70
80.81
81.26
81.03
81.93
82.04
82.27
82.27
82.04
83.05
83.05
82.72
83.61
84.06
83.61
83.50
83.95
82.94
84.18
83.95
84.29
84.40
84.18
84.40
84.18
84.18
83.95
84.62
83.84
84.74
84.51
84.06
83.73
85.07
84.29
84.96
84.85
85.19
84.29
84.40
84.18
84.29
84.40
85.07
85.19
84.85
84.74
84.85
84.96
85.07
84.96
84.96
84.62
85.30
85.07
85.07
84.74
85.86
85.19
85.41
85.19
85.07
85.52
84.74
85.63
85.30
85.41
84.51
85.52
85.63
85.19
85.63
85.41
85.52
85.97
85.75
86.42
85.30
86.20
85.52
85.07
85.63
85.30
86.08
85.63
85.75
85.30
85.63
86.20
85.75
86.87
85.75
85.41
86.31
86.42
86.42
85.63
86.53
86.08
86.42
85.86
84.96
86.08
86.20
85.75
86.53
85.52
86.08
85.86
86.87
86.20
86.53
86.53
86.20
86.98
86.87
86.31
85.41
86.53
86.31
87.09
86.42
87.21
86.64
86.87
85.86
86.53
86.31
86.42
87.54
87.09
85.97
86.76
86.87
87.54
86.20
86.98
86.87
86.42
87.09
86.42
86.98
86.87
87.21
86.98
86.76
86.64
86.98
86.98
87.21
86.42
87.09
86.53
87.43
87.09
86.64
87.21
87.21
87.21
85.86
87.21
87.65
87.21
87.21
88.10
87.3

In [11]:
test_df = pd.read_csv('/kaggle/input/titanic/test.csv', index_col=0)

In [12]:
normalize(test_df, True)
test_df.describe()

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
count,418.0,418.0,418.0,418.0,418.0,418.0,418.0,418.0,418.0
mean,-3.0810020000000004e-17,-3.8246920000000005e-17,3.505967e-17,-1.381139e-17,-5.5245550000000004e-17,-1.184592e-16,-1.9123460000000003e-17,1.019918e-16,-9.774212000000001e-17
std,1.0,1.0,0.8909349,1.0,1.0,1.0,0.9988002,1.0,1.0
min,-1.503319,-1.321292,-2.12271,-0.4988722,-0.3997686,-1.682681,-0.6372515,-3.532364,-1.640632
25%,-1.503319,-1.321292,-0.5128329,-0.4988722,-0.3997686,-0.8899063,-0.496022,0.4484846,-0.4703517
50%,0.8724364,0.7550242,0.0,-0.4988722,-0.3997686,0.0005116897,-0.3787141,0.4484846,0.6999282
75%,0.8724364,0.7550242,0.3862442,0.6162539,-0.3997686,0.9188279,-0.07382163,0.4484846,0.6999282
max,0.8724364,0.7550242,3.224507,8.422137,8.770534,1.683704,8.526608,0.4484846,0.6999282


In [13]:
x_tensor = torch.tensor(test_df.iloc[:, :].values, dtype=float)
x_tensor.shape

torch.Size([418, 9])

In [14]:
with torch.no_grad():
    outputs = model(x_tensor)
    predicted_classes = torch.argmax(outputs, dim=1)
    result = pd.DataFrame({'PassengerId': test_df.index, 'Survived': predicted_classes})
    result.to_csv('sub.csv', index=False)