In [110]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn import preprocessing
import copy

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [111]:
train_data_frame = pd.read_csv('/kaggle/input/titanic/train.csv')
train_data_frame.head(1)


In [112]:
train_data_frame.shape

In [113]:
def clean_data (data):
    for col in data:
        dt = data[col].dtype 
        if dt == int or dt == float:
            data[col] = data[col].fillna(0)
        else:
            data[col] = data[col].fillna("U")
    data = data.drop(['PassengerId', 'Name', 'Ticket', 'Cabin'], axis=1)
    cols = ['Sex', 'Embarked']
    le = preprocessing.LabelEncoder()
    for col in cols:
        data[col] = le.fit_transform(data[col])
        print(le.classes_)
    return data

In [114]:

train_data_frame = clean_data(train_data_frame)
print(train_data_frame['Age'].isna().sum())
train_data_frame.head(5)

In [115]:
print(train_data_frame.isna().sum())
X = train_data_frame.drop(['Survived'], axis=1).to_numpy()
print(X.shape)

In [116]:
Y_ground_truth = train_data_frame['Survived'].to_numpy()
Y_ground_truth =  Y_ground_truth.reshape(Y_ground_truth.shape[0], 1)
print(Y_ground_truth.shape)

In [117]:
import torch as T
device = T.device("cpu")

class Net(T.nn.Module):
    def __init__(self, input_size, output_size):
        super(Net, self).__init__()
        self.inpt= T.nn.Linear(input_size, input_size)
        self.middle = T.nn.Linear(input_size, 2)
        self.oupt = T.nn.Linear(2, output_size)
        
        T.nn.init.xavier_uniform_(self.inpt.weight)
        T.nn.init.zeros_(self.inpt.bias)
        T.nn.init.xavier_uniform_(self.middle.weight)
        T.nn.init.zeros_(self.middle.bias)
        T.nn.init.xavier_uniform_(self.oupt.weight)
        T.nn.init.zeros_(self.oupt.bias)
        
    def forward(self, X):
        Z = T.relu(self.inpt(X))
        Z = T.relu(self.middle(Z))
        Z = T.sigmoid(self.oupt(Z))
        return Z
    
    def predict(self, X):
        Z = T.relu(self.inpt(X))
        Z = T.relu(self.middle(Z))
        Z = T.sigmoid(self.oupt(Z))
        Z = (Z > T.tensor(np.ones(Z.shape) * 0.5)).int()
        return Z

In [118]:
print(X.shape)
print(Y_ground_truth.shape)
inputDim = X.shape[1]
print(inputDim)
outputDim = Y_ground_truth.shape[1]
print(outputDim)
learningRate = 0.0001 
epochs = 100000
T.manual_seed(1)
net = Net(inputDim, outputDim).to(device)

first_input = T.from_numpy(X[1,:].astype(np.float32))
y = net.forward(first_input)
print(y)
y = net.predict(first_input)
print(y)


In [119]:
criterion = T.nn.MSELoss() 
optimizer = T.optim.Adam(net.parameters(), lr=learningRate, betas=(0.9,0.999),eps=1e-08,weight_decay=0,amsgrad=False)

In [120]:
inputs = T.from_numpy(X.astype(np.float32))
labels = T.from_numpy(Y_ground_truth.astype(np.float32))
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
print(inputs.shape)
print(labels.shape)
print(pd.DataFrame(inputs.numpy()).head(5))
print(pd.DataFrame(labels.numpy()).head(5))
losses = []
correct = 0
for epoch in range(epochs):
    optimizer.zero_grad()

    outputs = net(inputs)
    loss = criterion(outputs, labels)
    loss.backward()

    optimizer.step()
    losses.append(loss.item())
    if epoch % 10000 == 0:
        print('epoch {}, loss {}'.format(epoch, loss.item()))
        predict_outputs = net.predict(inputs)
        accuracy = accuracy_score(labels, predict_outputs) * 100
        print("Accuracy = {}".format(accuracy))
    
plt.plot(range(epochs), losses)
plt.show

In [121]:
print(outputs.shape)
print(pd.DataFrame(outputs.detach().numpy()).head(5))

In [122]:
test_data_frame = pd.read_csv('/kaggle/input/titanic/test.csv')
test_data_frame.head(1)
test_data_frame.shape

In [123]:
test_data_frame = clean_data(test_data_frame)
print(test_data_frame['Age'].isna().sum())
print(test_data_frame.head(5))
print(test_data_frame.shape)

In [124]:
X_test = test_data_frame.to_numpy()
print(X_test.shape)


In [125]:
test_inputs = T.from_numpy(X_test.astype(np.float32))
print(test_inputs.shape)

In [126]:
test_outputs = net.predict(test_inputs)

In [127]:
test_data_frame_with_PSID = pd.read_csv('/kaggle/input/titanic/test.csv')
df = pd.DataFrame({"PassengerId": test_data_frame_with_PSID["PassengerId"].values,
                  "Survived": test_outputs.detach().numpy()[:,0].astype(int),})

In [128]:
df.to_csv("submission.csv", index=False)