In [52]:
#import modules
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split

In [53]:
#combine datasets into 1 dataframe
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')
df = pd.concat([train_df, test_df], sort=False)

In [54]:
#condense to only neccessary columns & get rid of empty rows
df = df.dropna()
df = df[['Survived', 'Pclass', 'Sex', 'Age', 'SibSp', 'Fare', 'Embarked']]
df.head()

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Fare,Embarked
1,1.0,1,female,38.0,1,71.2833,C
3,1.0,1,female,35.0,1,53.1,S
6,0.0,1,male,54.0,0,51.8625,S
10,1.0,3,female,4.0,1,16.7,S
11,1.0,1,female,58.0,0,26.55,S


In [55]:
le = LabelEncoder()

df[['Sex', 'Embarked']] = df[['Sex', 'Embarked']].apply(le.fit_transform)
X = df[['Pclass', 'Sex', 'Age', 'SibSp', 'Fare', 'Embarked']].values.astype(np.float32)
y = df['Survived'].values.astype(np.float32).reshape(-1, 1)

In [56]:
scaler = StandardScaler()
X = scaler.fit_transform(X)
y = scaler.fit_transform(y)

In [57]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [58]:
X_train = torch.from_numpy(X_train)
X_test = torch.from_numpy(X_test)
y_train = torch.from_numpy(y_train)
y_test = torch.from_numpy(y_test)

In [59]:
class RegressionModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(6, 1)
        
    def forward(self, x):
        pred = torch.sigmoid(self.linear(x))
        return pred

In [60]:
model = RegressionModel()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [61]:
epochs = 1000

In [62]:
for i in range(epochs):
    model.train()
    y_pred = model.forward(X_train)
    loss = criterion(y_pred, y_train)

    #print('epoch: ', i, 'loss: ', loss.item())

    loss.backward()
    optimizer.step()
    optimizer.zero_grad()

with torch.no_grad():
    y_eval = model.forward(X_test)
    loss = criterion(y_eval, y_test)
    print('loss: ', np.round((loss.item() * 100)), '%')

loss:  98.0 %


In [63]:
#test the model
test_df = pd.read_csv('test.csv')
test_df = test_df[['PassengerId', 'Pclass', 'Sex', 'Age', 'SibSp', 'Fare', 'Embarked']]

test_df[['Sex', 'Embarked']] = test_df[['Sex', 'Embarked']].apply(le.fit_transform)
test_df['Survived'] = 0

X = test_df[['Pclass', 'Sex', 'Age', 'SibSp', 'Fare', 'Embarked']].values.astype(np.float32)
X = scaler.fit_transform(X)
X = torch.from_numpy(X)

y = test_df[['Survived']].values.astype(np.float32)
y = scaler.fit_transform(y)
y = torch.from_numpy(y)
test_df.head()

Unnamed: 0,PassengerId,Pclass,Sex,Age,SibSp,Fare,Embarked,Survived
0,892,3,1,34.5,0,7.8292,1,0
1,893,3,0,47.0,1,7.0,2,0
2,894,2,1,62.0,0,9.6875,1,0
3,895,3,1,27.0,0,8.6625,2,0
4,896,3,0,22.0,1,12.2875,2,0


In [64]:
model.eval()
with torch.no_grad():
    y_eval = model.forward(X)
    loss = criterion(y_eval, y)
    print('loss: ', np.round((loss.item() * 100)), '%')


loss:  nan %


In [65]:
test_df['Survived'] = y_eval

In [66]:
test_df.head()

Unnamed: 0,PassengerId,Pclass,Sex,Age,SibSp,Fare,Embarked,Survived
0,892,3,1,34.5,0,7.8292,1,0.013248
1,893,3,0,47.0,1,7.0,2,0.668439
2,894,2,1,62.0,0,9.6875,1,0.028277
3,895,3,1,27.0,0,8.6625,2,0.008262
4,896,3,0,22.0,1,12.2875,2,0.618862


In [67]:
test_df['Survived'] = test_df['Survived'].apply(lambda x: 1 if x > 0.5 else 0)

In [68]:
test_df.head()

Unnamed: 0,PassengerId,Pclass,Sex,Age,SibSp,Fare,Embarked,Survived
0,892,3,1,34.5,0,7.8292,1,0
1,893,3,0,47.0,1,7.0,2,1
2,894,2,1,62.0,0,9.6875,1,0
3,895,3,1,27.0,0,8.6625,2,0
4,896,3,0,22.0,1,12.2875,2,1


In [71]:
#export to csv
test_df = test_df[['PassengerId', 'Survived']]
test_df = test_df.dropna()
test_df.to_csv('submission.csv', index=False)