In [2]:
# Load the Drive helper and mount
from google.colab import drive

# This will prompt for authorization.
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
import pandas as pd
train = pd.read_csv("drive/My Drive/titanic/train.csv")
test = pd.read_csv("drive/My Drive/titanic/test.csv")
Y_test = pd.read_csv("drive/My Drive/titanic/gender_submission.csv")
print(train.shape)
print(test.shape)

(891, 12)
(418, 11)


In [0]:
import torch

In [0]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [6]:
Y_train = torch.tensor(train['Survived'], dtype=torch.long)
print(Y_train.size())

torch.Size([891])


In [0]:
D = 7 #Pclass, Sex, Age, SibSp, Parch, Fare, Embarked
X_train = torch.zeros(891,D)
train["Age"] = train["Age"].fillna(-0.5)

In [0]:
for i in range(891):
  X_train[i,0] = float(train['Pclass'][i])
  if train['Sex'][i] == 'male':
    X_train[i,1] = 1.0
  else:
    X_train[i,1] = 2.0
  X_train[i,2] = float(train['Age'][i])
  X_train[i,3] = float(train['SibSp'][i])
  X_train[i,4] = float(train['Parch'][i])
  X_train[i,5] = float(train['Fare'][i])
  if train['Embarked'][i] == 'C':
    X_train[i,6] = 1.0
  elif train['Embarked'][i] == 'Q':
    X_train[i,6] = 2.0
  else:
    X_train[i,6] = 3.0

In [9]:
print(X_train[0,:])

tensor([ 3.0000,  1.0000, 22.0000,  1.0000,  0.0000,  7.2500,  3.0000])


In [0]:
X_test = torch.zeros(418,D)
test["Age"] = test["Age"].fillna(-0.5)

In [0]:
for i in range(418):
  X_test[i,0] = float(test['Pclass'][i])
  if test['Sex'][i] == 'male':
    X_test[i,1] = 1.0
  else:
    X_test[i,1] = 2.0
  X_test[i,2] = float(test['Age'][i])
  X_test[i,3] = float(test['SibSp'][i])
  X_test[i,4] = float(test['Parch'][i])
  X_test[i,5] = float(test['Fare'][i])
  if test['Embarked'][i] == 'C':
    X_test[i,6] = 1.0
  elif test['Embarked'][i] == 'Q':
    X_test[i,6] = 2.0
  else:
    X_test[i,6] = 3.0

In [0]:
from IPython import display
from torch import nn, optim
import random
seed = 12345
random.seed(seed)
torch.manual_seed(seed)
learning_rate = 1e-3
lambda_l2 = 1e-5

In [38]:
# nn package to create our linear model
# each Linear module has a weight and bias

model = nn.Sequential(
    #nn.BatchNorm1d(D),
    nn.Linear(D, 100),
    #nn.BatchNorm1d(100),
    nn.ReLU(),
    nn.Linear(100, 2),
)

# nn package also has different loss functions.
# we use cross entropy loss for our classification task
criterion = torch.nn.CrossEntropyLoss()

# we use the optim package to apply
# ADAM for our parameter updates
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=lambda_l2) # built-in L2

# e = 1.  # plotting purpose

# Training
for t in range(1000):
    
    model.train()
    # Feed forward to get the logits
    y_pred = model(X_train)
    
    # Compute the loss and accuracy
    loss = criterion(y_pred, Y_train)
    score, predicted = torch.max(y_pred, 1)
    acc = (Y_train == predicted).sum().float() / len(Y_train)
    print("[EPOCH]: %i, [LOSS]: %.6f, [ACCURACY]: %.3f" % (t, loss.item(), acc))
    display.clear_output(wait=True)
    
    # zero the gradients before running
    # the backward pass.
    optimizer.zero_grad()
    
    # Backward pass to compute the gradient
    # of loss w.r.t our learnable params. 
    loss.backward()
    
    # Update params
    optimizer.step()

[EPOCH]: 999, [LOSS]: 0.362999, [ACCURACY]: 0.845


In [39]:
model.eval()
y = model(X_test)
score, ypredicted = torch.max(y, 1)
print(ypredicted)

tensor([0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1,
        1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0,
        1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0,
        1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0,
        1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1,
        1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0,
        0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0,
        1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0,
        0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0,
        1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1,
        1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1,
        0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,

In [34]:
#acc = (torch.tensor(Y_test['Survived'], dtype=torch.long) == ypredicted).sum().float() / len(ypredicted)
#print(acc)
ids = Y_test['PassengerId']
print(ids)

0       892
1       893
2       894
3       895
4       896
5       897
6       898
7       899
8       900
9       901
10      902
11      903
12      904
13      905
14      906
15      907
16      908
17      909
18      910
19      911
20      912
21      913
22      914
23      915
24      916
25      917
26      918
27      919
28      920
29      921
       ... 
388    1280
389    1281
390    1282
391    1283
392    1284
393    1285
394    1286
395    1287
396    1288
397    1289
398    1290
399    1291
400    1292
401    1293
402    1294
403    1295
404    1296
405    1297
406    1298
407    1299
408    1300
409    1301
410    1302
411    1303
412    1304
413    1305
414    1306
415    1307
416    1308
417    1309
Name: PassengerId, Length: 418, dtype: int64


In [0]:
submission_df = {"PassengerId": ids,
                 "Survived": ypredicted}
submission = pd.DataFrame(submission_df)

In [0]:
submission.to_csv("submission2.csv",index=False)