In [1]:
import pandas as pd
import math
import numpy as np
import torch

In [2]:
d = pd.read_csv('./test.csv')
d

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Survived
0,892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q,0
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0000,,S,1
2,894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q,0
3,895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S,0
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S,1
...,...,...,...,...,...,...,...,...,...,...,...,...
413,1305,3,"Spector, Mr. Woolf",male,,0,0,A.5. 3236,8.0500,,S,0
414,1306,1,"Oliva y Ocana, Dona. Fermina",female,39.0,0,0,PC 17758,108.9000,C105,C,1
415,1307,3,"Saether, Mr. Simon Sivertsen",male,38.5,0,0,SOTON/O.Q. 3101262,7.2500,,S,0
416,1308,3,"Ware, Mr. Frederick",male,,0,0,359309,8.0500,,S,0


In [3]:
# drop unused columns
d = d.drop(['PassengerId', 'Name', 'Ticket', 'Cabin'], axis=1)

# drop nan
d = d.dropna()

### Type Casting

In [4]:
d['Pclass'] = pd.to_numeric(d['Pclass']) # cast to int

### Optimizations

In [5]:
# optimizing data

#optimize Age
maxAge = max(d['Age'])
d['Age'] = round(d['Age']/maxAge, 2)

# optimize Fare
d['Fare'] = round(np.log10(d['Fare']+1), 2)

# optimize Pclass
d['Pclass_1'] = np.where(d['Pclass']==1, 1, 0)
d['Pclass_2'] = np.where(d['Pclass']==2, 1, 0)

# optimize gender
d['Sex'] = np.where(d['Sex']=='male',1,0)
d.rename(columns={'Sex' : 'Male'}, inplace=True)

# optimize embarked
d['Embark_S'] = np.where(d['Embarked']=='S',1,0)
d['Embark_C'] = np.where(d['Embarked']=='C',1,0)

# add ones
d['Ones'] = 1

d

Unnamed: 0,Pclass,Male,Age,SibSp,Parch,Fare,Embarked,Survived,Pclass_1,Pclass_2,Embark_S,Embark_C,Ones
0,3,1,0.45,0,0,0.95,Q,0,0,0,0,0,1
1,3,0,0.62,1,0,0.90,S,1,0,0,1,0,1
2,2,1,0.82,0,0,1.03,Q,0,0,1,0,0,1
3,3,1,0.36,0,0,0.99,S,0,0,0,1,0,1
4,3,0,0.29,1,1,1.12,S,1,0,0,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
409,3,0,0.04,1,1,1.17,S,1,0,0,1,0,1
411,1,0,0.49,1,0,1.96,Q,1,1,0,0,0,1
412,3,0,0.37,0,0,0.94,S,1,0,0,1,0,1
414,1,0,0.51,0,0,2.04,C,1,1,0,0,1,1


In [6]:
final_data = d[['SibSp', 'Parch', 'Age', 'Fare', 'Pclass_1', 'Pclass_2', 'Embark_S', 'Embark_C', 'Male', 'Ones']]

final_data

Unnamed: 0,SibSp,Parch,Age,Fare,Pclass_1,Pclass_2,Embark_S,Embark_C,Male,Ones
0,0,0,0.45,0.95,0,0,0,0,1,1
1,1,0,0.62,0.90,0,0,1,0,0,1
2,0,0,0.82,1.03,0,1,0,0,1,1
3,0,0,0.36,0.99,0,0,1,0,1,1
4,1,1,0.29,1.12,0,0,1,0,0,1
...,...,...,...,...,...,...,...,...,...,...
409,1,1,0.04,1.17,0,0,1,0,0,1
411,1,0,0.49,1.96,1,0,0,0,0,1
412,0,0,0.37,0.94,0,0,1,0,0,1
414,0,0,0.51,2.04,1,0,0,1,0,1


In [13]:
# w = np.array([5,2,3,4,5,6,7,8,9,10])
# w = np.array([0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01])
# w = np.array([ 0.0067,  0.0503,  0.0333,  0.1744,  0.0832,  0.0513,  0.0469,  0.0628,
#         -0.0694,  0.1017])
w = np.array([-0.0737, -0.0310, -0.5324,  0.2062,  0.3000,  0.1049,  0.1227,  0.2146,
        -0.6278,  0.6141])
w

array([-0.0737, -0.031 , -0.5324,  0.2062,  0.3   ,  0.1049,  0.1227,
        0.2146, -0.6278,  0.6141])

In [8]:
# finding the loss

# pred = (final_data*w).sum(axis=1)
# loss = (pred - d['Survived']) ** 2
# total_loss = loss.sum()
# data = {
#     'Predictions': pred,
#     'Survived': d['Survived'],
#     'Loss': loss
# }
# df = pd.DataFrame(data)

# print("Total Loss : ",total_loss)
# df

### Using Torch for Backprop

In [9]:
# data
final_data_t = torch.Tensor(final_data.values)       ; #final_data_t.requires_grad = True
# ground_truth = torch.Tensor(d['Survived'].values)

# weights
w_t = torch.Tensor(w)                                ; #w_t.requires_grad = True

In [10]:
for i in range(1):
    # Forward Pass
    predictions = torch.sum((final_data_t * w_t), dim=1) ;#  predictions.requires_grad = True

    print(np.round(predictions))
    # loss
    # loss = torch.sum((predictions - ground_truth) ** 2)
    # print(i, ' Total Loss: ', loss)
    
    # # backward pass
    # loss.backward()
    
    # # Nudge the gradients
    # with torch.no_grad():
    #     w_t -= 0.0001 * w_t.grad
    
    # # Reset Gradients
    # w_t.grad.zero_()

tensor([-0., 1., -0., 0., 1., 0., 1., 0., 1., 0., 0., 1., -0., 1., 1., 0., 0., 1.,
        1., 0., 0., 1., 1., -0., 1., 0., 0., 0., 0., 1., 1., 0., 1., 0., 0., 0.,
        1., 1., 0., 0., 1., 1., 1., 0., 1., 1., -0., 0., 0., 1., 0., 0., 0., 1.,
        1., 1., 0., 1., 1., 1., 0., 1., 1., 1., 1., 1., 0., 1., 0., 0., 0., 1.,
        1., 0., 1., 1., 1., 0., 1., 0., 1., 0., 1., 0., 0., 1., 0., 0., 0., 0.,
        1., 1., 1., 0., 1., 1., 1., 1., 1., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 1., -0., 1., 1., 0., 0., 0., -0., 0., 0., 1., 1., -0., 0., 1., 1.,
        0., 1., 0., 1., 0., 1., 1., 0., 1., 0., 0., -0., 1., 1., 0., 1., 1., 0.,
        1., 1., 1., 0., 1., 0., 0., 0., 0., -0., 0., 0., 1., 1., 0., 0., 1., 1.,
        0., 1., 0., 1., 0., 0., 0., 1., 0., 0., 0., 1., 1., 0., 1., 0., 1., 0.,
        0., 0., 0., 1., 0., 1., 0., 0., 0., 1., 1., 1., 1., 1., 0., 1., 0., 1.,
        1., 0., 1., 0., 0., 0., 1., 0., 0., 0., 1., 1., 0., 0., 1., 1., 1., 0.,
        0., 0., 0., 1., 0., 1.

In [11]:
pred = np.round(predictions.numpy())
survived = d['Survived'].to_numpy()
loss = np.abs(pred - survived)
new_data = {
    'predictions' : pred,
    'survived' : survived,
    'loss' : loss
}
new_data = pd.DataFrame(new_data)

accurate_preds = new_data[new_data['loss'] == 0]
len(accurate_preds['loss'].to_numpy()) / 331 * 100

92.14501510574019