In [1]:
import pandas as pd
import torch

## Downloading data sets

In [2]:
!pip install kaggle
from google.colab import files
files.upload()

!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

!kaggle competitions download -c titanic -p /content/data



Saving kaggle.json to kaggle.json
Downloading titanic.zip to /content/data
  0% 0.00/34.1k [00:00<?, ?B/s]
100% 34.1k/34.1k [00:00<00:00, 32.0MB/s]


## Extracting data from `titanic.zp`

In [3]:
import zipfile
import pandas as pd

# Path to your ZIP file
zip_path = 'data/titanic.zip'

# Open the ZIP file
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    # Extract the desired files
    with zip_ref.open('gender_submission.csv') as gender_file, \
         zip_ref.open('test.csv') as test_file, \
         zip_ref.open('train.csv') as train_file:

        # Load the files into Pandas DataFrames
        gender_submission = pd.read_csv(gender_file)
        test = pd.read_csv(test_file)
        train = pd.read_csv(train_file)

In [4]:
gender_submission[:5]

Unnamed: 0,PassengerId,Survived
0,892,0
1,893,1
2,894,0
3,895,0
4,896,1


In [5]:
train[:5]

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [6]:
test[:5]

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0,,S
2,894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q
3,895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S


* `SibSp` - number of siblings (бартів і сестер) / spouses (подружжя) aboard the Titanic
* `Parch` -	number of parents / children aboard the Titanic
* `Fare` - Passenger fare (Тариф)
* `Embarked` - Port of Embarkation (порт відпливу)
  * `C` - Cherbourg;
  * `Q` - Queenstown;
  * `S` - Southampton;

In [7]:
train.groupby(["Survived", "Pclass"]).mean(numeric_only=True)

Unnamed: 0_level_0,Unnamed: 1_level_0,PassengerId,Age,SibSp,Parch,Fare
Survived,Pclass,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,1,410.3,43.695312,0.2875,0.3,64.684007
0,2,452.123711,33.544444,0.319588,0.14433,19.412328
0,3,453.580645,26.555556,0.672043,0.384409,13.669364
1,1,491.772059,35.368197,0.492647,0.389706,95.608029
1,2,439.08046,25.901566,0.494253,0.643678,22.0557
1,3,394.058824,20.646118,0.436975,0.420168,13.694887


In [8]:
train[train["Age"] < 18].groupby(["Survived", "Pclass"]).count()
# Most kids that drowned had ticked of 3rd class

Unnamed: 0_level_0,Unnamed: 1_level_0,PassengerId,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
Survived,Pclass,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,1,1,1,1,1,1,1,1,1,1,1
0,2,2,2,2,2,2,2,2,2,0,2
0,3,49,49,49,49,49,49,49,49,1,49
1,1,11,11,11,11,11,11,11,11,11,11
1,2,21,21,21,21,21,21,21,21,4,21
1,3,29,29,29,29,29,29,29,29,2,29


In [9]:
train[train["Age"] > 50].groupby(["Survived", "Pclass", "Sex"]).count()
# Most pensioners that drowned are men, and most pensioners that survived are women of 1st class

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,PassengerId,Name,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
Survived,Pclass,Sex,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,1,male,21,21,21,21,21,21,21,15,21
0,2,female,1,1,1,1,1,1,1,1,1
0,2,male,11,11,11,11,11,11,11,0,11
0,3,male,9,9,9,9,9,9,9,0,9
1,1,female,13,13,13,13,13,13,13,12,12
1,1,male,5,5,5,5,5,5,5,5,5
1,2,female,2,2,2,2,2,2,2,0,2
1,2,male,1,1,1,1,1,1,1,0,1
1,3,female,1,1,1,1,1,1,1,0,1


In [10]:
train[(train["Age"] > 18) & (train["Age"] < 50)].groupby(["Survived", "Pclass", "Sex"]).count()
# Most middle age people that drowned are men

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,PassengerId,Name,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
Survived,Pclass,Sex,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,1,female,1,1,1,1,1,1,1,1,1
0,1,male,37,37,37,37,37,37,37,31,37
0,2,female,5,5,5,5,5,5,5,0,5
0,2,male,66,66,66,66,66,66,66,2,66
0,3,female,34,34,34,34,34,34,34,1,34
0,3,male,165,165,165,165,165,165,165,3,165
1,1,female,58,58,58,58,58,58,58,50,57
1,1,male,30,30,30,30,30,30,30,26,30
1,2,female,49,49,49,49,49,49,49,7,49
1,2,male,5,5,5,5,5,5,5,1,5


In [11]:
train.groupby(["SibSp", "Survived"]).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,PassengerId,Pclass,Name,Sex,Age,Parch,Ticket,Fare,Cabin,Embarked
SibSp,Survived,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,0,398,398,398,398,296,398,398,398,49,398
0,1,210,210,210,210,175,210,210,210,77,208
1,0,97,97,97,97,86,97,97,97,17,97
1,1,112,112,112,112,97,112,112,112,52,112
2,0,15,15,15,15,14,15,15,15,1,15
2,1,13,13,13,13,11,13,13,13,5,13
3,0,12,12,12,12,8,12,12,12,1,12
3,1,4,4,4,4,4,4,4,4,2,4
4,0,15,15,15,15,15,15,15,15,0,15
4,1,3,3,3,3,3,3,3,3,0,3


In [12]:
train.groupby(["Sex", "Survived"]).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,PassengerId,Pclass,Name,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
Sex,Survived,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
female,0,81,81,81,64,81,81,81,81,6,81
female,1,233,233,233,197,233,233,233,233,91,231
male,0,468,468,468,360,468,468,468,468,62,468
male,1,109,109,109,93,109,109,109,109,45,109


In [13]:
train.loc[train["Cabin"].notnull()].groupby(["Survived", "Pclass"]).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,PassengerId,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
Survived,Pclass,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,1,59,59,59,52,59,59,59,59,59,59
0,2,3,3,3,3,3,3,3,3,3,3
0,3,6,6,6,5,6,6,6,6,6,6
1,1,117,117,117,108,117,117,117,117,117,115
1,2,13,13,13,12,13,13,13,13,13,13
1,3,6,6,6,5,6,6,6,6,6,6


In [14]:
train['Cabin'] = train['Cabin'].notnull()
train_data = train
train_data[:10]

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,False,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,True,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,False,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,True,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,False,S
5,6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,False,Q
6,7,0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,True,S
7,8,0,3,"Palsson, Master. Gosta Leonard",male,2.0,3,1,349909,21.075,False,S
8,9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27.0,0,2,347742,11.1333,False,S
9,10,1,2,"Nasser, Mrs. Nicholas (Adele Achem)",female,14.0,1,0,237736,30.0708,False,C


In [15]:
train_data = train_data.dropna(subset=['Age'])
train_data

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,False,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,True,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,False,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,True,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,False,S
...,...,...,...,...,...,...,...,...,...,...,...,...
885,886,0,3,"Rice, Mrs. William (Margaret Norton)",female,39.0,0,5,382652,29.1250,False,Q
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,False,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,True,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,True,C


# Work on the competition

## Preparing data

### Setting imports

In [16]:
import pandas as pd
import numpy as np
import torch
from torch import nn

### Download zip file with data

In [17]:
!pip install kaggle
from google.colab import files
files.upload()  # Upload your kaggle.json file

!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

!kaggle competitions download -c titanic -p /content/data



Saving kaggle.json to kaggle (1).json
titanic.zip: Skipping, found more recently modified local copy (use --force to force download)


### Extracting data sets from zip file

In [18]:
import zipfile

# Path to your ZIP file
zip_path = 'data/titanic.zip'

# Open the ZIP file
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    # Extract the desired files
    with zip_ref.open('gender_submission.csv') as gender_file, \
         zip_ref.open('test.csv') as test_file, \
         zip_ref.open('train.csv') as train_file:

        # Load the files into Pandas DataFrames
        gender_submission = pd.read_csv(gender_file)
        test = pd.read_csv(test_file)
        train = pd.read_csv(train_file)

### Clearing data

In [19]:
train['Cabin'] = train['Cabin'].notnull()
train['Age'].fillna(train['Age'].mean(), inplace=True)
train["Sex"] = train["Sex"] == 'male'
train["Sex"] = train["Sex"].astype(int)
train_data = train

print("NaNs in 'Cabin' column:", train_data['Cabin'].isna().any())
print("NaNs in 'Age' column:", train_data['Age'].isna().any())
print("NaNs in 'Pclass' column:", train_data['Pclass'].isna().any())
print("NaNs in 'SibSp' column:", train_data['SibSp'].isna().any())
print("NaNs in 'Parch' column:", train_data['Parch'].isna().any())
print("NaNs in 'Fare' column:", train_data['Fare'].isna().any())
print("NaNs in 'Parch' column:", train_data['Parch'].isna().any())
train[:10]

NaNs in 'Cabin' column: False
NaNs in 'Age' column: False
NaNs in 'Pclass' column: False
NaNs in 'SibSp' column: False
NaNs in 'Parch' column: False
NaNs in 'Fare' column: False
NaNs in 'Parch' column: False


Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",1,22.0,1,0,A/5 21171,7.25,False,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",0,38.0,1,0,PC 17599,71.2833,True,C
2,3,1,3,"Heikkinen, Miss. Laina",0,26.0,0,0,STON/O2. 3101282,7.925,False,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",0,35.0,1,0,113803,53.1,True,S
4,5,0,3,"Allen, Mr. William Henry",1,35.0,0,0,373450,8.05,False,S
5,6,0,3,"Moran, Mr. James",1,29.699118,0,0,330877,8.4583,False,Q
6,7,0,1,"McCarthy, Mr. Timothy J",1,54.0,0,0,17463,51.8625,True,S
7,8,0,3,"Palsson, Master. Gosta Leonard",1,2.0,3,1,349909,21.075,False,S
8,9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",0,27.0,0,2,347742,11.1333,False,S
9,10,1,2,"Nasser, Mrs. Nicholas (Adele Achem)",0,14.0,1,0,237736,30.0708,False,C


### Selecting needed features

In [20]:
features = ["Pclass", "Age", "Sex", "SibSp", "Parch", "Fare", "Cabin"]
train_labels = train_data["Survived"]
train_features = pd.get_dummies(train_data[features])
train_features[:5], train_labels[:5]

(   Pclass   Age  Sex  SibSp  Parch     Fare  Cabin
 0       3  22.0    1      1      0   7.2500  False
 1       1  38.0    0      1      0  71.2833   True
 2       3  26.0    0      0      0   7.9250  False
 3       1  35.0    0      1      0  53.1000   True
 4       3  35.0    1      0      0   8.0500  False,
 0    0
 1    1
 2    1
 3    1
 4    0
 Name: Survived, dtype: int64)

### Convert to tensors

In [21]:
# Convert 'Cabin' column to integers (True/False to 1/0)
train_features['Cabin'] = train_features['Cabin'].astype(int)

# Convert the DataFrames to numpy arrays
features_array = train_features.to_numpy()
labels_array = train_labels.to_numpy()

# Convert the numpy arrays to torch tensors
X = torch.tensor(features_array, dtype=torch.float32)
y = torch.tensor(labels_array, dtype=torch.float32)

In [22]:
X[0], y[0], X[0].shape, y[0].shape

(tensor([ 3.0000, 22.0000,  1.0000,  1.0000,  0.0000,  7.2500,  0.0000]),
 tensor(0.),
 torch.Size([7]),
 torch.Size([]))

In [23]:
# Split data into training and test splits
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [24]:
len(X_train), len(X_test), len(y_train), len(y_test)

(712, 179, 712, 179)

## Building a model

In [25]:
# device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cpu'

In [26]:
from torch import nn
# 1. Construct a model that subclasses nn.Module
class Model(nn.Module):
  def __init__(self, in_shape: int, out_shape: int, hidden_layers: int):
    super().__init__()
    # 2. Create 2 nn.Linear layers capable of handling the shapes of our data
    self.layer_1 = nn.Linear(in_features=in_shape, out_features=hidden_layers)
    self.layer_2 = nn.Linear(in_features=hidden_layers, out_features=out_shape)

  # 3. Define forward() method that outlines the forward pass
  def forward(self, x: torch.Tensor) -> torch.Tensor:
    return self.layer_2(self.layer_1(x)) # x -> layer_1 -> layer_2 -> output

# 4. Instantiate an instance of our model class and sent it ti the target device
model = Model(in_shape=len(features),
              out_shape=1,
              hidden_layers=15).to(device)
model

Model(
  (layer_1): Linear(in_features=7, out_features=15, bias=True)
  (layer_2): Linear(in_features=15, out_features=1, bias=True)
)

### Setting up loss function, optmizer and Testing & Training loop

In [27]:
# Setup loss function and optimizer
from torch.optim.lr_scheduler import StepLR

loss_fn = nn.BCEWithLogitsLoss()

optimizer = torch.optim.SGD(params=model.parameters(), lr=0.01)
scheduler = StepLR(optimizer, step_size=1000, gamma=0.1)  # Reduce LR every 1000 epochs by a factor of 0.1

In [28]:
# Accuracy calculaton func
def accuracy_fn(y_true, y_pred):
  correct = torch.eq(y_true, y_pred).sum().item()
  acc = (correct/len(y_pred)) * 100
  return acc

In [29]:
### TRAINING AND TESTING LOOP

# Put data to target device
X_train, y_train = X_train.to(device), y_train.to(device)
X_test, y_test = X_test.to(device), y_test.to(device)

epochs = 10000
for epoch in range(epochs):
  model.train()

  # 1. Forward pass
  y_logits = model(X_train).squeeze()
  y_pred = torch.round(torch.sigmoid(y_logits)) # turn logits -> pred probs -> pred labels

  # 2. Calculate loss/accuracy
  loss = loss_fn(y_logits, y_train) # nn.BCEWithLogitsLoss expects raw logits as input
  acc = accuracy_fn(y_true=y_train, y_pred=y_pred)

  # 3. Optimizer zero grad
  optimizer.zero_grad()

  # 4. Loss backpropagation
  loss.backward()

  # 5. Optimizer step
  optimizer.step()

  ### Testing
  model.eval()
  with torch.inference_mode():
    # 1. forward pass
    test_logits = model(X_test).squeeze()
    test_pred = torch.round(torch.sigmoid(test_logits))

    # 2. calculate loss/accuracy
    test_loss = loss_fn(test_pred, y_test)
    test_acc = accuracy_fn(y_true=y_test, y_pred=test_pred)

  # Print out
  if epoch % 1000 == 0:
    print(f"Epoch: {epoch} | Train loss: {loss:.5f} | Train accuracy: {acc:.2f}% | Test loss: {test_loss:.5f} | Test accuracy: {test_acc:.2f}%")

Epoch: 0 | Train loss: 1.80965 | Train accuracy: 62.36% | Test loss: 0.84655 | Test accuracy: 49.72%
Epoch: 1000 | Train loss: 0.57418 | Train accuracy: 67.42% | Test loss: 0.65117 | Test accuracy: 74.30%
Epoch: 2000 | Train loss: 0.55013 | Train accuracy: 69.24% | Test loss: 0.65844 | Test accuracy: 74.86%
Epoch: 3000 | Train loss: 0.53252 | Train accuracy: 71.35% | Test loss: 0.66147 | Test accuracy: 76.54%
Epoch: 4000 | Train loss: 0.51863 | Train accuracy: 73.03% | Test loss: 0.64896 | Test accuracy: 78.77%
Epoch: 5000 | Train loss: 0.50757 | Train accuracy: 75.00% | Test loss: 0.64896 | Test accuracy: 78.77%
Epoch: 6000 | Train loss: 0.49871 | Train accuracy: 76.40% | Test loss: 0.64549 | Test accuracy: 79.33%
Epoch: 7000 | Train loss: 0.49158 | Train accuracy: 77.11% | Test loss: 0.64761 | Test accuracy: 78.77%
Epoch: 8000 | Train loss: 0.48579 | Train accuracy: 80.06% | Test loss: 0.64761 | Test accuracy: 78.77%
Epoch: 9000 | Train loss: 0.48107 | Train accuracy: 79.63% | Test l

### Saving/Loading model functions

In [30]:
def save_model(model, filepath, save_state_dict=True):
    if save_state_dict:
        torch.save(model.state_dict(), filepath)
    else:
        torch.save(model, filepath)

def load_model(model, filepath, device='cpu', load_state_dict=True):
    if load_state_dict:
        model.load_state_dict(torch.load(filepath, map_location=device))
    else:
        model = torch.load(filepath, map_location=device)

    model.to(device)
    return model

In [31]:
save_model(model, 'model_0.pth', save_state_dict=True)

In [32]:
loaded_model = load_model(model=Model(in_shape=len(features),
                                      out_shape=1,
                                      hidden_layers=15),
                          filepath='model_0.pth',
                          device=device,
                          load_state_dict=True)
loaded_model

Model(
  (layer_1): Linear(in_features=7, out_features=15, bias=True)
  (layer_2): Linear(in_features=15, out_features=1, bias=True)
)

In [33]:
loaded_model.eval()
with torch.inference_mode():
  # 1. forward pass
  loaded_test_logits = loaded_model(X_test).squeeze()
  loaded_test_pred = torch.round(torch.sigmoid(loaded_test_logits))

  # 2. calculate loss/accuracy
  test_loss = loss_fn(loaded_test_pred, y_test)
  test_acc = accuracy_fn(y_true=y_test, y_pred=loaded_test_pred)

loaded_test_pred

tensor([0., 0., 0., 1., 1., 1., 1., 0., 1., 1., 0., 0., 0., 0., 0., 1., 0., 1.,
        0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 1., 0., 1., 0., 1., 0., 0.,
        0., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 0., 0.,
        0., 1., 0., 1., 1., 1., 0., 1., 1., 0., 0., 1., 0., 0., 0., 1., 1., 1.,
        0., 1., 0., 0., 1., 1., 1., 0., 0., 1., 1., 0., 0., 0., 1., 1., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 0., 0.,
        0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 1., 1., 0., 0., 0.,
        1., 0., 0., 0., 1., 0., 0., 1., 0., 1., 1., 0., 0., 1., 0., 0., 0., 0.,
        0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 0., 0., 0.,
        0., 1., 0., 0., 0., 1., 0., 0., 0., 1., 0., 1., 0., 0., 0., 1., 1.])

In [34]:
### Making predictions

In [35]:
test['Cabin'] = test['Cabin'].notnull()
test['Age'].fillna(test['Age'].mean(numeric_only=True), inplace=True)
test['Fare'].fillna(test['Fare'].mean(numeric_only=True), inplace=True)
test["Sex"] = test["Sex"] == 'male'
test["Sex"] = test["Sex"].astype(int)
test_data = test

In [36]:
features = ["Pclass", "Age", "Sex", "SibSp", "Parch", "Fare", "Cabin"]
test_features = pd.get_dummies(test_data[features])
test_features[:5]

Unnamed: 0,Pclass,Age,Sex,SibSp,Parch,Fare,Cabin
0,3,34.5,1,0,0,7.8292,False
1,3,47.0,0,1,0,7.0,False
2,2,62.0,1,0,0,9.6875,False
3,3,27.0,1,0,0,8.6625,False
4,3,22.0,0,1,1,12.2875,False


In [37]:
# Convert 'Cabin' column to integers (True/False to 1/0)
test_features['Cabin'] = test_features['Cabin'].astype(int)

# Convert the DataFrames to numpy arrays
features_array = test_features.to_numpy()

# Convert the numpy arrays to torch tensors
X = torch.tensor(features_array, dtype=torch.float32)

In [38]:
model.eval()
with torch.inference_mode():
  test_logits = model(X.to(device)).squeeze()
  test_pred = torch.round(torch.sigmoid(test_logits))

test_pred

tensor([0., 0., 0., 0., 0., 0., 1., 0., 1., 0., 0., 0., 1., 0., 1., 1., 0., 0.,
        0., 0., 0., 0., 1., 0., 1., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        1., 1., 0., 0., 0., 0., 0., 1., 1., 0., 0., 0., 1., 0., 0., 0., 0., 1.,
        0., 0., 0., 0., 0., 1., 0., 0., 0., 1., 0., 1., 1., 0., 0., 1., 1., 0.,
        1., 0., 1., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 0., 1., 1., 1., 0.,
        1., 0., 1., 0., 0., 0., 1., 0., 1., 0., 1., 0., 0., 0., 1., 0., 0., 0.,
        0., 0., 0., 1., 1., 1., 1., 0., 0., 1., 0., 1., 1., 0., 1., 0., 0., 1.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 1., 0., 0.,
        0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 1., 0., 1., 1., 0.,
        1., 0., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 1., 1., 0., 1., 1.,
        0., 0., 1., 0., 1., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
        0., 1., 1., 0., 0., 1., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
        1., 0., 1., 0., 1., 0., 1., 0., 

In [39]:
output_preds = pd.DataFrame({'PassengerId': test_data.PassengerId, 'Survived': test_pred.cpu().type(torch.int32)})
output_preds.to_csv('submission.csv', index=False)