In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim 


In [2]:
df = pd.read_csv("data/Titanic-Dataset.csv")
df['Title'] = df['Name'].str.extract(' ([A-Za-z]+)\.', expand=False)
df['Age'] = df['Age'].fillna(df.groupby('Title')['Age'].transform('median'))

df = df[[ 'Survived','Pclass', 'Sex', 'Age','SibSp','Parch','Fare', 'Embarked']]
df = df.dropna()

df["Sex"] = LabelEncoder().fit_transform(df["Sex"])
df["Embarked"] = LabelEncoder().fit_transform(df["Embarked"])

X = df.drop("Survived", axis=1).values
y = df["Survived"].values

X_train,X_test,Y_train,Y_test = train_test_split(X,y,test_size=0.2,random_state=42)

In [3]:
class TitanicDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)

    def __len__(self):
        return len(self.y)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]
    
train_ds = TitanicDataset(X_train, Y_train)
train_loader = DataLoader(train_ds, batch_size=16, shuffle=True)

In [4]:
class TitanicModel(nn.Module):
    def __init__(self):
        super(TitanicModel, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(X_train.shape[1],16),
            nn.ReLU(),
            nn.Linear(16,8),
            nn.ReLU(),
            nn.Linear(8,1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.net(x)
    
model = TitanicModel()
criterion = nn.BCELoss()
optimize = optim.Adam(model.parameters(),lr=0.001)

In [5]:
for epoch in range(100):
    for X_batch, y_batch in train_loader:
        optimize.zero_grad()
        outputs = model(X_batch).squeeze()
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimize.step()
    print(f"Epoch{epoch+1}, Loss:{loss.item():.4f}")

Epoch1, Loss:0.5542
Epoch2, Loss:0.4354
Epoch3, Loss:0.4369
Epoch4, Loss:0.6614
Epoch5, Loss:0.6534
Epoch6, Loss:0.9036
Epoch7, Loss:0.6373
Epoch8, Loss:0.4913
Epoch9, Loss:0.5925
Epoch10, Loss:0.3845
Epoch11, Loss:0.6846
Epoch12, Loss:0.4830
Epoch13, Loss:0.5725
Epoch14, Loss:0.3885
Epoch15, Loss:0.6434
Epoch16, Loss:0.5466
Epoch17, Loss:0.4715
Epoch18, Loss:0.5508
Epoch19, Loss:0.5075
Epoch20, Loss:0.5752
Epoch21, Loss:0.4504
Epoch22, Loss:0.4483
Epoch23, Loss:0.4869
Epoch24, Loss:0.4681
Epoch25, Loss:0.3862
Epoch26, Loss:0.6909
Epoch27, Loss:0.4183
Epoch28, Loss:0.3361
Epoch29, Loss:0.5408
Epoch30, Loss:0.3048
Epoch31, Loss:0.4185
Epoch32, Loss:0.3626
Epoch33, Loss:0.3425
Epoch34, Loss:0.2321
Epoch35, Loss:0.8203
Epoch36, Loss:0.2649
Epoch37, Loss:0.3235
Epoch38, Loss:0.4752
Epoch39, Loss:0.5516
Epoch40, Loss:0.3574
Epoch41, Loss:0.3258
Epoch42, Loss:0.4972
Epoch43, Loss:0.3313
Epoch44, Loss:0.3493
Epoch45, Loss:0.3640
Epoch46, Loss:0.2583
Epoch47, Loss:0.5454
Epoch48, Loss:0.5373
E

In [9]:
import torch

dummy_input = torch.randn(1, X_train.shape[1]) 

torch.onnx.export(
    model,                 
    dummy_input,            
    "model/titanic_model.onnx",   
    export_params=True,    
    opset_version=18,       
    do_constant_folding=True,
    input_names=['input'],  
    output_names=['output'],
    dynamic_axes={'input' : {0 : 'batch_size'},'output' : {0 : 'batch_size'}}
)

print("save model")

  torch.onnx.export(


[torch.onnx] Obtain model graph for `TitanicModel([...]` with `torch.export.export(..., strict=False)`...
[torch.onnx] Obtain model graph for `TitanicModel([...]` with `torch.export.export(..., strict=False)`... ✅
[torch.onnx] Run decomposition...
[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
save model


1

In [14]:
df

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,0,3,1,22.0,1,0,7.2500,2
1,1,1,0,38.0,1,0,71.2833,0
2,1,3,0,26.0,0,0,7.9250,2
3,1,1,0,35.0,1,0,53.1000,2
4,0,3,1,35.0,0,0,8.0500,2
...,...,...,...,...,...,...,...,...
886,0,2,1,27.0,0,0,13.0000,2
887,1,1,0,19.0,0,0,30.0000,2
888,0,3,0,21.0,1,2,23.4500,2
889,1,1,1,26.0,0,0,30.0000,0


In [13]:
Title.value_counts()

Name
Mr          517
Miss        182
Mrs         125
Master       40
Dr            7
Rev           6
Col           2
Mlle          2
Major         2
Ms            1
Mme           1
Don           1
Lady          1
Sir           1
Capt          1
Countess      1
Jonkheer      1
Name: count, dtype: int64

In [16]:
df['Age']

0      22.0
1      38.0
2      26.0
3      35.0
4      35.0
       ... 
886    27.0
887    19.0
888    21.0
889    26.0
890    32.0
Name: Age, Length: 891, dtype: float64