In [1]:
import torch
from torch.utils.data import TensorDataset, DataLoader
import pandas as pd
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam
from sklearn.model_selection import train_test_split
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR

In [2]:
## Import and split the training and testing dataset
train_data_mode = pd.read_csv(r'C:\Users\liuch\Desktop\395\Final_Project\Spaceship_Titanic\spaceship-titanic\train_data_mode.csv')
train_data_knn = pd.read_csv(r'C:\Users\liuch\Desktop\395\Final_Project\Spaceship_Titanic\spaceship-titanic\train_data_knn.csv')
train_data_mode.drop(['PassengerId', 'Name', 'Cabin'], axis=1, inplace=True)
train_data_knn.drop(['PassengerId', 'Name', 'Cabin'], axis=1, inplace=True)

train_mode, test_mode = train_test_split(train_data_mode, test_size=0.2, random_state=7)

train_mode_y = train_mode['Transported']
train_mode_X = train_mode.drop('Transported',axis=1)
test_mode_y = test_mode['Transported']
test_mode_X = test_mode.drop('Transported',axis=1)

train_knn, test_knn = train_test_split(train_data_knn, test_size=0.2, random_state=7)

train_knn_y = train_knn['Transported']
train_knn_X = train_knn.drop('Transported',axis=1)
test_knn_y = test_knn['Transported']
test_knn_X = test_knn.drop('Transported',axis=1)

In [3]:
# Convert Boolean Columns to Floats
for col in train_mode_X.columns:
    if train_mode_X[col].dtype == bool:
        train_mode_X[col] = train_mode_X[col].astype(float)

print(train_mode_X.dtypes)

CryoSleep                    float64
Age                          float64
VIP                          float64
RoomService                  float64
FoodCourt                    float64
ShoppingMall                 float64
Spa                          float64
VRDeck                       float64
Num                          float64
Deck_A                       float64
Deck_B                       float64
Deck_C                       float64
Deck_D                       float64
Deck_E                       float64
Deck_F                       float64
Deck_G                       float64
Deck_T                       float64
Side_P                       float64
Side_S                       float64
Destination_55 Cancri e      float64
Destination_PSO J318.5-22    float64
Destination_TRAPPIST-1e      float64
HomePlanet_Earth             float64
HomePlanet_Europa            float64
HomePlanet_Mars              float64
Cabin_encoded                  int64
dtype: object


In [4]:
# Convert Boolean Columns to Floats
for col in test_mode_X.columns:
    if test_mode_X[col].dtype == bool:
        test_mode_X[col] = test_mode_X[col].astype(float)

print(test_mode_X.dtypes)

CryoSleep                    float64
Age                          float64
VIP                          float64
RoomService                  float64
FoodCourt                    float64
ShoppingMall                 float64
Spa                          float64
VRDeck                       float64
Num                          float64
Deck_A                       float64
Deck_B                       float64
Deck_C                       float64
Deck_D                       float64
Deck_E                       float64
Deck_F                       float64
Deck_G                       float64
Deck_T                       float64
Side_P                       float64
Side_S                       float64
Destination_55 Cancri e      float64
Destination_PSO J318.5-22    float64
Destination_TRAPPIST-1e      float64
HomePlanet_Earth             float64
HomePlanet_Europa            float64
HomePlanet_Mars              float64
Cabin_encoded                  int64
dtype: object


In [5]:
# Convert pandas dataframes to numpy arrays and then to PyTorch tensors
X_train = torch.tensor(train_mode_X.values.astype(np.float32))
y_train = torch.tensor(train_mode_y.values.astype(np.float32))
X_test = torch.tensor(test_mode_X.values.astype(np.float32))
y_test = torch.tensor(test_mode_y.values.astype(np.float32))

# Create Tensor datasets
train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)

In [6]:
# Create DataLoaders
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [7]:
class BinaryClassifier(nn.Module):
    def __init__(self):
        super(BinaryClassifier, self).__init__()
        self.fc1 = nn.Linear(X_train.shape[1], 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 32)
        self.fc4 = nn.Linear(32, 1)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.dropout(x, p=0.2)
        x = F.relu(self.fc2(x))
        x = F.dropout(x, p=0.2)
        x = F.relu(self.fc3(x))
        x = torch.sigmoid(self.fc4(x))
        return x

In [9]:
model = BinaryClassifier()
criterion = nn.BCELoss()
optimizer = Adam(model.parameters(), lr=0.001)

# Training loop
epochs = 200
for epoch in range(epochs):
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs.squeeze(), labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch {epoch+1}/{epochs} Loss: {loss.item()}')

Epoch 1/200, Training Loss: 3.4391647500729343, Validation Loss: 0.7370868975465948
Epoch 2/200, Training Loss: 0.6687964999347652, Validation Loss: 0.6464433523741635
Epoch 3/200, Training Loss: 0.6497206827369305, Validation Loss: 0.6120418071746826
Epoch 4/200, Training Loss: 0.5840393017738237, Validation Loss: 0.6075944369489497
Epoch 5/200, Training Loss: 0.5598403304268461, Validation Loss: 0.5657515769655054
Epoch 6/200, Training Loss: 0.5396629001842727, Validation Loss: 0.5619476025754755
Epoch 7/200, Training Loss: 0.5366451483254039, Validation Loss: 0.5217130260034041
Epoch 8/200, Training Loss: 0.5170524280279054, Validation Loss: 0.521913880109787
Epoch 9/200, Training Loss: 0.5104240306200237, Validation Loss: 0.5107990503311157
Epoch 10/200, Training Loss: 0.5089170976516304, Validation Loss: 0.5065065053376284
Epoch 11/200, Training Loss: 0.4950163312461398, Validation Loss: 0.5062081038951873
Epoch 12/200, Training Loss: 0.5009851592396377, Validation Loss: 0.4910611

In [10]:
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        predicted = outputs.squeeze().round()
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {100 * correct / total}%')

Accuracy: 49.971247843588266%
