In [14]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Define the neural network model
class TitanicModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(TitanicModel, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Define the custom dataset
class TitanicDataset(Dataset):
    def __init__(self, data, labels=None):
        self.data = torch.tensor(data, dtype=torch.float32)
        self.labels = None
        if labels is not None:
            self.labels = torch.tensor(labels, dtype=torch.long)

    def __getitem__(self, index):
        x = self.data[index]
        if self.labels is not None:
            y = self.labels[index]
            return x, y
        else:
            return x

    def __len__(self):
        return len(self.data)

# Load the dataset
df_train = pd.read_csv("train.csv")
df_test = pd.read_csv("test.csv")

# Drop the specified columns
drop_columns = ["Name", "Sex", "SibSp", "Parch", "Ticket", "Cabin", "Embarked"]
df_train = df_train.drop(drop_columns, axis=1)
df_test = df_test.drop(drop_columns, axis=1)

# Split into features and labels
features = df_train.drop("Survived", axis=1)
labels = df_train["Survived"]

# Split into train and validation sets
X_train, X_val, y_train, y_val = train_test_split(features, labels, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(df_test.values)

# Set up the model, loss function, and optimizer
input_size = len(features.columns)
hidden_size = 64
num_classes = 2  # Assuming binary classification (Survived or not)
model = TitanicModel(input_size, hidden_size, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Convert data to tensors and create datasets
train_dataset = TitanicDataset(X_train, y_train.values)
val_dataset = TitanicDataset(X_val, y_val.values)
test_dataset = TitanicDataset(X_test)

# Set up data loaders
batch_size = 32
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()  # Set model to training mode

    for batch_idx, (data, targets) in enumerate(train_dataloader):
        # Forward pass
        outputs = model(data)
        loss = criterion(outputs, targets)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Print training progress
        if (batch_idx + 1) % 10 == 0:
            print(f"Epoch [{epoch + 1}/{num_epochs}], Step [{batch_idx + 1}/{len(train_dataloader)}], Loss: {loss.item():.4f}")

    # Validation
    model.eval()  # Set model to evaluation mode
    val_loss = 0
    correct = 0
    total = 0

    with torch.no_grad():
        for data, targets in val_dataloader:
            outputs = model(data)
            val_loss += criterion(outputs, targets).item()
            _, predicted = torch.max(outputs.data, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()

    val_loss /= len(val_dataloader)
    accuracy = correct / total
    print(f"Epoch [{epoch + 1}/{num_epochs}], Validation Loss: {val_loss:.4f}, Validation Accuracy: {accuracy * 100:.2f}%")

# Evaluation
model.eval()  # Set model to evaluation mode
predictions = []

with torch.no_grad():
    for data in test_dataloader:
        outputs = model(data)
        _, predicted = torch.max(outputs.data, 1)
        predictions.extend(predicted.tolist())

# Use the predictions as needed
print(predictions)




Epoch [1/10], Step [10/23], Loss: nan
Epoch [1/10], Step [20/23], Loss: nan
Epoch [1/10], Validation Loss: nan, Validation Accuracy: 58.66%
Epoch [2/10], Step [10/23], Loss: nan
Epoch [2/10], Step [20/23], Loss: nan
Epoch [2/10], Validation Loss: nan, Validation Accuracy: 58.66%
Epoch [3/10], Step [10/23], Loss: nan
Epoch [3/10], Step [20/23], Loss: nan
Epoch [3/10], Validation Loss: nan, Validation Accuracy: 58.66%
Epoch [4/10], Step [10/23], Loss: nan
Epoch [4/10], Step [20/23], Loss: nan
Epoch [4/10], Validation Loss: nan, Validation Accuracy: 58.66%
Epoch [5/10], Step [10/23], Loss: nan
Epoch [5/10], Step [20/23], Loss: nan
Epoch [5/10], Validation Loss: nan, Validation Accuracy: 58.66%
Epoch [6/10], Step [10/23], Loss: nan
Epoch [6/10], Step [20/23], Loss: nan
Epoch [6/10], Validation Loss: nan, Validation Accuracy: 58.66%
Epoch [7/10], Step [10/23], Loss: nan
Epoch [7/10], Step [20/23], Loss: nan
Epoch [7/10], Validation Loss: nan, Validation Accuracy: 58.66%
Epoch [8/10], Step [