In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/titanic/train.csv
/kaggle/input/titanic/test.csv
/kaggle/input/titanic/gender_submission.csv


In [2]:
train_df = pd.read_csv('/kaggle/input/titanic/train.csv')
test_df = pd.read_csv('/kaggle/input/titanic/test.csv')

In [3]:
# Combine train + test to preprocess together
all_df = pd.concat([train_df, test_df], sort=False)

In [4]:
# Fill missing values
all_df.fillna({'Age': all_df['Age'].median(),
               'Fare': all_df['Fare'].median(),
               'Embarked': all_df['Embarked'].mode()[0]}, inplace=True)

In [5]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim

# Encode categorical features
label_encoders = {}
for col in ['Sex', 'Embarked']:
    le = LabelEncoder()
    all_df[col] = le.fit_transform(all_df[col])
    label_encoders[col] = le

# Select features
features = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked']
X_all = all_df[features]

# Split back
X_train = X_all.iloc[:len(train_df)]
X_test = X_all.iloc[len(train_df):]
y_train = train_df['Survived']

# Scale features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [6]:
class TitanicDataset(Dataset):
    def __init__(self, X, y=None):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y.values, dtype=torch.float32) if y is not None else None

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        if self.y is not None:
            return self.X[idx], self.y[idx]
        return self.X[idx]

In [7]:
class Classifier(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 16),
            nn.ReLU(),
            nn.Linear(16, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.model(x)

In [8]:
class Regressor(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 16),
            nn.ReLU(),
            nn.Linear(16, 1),
            nn.Sigmoid()  # 회귀도 sigmoid 붙여서 확률처럼 사용
        )

    def forward(self, x):
        return self.model(x)

In [9]:
def train_model(model, dataloader, epochs=20, lr=0.001, task='classification'):
    criterion = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    for epoch in range(epochs):
        model.train()
        total_loss = 0
        for batch_X, batch_y in dataloader:
            output = model(batch_X).squeeze()
            loss = criterion(output, batch_y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss/len(dataloader):.4f}")

In [10]:
# Dataset & DataLoader
train_dataset = TitanicDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# Classification
clf_model = Classifier(input_dim=X_train.shape[1])
train_model(clf_model, train_loader, task='classification')

# Regression
reg_model = Regressor(input_dim=X_train.shape[1])
train_model(reg_model, train_loader, task='regression')

Epoch 1/20, Loss: 0.6473
Epoch 2/20, Loss: 0.6205
Epoch 3/20, Loss: 0.5925
Epoch 4/20, Loss: 0.5635
Epoch 5/20, Loss: 0.5354
Epoch 6/20, Loss: 0.5099
Epoch 7/20, Loss: 0.4905
Epoch 8/20, Loss: 0.4738
Epoch 9/20, Loss: 0.4618
Epoch 10/20, Loss: 0.4537
Epoch 11/20, Loss: 0.4462
Epoch 12/20, Loss: 0.4409
Epoch 13/20, Loss: 0.4372
Epoch 14/20, Loss: 0.4324
Epoch 15/20, Loss: 0.4289
Epoch 16/20, Loss: 0.4263
Epoch 17/20, Loss: 0.4242
Epoch 18/20, Loss: 0.4225
Epoch 19/20, Loss: 0.4202
Epoch 20/20, Loss: 0.4188
Epoch 1/20, Loss: 0.7593
Epoch 2/20, Loss: 0.7207
Epoch 3/20, Loss: 0.6886
Epoch 4/20, Loss: 0.6582
Epoch 5/20, Loss: 0.6265
Epoch 6/20, Loss: 0.5928
Epoch 7/20, Loss: 0.5603
Epoch 8/20, Loss: 0.5295
Epoch 9/20, Loss: 0.5053
Epoch 10/20, Loss: 0.4859
Epoch 11/20, Loss: 0.4730
Epoch 12/20, Loss: 0.4629
Epoch 13/20, Loss: 0.4557
Epoch 14/20, Loss: 0.4498
Epoch 15/20, Loss: 0.4453
Epoch 16/20, Loss: 0.4420
Epoch 17/20, Loss: 0.4376
Epoch 18/20, Loss: 0.4355
Epoch 19/20, Loss: 0.4326
Epoc

In [11]:
# 예측
test_dataset = TitanicDataset(X_test)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

def predict(model, loader):
    model.eval()
    preds = []
    with torch.no_grad():
        for batch in loader:
            out = model(batch).squeeze()
            preds.extend(out.tolist())
    return np.array(preds)

# Classification
clf_preds = predict(clf_model, test_loader)
clf_preds_bin = (clf_preds >= 0.5).astype(int)

# Regression (후처리 동일)
reg_preds = predict(reg_model, test_loader)
reg_preds_bin = (reg_preds >= 0.5).astype(int)

# 제출용
submission = pd.read_csv('/kaggle/input/titanic/gender_submission.csv')
submission['Survived'] = clf_preds_bin  # 또는 reg_preds_bin
submission.to_csv('submission.csv', index=False)