In [14]:
# 1. 타이타닉 데이터셋 불러오기
import pandas as pd

titanic_data = pd.read_csv('titanic.csv')

In [15]:
# 2. 누락된 값 채우기
titanic_data['Age'].fillna(titanic_data['Age'].mean(), inplace=True)
titanic_data.dropna(subset=['Embarked'], inplace=True)

In [16]:
# 3. 'Age' 열의 데이터 유형 확인 및 변환
print(titanic_data['Age'].dtype)
titanic_data['Age'] = titanic_data['Age'].astype(float)

float64


In [17]:
# 4. 범주형 데이터 숫자로 변환 (원핫인코딩 등)
titanic_data = pd.get_dummies(titanic_data, columns=['Sex', 'Embarked'])

In [19]:
# 5. 텐서로 변환
# import torch

#X = torch.tensor(titanic_data.drop(['Age'], axis=1).values, dtype=torch.float32)
#y = torch.tensor(titanic_data['Age'].values, dtype=torch.float32).view(-1, 1)

import torch

# 'Age' 열을 제외한 숫자형 데이터만 선택
numeric_data = titanic_data.drop(['Age'], axis=1).select_dtypes(include=['float64', 'float32', 'int64', 'int32'])

X = torch.tensor(numeric_data.values, dtype=torch.float32)
y = torch.tensor(titanic_data['Age'].values, dtype=torch.float32).view(-1, 1)



In [20]:
# 6. 데이터 분할
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [21]:
# 신경망 모델 정의
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, 1)

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

# 모델 초기화
input_size = X_train.shape[1]
hidden_size = 64
model = NeuralNet(input_size, hidden_size)

# 손실 함수 및 최적화기 정의
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [22]:
# 모델 훈련
num_epochs = 100
for epoch in range(num_epochs):
    # Forward pass
    outputs = model(X_train)
    loss = criterion(outputs, y_train)

    # Backward pass 및 최적화
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [10/100], Loss: 382.9207
Epoch [20/100], Loss: 347.8085
Epoch [30/100], Loss: 334.5106
Epoch [40/100], Loss: 329.4608
Epoch [50/100], Loss: 326.9124
Epoch [60/100], Loss: 325.1617
Epoch [70/100], Loss: 323.6517
Epoch [80/100], Loss: 322.3578
Epoch [90/100], Loss: 321.0495
Epoch [100/100], Loss: 319.7879


In [23]:
# 모델 예측
with torch.no_grad():
    predictions = model(X_test).numpy()

# 평균 제곱 오차 계산
mse = mean_squared_error(y_test, predictions)
print("Mean Squared Error:", mse)

Mean Squared Error: 341.06775
