In [41]:
import torch
from torch import nn, optim
from torch.utils.data import DataLoader, Dataset
import torch.nn.functional as F
from torchsummary import summary

import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error # MSE

import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv('./data/reg.csv', index_col = [0])
X = df.drop('Price', axis=1).to_numpy()
y = df['Price'].to_numpy().reshape((-1, 1)) # 열벡터로 바꾸기

In [14]:
# Create Data Loader
class TensorData(Dataset):

    def __init__(self, x_data, y_data):
        self.x_data = torch.FloatTensor(x_data)
        self.y_data = torch.FloatTensor(y_data)
        self.len = self.y_data.shape[0]

    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]

    def __len__(self):
        return self.len

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.7)

trainset = TensorData(X_train, y_train)
testset = TensorData(X_test, y_test)
testloader = torch.utils.data.DataLoader(testsets, batch_size = 32, shuffle = False)

In [15]:
class Regressor(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(13, 50, bias = True)
        self.fc2 = nn.Linear(50, 30, bias = True)
        self.fc3 = nn.Linear(30, 1, bias = True)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.dropout(F.relu(self.fc2(x)))
        x = F.relu(self.fc3(x))
        return x

In [16]:
kfold = KFold(n_splits = 3, shuffle = True)
criterion = nn.MSELoss()

In [17]:
def evaluation(dataloader):
    
    predictions = torch.tensor([], dtype=torch.float)
    actual = torch.tensor([], dtype=torch.float)
    
    with torch.no_grad():
        model.eval()
        for data in dataloader:
            inputs, values = data
            outputs = model(inputs)
            predictions = torch.cat((predictions, outputs), 0)
            actual = torch.cat((actual, values), 0)

    predictions = predictions.numpy()
    actual = actual.numpy()
    rmse = np.sqrt(mean_squared_error(predictions, actual))
    model.train()
    return rmse

In [18]:
validation_loss = []
for fold, (train_idx, val_idx) in enumerate(kfold.split(trainset)):

    train_subsampler = torch.utils.data.SubsetRandomSampler(train_idx)
    val_subsampler = torch.utils.data.SubsetRandomSampler(val_idx)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size = 32, sampler = train_subsampler)
    valloader = torch.utils.data.DataLoader(trainset, batch_size = 32, sampler = val_subsampler)

    model = Regressor()
    optimizer = optim.Adam(model.parameters(), lr = 0.001, weight_decay = 1e-7)

    for epoch in range(400):
        for data in trainloader:
            inputs, values = data
            optimizer.zero_grad()

            outputs = model(inputs)
            loss = criterion(outputs, values)
            loss.backward()
            optimizer.step()

    train_rmse = evaluation(trainloader)
    val_rmse = evaluation(valloader)
    print(f"k-fold: {fold}, Train Loss: {train_rmse:.4f}, Validation Loss: {val_rmse:.4f}")
    validation_loss.append(val_rmse)

k-fold: 0, Train Loss: 0.0541, Validation Loss: 0.1616
k-fold: 1, Train Loss: 0.0839, Validation Loss: 0.0924
k-fold: 2, Train Loss: 0.0644, Validation Loss: 0.1168


In [19]:
validation_loss = np.array(validation_loss)
mean = np.mean(validation_loss)
std = np.std(validation_loss)
print(f"Validation Score: {mean:.4f} ± {std:.4f}")

Validation Score: 0.1236 ± 0.0287


In [20]:
trainloader = torch.utils.data.DataLoader(trainset, batch_size = 32, shuffle = False)
train_rmse = evaluation(trainloader)
test_rmse = evaluation(testloader)
print(f"Train RMSE: {train_rmse:.4f}")
print(f"Test RMSE: {test_rmse:.4f}")

Train RMSE: 0.0854
Test RMSE: 0.1091


In [21]:
print(model)

Regressor(
  (fc1): Linear(in_features=13, out_features=50, bias=True)
  (fc2): Linear(in_features=50, out_features=30, bias=True)
  (fc3): Linear(in_features=30, out_features=1, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
)


In [23]:
number_of_hyperparameters = ((13 * 50 + 50) + (50 * 30 + 30) + (30 * 1 + 1))
print(number_of_hyperparameters)

2261


In [27]:
for parameter in model.parameters():
    #print(parameter)
    print(parameter.size())

torch.Size([50, 13])
torch.Size([50])
torch.Size([30, 50])
torch.Size([30])
torch.Size([1, 30])
torch.Size([1])


In [28]:
print(model.fc1.weight.size(), model.fc1.bias.size())

torch.Size([50, 13]) torch.Size([50])


In [37]:
for name, param in model.named_parameters():
    print(name, param.size())

fc1.weight torch.Size([50, 13])
fc1.bias torch.Size([50])
fc2.weight torch.Size([30, 50])
fc2.bias torch.Size([30])
fc3.weight torch.Size([1, 30])
fc3.bias torch.Size([1])


In [45]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

summary(model, input_size=(10, 13), device=str(device))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1               [-1, 10, 50]             700
            Linear-2               [-1, 10, 30]           1,530
           Dropout-3               [-1, 10, 30]               0
            Linear-4                [-1, 10, 1]              31
Total params: 2,261
Trainable params: 2,261
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.01
Params size (MB): 0.01
Estimated Total Size (MB): 0.02
----------------------------------------------------------------
