In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import torch
from torch import nn, optim
from torch.utils.data import DataLoader, Dataset, SubsetRandomSampler
import torch.nn.functional as F
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import os


In [None]:
path = os.getcwd()
print(path)
PATH = path + "/pytorch_sample/data/reg.csv"
df = pd.read_csv(PATH, index_col=[0])
X = df.drop('Price', axis=1).to_numpy()
Y = df['Price'].to_numpy().reshape((-1,1))

In [None]:
class TensorData(Dataset):
    
    def __init__(self, x_data, y_data):
        self.x_data = torch.FloatTensor(x_data)
        self.y_data = torch.FloatTensor(y_data)
        self.len = self.y_data.shape[0]
        
    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index] 
    
    def __len__(self):
        return self.len

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.7)
trainset = TensorData(X_train, Y_train)
testset = TensorData(X_test, Y_test)
testloader = DataLoader(testset, batch_size=32, shuffle=False)

In [None]:
class Regressor(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(13, 50, bias=True)
        self.fc2 = nn.Linear(50, 30, bias=True)
        self.fc3 = nn.Linear(30, 1, bias=True)
        
    def forward(self, x):
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        return x
    

# model = Regressor()
kfold = KFold(n_splits = 3, shuffle=True)
criterion = nn.MSELoss()


def evaluation(dataloader):
    predictions = torch.tensor([], dtype=torch.float)
    actual = torch.tensor([], dtype=torch.float)
    
    with torch.no_grad():
        model.eval()
        for data in dataloader:
            inputs, values = data
            outputs = model(inputs)
            predictions = torch.cat((predictions, outputs),0)
            actual = torch.cat((actual, values), 0)
            
    predictions = predictions.numpy()
    actual = actual.numpy()
    rmse = np.sqrt(mean_squared_error(predictions, actual))
    model.train()
    return rmse


validation_loss = []
for fold, (train_idx, val_idx) in enumerate(kfold.split(trainset)):
    
    train_subsampler = SubsetRandomSampler(train_idx)
    val_subsampler = SubsetRandomSampler(val_idx)
    # train_subsampler = torch.utils.data.SubsetRandomSampler(train_idx) # index 생성
    # val_subsampler = torch.utils.data.SubsetRandomSampler(val_idx) # index 생성
    
    trainloader = DataLoader(trainset, batch_size = 32, sampler = train_subsampler)
    # trainloader = torch.utils.data.DataLoader(trainset, batch_size=32, sampler=train_subsampler) 
    # valloader = torch.utils.data.DataLoader(trainset, batch_size=32, sampler=val_subsampler)
    valloader = DataLoader(trainset, batch_size = 32, sampler = val_subsampler)
    
    model = Regressor()
    optimizer = optim.Adam(model.parameters(), lr = 0.001, weight_decay=1e-7)
    
    for epoch in range(400):
        for data in trainloader:
            inputs, values = data
            optimizer.zero_grad()
            
            outputs = model(inputs)
            loss = criterion(outputs, values)
            loss.backward()
            optimizer.step()
            
    train_rmse = evaluation(trainloader)
    val_rmse = evaluation(valloader)
    print("kfold",fold,"Train Loss,%4f, Validation lOSS : %.4f" %(train_rmse, val_rmse))
    validation_loss.append(val_rmse)
    
validation_loss = np.array(validation_loss)
mean = np.mean(validation_loss)
std = np.std(validation_loss)
print("validation Score:%4f, +_ %4f" %(mean,std))

모델 구조 및 가중치 확인

In [6]:
import torch
from torch import nn
import torch.nn.functional as F
from torchsummary import summary

In [8]:
class Regressor(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(13,50)
        self.fc2 = nn.Linear(50,30)
        self.fc3 = nn.Linear(30,1)
        self.dropout = nn.Dropout(0.5)
        
    def forward(self,x):
        x = F.relu(self.fc1(x))
        x = self.dropout(F.relu(self.fc2(x)))
        x = F.relu(self.fc3(x))
        
        return x

model = Regressor()
print(model)

Regressor(
  (fc1): Linear(in_features=13, out_features=50, bias=True)
  (fc2): Linear(in_features=50, out_features=30, bias=True)
  (fc3): Linear(in_features=30, out_features=1, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
)


In [3]:
for parameter in model.parameters():
    print(parameter.size())

torch.Size([50, 13])
torch.Size([50])
torch.Size([30, 50])
torch.Size([30])
torch.Size([1, 30])
torch.Size([1])


In [4]:
for name, param in model.named_parameters():
    print(name, param.size())

fc1.weight torch.Size([50, 13])
fc1.bias torch.Size([50])
fc2.weight torch.Size([30, 50])
fc2.bias torch.Size([30])
fc3.weight torch.Size([1, 30])
fc3.bias torch.Size([1])


In [9]:
summary(model, (10, 13))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1               [-1, 10, 50]             700
            Linear-2               [-1, 10, 30]           1,530
           Dropout-3               [-1, 10, 30]               0
            Linear-4                [-1, 10, 1]              31
Total params: 2,261
Trainable params: 2,261
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.01
Params size (MB): 0.01
Estimated Total Size (MB): 0.02
----------------------------------------------------------------
