In [1]:
from torch.utils.data import Dataset, DataLoader, TensorDataset
import numpy as np
import torch
import pandas as pd
from sklearn.model_selection import train_test_split
import csv
import torch.nn.functional as F

In [2]:
def load_steel_dataset():
    ## pandas로 불러오는 것 적용하기
    with open('data/chap03/faults.csv') as csvfile:
        csvreader = csv.reader(csvfile)
        next(csvreader, None)
        rows = []
        for row in csvreader:
            rows.append(row)

    input_cnt, output_cnt = 27, 7
    data = np.asarray(rows, dtype='float32')
    
    return data

In [3]:
data = load_steel_dataset()

In [4]:
data.shape

(1941, 34)

In [5]:
pd.DataFrame(data)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,24,25,26,27,28,29,30,31,32,33
0,42.0,50.0,270900.0,270944.0,267.0,17.0,44.0,24220.0,76.0,108.0,...,0.8182,-0.2913,0.5822,1.0,0.0,0.0,0.0,0.0,0.0,0.0
1,645.0,651.0,2538079.0,2538108.0,108.0,10.0,30.0,11397.0,84.0,123.0,...,0.7931,-0.1756,0.2984,1.0,0.0,0.0,0.0,0.0,0.0,0.0
2,829.0,835.0,1553913.0,1553931.0,71.0,8.0,19.0,7972.0,99.0,125.0,...,0.6667,-0.1228,0.2150,1.0,0.0,0.0,0.0,0.0,0.0,0.0
3,853.0,860.0,369370.0,369415.0,176.0,13.0,45.0,18996.0,99.0,126.0,...,0.8444,-0.1568,0.5212,1.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1289.0,1306.0,498078.0,498335.0,2409.0,60.0,260.0,246930.0,37.0,126.0,...,0.9338,-0.1992,1.0000,1.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1936,249.0,277.0,325780.0,325796.0,273.0,54.0,22.0,35033.0,119.0,141.0,...,-0.4286,0.0026,0.7254,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1937,144.0,175.0,340581.0,340598.0,287.0,44.0,24.0,34599.0,112.0,133.0,...,-0.4516,-0.0582,0.8173,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1938,145.0,174.0,386779.0,386794.0,292.0,40.0,22.0,37572.0,120.0,140.0,...,-0.4828,0.0052,0.7079,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1939,137.0,170.0,422497.0,422528.0,419.0,97.0,47.0,52715.0,117.0,140.0,...,-0.0606,-0.0171,0.9919,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [6]:
xTrain = data[:,:27]
yTrain = data[:,27:]

In [7]:
yTrain = yTrain.astype('int')

In [8]:
xTrain.shape

(1941, 27)

In [9]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(xTrain, yTrain, test_size=0.2, random_state=42)

In [11]:
trn = TensorDataset(torch.tensor(X_train), torch.tensor(y_train))
train_loader = DataLoader(trn, batch_size = 10, shuffle=True)

trn = TensorDataset(torch.tensor(X_test), torch.tensor(y_test))
test_loader = DataLoader(trn, shuffle=True)

In [12]:
class NeuralNet(torch.nn.Module):
    def __init__(self, input_size, hidden_size):
        super(NeuralNet, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        
        self.linear_1 = torch.nn.Linear(self.input_size, self.hidden_size, bias=True)
        
    def forward(self, input_tensor):
        linear1 = self.linear_1(input_tensor)
        #out = torch.sigmoid(linear1)
        
        return linear1
        

In [13]:
USE_CUDA = torch.cuda.is_available()
DEVICE = torch.device("cuda" if USE_CUDA else "cpu")
print("Training on {}".format(DEVICE))

Training on cuda


In [14]:
model        = NeuralNet(27, 7).to(DEVICE)
optimizer    = torch.optim.SGD(model.parameters(), lr=0.01)
#criterion    = torch.nn.CrossEntropyLoss()

In [15]:
print(model)

NeuralNet(
  (linear_1): Linear(in_features=27, out_features=7, bias=True)
)


In [16]:
def train(model, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(DEVICE), target.to(DEVICE, dtype=torch.long)
        optimizer.zero_grad()
        output = model(data)
        _, target = target.max(dim=1)
        loss = F.cross_entropy(output,target)
        loss.backward()
        optimizer.step()

        if batch_idx % 10 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))


# ## 테스트하기

def evaluate(model, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(DEVICE), target.to(DEVICE, dtype=torch.long)
            output = model(data)

            # 배치 오차를 합산
            _, target = target.max(dim=1)
            test_loss += F.cross_entropy(output, target, reduction='sum').item()

            # 가장 높은 값을 가진 인덱스가 바로 예측값
            pred = output.max(1, keepdim=True)[1]
            correct += pred.eq(target.view_as(pred)).sum().item()


    test_loss /= len(test_loader.dataset)
    test_accuracy = 100. * correct / len(test_loader.dataset)
    return test_loss, test_accuracy

In [17]:
EPOCHS = 10

for epoch in range(1, EPOCHS + 1):
    train(model, train_loader, optimizer, epoch)
    test_loss, test_accuracy = evaluate(model, test_loader)
    
    print('[{}] Test Loss: {:.4f}, Accuracy: {:.2f}%\n'.format(
          epoch, test_loss, test_accuracy))

[1] Test Loss: 26159397093.2648, Accuracy: 28.53%

[2] Test Loss: 7000731182.8895, Accuracy: 47.56%

[3] Test Loss: 17470234336.0000, Accuracy: 32.39%

[4] Test Loss: 14298257074.0154, Accuracy: 43.44%

[5] Test Loss: 94390443139.2905, Accuracy: 21.34%

[6] Test Loss: 11716094569.9126, Accuracy: 33.16%

[7] Test Loss: 16596143263.1774, Accuracy: 45.50%

[8] Test Loss: 34458246829.5733, Accuracy: 17.74%

[9] Test Loss: 218118007495.4036, Accuracy: 11.05%

[10] Test Loss: 48090817459.8252, Accuracy: 18.25%

