Pytorch : Wine dataset

In [None]:
import torch
import numpy as np
import pandas as pd
import math


df=pd.read_csv('C:/Users/sec/Desktop/bigleader/deeplearning/data/wine.csv')
df.head(3)

Custom Dataset 구축
1. custom dataset Class로 'Dataset' 구축
2. DataLoader로 구축

In [None]:
print(df.values)
print(df.values.shape)

In [None]:
from torch.utils.data import Dataset, DataLoader

# Dataset class로 바꾸기
class WineDataset(Dataset):

    def __init__(self):
        wine=pd.read_csv('C:/Users/sec/Desktop/bigleader/deeplearning/data/wine.csv')
        # torch.float32 형태여야 에러 안 남
        wine=df.values.astype(np.float32)
        self.n_samples=wine.shape[0]

        # np 데이터 torch로 변경
        # wine[:, :] 왼쪽 전체, 뒤에는 인덱스 슬라이싱
        self.X_data=torch.from_numpy(wine[:, 1:])
        # Y값이 하나지만, torch라서 2차원으로 바꿔야함.
        self.y_data=torch.from_numpy(wine[:, [0]])

    def __getitem__(self, index):
        return self.X_data[index], self.y_data[index]

    def __len__(self):
        return self.n_samples

In [None]:
dataset = WineDataset()
dataset

In [None]:
# X_data와 y_data로 구성
dataset[0]

In [None]:
features, labels = dataset[0]
print(features, labels)

In [None]:
# shuffle을 통해 중복 학습을 막고, overfitting을 방지
train_loader = DataLoader(dataset = dataset, batch_size=16, shuffle=True)
print(train_loader)
print(next(iter(train_loader)))

Pytoch: XOR Problem

1. XOR Dataset

In [40]:
import torch
import torch.nn as nn

device= 'cuda' if torch.cuda.is_available() else 'cpu'

In [41]:
X=torch.FloatTensor([[0,0],[0,1],[1,0],[1,1]]).to(device)
Y=torch.FloatTensor([[0],[1],[1],[0]]).to(device)
print(X, Y, sep='\n')
print(X.shape, Y.shape, sep='\n')

tensor([[0., 0.],
        [0., 1.],
        [1., 0.],
        [1., 1.]])
tensor([[0.],
        [1.],
        [1.],
        [0.]])
torch.Size([4, 2])
torch.Size([4, 1])


2. Perceptron 학습

In [42]:
linear=nn.Linear(X.shape[1], Y.shape[1], bias=True)
sigmoid=nn.Sigmoid()
model=nn.Sequential(linear, sigmoid).to(device)

# 0, 1로 나누는 이진 분류
criterion = torch.nn.BCELoss().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)

# training
for epoch in range(10001):
    y_pred = model(X)
    loss = criterion(y_pred, Y)

    loss.backward()
    optimizer.step()
    optimizer.zero_grad()

    if epoch % 1000==0:
        print(epoch, loss.item())
    

0 0.7100017666816711
1000 0.693149745464325
2000 0.6931471824645996
3000 0.6931471824645996
4000 0.6931471824645996
5000 0.6931471824645996
6000 0.6931471824645996
7000 0.6931471824645996
8000 0.6931471824645996
9000 0.6931471824645996
10000 0.6931471824645996


In [43]:
# Prediction
with torch.no_grad():
    y_hat = (y_pred > 0.5).float()
    accuracy = (y_hat == Y).float().mean()

    print(Y)
    print(y_hat)

    print('Accuracy:', accuracy.item())

tensor([[0.],
        [1.],
        [1.],
        [0.]])
tensor([[0.],
        [0.],
        [0.],
        [0.]])
Accuracy: 0.5


In [44]:
Y.detach()

tensor([[0.],
        [1.],
        [1.],
        [0.]])

Multi-layered Perceptron 해결

In [45]:
# model 구현
model = nn.Sequential(
    nn.Linear(2, 8, bias=True),   # input layer
    nn.Sigmoid(),
    nn.Linear(8, 16, bias=True),  # layer 1, 2
    nn.Sigmoid(),
    nn.Linear(16, 16, bias=True), # layer 2, 3
    nn.Sigmoid(),
    nn.Linear(16, 1, bias=True),   # layer 3, output layer
    nn.Sigmoid()).to(device)

In [46]:
# 0, 1로 나누는 이진 분류
criterion = torch.nn.BCELoss().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.5)

# training
for epoch in range(10001):
    y_pred = model(X)
    loss = criterion(y_pred, Y)

    loss.backward()
    optimizer.step()
    optimizer.zero_grad()

    if epoch % 1000==0:
        print(epoch, loss.item())
    

0 0.6947944164276123
1000 0.6931302547454834
2000 0.693111777305603
3000 0.6930837631225586
4000 0.6930321455001831
5000 0.6929085850715637
6000 0.6924196481704712
7000 0.6773561835289001
8000 0.0024088521022349596
9000 0.0008361625950783491
10000 0.00048682658234611154


In [47]:
# Prediction
with torch.no_grad():
    y_hat = (y_pred > 0.5).float()
    accuracy = (y_hat == Y).float().mean()

    print(y_hat)

    print('Accuracy:', accuracy.item())

tensor([[0.],
        [1.],
        [1.],
        [0.]])
Accuracy: 1.0
