<a href="https://colab.research.google.com/github/ShineySun/BASIC_AI/blob/master/Wine_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


- Project : Wine Classification
- Framework : Pytorch
- Date : 2020. 07. 20

In [38]:
# import pytorch lib
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

In [39]:
# import scikit-learn lib
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split

# import pandas lib
import pandas as pd

In [40]:
# load Data
wine = load_wine()
wine['feature_names']

['alcohol',
 'malic_acid',
 'ash',
 'alcalinity_of_ash',
 'magnesium',
 'total_phenols',
 'flavanoids',
 'nonflavanoid_phenols',
 'proanthocyanins',
 'color_intensity',
 'hue',
 'od280/od315_of_diluted_wines',
 'proline']

In [41]:
# print wine data
pd.DataFrame(wine.data, columns=wine.feature_names)

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline
0,14.23,1.71,2.43,15.6,127.0,2.80,3.06,0.28,2.29,5.64,1.04,3.92,1065.0
1,13.20,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.40,1050.0
2,13.16,2.36,2.67,18.6,101.0,2.80,3.24,0.30,2.81,5.68,1.03,3.17,1185.0
3,14.37,1.95,2.50,16.8,113.0,3.85,3.49,0.24,2.18,7.80,0.86,3.45,1480.0
4,13.24,2.59,2.87,21.0,118.0,2.80,2.69,0.39,1.82,4.32,1.04,2.93,735.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
173,13.71,5.65,2.45,20.5,95.0,1.68,0.61,0.52,1.06,7.70,0.64,1.74,740.0
174,13.40,3.91,2.48,23.0,102.0,1.80,0.75,0.43,1.41,7.30,0.70,1.56,750.0
175,13.27,4.28,2.26,20.0,120.0,1.59,0.69,0.43,1.35,10.20,0.59,1.56,835.0
176,13.17,2.59,2.37,20.0,120.0,1.65,0.68,0.53,1.46,9.30,0.60,1.62,840.0


In [42]:
 df_label = pd.DataFrame(wine.target)
 df_label

Unnamed: 0,0
0,0
1,0
2,0
3,0
4,0
...,...
173,2
174,2
175,2
176,2


In [43]:
data_len = len(wine.data)
print("data length : " , data_len)

target_len = len(wine.target)
print("target length : ", target_len)

data length :  178
target length :  178


In [44]:
# data to array
wine_data = wine.data[:130]
# data to array
wine_target = wine.target[:130]

print("wine_data type : ", type(wine_data))
print("wine_target type : ", type(wine_target))

wine_data type :  <class 'numpy.ndarray'>
wine_target type :  <class 'numpy.ndarray'>


In [45]:
# split datasets into training and test data
train_x, test_x, train_y, test_y = train_test_split(wine_data, wine_target, test_size = 0.2)

print("len(train_x) : ", len(train_x) , "   |   type(train_x) : {}".format(type(train_x)))
print("len(train_y) : ", len(train_y) , "   |   type(train_y) : {}".format(type(train_y)))
print("len(test_x) : ", len(test_x) , "     |   type(test_x) : {}".format(type(test_x)))
print("len(test_y) : ", len(test_y) , "     |   type(test_y) : {}".format(type(test_y)))

len(train_x) :  104    |   type(train_x) : <class 'numpy.ndarray'>
len(train_y) :  104    |   type(train_y) : <class 'numpy.ndarray'>
len(test_x) :  26      |   type(test_x) : <class 'numpy.ndarray'>
len(test_y) :  26      |   type(test_y) : <class 'numpy.ndarray'>


In [46]:
# convert train data to tensor
train_x = torch.from_numpy(train_x).float()
train_y = torch.from_numpy(train_y).long()

# conver test data to tensor
test_x = torch.from_numpy(test_x).float()
test_y = torch.from_numpy(test_y).long()

print("len(train_x) : ", len(train_x) , "   |   type(train_x) : {}".format(type(train_x)))
print("len(train_y) : ", len(train_y) , "   |   type(train_y) : {}".format(type(train_y)))
print("len(test_x) : ", len(test_x) , "     |   type(test_x) : {}".format(type(test_x)))
print("len(test_y) : ", len(test_y) , "     |   type(test_y) : {}".format(type(test_y)))

len(train_x) :  104    |   type(train_x) : <class 'torch.Tensor'>
len(train_y) :  104    |   type(train_y) : <class 'torch.Tensor'>
len(test_x) :  26      |   type(test_x) : <class 'torch.Tensor'>
len(test_y) :  26      |   type(test_y) : <class 'torch.Tensor'>


In [47]:
# Combine the tensor of data and target variable
train = TensorDataset(train_x, train_y)

# Check the contents of the tensor's first data
print("train[0] : ", train[0])
print("train[1] : ", train[1])

# split into mini batch (16)
train_loader = DataLoader(train, batch_size = 16, shuffle=True)

train[0] :  (tensor([1.3770e+01, 1.9000e+00, 2.6800e+00, 1.7100e+01, 1.1500e+02, 3.0000e+00,
        2.7900e+00, 3.9000e-01, 1.6800e+00, 6.3000e+00, 1.1300e+00, 2.9300e+00,
        1.3750e+03]), tensor(0))
train[1] :  (tensor([1.3580e+01, 1.6600e+00, 2.3600e+00, 1.9100e+01, 1.0600e+02, 2.8600e+00,
        3.1900e+00, 2.2000e-01, 1.9500e+00, 6.9000e+00, 1.0900e+00, 2.8800e+00,
        1.5150e+03]), tensor(0))


In [48]:
# Construct neural network
class Net(nn.Module):
  def __init__(self):
    super(Net, self).__init__()

    # fully connected layer 1
    self.fc1 = nn.Linear(13,96)

    # fully connected layer 2
    self.fc2 = nn.Linear(96,2)
  
  def forward(self, x):
    #print("x.shape : ", x.shape)
    x = self.fc1(x)
    x = F.relu(x)

    x = self.fc2(x)

    return F.log_softmax(x)
    

In [49]:
# check gpu setting
print("cuda is available : ", torch.cuda.is_available())
print("current device : ", torch.cuda.current_device())
print("torch.cuda.get_device_name(0) : ",torch.cuda.get_device_name(0))

cuda is available :  True
current device :  0
torch.cuda.get_device_name(0) :  Tesla K80


In [50]:
# create Net
model = Net()

if torch.cuda.is_available() is True:
  model = model.cuda()

In [51]:
# create loss function object
criterion = nn.CrossEntropyLoss()

# create optimizer
optimizer = optim.SGD(model.parameters(), lr=0.01)

# nEpoch
nEpoch = 300

# training start
for epoch in range(nEpoch):
  total_loss = 0

  for train_x, train_y in train_loader:
    train_x, train_y = Variable(train_x), Variable(train_y)

    if torch.cuda.is_available() is True :
      train_x = train_x.cuda()
      train_y = train_y.cuda()
    
    # init GD
    optimizer.zero_grad()

    # calc forward
    output = model(train_x)

    # print("output : ", output)
    # print("train_y : ", train_y)

    # print("output.shape : ", output.shape)
    # print("train_y.shape : ", train_y.shape)

    # calc loss
    loss = criterion(output, train_y)

    # backward
    loss.backward()

    # update weight
    optimizer.step()

    # calc total loss
    total_loss += loss.data
  if (epoch+1)%50 == 0:
    print("{}'s total loss : {}".format(epoch+1, total_loss))






50's total loss : 4.812386512756348
100's total loss : 4.785183429718018
150's total loss : 4.8272294998168945
200's total loss : 4.798107624053955
250's total loss : 4.7551960945129395
300's total loss : 4.812600135803223


In [63]:
# test section
test_x, test_y = Variable(test_x), Variable(test_y)

if torch.cuda.is_available() is True :
      test_x = test_x.cuda()
      test_y = test_y.cuda()

result = torch.max(model(test_x).data, 1)[1]

# calc accuracy
accuracy = sum(test_y.detach().cpu().data.numpy() == result.detach().cpu().numpy()) / len(test_y.cpu().data.numpy())

print("Classification Accuracy : ", accuracy)

Classification Accuracy :  0.5


