[View in Colaboratory](https://colab.research.google.com/github/SokichiFujita/pytorch-sandbox/blob/master/wine_classification.ipynb)

# Pytorchによるワインの分類

Pytorchを用いて、Scikit-learnに含まれるワインのデータを教師あり学習によって分類する

# 準備

## Pytorchのインストール

In [0]:
!python --version

!ls /usr/lib/x86_64-linux-gnu/libcuda*

!pip3 install torch torchvision

Python 3.6.6
/usr/lib/x86_64-linux-gnu/libcuda.so
/usr/lib/x86_64-linux-gnu/libcuda.so.1
/usr/lib/x86_64-linux-gnu/libcuda.so.396.54


## その他ライブラリのインストール

In [0]:
!pip3 install scikit-learn janome pillow pandas



# ワインの分類

## ライブラリのロード

In [0]:
　# pytorch
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
##scikit-learn
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
import pandas as pd

In [0]:
torch.cuda.is_available()

True

## データのロード

In [0]:
# wine dataset
wine = load_wine()
pandas.DataFrame(wine.data, columns=wine.feature_names)

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline
0,14.23,1.71,2.43,15.6,127.0,2.80,3.06,0.28,2.29,5.640000,1.04,3.92,1065.0
1,13.20,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.380000,1.05,3.40,1050.0
2,13.16,2.36,2.67,18.6,101.0,2.80,3.24,0.30,2.81,5.680000,1.03,3.17,1185.0
3,14.37,1.95,2.50,16.8,113.0,3.85,3.49,0.24,2.18,7.800000,0.86,3.45,1480.0
4,13.24,2.59,2.87,21.0,118.0,2.80,2.69,0.39,1.82,4.320000,1.04,2.93,735.0
5,14.20,1.76,2.45,15.2,112.0,3.27,3.39,0.34,1.97,6.750000,1.05,2.85,1450.0
6,14.39,1.87,2.45,14.6,96.0,2.50,2.52,0.30,1.98,5.250000,1.02,3.58,1290.0
7,14.06,2.15,2.61,17.6,121.0,2.60,2.51,0.31,1.25,5.050000,1.06,3.58,1295.0
8,14.83,1.64,2.17,14.0,97.0,2.80,2.98,0.29,1.98,5.200000,1.08,2.85,1045.0
9,13.86,1.35,2.27,16.0,98.0,2.98,3.15,0.22,1.85,7.220000,1.01,3.55,1045.0


In [0]:
# Label to classify data
wine.target

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2])

## 教師・テストデータの作成

In [0]:
wine_data = wine.data[0:130]
wine_target = wine.target[0:130]
train_data, test_data, train_label, test_label = train_test_split(wine_data, wine_target, test_size=0.2)
print(train_data.shape)
print(train_label.shape)

(104, 13)
(104,)


## テンソルの作成

In [0]:
# create tensor to learn
train_data_tensor = torch.from_numpy(train_data).float()
train_label_tensor = torch.from_numpy(train_label).long()
# create tensor to test
test_data_tensor = torch.from_numpy(test_data).float()
test_label_tensor = torch.from_numpy(test_label).long()
# display the shape
print(train_data_tensor.shape)
print(train_label_tensor.shape)
assert(train_data_tensor.shape == train_data.shape)
assert(train_label_tensor.shape == train_label.shape)

# merge train data and label
train = TensorDataset(train_data_tensor, train_label_tensor)
print(train)

torch.Size([104, 13])
torch.Size([104])
<torch.utils.data.dataset.TensorDataset object at 0x7f2574c30eb8>


## ミニバッチの作成

In [0]:
train_minibatch = DataLoader(train, batch_size=16, shuffle=True)

## ニューラルネットワークの作成

In [0]:
# define neural network
class Net(nn.Module):
  def __init__(self):
    super(Net, self).__init__()
    # y = Ax + b s.t. 13 -> 96
    self.fc1 = nn.Linear(13, 96)
    # y = Ax + b s.t. 96 -> 2
    self.fc2 = nn.Linear(96, 2)

  def forward(self, x):
    x = F.relu(self.fc1(x))
    x = self.fc2(x)
    return F.log_softmax(x)

# create neural network instance
model = Net()

## 学習モデルの作成

In [0]:
# 誤差関数のインスタンス:交差エントロピー
criterion = nn.CrossEntropyLoss()

# 最適化関数のインスタンス:確率的勾配降下法
optimizer = optim.SGD(model.parameters(), lr=0.01)

# 学習を開始する
for epoch in range(300):
  total_loss = 0
  for train_data, train_label in train_minibatch:
    # 計算グラフの構築
    train_data, train_label = Variable(train_data), Variable(train_label)
    # 勾配のリセット
    optimizer.zero_grad()
    # 順伝播の計算
    output = model(train_data)
    # 誤差の計算
    loss = criterion(output, train_label)
    # 逆伝播の計算
    loss.backward()
    # 重みの更新
    optimizer.step()
    # 誤差の累積
    total_loss += loss.data[0]  
  #累積誤算表示 (多いので時々表示)
  if (epoch+1) % 20 == 0:
    print(epoch+1, total_loss)


  if sys.path[0] == '':


20 tensor(4.8500)
40 tensor(4.8430)
60 tensor(4.8351)
80 tensor(4.8568)
100 tensor(4.8360)
120 tensor(4.8357)
140 tensor(4.8432)
160 tensor(4.8359)
180 tensor(4.8432)
200 tensor(4.8526)
220 tensor(4.8351)
240 tensor(4.8427)
260 tensor(4.8512)
280 tensor(4.8506)
300 tensor(4.8429)


## 学習モデルのテスト

In [0]:
# 計算グラフの構築
test_data, test_label = Variable(test_data_tensor), Variable(test_label_tensor)

# 結果の出力
test_result = torch.max(model(test_data).data, 1)[1]

accuracy = sum(test_label.data.numpy() == test_result.numpy()) / len(test_label.data.numpy())

accuracy


  if sys.path[0] == '':


0.5384615384615384