## データ準備と前処理

### 必要ライブラリのインポート

In [116]:
import csv
import numpy as np
from collections import defaultdict

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

### グローバル変数の定義

In [117]:
poke_id_dict = defaultdict(int)   # ポケモン名に対するIDを格納する辞書
party_list = []                   # 相手パーティのリスト（numpy配列 party_np に変換する）
first_poke = []                   # 初手のポケモン（numpy配列 first_np に変換する）

## CSVを読み込んでポケモン名に対応する辞書を作る

In [118]:
file_name = "./sample.csv"
id = 1

with open(file_name, mode='r') as csv_file:
  csv_reader = csv.reader(csv_file)
  next(csv_reader)

  for row in csv_reader:
    for pokemon in row[:-1]:
      if pokemon not in poke_id_dict.keys():
        poke_id_dict[pokemon] = id
        id += 1

## CSVから相手パーティのポケモンをIDに変換したnumpy配列を生成

In [119]:
file_name = "./sample.csv"
with open(file_name, mode='r') as csv_file:
  csv_reader = csv.reader(csv_file)
  next(csv_reader)

  for row in csv_reader:
    one_party = []
    for pokemon in row[:-1]:
      one_party.append(poke_id_dict[pokemon])
    party_list.append(one_party)
    first_poke.append(poke_id_dict[row[-1]])

party_np = np.array(party_list)
first_np = np.array(first_poke)

### 学習データと検証データに分割

In [120]:
x_train, x_valid, y_train, y_valid = train_test_split(party_np, first_np, shuffle=True, test_size=0.2)

array([[ 7,  8,  3,  9, 10,  5],
       [16,  3,  7,  9, 17,  5],
       [ 1,  2,  3,  4,  5,  6],
       [14,  7,  8, 10, 15, 16]])

### 特徴量の標準化

In [98]:
scaler = StandardScaler()
scaler.fit(x_train)


x_train = scaler.transform(x_train)
x_valid = scaler.transform(x_valid)

### Tensor型に変換

In [99]:
x_train = torch.from_numpy(x_train).float()
y_train = torch.from_numpy(y_train).long()
x_valid = torch.from_numpy(x_valid).float()
y_valid = torch.from_numpy(y_valid).long()

print('x_train : ', x_train.shape)
print('y_train : ', y_train.shape)
print('x_valid : ', x_valid.shape)
print('y_valid : ', y_valid.shape)

x_train :  torch.Size([4, 6])
y_train :  torch.Size([4])
x_valid :  torch.Size([1, 6])
y_valid :  torch.Size([1])


## Datasetの作成

In [100]:
train_dataset = TensorDataset(x_train, y_train)
valid_dataset = TensorDataset(x_valid, y_valid)

In [101]:
batch_size = 2
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valid_dataloader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)

# 動作確認
# イテレータに変換
batch_iterator = iter(valid_dataloader)
# 1番目の要素を取り出す
inputs, labels = next(batch_iterator)
print(inputs.size())
print(labels.size())

torch.Size([1, 6])
torch.Size([1])


## ネットワークの定義

In [102]:
poke_num = len(poke_id_dict)    # 分類したいポケモンの数

class Net(nn.Module):    
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(6, 50)
        self.fc2 = nn.Linear(50, poke_num)
    
    def forward(self, x):
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.softmax(x, dim=1)
        return x

net = Net()
print(net)

Net(
  (fc1): Linear(in_features=6, out_features=50, bias=True)
  (fc2): Linear(in_features=50, out_features=17, bias=True)
)


### 損失関数と最適化手法の定義

In [103]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.01)

## 学習

In [80]:
train_dataloader

for inputs, labels in train_dataloader:
  print(inputs)
  print(labels)


tensor([[ 0.2410,  0.7770,  1.1767,  0.9045,  1.3106,  0.8642],
        [ 0.5623, -1.5541, -0.7845, -1.5076,  0.6116, -1.5758]])
tensor([30, 27])
tensor([[ 0.8835, -0.1943,  0.7845,  0.9045, -1.1358,  0.8642],
        [-1.6868,  0.9713, -1.1767, -0.3015, -0.7863, -0.1525]])
tensor([30, 22])


In [130]:
# エポック数
num_epochs = 10

# 学習時と検証時で分けるためディクショナリを用意
dataloaders_dict = {
    'train': train_dataloader,
    'val': valid_dataloader
}

for epoch in range(num_epochs):
    print('Epoch {}/{}'.format(epoch+1, num_epochs))
    print('-------------')
    
    for phase in ['train', 'val']:
        
        if phase == 'train':
            # モデルを訓練モードに設定
            net.train()
        else:
            # モデルを推論モードに設定
            net.eval()
        
        # 損失和
        epoch_loss = 0.0
        # 正解数
        epoch_corrects = 0
        
        # DataLoaderからデータをバッチごとに取り出す
        for inputs, labels in dataloaders_dict[phase]:
            
            # optimizerの初期化
            optimizer.zero_grad()
            
            # 学習時のみ勾配を計算させる設定にする
            with torch.set_grad_enabled(phase == 'train'):
                outputs = net(inputs)
                
                # 損失を計算
                loss = criterion(outputs, labels)
                
                # ラベルを予測
                _, preds = torch.max(outputs, 1)
                
                # 訓練時はバックプロパゲーション
                if phase == 'train':
                    # 逆伝搬の計算
                    loss.backward()
                    # パラメータの更新
                    optimizer.step()
                
                # イテレーション結果の計算
                # lossの合計を更新
                # PyTorchの仕様上各バッチ内での平均のlossが計算される。
                # データ数を掛けることで平均から合計に変換をしている。
                # 損失和は「全データの損失/データ数」で計算されるため、
                # 平均のままだと損失和を求めることができないため。
                epoch_loss += loss.item() * inputs.size(0)
                
                # 正解数の合計を更新
                epoch_corrects += torch.sum(preds == labels.data)

        # epochごとのlossと正解率を表示
        epoch_loss = epoch_loss / len(dataloaders_dict[phase].dataset)
        epoch_acc = epoch_corrects.double() / len(dataloaders_dict[phase].dataset)

        print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

Epoch 1/10
-------------
tensor([14,  5])
tensor([14,  0])
train Loss: 2.8415 Acc: 0.0000
tensor([0])
val Loss: 2.8462 Acc: 0.0000
Epoch 2/10
-------------
tensor([ 5, 14])
tensor([14,  0])
train Loss: 2.8413 Acc: 0.0000
tensor([0])
val Loss: 2.8461 Acc: 0.0000
Epoch 3/10
-------------
tensor([0, 5])
tensor([14, 14])
train Loss: 2.8412 Acc: 0.0000
tensor([0])
val Loss: 2.8459 Acc: 0.0000
Epoch 4/10
-------------
tensor([ 0, 14])
tensor([14,  5])
train Loss: 2.8410 Acc: 0.0000
tensor([0])
val Loss: 2.8458 Acc: 0.0000
Epoch 5/10
-------------
tensor([14, 14])
tensor([0, 5])
train Loss: 2.8408 Acc: 0.0000
tensor([0])
val Loss: 2.8456 Acc: 0.0000
Epoch 6/10
-------------
tensor([14,  0])
tensor([14,  5])
train Loss: 2.8406 Acc: 0.0000
tensor([0])
val Loss: 2.8455 Acc: 0.0000
Epoch 7/10
-------------
tensor([14,  5])
tensor([ 0, 14])
train Loss: 2.8404 Acc: 0.0000
tensor([0])
val Loss: 2.8453 Acc: 0.0000
Epoch 8/10
-------------
tensor([14,  0])
tensor([14,  5])
train Loss: 2.8402 Acc: 0.00

### 予測実行

In [146]:
poke1 = "ハバタクカミ"
poke2 = "ウーラオス（連撃）"
poke3 = "ハバタクカミ"
poke4 = "タケルライコ"
poke5 = "カイリュー"
poke6 = "サーフゴー"
party_list = [poke1, poke2, poke3, poke4, poke5, poke6]
party_id = []
for poke in party_list:
  party_id.append(poke_id_dict[poke])



party_id = scaler.transform(np.array([party_id]))
party_id = torch.from_numpy(party_id).float()
pred = net(party_id)
_, pred = torch.max(pred, 1)
pred = pred.item()

for poke, id in poke_id_dict.items():
  if id == pred:
    print(poke)
    break


トドロクツキ
