# GCNNによる隣接行列の分類

## Colabで実行するときの設定

In [None]:
# Google driveのマウント
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/My \Drive/source/dna-microscopy/2D/diffusion_deep_learning
%ls *.mtx

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive
/content/drive/My Drive/source/dna-microscopy/2D/diffusion_deep_learning
adjMatTargets.mtx  adjMatTargets_old.mtx  adjMatTargetsTest.mtx


In [None]:
# PyGのインストール
! pip install --verbose --no-cache-dir torch-scatter
! pip install --verbose --no-cache-dir torch-sparse
! pip install --verbose --no-cache-dir torch-cluster
! pip install torch-geometric

In [None]:
from torch_geometric.nn import GCNConv
from torch_geometric.data import Data, DataLoader
# import torch_geometric.transforms as T

In [None]:
!pip list | grep torch

torch                    1.5.1+cu101    
torch-cluster            1.5.5          
torch-geometric          1.5.0          
torch-scatter            2.0.5          
torch-sparse             0.6.5          
torchsummary             1.5.1          
torchtext                0.3.1          
torchvision              0.6.1+cu101    


## 実験条件

- ネットワーク: GCNN
- 拡散時間t=20, ビーズの数nBeads=100
- 作った隣接行列：1~9の各クラスで5000ずつ(訓練：5000x9-9000=36000 ，検証：500x9=4500，テスト：500x9=4500)
- バッチサイズ：128

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

from matplotlib import pyplot as plt
import numpy as np

import time
from scipy.io import mmread
from sklearn.model_selection import train_test_split

plt.ion()   # interactive mode

# random seed
import random
torch.manual_seed(1234)
np.random.seed(1234)
random.seed(1234)

# GPUの使用
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("device:", device)

device: cuda:0


## データの用意

### データの読み込み

In [None]:
test_run = True

#データの読み込み
#amt.shape==(nSample * 9, nBeads**2 + 1), 最後の1列は各レコードのターゲットになっている
if test_run:
    # プログラムが動くかどうかのテスト用の小さいデータ．shape=（5*9, 32*32 + 1）
    amt = mmread("adjMatTargets_small.mtx").toarray()
    epoch_num = 20
    batch_size = 3
else:
    # 本番用データ
    amt = mmread("adjMatTargets.mtx").toarray()
    epoch_num = 100
    batch_size = 128


adj = amt[:,:-1]
nBeads = int(np.sqrt(adj.shape[1]))

# vggを使うためデータのshapeを(nSmaple*9, 1, nBeads,nBeads)にする.第2次元はチャネル数に相当する．
adj = adj.reshape(adj.shape[0], 1, nBeads, nBeads)
target = amt[:,-1]

### (dataset,) dataloaderの作成

In [None]:
# PyG用のデータに変換

# とりあえずはPyGのDataset, In Memory Datasetを使わない簡易的な実装を用いる
data_list = []
for i, a in enumerate(adj):
    a = a.squeeze()
    e_index = np.where(a > 0)

    x = torch.ones((nBeads, 1), dtype=torch.float)  # ノードに特徴量はないのでとりあえず1にしている
    edge_index = torch.tensor(e_index, dtype=torch.long)
    edge_attr = torch.tensor(a[e_index].reshape(-1,1))
    y = int(target[i])

    data_list.append(Data(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y))

test_size = int(len(data_list) * 0.1)

trainvallist, testlist = train_test_split(data_list, test_size=test_size, shuffle=True)
trainlist, vallist = train_test_split(trainvallist, test_size=test_size, random_state=0)

# dataloaderの作成    
trainloader = DataLoader(trainlist, batch_size=batch_size)
valloader = DataLoader(vallist, batch_size=batch_size)
testloader = DataLoader(testlist, batch_size=batch_size)

## モデルの定義

### ネットワークの定義

In [None]:
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        #TODO: 入力グラフのチャネル数への依存をなくす
        self.conv1 = GCNConv(1, 16)
        self.conv2 = GCNConv(16, 32)
        self.conv3 = GCNConv(32, 48)
        self.conv4 = GCNConv(48, 64)
        self.conv5 = GCNConv(64, 96)
        self.conv6 = GCNConv(96, 128)
        self.linear1 = torch.nn.Linear(128,64)
        #TODO: 分類数への依存をなくす
        self.linear2 = torch.nn.Linear(64,9)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        x = F.relu(x)
        x = self.conv3(x, edge_index)
        x = F.relu(x)
        x = self.conv4(x, edge_index)
        x = F.relu(x)
        x = self.conv5(x, edge_index)
        x = F.relu(x)
        x = self.conv6(x, edge_index)
        x = F.relu(x)
        #x, _ = scatter_max(x, data.batch, dim=0)
        x = self.linear1(x)
        x = F.relu(x)
        x = self.linear2(x)
        return x

'\nclass Net(torch.nn.Module):\n    def __init__(self):\n        super(Net, self).__init__()\n        #TODO: 入力グラフのチャネル数への依存をなくす(dataset.num_node_features)\n        self.conv1 = GCNConv(1, 16)\n        #TODO: 分類数への依存をなくす(dataset.num_classes)\n        self.conv2 = GCNConv(16, 9)\n\n    def forward(self, data):\n        x, edge_index = data.x, data.edge_index\n\n        x = self.conv1(x, edge_index)\n        x = F.relu(x)\n        x = F.dropout(x, training=self.training)\n        x = self.conv2(x, edge_index)\n\n        return x\n'

### 学習

In [None]:
train_size = len(trainlist)
val_size = len(vallist)
test_size = len(testlist)

model = Net().to(device)

optimizer = torch.optim.Adam(model.parameters())

criterion = nn.CrossEntropyLoss()
history = {
    "train_loss": [],
    "train_acc": [],
    "val_loss": [],
    "val_acc": []
}

model.train()
for epoch in range(epoch_num):
    train_correct = 0
    train_total = 0
    train_loss = 0.0
    for i, batch in enumerate(trainloader):
        print(type(batch))
        print(batch)
        batch = batch.to(device)
        optimizer.zero_grad()
        outputs = model(batch)
        print(outputs.shape)
        print(batch.y.shape)
        loss = criterion(outputs, batch.y)
        loss.backward()
        optimizer.step()
        
        _, predicted = torch.max(outputs, 1)
        train_total += data.y.size(0)
        train_correct += (predicted == data.y).sum().cpu().item()

        train_loss += loss.cpu().item()
        if i % 10 == 9:
            progress_bar = '['+('='*((i+1)//10))+(' '*((train_size//100-(i+1))//10))+']'
            print('\repoch: {:d} loss: {:.3f}  {}'
                .format(
                    epoch + 1,
                    loss.cpu().item(),
                    progress_bar),
                end="  ")

    train_acc = float(train_correct/train_total)

    print('\repoch: {:d} loss: {:.3f}'
        .format(epoch + 1, train_loss / (train_size / batch_size)), end="  ")
    history["train_acc"].append(train_acc)
    history["train_loss"].append(train_loss / (train_size / batch_size))

    correct = 0
    total = 0
    batch_num = 0
    loss = 0
    with torch.no_grad():
        for data in valloader:
            data = data.to(device)
            outputs = model(data)
            loss += criterion(outputs,data.y)
            _, predicted = torch.max(outputs, 1)
            total += data.y.size(0)
            batch_num += 1
            correct += (predicted == data.y).sum().cpu().item()

    history["val_acc"].append(correct/total)
    history["val_loss"].append(loss.cpu().item()/batch_num)
    endstr = ' '*max(1,(train_size//1000-39))+"\n"
    print('Val Accuracy: {:.2f} %%'.format(100 * float(correct/total)), end='  ')
    print(f'Val Loss: {loss.cpu().item()/batch_num:.3f}',end=endstr)


print('Finished Training')

#最終結果出力
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        data = data.to(device)
        outputs = model(data)
        _, predicted = torch.max(outputs, 1)
        total += data.t.size(0)
        correct += (predicted == data.t).sum().cpu().item()
print('Accuracy: {:.2f} %%'.format(100 * float(correct/total)))

<class 'torch_geometric.data.batch.Batch'>
Batch(batch=[64], edge_attr=[538, 1], edge_index=[2, 538], x=[64, 1], y=[2])
torch.Size([64, 9])
torch.Size([2])


ValueError: ignored

## 可視化