<a href="https://colab.research.google.com/github/ShinAsakawa/ShinAsakawa.github.io/blob/master/2020ccap/2020%E2%88%920705DNNs_check.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import numpy as np
import json
from PIL import Image as PILImage
import matplotlib.pyplot as plt
%matplotlib inline

import torch
import torchvision
from torchvision import models, transforms

In [None]:
# PyTorchのバージョン確認
print("PyTorch Version: ",torch.__version__)
print("Torchvision Version: ",torchvision.__version__)

PyTorch Version:  1.5.1+cu101
Torchvision Version:  0.6.1+cu101


In [None]:
# 各モデルを定義し，訓練済み結合係数をダウンロードする
# 時間が無いので行頭に # を入れてコメントアウトしてあります
DNNs = {}
DNNs['resnet18'] = models.resnet18(pretrained=True, progress=True)
#DNNs['alexnet'] = models.alexnet(pretrained=True, progress=True)
#DNNs['vgg16'] = models.vgg16(pretrained=True, progress=True)
#DNNs['squeezenet']= models.squeezenet1_0(pretrained=True, progress=True)
#DNNs['densenet'] = models.densenet161(pretrained=True, progress=True)
#DNNs['inception'] = models.inception_v3(pretrained=True, progress=True)
#DNNs['googlenet'] = models.googlenet(pretrained=True, progress=True)
#DNNs['shufflenet'] = models.shufflenet_v2_x1_0(pretrained=True, progress=True)
#DNNs['mobilenet'] = models.mobilenet_v2(pretrained=True, progress=True)
#DNNs['resnext50_32x4d'] = models.resnext50_32x4d(pretrained=True, progress=True)
#DNNs['wide_resnet50_2'] = models.wide_resnet50_2(pretrained=True, progress=True)
#DNNs['mnasnet'] = models.mnasnet1_0(pretrained=True, progress=True)

In [None]:
# 上の中から試したいモデルを選んでください。最後のモデルが有効になります。
net = DNNs['resnet18'] 
#net = DNNs['squeezenet']
#net = DNNs['googlenet']
#net = DNNs['shufflenet']
#net = DNNs['mobilenet']
params = [name for name, param in net.named_parameters()]
print(params[-2:])
modules = [name for name, param in net.named_modules()]
print(modules[-2:])

# モデルの詳細が表示されます
net.eval()  

['fc.weight', 'fc.bias']
['avgpool', 'fc']


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [None]:
transform = transforms.Compose([
    transforms.Resize(256), 
    transforms.CenterCrop(224), 
    transforms.ToTensor()])

#dataset = datasets.ImageNet(".", split="train", transform=transform)

mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]

normalize = transforms.Normalize(mean=mean, std=std)
#mean = torch.mean(torch.tensor(means))
#std = torch.mean(torch.tensor(stds))

In [None]:
# ネット上に転がっている画像を用いる場合
#!wget 'ここ URL を書いて，行頭の # を削除して実行すると' test_img.jpg

In [None]:
from google.colab import files

uploaded = files.upload()

Saving 19960222_004.jpg to 19960222_004.jpg


In [None]:
!mv 19960222_004.jpg test_img.jpg

In [None]:
# 任意の画像を読み込んで表示
image_file_path = 'test_img.jpg'
img = PILImage.open(image_file_path)  # [高さ][幅][色RGB]
plt.imshow(img); plt.show()


In [None]:
# 認識するための準備で画像を変形
img_ = transform(img)
plt.imshow(img_.numpy().transpose((1, 2, 0))); plt.show()

In [None]:
# ILSVRCのラベル情報をロードし辞意書型変数を生成
!wget https://raw.githubusercontent.com/komazawa-deep-learning/komazawa-deep-learning.github.io/master/imagenet_class_index.json
ILSVRC_class_index = json.load(open('imagenet_class_index.json', 'r'))
#print(ILSVRC_class_index)

In [None]:
# 出力結果からラベルを予測する後処理クラス
class ILSVRCPredictor():
    """
    ILSVRCデータに対するモデルの出力からラベルを求める。

    Attributes
    ----------
    class_index : dictionary
            クラスindexとラベル名を対応させた辞書型変数。
    """

    def __init__(self, class_index):
        self.class_index = class_index

    def predict_max(self, out):
        """
        確率最大のILSVRCのラベル名を取得する。

        Parameters
        ----------
        out : torch.Size([1, 1000])
            Netからの出力。

        Returns
        -------
        predicted_label_name : str
            最も予測確率が高いラベルの名前
        """
        maxid = np.argmax(out.detach().numpy())
        predicted_label_name = self.class_index[str(maxid)][1]

        return predicted_label_name

In [None]:
# 認識の実施
inputs = img_.unsqueeze_(0)  # torch.Size([1, 3, 224, 224])
out = net(inputs)  # torch.Size([1, 1000])

In [None]:
outnp = out.detach().numpy()
print('トップ 1: ', ILSVRC_class_index[np.argmax(outnp)])

In [None]:
#np.sort(outnp)
for i in reversed(np.argsort(outnp[0])[-5:]):
    print(i, ILSVRC_class_index[i])
print(np.argsort(outnp[0])[-5:])

In [None]:
vocab = {}
!wget https://raw.githubusercontent.com/komazawa-deep-learning/komazawa-deep-learning.github.io/master/pnt_stims.txt
with open('pnt_stims.txt') as f:
    a = f.readlines()
    
for x in a:
    b = x.strip().split(',')
    if len(b[0]) < 1:
        continue
    else:
        vocab[b[0]] = b[1] 

ivocab = {v:k for k, v in vocab.items()}

for i, ws in enumerate(vocab):
    print(i,ws, end=", ")
    if (i + 1) % 10 == 0:
        print()

In [None]:
!wget !wget https://raw.githubusercontent.com/komazawa-deep-learning/komazawa-deep-learning.github.io/master/pnt_images.tgz
!tar xzf pnt_images.tgz > /dev/null

In [None]:
_from = 0  # 画像の開始番号 >= 0 
_to = 10 # 画像番号 最後の番号 <= 186

pnt_base = './pics'
vocab_list = list(vocab.keys())
#print(vocab_list)
for w in vocab_list[_from:_to]:
    filename = os.path.join(pnt_base,w+'.png')
    img = PILImage.open(filename)
    if img.mode == 'RGBA':
        background = PILImage.new("RGB", img.size, (255, 255, 255))
        background.paste(img, mask = img.split()[3])
        img = background

    img_ = transform(img)
    plt.axis(False); plt.imshow(img_.numpy().transpose((1, 2, 0))); plt.show()

    inputs = img_.unsqueeze_(0)  # torch.Size([1, 3, 224, 224])
    out = net(inputs).detach().numpy() 
    print(filename.split('/')[-1], img.mode, np.array(img).shape, img.getbbox())
    for i in reversed(np.argsort(out[0])[-5:]):
        print(i,  out[0][i], ILSVRC_class_index[i])
    #img_ = img.crop(img.getbbox())
    #plt.imshow(img); plt.show()