<a href="https://colab.research.google.com/github/ShinAsakawa/ShinAsakawa.github.io/blob/master/2022notebooks/2022_0916noto_fonts_recognition.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Noto font を用いた文字認識実験 + PMSP96 単語認識
* date: 2022_0906
* author: 浅川伸一

---



In [None]:
%load_ext autoreload
%autoreload 2
%config InlineBackend.figure_format = 'retina'
try:
    import bit
except ImportError:
    !pip install ipynbname --upgrade > /dev/null
    !git clone https://github.com/ShinAsakawa/bit.git > /dev/null
import bit
isColab = bit.isColab
HOME = bit.HOME

if isColab:
    !pip install --upgrade Pillow

## 1 Noto フォントの登録

In [None]:
import os
from glob import  glob
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw, ImageFont

fonts_jp = bit.get_notojp_fonts()
fonts_en = bit.get_notoen_fonts()

default_width, default_height = 224, 224
default_bgcolor=(255,255,255)
default_fontsize=28

fig, ax = plt.subplots(4, 6, figsize=(18, 12)) 
i, j = 0, 0
j_max = 6
for font_name, font in fonts_en.items():
    img = Image.new(
        mode='RGB', 
        size=(default_width, default_height), 
        color=default_bgcolor) 
    draw_canvas = ImageDraw.Draw(img)
    draw_canvas.text(
        xy=(2,84),
        text=font_name,
        font=font,
        fill=(0,0,0))
    
    ax[i,j].imshow(img)
    #ax[i,j].set_title(font_name)
    ax[i,j].set_xticks([])
    ax[i,j].set_yticks([])    
    j += 1
    if j == j_max:
        i+=1; j=0

plt.show()

## 2 PyTorch データセットの設定

In [None]:
import os
from glob import  glob
import numpy as np
import matplotlib.pyplot as plt
import PIL
from PIL import Image, ImageDraw, ImageFont

def get_text_img(
    text:str="XYZ",
    x0:int=0,
    y0:int=0,
    font:PIL.ImageFont.FreeTypeFont=fonts_en['NotoSans-Regular'],  # フォント
    bgcolor:tuple=default_bgcolor,  # デフォルト背景色
    color:[tuple or str] = 'black', # デフォルト前景色
    width:int=default_width,        # デフォルト刺激画面幅
    height:int=default_height,      # デフォルト刺激画面高さ
    fontsize:int=default_fontsize,  # デフォルトフォントサイズ
    target_transform=None):
    
    img = Image.new(mode='RGB',
                    size=(width, height), 
                    color=bgcolor)
    draw_canvas = ImageDraw.Draw(img)

    bbox = draw_canvas.textbbox(xy=(x0,y0),
                                font=font, 
                                text=text)
    bbox_width = bbox[2] - bbox[0]
    bbox_height = bbox[3] - bbox[1]

    # print(f'bbox_width:{bbox_width}',
    #       f'bbox_height:{bbox_height}',
    #       f'(x0,y0)=({x0},{y0})')

    if x0 == 0:
        x0 = (width >> 1) - (bbox_width >> 1)
    if y0 == 0:   
        y0 = (height >> 1) - (bbox_height >> 1)
    # print(f'bbox_width:{bbox_width}',
    #       f'bbox_height:{bbox_height}',
    #       f'(x0,y0)=({x0},{y0})')
    
    draw_canvas.text(xy=(x0, y0), 
                     text=text,
                     font=font,
                     stroke_width=1,
                     #stroke_fill="black",
                     #spacing=-4,
                     #fill=(0,0,0),
                     fill=color)
    
    return img, draw_canvas

img, draw_canvas = get_text_img(text="make")

plt.figure(figsize=(4,4))
plt.imshow(img)
plt.show()

In [None]:
# サンプル画像を表示するなら，次行 を true にする `verbose=True`
verbose = True

import torch
from torch.utils.data import Dataset
from torchvision import transforms
try:
    import japanize_matplotlib 
except ImportError:
    !pip install japanize_matplotlib
    import japanize_matplotlib

digit_chars = '0123456789'
alphabet_chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'

_dataset = bit.notoen_dataset(
    fonts_dict=fonts_en,
    items=[c for c in digit_chars+alphabet_chars],
    )

print(f'_data.__len__():{_dataset.__len__()}')

_labels = set([l[1] for l in _dataset.labels])
print(f'len(_labels):{len(_labels)}')

if verbose:
    # 全データの中から 1 つサンプリング
    N = np.random.choice(_dataset.__len__())
    img, label = _dataset.__getitem__(N)

    # 返ってきたデータは PyTorch.Tensor なので PILImage として表示できるように変換
    img = (img.detach().numpy().transpose(1,2,0) / 255).clip(0,1)
    plt.figure(figsize=(2,2))
    plt.title(f'ラベル:{label}')
    plt.imshow(img);plt.show()

print(_dataset.__len__())
fig, ax = plt.subplots(3, 7, figsize=(12, 6)) 
i, j = 0, 0
j_max = 7
x = 0
for x in np.random.choice(range(_dataset.__len__()),21):
    img, label = _dataset.__getoriginalitem__(x)
    ax[i,j].imshow(img)
    ax[i,j].set_title(font_name)
    ax[i,j].set_xticks([])
    ax[i,j].set_yticks([])    
    j += 1
    if j == j_max:
        i+=1
        j=0

plt.show()


## 3 PyTorch 乱数の種の設定

In [None]:
# PyTorch の seed の設定関連 再現性確保のため
# https://qiita.com/takubb/items/7d45ae701390912c7629
# https://qiita.com/si1242/items/d2f9195c08826d87d6ad
import numpy as np
import random
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

# リソースの選択（CPU/GPU）
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# 乱数シード固定（再現性の担保）
def fix_seed(seed):
    random.seed(seed)     # random
    np.random.seed(seed)  # numpy
    
    # pytorch
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.random.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed = 42
fix_seed(seed)

# データローダーのサブプロセスの乱数 seed 固定
def worker_init_fn(worker_id):
    np.random.seed(np.random.get_state()[1][0] + worker_id)
    print(worker_init_fn(1))
    
 # データローダーの作成
# train_loader = torch.utils.data.DataLoader(train_dataset,
#                                            batch_size=16,  # バッチサイズ
#                                            shuffle=True,  # データシャッフル
#                                            num_workers=2,  # 高速化
#                                            pin_memory=True,  # 高速化
#                                            worker_init_fn=worker_init_fn
#                                            )

## 4 訓練データと検証データの作成

In [None]:
N = _dataset.__len__()
N_train = int(N / 10 * 8)
N_test = N - N_train
seed=42
train_dataset, test_dataset = torch.utils.data.random_split(
    _dataset, 
    [N_train, N_test], 
    generator=torch.Generator().manual_seed(seed))

## 5 PyTorch データローダの作成

In [None]:
import torchvision

# dataloaders
train_dataloader = torch.utils.data.DataLoader(
    train_dataset, 
    batch_size=32,
    shuffle=True, 
    num_workers=0)  # 0 にしないとエラーになる

test_dataloader = torch.utils.data.DataLoader(
     test_dataset, 
     batch_size=32,
     shuffle=False, 
     num_workers=0)

# get some random training images
dataiter = iter(test_dataloader)
images, labels = dataiter.next()

# create grid of images
img_grid = torchvision.utils.make_grid(images)

# helper function to show an image
# (used in the `plot_classes_preds` function below)
def matplotlib_imshow(img, 
                      one_channel=False,
                      figsize=(15,15)
                     ):
    if one_channel:
        img = img.mean(dim=0)
    #img = img / 2 + 0.5     # unnormalize
    img /= 255
    npimg = img.numpy().clip(0,1)
    
    plt.figure(figsize=figsize)
    if one_channel:
        plt.imshow(npimg, cmap="Greys")
    else:
        plt.imshow(np.transpose(npimg, (1, 2, 0)))
        
# show images
matplotlib_imshow(img_grid, one_channel=False, figsize=(10,10))

In [None]:
_labels = sorted(set([l[1] for l in _dataset.labels]))
print(len(_labels), _labels)

## 6 `train_model()` の定義

In [None]:
from bit import train_model

## 7 LeNet による認識実験

In [None]:
from bit import LeNet_Imagenet

import torch.nn as nn
import torch.optim as optim

lenet = LeNet_Imagenet(out_size=len(_labels))
criterion = nn.CrossEntropyLoss()
#optimizer = optim.Adam(lenet.parameters(), lr=0.0001) # , momentum=0.9)
optimizer = optim.Adam(lenet.parameters(), lr=0.001) # , momentum=0.9)
lenet

### 7.1. 訓練の実施

In [None]:
%%time
seed = 42
fix_seed(seed)

losses = train_model(
    net=lenet,            
    dataloaders_dict={'train':train_dataloader, 'val':test_dataloader},
    criterion=criterion,
    optimizer=optimizer,
    num_epochs=10,
    )   

for phase in losses.keys():
    print(f'{phase} {losses[phase]}')
    plt.plot(losses[phase], label=phase)
plt.legend()    
plt.show()

In [None]:
correct = 0
total = 0
for data in test_dataloader:
    images, labels = data
    labels = labels[0]
    outputs = lenet(torch.autograd.Variable(images))
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum()

print(f'検証データセットでの精度: {int(100 * correct / total):3d} %')


## 8. ResNet による認識実験

In [None]:
%%time
from bit import ResNet18

resnet = ResNet18(img_channels=3, num_classes=len(_labels))
optimizer = optim.Adam(resnet.parameters(), lr=0.0001)
#optimizer = optim.Adam(resnet.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

seed = 42
fix_seed(seed)

losses = train_model(
    net=resnet,            
    dataloaders_dict={'train':train_dataloader, 'val':test_dataloader},
    criterion=criterion,
    optimizer=optimizer,
    num_epochs=10,
    )   

for phase in losses.keys():
    print(f'{phase} {losses[phase]}')
    plt.plot(losses[phase], label=phase)
plt.legend()    
plt.title('ResNet による文字認識')
plt.show()

In [None]:
correct = 0
total = 0
for data in test_dataloader:
    images, labels = data
    labels = labels[0]
    outputs = resnet(torch.autograd.Variable(images))
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum()

print(f'検証データセットでの精度: {int(100 * correct / total):3d} %')


## 9. MLP による認識実験

In [None]:
from bit import MLP_Imagenet

In [None]:
%%time
mlp = MLP_Imagenet(out_size=len(_labels))
#optimizer = optim.Adam(mlp.parameters(), lr=0.001)
optimizer = optim.Adam(resnet.parameters(), lr=0.1)
criterion = nn.CrossEntropyLoss()

seed = 42
fix_seed(seed)

losses = train_model(
    net=mlp,            
    dataloaders_dict={'train':train_dataloader, 'val':test_dataloader},
    criterion=criterion,
    optimizer=optimizer,
    num_epochs=10,
    )   

for phase in losses.keys():
    print(f'{phase} {losses[phase]}')
    plt.plot(losses[phase], label=phase)
plt.legend()    
plt.title('ResNet による文字認識')
plt.show()

In [None]:
correct = 0
total = 0
for data in test_dataloader:
    images, labels = data
    labels = labels[0]
    outputs = mlp(torch.autograd.Variable(images))
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum()

print(f'検証データセットでの精度: {int(100 * correct / total):3d} %')


# A1. PMSP データの準備

In [None]:
import os
import sys
import numpy as np
import matplotlib.pyplot as plt
import requests

# Plaut が用意している PMSP データファイル
pmsp_url = 'https://www.cnbc.cmu.edu/~plaut/xerion/PMSPdata.txt'

# 上の URL からファイル名を分離
pmsp_data_fname = pmsp_url.split('/')[-1]
if not os.path.exists(pmsp_data_fname):  
    # ファイルが存在しなければダウンロードする
    print(f'pmsp_url:{pmsp_url}')
    r = requests.get(pmsp_url)
    with open(pmsp_data_fname, 'wb') as f:
        total_length = int(r.headers.get('content-length'))
        print(f'{pmsp_data_fname} をダウンロード中 {total_length} バイト')
        f.write(r.content)

# `PMSPdata.txt` の読み込み
with open(pmsp_data_fname, 'r') as f:
    x = f.readlines()

# 読み込んだデータを辞書に登録    
pmsp = {}
for i, line in enumerate(x):
    x = line.strip().split('\t')
    if len(x) == 7:
        word, phon, _type, sim1, sim1raw, sim2sqrt, sim3rt = x
        pmsp[word] = {'phon': phon,
                     'type': _type,
                     'sim1': float(sim1),
                     'sim2raw': float(sim1raw),
                     'sim2sqrt': float(sim2sqrt),
                     'sim3rt': float(sim3rt),
                    }
    else:
        print(i, x)

# 書記素情報と音韻情報だけ取り出してリスト化
Orth_list, Phon_list = [], []        
for i, (k, v) in enumerate(pmsp.items()):
    Orth_list.append(k)
    Phon_list.append(pmsp[k]['phon'])

print(f'len(Orth_list):{len(Orth_list)}, len(Phon_list):{len(Phon_list)}')    

# 書記素と音素の構成要素を頻度情報を計測
Orth_vocab, Phon_vocab = {}, {}
for i, (orth, phon) in enumerate(zip(Orth_list, Phon_list)):
    for ch in orth:
        if not ch in Orth_vocab:
            Orth_vocab[ch] = 1
        else:
            Orth_vocab[ch] += 1
    for p in phon:
        if p != '/':
            if not p in Phon_vocab:
                Phon_vocab[p] = 1
            else:
                Phon_vocab[p] += 1
# print(f'Orth_vocab:{Orth_vocab}, Phon_vocab:{Phon_vocab}')

# 書記素情報のグラフ化
f2o = {v:k for k, v in Orth_vocab.items()}
orth_vocab_freqs = sorted(Orth_vocab.values())[::-1]
orth_vocab_freq_tags = [f2o[f] for f in orth_vocab_freqs]
plt.plot(orth_vocab_freq_tags, orth_vocab_freqs)
plt.title('Orthography frequncy of each letters')
plt.show()    

# 音素情報のグラフ化
f2p = {v:k for k, v in Phon_vocab.items()}
phon_vocab_freqs = sorted(Phon_vocab.values())[::-1]
phon_vocab_freq_tags = [f2p[f] for f in phon_vocab_freqs]
plt.plot(phon_vocab_freq_tags, phon_vocab_freqs)
plt.title('Phonology frequncy of each phoneme')
plt.show()

In [None]:
print(len(Orth_list))
_pmsp_dataset = bit.notoen_dataset(
    fonts_dict=fonts_en,
    items=Orth_list)
print(f'_data.__len__():{_pmsp_dataset.__len__()}')


In [None]:
N = _pmsp_dataset.__len__()
N_train = int(N / 10 * 8)
N_test = N - N_train
seed=42
train_dataset, test_dataset = torch.utils.data.random_split(
    _pmsp_dataset, 
    [N_train, N_test], 
    generator=torch.Generator().manual_seed(seed))

In [None]:
import torchvision

# dataloaders
train_dataloader = torch.utils.data.DataLoader(
    train_dataset, 
    batch_size=32,
    shuffle=True, 
    num_workers=0)  # 0 にしないとエラーになる

test_dataloader = torch.utils.data.DataLoader(
     test_dataset, 
     batch_size=32,
     shuffle=False, 
     num_workers=0)

# get some random training images
dataiter = iter(test_dataloader)
images, labels = dataiter.next()

# create grid of images
img_grid = torchvision.utils.make_grid(images)

# show images
matplotlib_imshow(img_grid, one_channel=False, figsize=(10,10))

In [None]:
_pmsp_dataset.__len__()

## A1.1 LeNet による PMSP データの訓練


In [None]:
%%time
lenet = LeNet_Imagenet(out_size=2998)
criterion = nn.CrossEntropyLoss()
#optimizer = optim.Adam(lenet.parameters(), lr=0.0001)
optimizer = optim.Adam(lenet.parameters(), lr=0.001)

#seed = 42
fix_seed(seed=42)

losses = train_model(
    net=lenet,            
    dataloaders_dict={'train':train_dataloader, 'val':test_dataloader},
    criterion=criterion,
    optimizer=optimizer,
    num_epochs=10)   

for phase in losses.keys():
    print(f'{phase} {losses[phase]}')
    plt.plot(losses[phase], label=phase)
plt.legend()    
plt.show()

In [None]:
correct = 0
total = 0
for data in test_dataloader:
    images, labels = data
    labels = labels[0]
    #print(f'labels:{labels}')
    #break
    outputs = lenet(torch.autograd.Variable(images))
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum()

print(f'検証データセットでの精度: {int(100 * correct / total):3d} %')


In [None]:
torch.save(lenet.state_dict(), '2022_0914pmsp_lenet.pt')

## A1.2 ResNet による PMSP データの訓練

In [None]:
%%time
from bit import ResNet18

resnet = ResNet18(img_channels=3, num_classes=len(Orth_list))
optimizer = optim.Adam(resnet.parameters(), lr=0.0001)
#optimizer = optim.Adam(resnet.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

fix_seed(seed=42)
np.random.seed(42)
losses = train_model(
    net=resnet,            
    dataloaders_dict={'train':train_dataloader, 'val':test_dataloader},
    criterion=criterion,
    optimizer=optimizer,
    num_epochs=10,
    )   

for phase in losses.keys():
    print(f'{phase} {losses[phase]}')
    plt.plot(losses[phase], label=phase)
plt.legend()    
plt.title('ResNet による文字認識')
plt.show()

In [None]:
correct = 0
total = 0
for data in test_dataloader:
    images, labels = data
    labels = labels[0]
    #print(f'labels:{labels}')
    #break
    outputs = lenet(torch.autograd.Variable(images))
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum()

print(f'検証データセットでの精度: {int(100 * correct / total):3d} %')


In [None]:
# get some random training images
dataiter = iter(test_dataloader)
images, labels = dataiter.next()

# create grid of images
img_grid = torchvision.utils.make_grid(images)

# show images
matplotlib_imshow(img_grid, one_channel=False, figsize=(10,10))

outputs = resnet(images)
_, predicted = torch.max(outputs.data, 1)
print(f'labels:{labels}')
print(f'正解数:{(labels[0] == predicted).sum().detach().numpy()}',
      f'/{len(images)}')
# for t, c, p in zip(labels[0], labels[1], predicted):
#     _t = t.detach().numpy()
#     _p = p.detach().numpy()
#     print(_t, _p, c)