<a href="https://colab.research.google.com/github/ShinAsakawa/ShinAsakawa.github.io/blob/master/2022notebooks/2022_0916noto_fonts_recognition.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Noto font を用いた文字認識実験 + PMSP96 単語認識
* date: 2022_0906
* author: 浅川伸一

---



In [None]:
#!rm -rf bit
#!cat bit/torch_nikogamulin_resnet.py

In [None]:
%reload_ext autoreload
%autoreload 2
%config InlineBackend.figure_format = 'retina'
try:
    import bit
except ImportError:
    !pip install ipynbname --upgrade > /dev/null
    !git clone https://github.com/ShinAsakawa/bit.git > /dev/null
import bit
isColab = bit.isColab
HOME = bit.HOME

if isColab:
    # 2022_0916 現在 PIL のバージョンが古く truetype フォント
    # の表示に不具合が出るためバージョン 9.2.0 以上に更新する
    !pip install --upgrade Pillow

## 1 Noto フォントの登録

In [None]:
import os
from glob import  glob
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw, ImageFont

fonts_jp = bit.get_notojp_fonts()
fonts_en = bit.get_notoen_fonts()

default_width, default_height = 224, 224
default_bgcolor=(255,255,255)
default_fontsize=28

fig, ax = plt.subplots(4, 6, figsize=(18, 12)) 
i, j = 0, 0
j_max = 6
for font_name, font in fonts_en.items():
    img = Image.new(
        mode='RGB', 
        size=(default_width, default_height), 
        color=default_bgcolor) 
    draw_canvas = ImageDraw.Draw(img)
    draw_canvas.text(
        xy=(2,84),
        text=font_name,
        font=font,
        fill=(0,0,0))
    
    ax[i,j].imshow(img)
    #ax[i,j].set_title(font_name)
    ax[i,j].set_xticks([])
    ax[i,j].set_yticks([])    
    j += 1
    if j == j_max:
        i+=1; j=0

plt.show()

## 2 PyTorch データセットの設定

In [None]:
#print([x for x in digit_chars+alphabet_chars])
from bit import get_text_img
img, draw_canvas, bbox = get_text_img(text="make", draw_bbox=True)

print(f'bbox:{bbox}')
plt.figure(figsize=(4,4))
plt.imshow(img)
plt.show()
#print(f'type(img):{type(img)}')

In [None]:
verbose=True
from bit import notoen_dataset
from bit import get_notoen_fonts
try:
    import japanize_matplotlib
except ImportError:
    !pip install japanize_matplotlib
    import japanize_matplotlib
noto_fonts = get_notoen_fonts()

digit_chars = '0123456789'
alphabet_chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'

_dataset = notoen_dataset(
    items=[c for c in digit_chars],
    #items=[c for c in digit_chars+alphabet_chars],
    fonts_dict=noto_fonts)
print(f'_dataset.__len__():{_dataset.__len__()}')

_labels = set([l[1] for l in _dataset.labels])
print(f'len(_labels):{len(_labels)}')


## 3 PyTorch 乱数の種の設定

In [None]:
# PyTorch の seed の設定関連 再現性確保のため
# https://qiita.com/takubb/items/7d45ae701390912c7629
# https://qiita.com/si1242/items/d2f9195c08826d87d6ad
import numpy as np
import random
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

# リソースの選択（CPU/GPU）
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# 乱数シード固定（再現性の担保）
def fix_seed(seed):
    random.seed(seed)     # random
    np.random.seed(seed)  # numpy
    
    # pytorch
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.random.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed = 42
fix_seed(seed)

# データローダーのサブプロセスの乱数 seed 固定
def worker_init_fn(worker_id):
    np.random.seed(np.random.get_state()[1][0] + worker_id)
    print(worker_init_fn(1))
    
 # データローダーの作成
# train_loader = torch.utils.data.DataLoader(train_dataset,
#                                            batch_size=16,  # バッチサイズ
#                                            shuffle=True,  # データシャッフル
#                                            num_workers=2,  # 高速化
#                                            pin_memory=True,  # 高速化
#                                            worker_init_fn=worker_init_fn
#                                            )

## 4 訓練データと検証データの作成

In [None]:
N = _dataset.__len__()
N_train = int(N / 10 * 8)
N_test = N - N_train
seed=42
train_dataset, test_dataset = torch.utils.data.random_split(
    _dataset, 
    [N_train, N_test], 
    generator=torch.Generator().manual_seed(seed))

## 5 PyTorch データローダの作成

In [None]:
import torchvision

# dataloaders
train_dataloader = torch.utils.data.DataLoader(
    train_dataset, 
    batch_size=32,
    shuffle=True, 
    num_workers=0)  # 0 にしないとエラーになる

test_dataloader = torch.utils.data.DataLoader(
     test_dataset, 
     batch_size=32,
     shuffle=False, 
     num_workers=0)

# get some random training images
dataiter = iter(test_dataloader)
images, labels = dataiter.next()

# create grid of images
img_grid = torchvision.utils.make_grid(images)

# helper function to show an image
# (used in the `plot_classes_preds` function below)
def matplotlib_imshow(img, 
                      one_channel=False,
                      figsize=(15,15)
                     ):
    if one_channel:
        img = img.mean(dim=0)
    #img = img / 2 + 0.5     # unnormalize
    img /= 255
    npimg = img.numpy().clip(0,1)
    
    plt.figure(figsize=figsize)
    if one_channel:
        plt.imshow(npimg, cmap="Greys")
    else:
        plt.imshow(np.transpose(npimg, (1, 2, 0)))
        
# show images
matplotlib_imshow(img_grid, one_channel=False, figsize=(14,14))

In [None]:
_labels = sorted(set([l[1] for l in _dataset.labels]))
print(len(_labels), _labels)

## 6 `train_model()` の定義

In [None]:
from bit import train_model
print(f'train_dataset.__len__():{train_dataset.__len__()},',
      f'test_dataset.__len__():{test_dataset.__len__()}')

## 7 LeNet による認識実験

In [None]:
from bit import LeNet_Imagenet

import torch.nn as nn
import torch.optim as optim

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'device: {device}')

lenet = LeNet_Imagenet(out_size=len(_labels), device=device)
#lenet.to(device=device)
criterion = nn.CrossEntropyLoss()
criterion.to(device=device)
#optimizer = optim.Adam(lenet.parameters(), lr=0.0001) # , momentum=0.9)
optimizer = optim.Adam(lenet.parameters(), lr=0.001) # , momentum=0.9)


### 7.1. 訓練の実施

In [None]:
%%time
seed = 42
fix_seed(seed)

losses = train_model(
    net=lenet,            
    dataloaders_dict={'train':train_dataloader, 'val':test_dataloader},
    criterion=criterion,
    optimizer=optimizer,
    num_epochs=10,
    device=device,
    )   

for phase in losses.keys():
    print(f'{phase} {losses[phase]}')
    plt.plot(losses[phase], label=phase)
plt.legend()    
plt.show()

In [None]:
correct = 0
total = 0
for data in test_dataloader:
    images, labels = data
    labels = labels[0].to(device)
    images = images.to(device)
    outputs = lenet(images)
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum()

print(f'検証データセットでの精度: {int(100 * correct / total):3d} %')


## 8. ResNet による認識実験

In [None]:
%%time
from bit import ResNet18

resnet = ResNet18(img_channels=3, 
                  num_classes=len(_labels),
                  device=device)
optimizer = optim.Adam(resnet.parameters(), lr=0.0001)
#optimizer = optim.Adam(resnet.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

seed = 42
fix_seed(seed)

losses = train_model(
    net=resnet,            
    dataloaders_dict={'train':train_dataloader, 'val':test_dataloader},
    criterion=criterion,
    optimizer=optimizer,
    num_epochs=10,
    )   

for phase in losses.keys():
    print(f'{phase} {losses[phase]}')
    plt.plot(losses[phase], label=phase)
plt.legend()    
plt.title('ResNet による文字認識')
plt.show()

In [None]:
correct = 0
total = 0
for data in test_dataloader:
    images, labels = data
    images = images.to(device)
    labels = labels[0].to(device)
    outputs = resnet(images)
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum()

print(f'検証データセットでの精度: {int(100 * correct / total):3d} %')


## 9. MLP による認識実験

In [None]:
from bit import MLP_Imagenet

In [None]:
%%time
mlp = MLP_Imagenet(out_size=len(_labels))
#optimizer = optim.Adam(mlp.parameters(), lr=0.001)
optimizer = optim.Adam(resnet.parameters(), lr=0.1)
criterion = nn.CrossEntropyLoss()

seed = 42
fix_seed(seed)

losses = train_model(
    net=mlp,            
    dataloaders_dict={'train':train_dataloader, 'val':test_dataloader},
    criterion=criterion,
    optimizer=optimizer,
    num_epochs=10,
    )   

for phase in losses.keys():
    print(f'{phase} {losses[phase]}')
    plt.plot(losses[phase], label=phase)
plt.legend()    
plt.title('ResNet による文字認識')
plt.show()

In [None]:
correct = 0
total = 0
for data in test_dataloader:
    images, labels = data
    labels = labels[0].to(device)
    images = images.to(device)
    outputs = mlp(images)
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum()

print(f'検証データセットでの精度: {int(100 * correct / total):3d} %')


# A1. PMSP データの準備

In [None]:
import os
import sys
import numpy as np
import matplotlib.pyplot as plt
import requests

# Plaut が用意している PMSP データファイル
pmsp_url = 'https://www.cnbc.cmu.edu/~plaut/xerion/PMSPdata.txt'

# 上の URL からファイル名を分離
pmsp_data_fname = pmsp_url.split('/')[-1]
if not os.path.exists(pmsp_data_fname):  
    # ファイルが存在しなければダウンロードする
    print(f'pmsp_url:{pmsp_url}')
    r = requests.get(pmsp_url)
    with open(pmsp_data_fname, 'wb') as f:
        total_length = int(r.headers.get('content-length'))
        print(f'{pmsp_data_fname} をダウンロード中 {total_length} バイト')
        f.write(r.content)

# `PMSPdata.txt` の読み込み
with open(pmsp_data_fname, 'r') as f:
    x = f.readlines()

# 読み込んだデータを辞書に登録    
pmsp = {}
for i, line in enumerate(x):
    x = line.strip().split('\t')
    if len(x) == 7:
        word, phon, _type, sim1, sim1raw, sim2sqrt, sim3rt = x
        pmsp[word] = {'phon': phon,
                     'type': _type,
                     'sim1': float(sim1),
                     'sim2raw': float(sim1raw),
                     'sim2sqrt': float(sim2sqrt),
                     'sim3rt': float(sim3rt),
                    }
    else:
        print(i, x)

# 書記素情報と音韻情報だけ取り出してリスト化
Orth_list, Phon_list = [], []        
for i, (k, v) in enumerate(pmsp.items()):
    Orth_list.append(k)
    Phon_list.append(pmsp[k]['phon'])

print(f'len(Orth_list):{len(Orth_list)}, len(Phon_list):{len(Phon_list)}')    

# 書記素と音素の構成要素を頻度情報を計測
Orth_vocab, Phon_vocab = {}, {}
for i, (orth, phon) in enumerate(zip(Orth_list, Phon_list)):
    for ch in orth:
        if not ch in Orth_vocab:
            Orth_vocab[ch] = 1
        else:
            Orth_vocab[ch] += 1
    for p in phon:
        if p != '/':
            if not p in Phon_vocab:
                Phon_vocab[p] = 1
            else:
                Phon_vocab[p] += 1
# print(f'Orth_vocab:{Orth_vocab}, Phon_vocab:{Phon_vocab}')

# 書記素情報のグラフ化
f2o = {v:k for k, v in Orth_vocab.items()}
orth_vocab_freqs = sorted(Orth_vocab.values())[::-1]
orth_vocab_freq_tags = [f2o[f] for f in orth_vocab_freqs]
plt.plot(orth_vocab_freq_tags, orth_vocab_freqs)
plt.title('Orthography frequncy of each letters')
plt.show()    

# 音素情報のグラフ化
f2p = {v:k for k, v in Phon_vocab.items()}
phon_vocab_freqs = sorted(Phon_vocab.values())[::-1]
phon_vocab_freq_tags = [f2p[f] for f in phon_vocab_freqs]
plt.plot(phon_vocab_freq_tags, phon_vocab_freqs)
plt.title('Phonology frequncy of each phoneme')
plt.show()

In [None]:
print(len(Orth_list))
_pmsp_dataset = bit.notoen_dataset(
    fonts_dict=fonts_en,
    items=Orth_list)
print(f'_data.__len__():{_pmsp_dataset.__len__()}')


In [None]:
N = _pmsp_dataset.__len__()
N_train = int(N / 10 * 8)
N_test = N - N_train
seed=42
train_dataset, test_dataset = torch.utils.data.random_split(
    _pmsp_dataset, 
    [N_train, N_test], 
    generator=torch.Generator().manual_seed(seed))

In [None]:
import torchvision

# dataloaders
train_dataloader = torch.utils.data.DataLoader(
    train_dataset, 
    batch_size=32,
    shuffle=True, 
    num_workers=0)  # 0 にしないとエラーになる

test_dataloader = torch.utils.data.DataLoader(
     test_dataset, 
     batch_size=32,
     shuffle=False, 
     num_workers=0)

# get some random training images
dataiter = iter(test_dataloader)
images, labels = dataiter.next()

# create grid of images
img_grid = torchvision.utils.make_grid(images)

# show images
matplotlib_imshow(img_grid, one_channel=False, figsize=(10,10))

In [None]:
_pmsp_dataset.__len__()

## A1.1 LeNet による PMSP データの訓練


In [None]:
%%time
lenet = LeNet_Imagenet(out_size=2998)
criterion = nn.CrossEntropyLoss()
#optimizer = optim.Adam(lenet.parameters(), lr=0.0001)
optimizer = optim.Adam(lenet.parameters(), lr=0.001)

seed = 42
fix_seed(seed=seed)

losses = train_model(
    net=lenet,            
    dataloaders_dict={'train':train_dataloader, 'val':test_dataloader},
    criterion=criterion,
    optimizer=optimizer,
    num_epochs=10)   

for phase in losses.keys():
    print(f'{phase} {losses[phase]}')
    plt.plot(losses[phase], label=phase)
plt.legend()    
plt.show()

In [None]:
correct = 0
total = 0
for data in test_dataloader:
    images, labels = data
    labels = labels[0]
    #print(f'labels:{labels}')
    #break
    outputs = lenet(torch.autograd.Variable(images))
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum()

print(f'検証データセットでの精度: {int(100 * correct / total):3d} %')


In [None]:
torch.save(lenet.state_dict(), '2022_0914pmsp_lenet.pt')

## A1.2 ResNet による PMSP データの訓練

In [None]:
%%time
from bit import ResNet18

resnet = ResNet18(img_channels=3, num_classes=len(Orth_list))
optimizer = optim.Adam(resnet.parameters(), lr=0.0001)
#optimizer = optim.Adam(resnet.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

fix_seed(seed=42)
np.random.seed(42)
losses = train_model(
    net=resnet,            
    dataloaders_dict={'train':train_dataloader, 'val':test_dataloader},
    criterion=criterion,
    optimizer=optimizer,
    num_epochs=10,
    )   

for phase in losses.keys():
    print(f'{phase} {losses[phase]}')
    plt.plot(losses[phase], label=phase)
plt.legend()    
plt.title('ResNet による文字認識')
plt.show()

In [None]:
correct = 0
total = 0
for data in test_dataloader:
    images, labels = data
    labels = labels[0]
    #print(f'labels:{labels}')
    #break
    outputs = lenet(torch.autograd.Variable(images))
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum()

print(f'検証データセットでの精度: {int(100 * correct / total):3d} %')


In [None]:
# get some random training images
dataiter = iter(test_dataloader)
images, labels = dataiter.next()

# create grid of images
img_grid = torchvision.utils.make_grid(images)

# show images
matplotlib_imshow(img_grid, one_channel=False, figsize=(10,10))

outputs = resnet(images)
_, predicted = torch.max(outputs.data, 1)
print(f'labels:{labels}')
print(f'正解数:{(labels[0] == predicted).sum().detach().numpy()}',
      f'/{len(images)}')

# B. PyTorch 公式訓練済モデルによる文字認識

In [None]:
import torch
import torchvision
from torchvision import models, transforms

# 各モデルを定義し，訓練済み結合係数をダウンロード
DNNs = {}
DNNs['resnet18'] = models.resnet18(weights='DEFAULT', progress=True)
DNNs['alexnet'] = models.alexnet(weights='DEFAULT', progress=True)
DNNs['vgg16'] = models.vgg16(weights='DEFAULT', progress=True)
DNNs['squeezenet']= models.squeezenet1_0(weights='DEFAULT', progress=True)
DNNs['densenet'] = models.densenet161(weights='DEFAULT', progress=True)
DNNs['inception'] = models.inception_v3(weights='DEFAULT', progress=True)
DNNs['googlenet'] = models.googlenet(weights='DEFAULT', progress=True)
DNNs['shufflenet'] = models.shufflenet_v2_x1_0(weights='DEFAULT', progress=True)
DNNs['mobilenet'] = models.mobilenet_v2(weights='DEFAULT', progress=True)
DNNs['resnext50_32x4d'] = models.resnext50_32x4d(weights='DEFAULT', progress=True)
DNNs['wide_resnet50_2'] = models.wide_resnet50_2(weights='DEFAULT', progress=True)
DNNs['mnasnet'] = models.mnasnet1_0(weights='DEFAULT', progress=True)

In [None]:
# 上の中から試したいモデルを選んでください。最後のモデルが有効になります。
net = DNNs['resnet18'] 
#net = DNNs['squeezenet']
#net = DNNs['googlenet']
#net = DNNs['shufflenet']
#net = DNNs['mobilenet']
#net = DNNs['vgg16']
#net = DNNs['alexnet']

In [None]:
transform = transforms.Compose([
    transforms.ToTensor()])

# RGB 各チャンネルの平均と分散の定義。CNN 唯一の前処理
mean=[0.485, 0.456, 0.406]
std=[0.229, 0.224, 0.225]

# モデルのインスタンスを生成し，事前学習済の結合係数をロード
use_pretrained = True  # 学習済みのパラメータを使用
resnet_pt = models.resnet18(pretrained=use_pretrained)
#resnet_pt

In [None]:
# モデルの最終直下層の出力ユニット数を データに合わせて変更する
resnet_pt.fc = nn.Linear(in_features=512, out_features=2998)

for name, param in resnet_pt.named_parameters():
    param.requires_grad = True
resnet_pc

In [None]:
optimizer = optim.Adam(resnet_pt.parameters(), lr=0.0001)
#optimizer = optim.Adam(resnet.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

fix_seed(seed=42)
np.random.seed(42)
losses = train_model(
    net=resnet_pt,            
    dataloaders_dict={'train':train_dataloader, 'val':test_dataloader},
    criterion=criterion,
    optimizer=optimizer,
    num_epochs=10,
    )   

for phase in losses.keys():
    print(f'{phase} {losses[phase]}')
    plt.plot(losses[phase], label=phase)
plt.legend()    
plt.title('ResNet による文字認識')
plt.show()

In [None]:
# get some random training images
dataiter = iter(test_dataloader)
images, labels = dataiter.next()

# create grid of images
img_grid = torchvision.utils.make_grid(images)

# show images
matplotlib_imshow(img_grid, one_channel=False, figsize=(10,10))

outputs = resnet(images)
_, predicted = torch.max(outputs.data, 1)
print(f'labels:{labels}')
print(f'正解数:{(labels[0] == predicted).sum().detach().numpy()}',
      f'/{len(images)}')
# for t, c, p in zip(labels[0], labels[1], predicted):
#     _t = t.detach().numpy()
#     _p = p.detach().numpy()
#     print(_t, _p, c)

In [None]:
model_fname_saved = '2022_0916pytorch_resnet18_pmsp.pt'
torch.save(resnet.state_dict(), model_fname_saved)  # 29163175


# C. 日本語による文字認識

In [None]:
# サンプル画像を表示するなら，次行 を true にする `verbose=True`
verbose = False

import os
import sys
import jaconv
import numpy as np
from termcolor import colored

hira_chars = 'ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをん'
kata_chars = 'ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロヮワヰヱヲンヴヵヶ'
# 以下の文字はどうしましょうかね?
# ゔヷヸヹヺ
digit_chars = '０１２３４５６７８９'
alphabet_chars = 'ＡＢＣＤＥＦＧＨＩＪＫＬＭＮＯＰＱＲＳＴＵＶＷＸＹＺａｂｃｄｅｆｇｈｉｊｋｌｍｎｏｐｑｒｓｔｕｖｗｘｙｚ'
symbol_chars = '、。，．・：；？！゛゜ヽヾゝゞ〃仝々〆〇＋−±×÷＝≠＜＞≦≧∞∴♂♀°′″℃¥＄¢£％＃＆＊＠§☆★○●◎◇◆□■△▲▽▼※〒→←↑↓〓∈∋⊆⊇⊂⊃∪∩∧∨¬⇒⇔∀∃∠⊥⌒∂∇≡≒≪≫√∽∝∵∫∬Å‰♯♭♪†‡¶◯０'

# 常用漢字
jyoyo_chars = '亜哀愛悪握圧扱安暗案以位依偉囲委威尉意慰易為異移維緯胃衣違遺医井域育一壱逸稲芋印員因姻引飲院陰隠韻右宇羽雨渦浦運雲営影映栄永泳英衛詠鋭液疫益駅悦謁越閲円園宴延援沿演炎煙猿縁遠鉛塩汚凹央奥往応押横欧殴王翁黄沖億屋憶乙卸恩温穏音下化仮何価佳加可夏嫁家寡科暇果架歌河火禍稼箇花荷華菓課貨過蚊我画芽賀雅餓介会解回塊壊快怪悔懐戒拐改械海灰界皆絵開階貝劾外害慨概涯街該垣嚇各拡格核殻獲確穫覚角較郭閣隔革学岳楽額掛潟割喝括活渇滑褐轄且株刈乾冠寒刊勘勧巻喚堪完官寛干幹患感慣憾換敢棺款歓汗漢環甘監看管簡緩缶肝艦観貫還鑑間閑関陥館丸含岸眼岩頑顔願企危喜器基奇寄岐希幾忌揮机旗既期棋棄機帰気汽祈季紀規記貴起軌輝飢騎鬼偽儀宜戯技擬欺犠疑義議菊吉喫詰却客脚虐逆丘久休及吸宮弓急救朽求泣球究窮級糾給旧牛去居巨拒拠挙虚許距漁魚享京供競共凶協叫境峡強恐恭挟教橋況狂狭矯胸脅興郷鏡響驚仰凝暁業局曲極玉勤均斤琴禁筋緊菌襟謹近金吟銀九句区苦駆具愚虞空偶遇隅屈掘靴繰桑勲君薫訓群軍郡係傾刑兄啓型契形径恵慶憩掲携敬景渓系経継茎蛍計警軽鶏芸迎鯨劇撃激傑欠決潔穴結血月件倹健兼券剣圏堅嫌建憲懸検権犬献研絹県肩見謙賢軒遣険顕験元原厳幻弦減源玄現言限個古呼固孤己庫弧戸故枯湖誇雇顧鼓五互午呉娯後御悟碁語誤護交侯候光公功効厚口向后坑好孔孝工巧幸広康恒慌抗拘控攻更校構江洪港溝甲皇硬稿紅絞綱耕考肯航荒行衡講貢購郊酵鉱鋼降項香高剛号合拷豪克刻告国穀酷黒獄腰骨込今困墾婚恨懇昆根混紺魂佐唆左差査砂詐鎖座債催再最妻宰彩才採栽歳済災砕祭斎細菜裁載際剤在材罪財坂咲崎作削搾昨策索錯桜冊刷察撮擦札殺雑皿三傘参山惨散桟産算蚕賛酸暫残仕伺使刺司史嗣四士始姉姿子市師志思指支施旨枝止死氏祉私糸紙紫肢脂至視詞詩試誌諮資賜雌飼歯事似侍児字寺慈持時次滋治璽磁示耳自辞式識軸七執失室湿漆疾質実芝舎写射捨赦斜煮社者謝車遮蛇邪借勺尺爵酌釈若寂弱主取守手朱殊狩珠種趣酒首儒受寿授樹需囚収周宗就州修愁拾秀秋終習臭舟衆襲週酬集醜住充十従柔汁渋獣縦重銃叔宿淑祝縮粛塾熟出術述俊春瞬准循旬殉準潤盾純巡遵順処初所暑庶緒署書諸助叙女序徐除傷償勝匠升召商唱奨宵将小少尚床彰承抄招掌昇昭晶松沼消渉焼焦照症省硝礁祥称章笑粧紹肖衝訟証詔詳象賞鐘障上丈乗冗剰城場壌嬢常情条浄状畳蒸譲醸錠嘱飾植殖織職色触食辱伸信侵唇娠寝審心慎振新森浸深申真神紳臣薪親診身辛進針震人仁刃尋甚尽迅陣酢図吹垂帥推水炊睡粋衰遂酔錘随髄崇数枢据杉澄寸世瀬畝是制勢姓征性成政整星晴正清牲生盛精聖声製西誠誓請逝青静斉税隻席惜斥昔析石積籍績責赤跡切拙接摂折設窃節説雪絶舌仙先千占宣専川戦扇栓泉浅洗染潜旋線繊船薦践選遷銭銑鮮前善漸然全禅繕塑措疎礎祖租粗素組訴阻僧創双倉喪壮奏層想捜掃挿操早曹巣槽燥争相窓総草荘葬藻装走送遭霜騒像増憎臓蔵贈造促側則即息束測足速俗属賊族続卒存孫尊損村他多太堕妥惰打駄体対耐帯待怠態替泰滞胎袋貸退逮隊代台大第題滝卓宅択拓沢濯託濁諾但達奪脱棚谷丹単嘆担探淡炭短端胆誕鍛団壇弾断暖段男談値知地恥池痴稚置致遅築畜竹蓄逐秩窒茶嫡着中仲宙忠抽昼柱注虫衷鋳駐著貯丁兆帳庁弔張彫徴懲挑朝潮町眺聴脹腸調超跳長頂鳥勅直朕沈珍賃鎮陳津墜追痛通塚漬坪釣亭低停偵貞呈堤定帝底庭廷弟抵提程締艇訂逓邸泥摘敵滴的笛適哲徹撤迭鉄典天展店添転点伝殿田電吐塗徒斗渡登途都努度土奴怒倒党冬凍刀唐塔島悼投搭東桃棟盗湯灯当痘等答筒糖統到討謄豆踏逃透陶頭騰闘働動同堂導洞童胴道銅峠匿得徳特督篤毒独読凸突届屯豚曇鈍内縄南軟難二尼弐肉日乳入如尿任妊忍認寧猫熱年念燃粘悩濃納能脳農把覇波派破婆馬俳廃拝排敗杯背肺輩配倍培媒梅買売賠陪伯博拍泊白舶薄迫漠爆縛麦箱肌畑八鉢発髪伐罰抜閥伴判半反帆搬板版犯班畔繁般藩販範煩頒飯晩番盤蛮卑否妃彼悲扉批披比泌疲皮碑秘罷肥被費避非飛備尾微美鼻匹必筆姫百俵標氷漂票表評描病秒苗品浜貧賓頻敏瓶不付夫婦富布府怖扶敷普浮父符腐膚譜負賦赴附侮武舞部封風伏副復幅服福腹複覆払沸仏物分噴墳憤奮粉紛雰文聞丙併兵塀幣平弊柄並閉陛米壁癖別偏変片編辺返遍便勉弁保舗捕歩補穂募墓慕暮母簿倣俸包報奉宝峰崩抱放方法泡砲縫胞芳褒訪豊邦飽乏亡傍剖坊妨帽忘忙房暴望某棒冒紡肪膨謀貿防北僕墨撲朴牧没堀奔本翻凡盆摩磨魔麻埋妹枚毎幕膜又抹末繭万慢満漫味未魅岬密脈妙民眠務夢無矛霧婿娘名命明盟迷銘鳴滅免綿面模茂妄毛猛盲網耗木黙目戻問紋門匁夜野矢厄役約薬訳躍柳愉油癒諭輸唯優勇友幽悠憂有猶由裕誘遊郵雄融夕予余与誉預幼容庸揚揺擁曜様洋溶用窯羊葉要謡踊陽養抑欲浴翌翼羅裸来頼雷絡落酪乱卵欄濫覧利吏履理痢裏里離陸律率立略流留硫粒隆竜慮旅虜了僚両寮料涼猟療糧良量陵領力緑倫厘林臨輪隣塁涙累類令例冷励礼鈴隷零霊麗齢暦歴列劣烈裂廉恋練連錬炉路露労廊朗楼浪漏老郎六録論和話賄惑枠湾腕'

all_chars = hira_chars + kata_chars + digit_chars + alphabet_chars + symbol_chars + jyoyo_chars
all_chars = hira_chars + kata_chars + digit_chars + alphabet_chars + jyoyo_chars
#all_chars = hira_chars + kata_chars + digit_chars + alphabet_chars
print('事前に定義した文字数:',
      colored(f' {len(all_chars)}',"blue", attrs=['bold']),
      ' 文字 (ひらがな，カタカナ，数字，常用漢字)')
print('ひらがな:',
      colored(f'{len(hira_chars)}', 'blue', attrs=['bold']),
      '文字')
print('カタカナ:',
      colored(f' {len(kata_chars)}', 'blue', attrs=['bold']),
      '文字')
print('数字:',
      colored(f'{len(digit_chars)}', 'blue', attrs=['bold']),
      '文字')
print('アルファベット:',
      colored(f'{len(alphabet_chars)}', 'blue', attrs=['bold']),
      '文字')
print('常用漢字:',
      colored(f'{len(jyoyo_chars)}', 'blue', attrs=['bold']),
      '文字')

print('len(all_chars):',
      colored(f'{len(all_chars)}', 'blue', attrs=['bold']))