In [1]:
import torch
import torch.nn as nn #network 函式
import torch.nn.functional as F #一些方法
import torch.optim as optim #優化方法
from torchvision import datasets, transforms #提供資料集 演算法
import matplotlib.pyplot as plt #圖表

In [2]:
### 資料前處理 ###
import os
import shutil

data_path = os.listdir('C:/Users/bymin/OneDrive/桌面/pytorch/hiragana-dataset-master/hiragana_images') #定義data_path
for i in data_path: #對data_path裡所有資料
    label = i.replace(".jpg","").strip('0123456789') #把label掐頭去尾
    if not os.path.exists('C:/Users/bymin/OneDrive/桌面/pytorch/hiragana-dataset-master/process'): #製作 process 資料夾，存放資料分類
        os.mkdir('C:/Users/bymin/OneDrive/桌面/pytorch/hiragana-dataset-master/process')
        
    data = 'C:/Users/bymin/OneDrive/桌面/pytorch/hiragana-dataset-master/process/'+ label
    if not os.path.exists(data): #找這label的資料夾是否存在 否則新增資料夾
        os.mkdir(data)
        
    data_src = 'C:/Users/bymin/OneDrive/桌面/pytorch/hiragana-dataset-master/hiragana_images/' + i 
    data_copy =  data + "/" + i
    shutil.copy(data_src, data_copy) #複製資料

In [3]:
print(torch.__version__)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = torch.device("cpu")
print(device)

##超參數
batch_size = 32 #批次大小
lr = 0.001 #學習率
num_epochs = 10 #代數

2.5.1+cu124
cuda


In [4]:
from sklearn.model_selection import train_test_split
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1), #因為資料輸入後頻道為3 所以這邊做灰度 
    transforms.RandomRotation(degrees=10),
    
    transforms.ToTensor(),  # shape H，W，C —> C，H，W
    transforms.Resize((64,64)), #
    transforms.Normalize((0.1307,), (0.3081,)) # 將資料壓縮至一個範圍，例如：-1,1，這樣跑的效率會比較高
])

dataset = datasets.ImageFolder(root='C:/Users/bymin/OneDrive/桌面/pytorch/hiragana-dataset-master/process', transform=transform) #使用ImageFolder加載資料集
train_data, test_data = train_test_split(dataset, test_size=0.2, random_state=42) #sklearn分割資料集 train:test 8:2 random_state=42：設定隨機種子，保證每次分割的結果一致。

train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True) #shuffle=True 隨機打散資料集
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, shuffle=False)

label_dict = dataset.class_to_idx #生出 label:index 的字典
reverse_label_dict = {v: k for k, v in label_dict.items()} #原始字典 label_dict 的key值對交換後生成一個新的字典 reverse_label_dict
print(reverse_label_dict)


{0: 'kanaA', 1: 'kanaBA', 2: 'kanaCHI', 3: 'kanaDA', 4: 'kanaE', 5: 'kanaFU', 6: 'kanaHA', 7: 'kanaHE', 8: 'kanaHI', 9: 'kanaHO', 10: 'kanaI', 11: 'kanaJI', 12: 'kanaKA', 13: 'kanaKE', 14: 'kanaKI', 15: 'kanaKO', 16: 'kanaKU', 17: 'kanaMA', 18: 'kanaME', 19: 'kanaMI', 20: 'kanaMO', 21: 'kanaMU', 22: 'kanaN', 23: 'kanaNA', 24: 'kanaNE', 25: 'kanaNI', 26: 'kanaNO', 27: 'kanaNU', 28: 'kanaO', 29: 'kanaPI', 30: 'kanaRA', 31: 'kanaRE', 32: 'kanaRI', 33: 'kanaRO', 34: 'kanaRU', 35: 'kanaSA', 36: 'kanaSE', 37: 'kanaSHI', 38: 'kanaSO', 39: 'kanaSU', 40: 'kanaTA', 41: 'kanaTE', 42: 'kanaTO', 43: 'kanaTSU', 44: 'kanaU', 45: 'kanaWA', 46: 'kanaWO', 47: 'kanaYA', 48: 'kanaYO', 49: 'kanaYU'}


In [5]:
class Cnn(nn.Module):
    def __init__(self):
        super(Cnn, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=64, kernel_size=3, stride=1, padding=0)
        self.conv2 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=0)
        self.conv3 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=0)
        
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.fc1 = nn.Linear(256*6*6, 512) #第一層全連接層 輸入256*8*8 輸出512
        self.fc2 = nn.Linear(512, 50) #第二層全連接層 輸入512 輸出50個類別
        
    def forward(self, x): #
        x = self.pool(F.relu(self.conv1(x))) # 卷積 -> sigmoid -> 池化 84 -> conv1 -> 82 -> pool -> 41
        x = self.pool(F.relu(self.conv2(x))) # 41 -> conv2 -> 39 -> pool -> 19
        x = self.pool(F.relu(self.conv3(x))) # 19 -> conv3 -> 17 -> pool -> 8
        # print(x.shape) #此時x.shape = (batch_size, 256, 8, 8)
        x = x.view(-1, 256*6*6) #圖像攤平成 (batch_size, 256個維度*(8*8)個像素) -1讓 PyTorch 自動推斷 batch 大小 一個向量
        x = F.relu(self.fc1(x)) #將x 丟進fc1後 應用relu激活函數
        x = self.fc2(x) #將x丟進fc2
        
        return x #這邊輸出x不做softmax的訓練結果較好
    
model = Cnn()
model = model.to(device)
print(model)

## 優化器
# optimizer = torch.optim.SGD(model.parameters(), lr=lr)
# optimizer = torch.optim.Adam(model.parameters(), lr=lr)
optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
# optimizer = torch.optim.Adagrad(model.parameters(), lr=lr)

# criterion = nn.MSELoss().to(device)
# criterion = nn.L1Loss().to(device)
# criterion = nn.BCELoss().to(device)
criterion = nn.CrossEntropyLoss().to(device)  #交叉商

Cnn(
  (conv1): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
  (conv3): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=9216, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=50, bias=True)
)


### resnet 

In [6]:
def accuracy(pred: torch.Tensor, label: torch.Tensor):
  # pred.max(1) 會在第1軸(也就是 class 的那一軸)找出最大值
  # 此函式會回傳(最大值, 最大值所在索引) 這兩個tensor
  # 其中 pred_label = 最大值所在的索引，也就代表模型預測的類別
  _, pred_label = pred.max(1)
  num_correct = (pred_label == label).sum().item() 
  acc = num_correct / label.shape[0] # 準確度 = 正確預測數量 / 總樣本數
  return acc


metric = {'loss': [], 'acc': []}
for i_epoch in range(num_epochs):
  train_loss = [] #宣告train的loss
  train_acc = [] #宣告train的acc
  model.train(mode=True) #模型轉為訓練模式
  for i_batch, (image, label) in enumerate(train_loader): #迭代每個batch 
    image = image.to(device) #圖片丟進gpu
    label = label.to(device) #label丟進gpu

    pred = model.forward(image) #預測pred 將圖片丟進模型 模型輸出的(未經 softmax) 形狀(batch_size, num_classes)
    loss = criterion(pred, label) #使用 CrossEntropyLoss，計算此次 batch 的 loss，將 pred 與 label 做比較

    optimizer.zero_grad()  #上一輪的梯度歸零
    loss.backward() # 反向傳播 計算梯度
    optimizer.step() #optimizer進行參數更新

    train_loss += [loss.item()] #將該 batch 的損失記錄到 train_loss 清單
    train_acc += [accuracy(pred, label)] #計算該 batch 的準確度，並記錄到 train_acc 清單
  metric['loss'] += [sum(train_loss)/ len(train_loader)] #統計當前 epoch 所有 batch 的平均 loss
  metric['acc'] += [sum(train_acc)/ len(train_loader)] #統計當前 epoch 所有 batch 的平均 acc
  print(f'Epoch[{i_epoch+1}/{num_epochs}] loss: {metric["loss"][-1]}, acc: {metric["acc"][-1]}')

Epoch[1/10] loss: 2.7641214418411253, acc: 0.3175
Epoch[2/10] loss: 0.46603880643844603, acc: 0.87
Epoch[3/10] loss: 0.12608944365754723, acc: 0.96
Epoch[4/10] loss: 0.063017196059227, acc: 0.98875
Epoch[5/10] loss: 0.0068777625495567915, acc: 0.9975
Epoch[6/10] loss: 0.0018616850400576368, acc: 1.0
Epoch[7/10] loss: 0.0005485209211474284, acc: 1.0
Epoch[8/10] loss: 0.00015027321569505148, acc: 1.0
Epoch[9/10] loss: 7.456379840732552e-05, acc: 1.0
Epoch[10/10] loss: 6.0981799651926846e-05, acc: 1.0


In [7]:
model.eval()
total_correct = 0
total_samples = 0

with torch.no_grad():
    for image, label in test_loader:
      image = image.to(device)
      label = label.to(device)
      pred = model.forward(image)
      _, pred_label = pred.max(1)
      total_correct += (pred_label == label).sum().item()
      total_samples += label.size(0)


print(f'Total correct: {total_correct}')
print(f'Total samples: {total_samples}')
test_acc = total_correct / total_samples
print(f'Test Accuracy: {test_acc}')

Total correct: 193
Total samples: 200
Test Accuracy: 0.965


In [8]:
from pathlib import Path
model_path = Path('./models/model.pt') #模型儲存位置/檔名
model_path.parent.mkdir(parents=True, exist_ok=True)
torch.jit.script(model).save(model_path)