In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import _LRScheduler
import torch.utils.data as data
import torchvision
from torchvision import datasets, models, transforms
from sklearn import decomposition
from sklearn import manifold
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import copy
import random
import time
from PIL import Image

In [12]:
test = pd.read_csv('test.csv')

In [11]:
train= pd.read_csv('train.csv')

In [24]:
char_dict=pd.Series(train.char.values,index=train.code).to_dict()

In [25]:
char_to_code = dict((v,k) for k,v in char_dict.items())

In [26]:
test['code']=test['char'].map(char_to_code) 

In [27]:
test['label']=test['font'].map(str)+' '+test['code'].map(str)

In [28]:
test

Unnamed: 0.1,Unnamed: 0,path,font,char,code,label
0,0,char\一字\一字 楷书 欧阳询.jpg,0,一,0,0 0
1,1,char\一字\一字 篆书 徐三庚.jpg,4,一,0,4 0
2,2,char\一字\一字 草书 孙过庭.jpg,3,一,0,3 0
3,3,char\一字\一字 草书 张旭.jpg,3,一,0,3 0
4,4,char\一字\一字 草书 毛泽东.jpg,3,一,0,3 0
...,...,...,...,...,...,...
13395,13395,char\龟字\龟字 草书 邓文原.jpg,3,龟,2309,3 2309
13396,13396,char\龟字\龟字 行书 米芾.jpg,2,龟,2309,2 2309
13397,13397,char\龟字\龟字 行书 苏轼.jpg,2,龟,2309,2 2309
13398,13398,char\龟字\龟字 行书 赵孟頫.jpg,2,龟,2309,2 2309


In [5]:
class MyModel(nn.Module):
    def __init__(self, num_classes1, num_classes2):
        super(MyModel, self).__init__()
        self.eps = 1
        self.k=torch.FloatTensor([10])
        self.model_resnet = models.resnet50(pretrained=True)
        num_ftrs = self.model_resnet.fc.in_features
        self.model_resnet.fc = nn.Identity()
        self.fc1 = nn.Linear(num_ftrs, num_classes1)
        self.fc2 = nn.Linear(2053, num_classes2)
        self.softmax = nn.Softmax(dim=1)

    def font_code(self,y):
        ret=torch.FloatTensor()
        for i, x in enumerate(y):
            result = np.zeros(5)
            result[int(x.item())] = 1
            ret=torch.cat((ret, torch.FloatTensor([result])))
        return ret.to(torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
    def forward(self, x,y):
        x = self.model_resnet(x)
        out1 = self.fc1(x)
        p = torch.rand(1).item()
        if p < self.k / (self.k + torch.exp(self.eps / self.k)):
            font = self.font_code(y)
        else:
            font = self.softmax(out1)
        self.eps +=1
        combined = torch.cat((x.view(x.size(0), -1),font.view(font.size(0), -1)), dim=1)
        out2 = self.fc2(combined)
        return out1, out2

In [6]:
model = MyModel(5,2310)

In [22]:
def training(model, iterator, optimizer, criterion, device):
    
    epoch_loss = 0
    epoch_loss1 = 0
    epoch_loss2 = 0
    epoch_acc1 = 0
    epoch_acc2 = 0
    model.train()
    
    for x, y1,y2 in iterator:
        
        x = x.to(device)
        y1 = y1.to(device)
        y2 = y2.to(device)
        
        optimizer.zero_grad()
                
        outputs = model(x,y1)
        
        loss1 = criterion(outputs[0], y1)
        loss2 = criterion(outputs[1], y2)
        loss = loss1 + loss2 
        
        acc1 = calculate_accuracy(outputs[0], y1)
        acc2 = calculate_accuracy(outputs[1], y2)
        #gender_corrects += torch.sum(torch.topk(outputs[0], 1)[1] == torch.topk(y1, 1)[1])
        
        
        
        loss.backward()
        
        optimizer.step()
        
        epoch_loss += loss.item()
        epoch_loss1 += loss1.item()
        epoch_loss2 += loss2.item()
        epoch_acc1 += acc1.item()
        epoch_acc2 += acc2.item()
        
    return epoch_loss / len(iterator),epoch_loss1 / len(iterator),epoch_loss2 / len(iterator), epoch_acc1 / len(iterator),epoch_acc2 / len(iterator)

In [18]:
def evaluate(model, iterator, criterion, device):
    
    epoch_loss = 0
    epoch_loss1 = 0
    epoch_loss2 = 0
    epoch_acc1 = 0
    epoch_acc2 = 0
    model.eval()
    
    with torch.no_grad():
    
        for x, y1,y2 in iterator:
        
            x = x.to(device)
            y1 = y1.to(device)
            y2 = y2.to(device)
        
            optimizer.zero_grad()
                
            outputs = model(x,y1)
        
            loss1 = criterion(outputs[0], y1)
            loss2 = criterion(outputs[1], y2)
            loss = loss1 + loss2 
        
            acc1 = calculate_accuracy(outputs[0], y1)
            acc2 = calculate_accuracy(outputs[1], y2)
        

            epoch_loss += loss.item()
            epoch_loss1 += loss1.item()
            epoch_loss2 += loss2.item()
            epoch_acc1 += acc1.item()
            epoch_acc2 += acc2.item()
        
    return epoch_loss / len(iterator),epoch_loss1 / len(iterator),epoch_loss2 / len(iterator), epoch_acc1 / len(iterator),epoch_acc2 / len(iterator)

In [9]:
preprocess = transforms.Compose([
   transforms.Resize(256),
   transforms.CenterCrop(224),
   transforms.ToTensor(),
   transforms.Normalize(
       mean=[0.485, 0.456, 0.406],
       std=[0.229, 0.224, 0.225]
   )
])

In [10]:
class MultiTaskDataset():
    def __init__(self,df):
        self.paths = list(df.path)
        self.labels = list(df.label)

    def __len__(self): return len(self.paths)

    def __getitem__(self,idx):
        #dealing with the image
        img = Image.open(self.paths[idx]).convert('RGB')
        img = preprocess(img)


        #dealing with the labels
        labels = self.labels[idx].split(" ")
        font = torch.tensor(int(labels[0]), dtype=torch.int64)
        code = torch.tensor(int(labels[1]), dtype=torch.int64)
        
        return img.data, font, code

    def show(self,idx):
        x,y = self.__getitem__(idx)
        font,code = y
        stds = np.array([0.229, 0.224, 0.225])
        means = np.array([0.485, 0.456, 0.406])
        img = ((x.numpy().transpose((1,2,0))*stds + means)*255).astype(np.uint8)
        plt.imshow(img)
        plt.title("{} {}".format(font.item(), code.item()))

In [13]:
train_ds = MultiTaskDataset(train)

tr_dataloader=torch.utils.data.DataLoader(train_ds, batch_size=64, shuffle=True, num_workers=0)

In [30]:
test_ds = MultiTaskDataset(test)

test_dataloader=torch.utils.data.DataLoader(test_ds, batch_size=64, shuffle=True, num_workers=0)

In [15]:
optimizer = optim.Adam(model.parameters(), lr = 1e-3)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
criterion = nn.CrossEntropyLoss()

model = model.to(device)
criterion = criterion.to(device)

In [16]:
def calculate_accuracy(y_pred, y):
    top_pred = y_pred.argmax(1, keepdim = True)
    correct = top_pred.eq(y.view_as(top_pred)).sum()
    acc = correct.float() / y.shape[0]
    return acc

In [17]:
def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [23]:
for i in range(8):
    since=time.time()
    tr_loss,font_loss,char_loss,font_acc,char_acc=training(model, tr_dataloader, optimizer, criterion, device)
    ts_loss,ts_font_loss,ts_char_loss,ts_font_acc,ts_char_acc=evaluate(model, test_dataloader, criterion, device)
    mins,secs=epoch_time(since,time.time())
    print('epochs:'+ str(i+1))
    print('min:'+str(mins)+' '+'sec:'+str(secs))
    print('training_loss:'+str(round(tr_loss, 5))+' font_loss:'+str(round(font_loss, 5))+' char_loss:'+str(round(char_loss, 5))+' font_accuracy:'+str(round(font_acc, 5))+' char_accuracy:'+str(round(char_acc, 5)))
    print('test_loss:'+str(round(ts_loss, 5))+' font_loss:'+str(round(ts_font_loss, 5))+' char_loss:'+str(round(ts_char_loss, 5))+' font_accuracy:'+str(round(ts_font_acc, 5))+' char_accuracy:'+str(round(ts_char_acc, 5)))

epochs:1
min:15 sec:58
training_loss:8.34283 font_loss:0.73936 char_loss:7.60347 font_accuracy:0.6877 char_accuracy:0.00272
test_loss:15.05449 font_loss:2.4396 char_loss:12.61489 font_accuracy:0.35533 char_accuracy:0.00067
epochs:2
min:10 sec:52
training_loss:6.89478 font_loss:0.43528 char_loss:6.45951 font_accuracy:0.82385 char_accuracy:0.02738
test_loss:11.15377 font_loss:0.41414 char_loss:10.73963 font_accuracy:0.82865 char_accuracy:0.0032
epochs:3
min:11 sec:1
training_loss:4.50165 font_loss:0.37696 char_loss:4.12469 font_accuracy:0.84801 char_accuracy:0.19732
test_loss:15.62011 font_loss:0.46497 char_loss:15.15514 font_accuracy:0.81853 char_accuracy:0.00528
epochs:4
min:10 sec:58
training_loss:2.57234 font_loss:0.3279 char_loss:2.24444 font_accuracy:0.87018 char_accuracy:0.46637
test_loss:17.86771 font_loss:0.31574 char_loss:17.55198 font_accuracy:0.87093 char_accuracy:0.00692
epochs:5
min:11 sec:3
training_loss:1.74156 font_loss:0.28149 char_loss:1.46007 font_accuracy:0.8883 char

In [31]:
ts_loss,ts_font_loss,ts_char_loss,ts_font_acc,ts_char_acc=evaluate(model, test_dataloader, criterion, device)
print('test_loss:'+str(round(ts_loss, 5))+' font_loss:'+str(round(ts_font_loss, 5))+' char_loss:'+str(round(ts_char_loss, 5))+' font_accuracy:'+str(round(ts_font_acc, 5))+' char_accuracy:'+str(round(ts_char_acc, 5)))

test_loss:1.82906 font_loss:0.26717 char_loss:1.56188 font_accuracy:0.90198 char_accuracy:0.60565


In [None]:
torch.save(model.state_dict(), "D:/caligraphy/test_model2.pth")