In [None]:
import numpy as np
import time
import PIL.Image as Image
import matplotlib.pylab as plt
import tensorflow as tf
import os
import zipfile
import shutil


!git clone https://github.com/AI-FREE-Team/Traditional-Chinese-Handwriting-Dataset.git
OutputFolder = '/content/Handwritten_Data'
!rm -rf '/content/Handwritten_Data'

In [None]:
SIZE = 150 # рассматриваем 150 классов

In [None]:
CompressedFiles = []

os.chdir('/content/Traditional-Chinese-Handwriting-Dataset/data')

for item in os.listdir():  
  if item.endswith('.zip'): # Check for ".zip" extension.
    file_path = os.path.abspath(item) # Get full path of the compressed file. 
    CompressedFiles.append(file_path)

for file in CompressedFiles:     
  # Construct a ZipFile object with the filename, and then extract it.
  zip_ref = zipfile.ZipFile(file).extractall(OutputFolder) 
  
  source_path = OutputFolder + '/cleaned_data(50_50)'
  img_list = os.listdir(source_path)

  for img in img_list:
      shutil.move(source_path + '/' + img, OutputFolder) # Move a file to another location. 
  
  shutil.rmtree(OutputFolder + '/cleaned_data(50_50)') 
  #print(f'Decompress successfully {file} ......')
  #print( 'Moving images according to traditional Chinese characters......' )

ImageList = os.listdir(OutputFolder)
ImageList = [img for img in ImageList if len(img)>1]
WordList = list(set([w.split('_')[0] for w in ImageList]))[:SIZE]

for w in WordList:
  try:
    os.chdir(OutputFolder) # Change the current working directory to OutputPath.
    os.mkdir(w) # Create the new word folder in OutputPath.
    MoveList = [img for img in ImageList if w in img]
                
  except: 
    os.chdir(OutputFolder)
    MoveList = [img for img in ImageList if w in img ]
  
  finally:            
    for img in MoveList:
      old_path = OutputFolder + '/' + img
      new_path = OutputFolder + '/' + w + '/' + img
      shutil.move( old_path, new_path )

print( 'Data Deployment completed.' )

In [None]:
a=0
b=0

for item in os.listdir(OutputFolder):
  if (os.path.isdir(item)):  
    a += 1
    for i in os.listdir(OutputFolder + '/' + item):
      b +=1

#print ('Всего: ' + str(a) + ' слов (папка) / Всего: ' + str(b) + ' образцов')
#print ('В среднем каждое слово содержит: ' + str (b / a) + ' образцов')

In [None]:
image_generator = tf.keras.preprocessing.image.ImageDataGenerator(validation_split=0.1)
train_dataset = image_generator.flow_from_directory(str(OutputFolder), class_mode='sparse', batch_size=10, target_size=(50, 50), subset='training')
valid_dataset = image_generator.flow_from_directory(str(OutputFolder), class_mode='sparse', batch_size=10, target_size=(50, 50), subset='validation')

In [None]:
data_augmentation = tf.keras.Sequential([
  tf.keras.layers.RandomFlip("horizontal_and_vertical"),
  tf.keras.layers.RandomRotation(0.2),
])

In [None]:
IMG_SIZE = 40

resize_and_rescale = tf.keras.Sequential([
  tf.keras.layers.Resizing(IMG_SIZE, IMG_SIZE),
  tf.keras.layers.Rescaling(1./255)
])

In [None]:
for image_b, label_b in train_dataset:
  #print("Image batch shape: ", image_b.shape)
  #print("Label batch shape: ", label_b.shape)
  plt.imshow(image_b[1])
  image_batch = resize_and_rescale(image_b)
  
  label_batch = label_b
  #print(label_batch)
  break

In [None]:
plt.imshow(image_batch[1])

In [None]:
model = tf.keras.Sequential([
  tf.keras.layers.Conv2D(filters=16,  kernel_size=3, activation='relu', padding= 'same' , input_shape=(50,50,3)),
  tf.keras.layers.MaxPooling2D(pool_size=(2,2), strides=2),
  tf.keras.layers.Conv2D(filters=32,  kernel_size=3, activation='relu', padding= 'same'),
  tf.keras.layers.MaxPooling2D(pool_size=(2,2), strides=2),
  tf.keras.layers.Conv2D(filters=64,  kernel_size=3, activation='relu', padding= 'same'),
  tf.keras.layers.MaxPooling2D(pool_size=(2,2), strides=2),
  tf.keras.layers.Conv2D(filters=128,  kernel_size=2, activation='relu', padding= 'same'),
  tf.keras.layers.MaxPooling2D(pool_size=(2,2), strides=2),
  tf.keras.layers.Conv2D(filters=256,  kernel_size=2, activation='relu', padding= 'same'),
  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(512, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001)),
  #tf.keras.layers.Dropout(0.4),
  tf.keras.layers.Dense(SIZE, activation='softmax')
])

model.summary()

In [None]:
lr_schedule = tf.keras.optimizers.schedules.InverseTimeDecay(1.8, decay_steps = 705*5, decay_rate = 0.1, staircase = False)

In [None]:
model.compile(
  optimizer=tf.keras.optimizers.Adadelta(lr_schedule),
  loss=tf.keras.losses.SparseCategoricalCrossentropy(),
  metrics=['accuracy']
)
EPOCHS = 20
history = model.fit(train_dataset,
                    validation_data=valid_dataset,
                    epochs=EPOCHS)

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

plt.figure(figsize=(10, 10))
plt.subplot(2, 1, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.ylabel('Accuracy')
plt.ylim([min(plt.ylim()),1])
plt.title('Training and Validation Accuracy')

plt.subplot(2, 1, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.ylabel('Cross Entropy')
plt.ylim([min(plt.ylim()),max(plt.ylim())])
plt.title('Training and Validation Loss')
plt.xlabel('epoch')
plt.show()

In [None]:
lr_schedule = tf.keras.optimizers.schedules.InverseTimeDecay(0.5, decay_steps = 705*5, decay_rate = 0.1, staircase = False)

In [None]:
model.compile(
  optimizer=tf.keras.optimizers.Adadelta(lr_schedule),
  loss=tf.keras.losses.SparseCategoricalCrossentropy(),
  metrics=['accuracy']
)
EPOCHS = 20
history = model.fit(train_dataset,
                    validation_data=valid_dataset,
                    epochs=EPOCHS)

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

plt.figure(figsize=(10, 10))
plt.subplot(2, 1, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.ylabel('Accuracy')
plt.ylim([min(plt.ylim()),1])
plt.title('Training and Validation Accuracy')

plt.subplot(2, 1, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.ylabel('Cross Entropy')
plt.ylim([min(plt.ylim()),max(plt.ylim())])
plt.title('Training and Validation Loss')
plt.xlabel('epoch')
plt.show()