# Get the data

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

# Task 1: Font Family

In [None]:
import pandas as pd
import numpy as np
import os
import keras
import matplotlib.pyplot as plt
from keras.layers import Dense, GlobalAveragePooling2D, Dropout, Flatten
from keras.applications.densenet import DenseNet201, preprocess_input
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Model
#from keras.optimizers import Adam

In [None]:
img_width, img_height = 224, 224
model=DenseNet201(weights='imagenet',include_top=False, input_shape=(img_width, img_height, 3))

for layer in model.layers:
    layer.trainable = False
# print(model.output)

last_layer = model.get_layer('conv5_block32_concat')
# print(last_layer.output_shape)  

x = Flatten()(last_layer.output)
x = Dense(1024, activation = 'relu')(x)
# x = Dropout(0.3)(x)
# x = Dense(512, activation = 'relu')(x)
x= Dense(10,'softmax')(x)
model = Model(model.input, x)

In [None]:
!gdown --id 1_2Bp5D8TAXe2-tPf-RcQSeouB26in3Qx
!unzip /content/version3.zip

In [None]:
from tensorflow.keras.optimizers import RMSprop

model.compile(optimizer='sgd', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
train_datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.4) # set validation split

train_generator = train_datagen.flow_from_directory(
    directory,
    #color_mode='grayscale',
    target_size=(224, 224,),
    batch_size=128,
    subset='training') # set as training data

print(directory)
validation_generator = train_datagen.flow_from_directory(
    directory, # same directory as training data
    target_size=(224, 224),
    #color_mode='grayscale',
    batch_size=128,
    subset='validation') # set as validation data    

In [None]:
import tensorflow as tf
callback = tf.keras.callbacks.ModelCheckpoint(filepath='model.{epoch:02d}-{val_loss:.2f}.h5', 
                                              save_weights_only=False,
                                              monitor='val_accuracy',
                                              mode='max',
                                              save_best_only=True)

In [None]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.005)
model.compile(optimizer, loss='categorical_crossentropy', metrics=['accuracy'],)

In [None]:
history = model.fit_generator(train_generator,
                    epochs = 50,
                    validation_data = validation_generator,
                    validation_steps = 50,
                    callbacks= [callback])

In [None]:
# Gráfico do histórico do treino
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'r', label='Training accuracy')
plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend(loc=0)
plt.figure()
plt.show()

plt.plot(epochs, loss, 'r', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend(loc=0)
plt.figure()
plt.show()

In [None]:
model.save("model.h5")

In [None]:
# métricas de perda e acurácia
score = model.evaluate_generator(validation_set, 105)

print ("%s: %.2f%%" % (model.metrics_names[0], score[0]*100))
print ("%s: %.2f%%" % (model.metrics_names[1], score[1]*100))

In [None]:
# carregar modelo salvo no HD
from keras.models import load_model

model = load_model('model.h5')
for i, layer in enumerate(model.layers):
   print(i, layer.name)

for layer in model.layers[:167]:
   layer.trainable = False
for layer in model.layers[167:]:
   layer.trainable = True

In [None]:
from keras.optimizers import SGD
model.compile(optimizer=SGD(lr=0.000001, momentum=0.9), loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
from google.colab import drive
from keras.callbacks import *

drive.mount('/content/drive')
filepath="/content/drive/My Drive/Colab Notebooks/log/resnet50:{epoch:03d}-val_acc:{val_acc:.3f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
callbacks_list = [checkpoint]

In [None]:
model.load_weights('/content/drive/My Drive/Colab Notebooks/log/resnet50:030-val_acc:0.629.hdf5')

In [None]:
# Fine-tune
history = model.fit_generator(training_set,
                    steps_per_epoch=528/32,
                    epochs = 100,
                    validation_data = validation_set,
                    validation_steps = 105/32,
                    callbacks=callbacks_list)

In [None]:
model.save("model.h5")
uploaded = drive.CreateFile({'title': 'model.h5'})
uploaded.SetContentFile('model.h5')
uploaded.Upload()

In [None]:
import matplotlib.pyplot as plt

# listar todos os dados no history
print(history.history.keys())
# Gráfico de treino - acurácia
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
# Gráfico de treino - perda
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
!pip install --upgrade --quiet PyDrive
# para conectar com o Google Drive

from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

In [None]:
# Importando imagens de teste do drive

link = 'https://drive.google.com/open?id=1sbjcK__NABa7gfsPOt7JM8jAaXCLCfKW'
fluff, id = link.split('=')

downloaded = drive.CreateFile({'id':id}) 
downloaded.GetContentFile('teste1_cytHigh-grade Squamous Intraepithelial Lesion - 14659.jpg')

link = 'https://drive.google.com/open?id=1vDBjgozlaLg0tcGQ50b9wpOBz_NYyx8o'
fluff, id = link.split('=')

downloaded = drive.CreateFile({'id':id}) 
downloaded.GetContentFile('teste2_cyt14721.jpg')

link = 'https://drive.google.com/open?id=14mCco19UM0k83Irdz3xk2lQaOs7a1YuK'
fluff, id = link.split('=')

downloaded = drive.CreateFile({'id':id}) 
downloaded.GetContentFile('teste3_cytoCandida - 7557.jpg')

In [None]:
# Testando o modelo

test_image = image.load_img('teste1_cytHigh-grade Squamous Intraepithelial Lesion - 14659.jpg', target_size = (200, 200))
test_image = image.img_to_array(test_image)
test_image = np.expand_dims(test_image, axis = 0)
result = model.predict(test_image)

if result[0][0] == 0:
    diagnostico = 'Carcinoma'
elif result[0][0] == 1:
    diagnostico = 'Normal'
else:
    diagnostico = 'Outros problemas'
    
print ('Diagnóstico:', diagnostico)

In [None]:
# Salvando o modelo no drive

uploaded = drive.CreateFile({'title': 'model.h5'})
uploaded.SetContentFile('model.h5')
uploaded.Upload()

In [None]:
# Importando o modelo do drive

link = 'https://drive.google.com/open?id=1b8QkLS1nix4K5g46SGFAR4Cnx1D6eoF1'
fluff, id = link.split('=')

downloaded = drive.CreateFile({'id':id}) 
downloaded.GetContentFile('model.h5')

<hr></hr>

# Task 2: Font Size

In [None]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt

import os
from pathlib import Path
import cv2
import math

### Function

In [None]:
def  twoCut(projection):
  dif = []
  # print(projection)
  projection.append(0)
  for i in range(len(projection)): 
    if i<len(projection)-1:
      dif.append(abs(projection[i]-projection[i+1]))
  # print('projection: ',projection)
  # print('dif:        ',dif)
  
  n = len(dif)
  max1 = dif[0]
  max2 = dif[-1]
  temp1 = []
  temp2 = []

  for i in range(n):
    if i<n/2:
      temp1.append(dif[i])
      if dif[i]>max1:
        max1=dif[i]
    else:
      temp2.append(dif[i])
      if dif[i]>max2:
        max2=dif[i]
  # print(temp1)
  # print(temp2)
  # print('max: ',max1,' ',max2)

  max1 = temp1.index(max1)

  for i, e in reversed(list(enumerate(temp2))):
    if temp2[i]==max2:
      max2=i
      break
  max2 = len(temp1)+max2
  if max2==(len(projection)-1):
    max2-=1
  max = abs(max1-max2)

  # print('max: ',max1,' ',max2)

  return max,max1,max2

In [None]:
def VPP_2(imgPath,printPic=False):
  # ================ picture 1 ================ 
  img1 = cv2.imread(imgPath)
  img1 = img1
  # print(img1)
  h, w, c = img1.shape
  temp = np.ones(3)*255
  temp = temp.astype(np.int32)
  m = []
  for i in range(w):
    m.append(temp)
  n = []
  n.append(m)
  n = np.asarray(n)
  # print(img1)
  # print(n)
  
  img1 = np.concatenate((img1,n))
  img1 = np.concatenate((n,img1))
  img1 = img1[:,:, ::-1]
  img3=img1
  # if printPic:
  #   plt.imshow(img1)
  #   plt.show()

  # ================ picture 2 ================ 
  h, w, c = img1.shape

  pjt = []
  temp = 0
  # create projection list
  for j in range(h):
    temp = 0
    for i in range(w):
      if img1[j][i][0]!=255:
        temp+=1
    pjt.append(temp)

  # img2 = np.ones(img1.shape)

  # for j in range(h):
  #   for i in range(pjt[j]):
  #     img2[j,i] = 0
  # if printPic:
  #   plt.imshow(img2)
  #   plt.show()

  # ================ picture 3 ================ 
  sizePX,start,end = twoCut(pjt)
  for i in range(w):
    img3[start,i] = (200,100,123)
    img3[end,i] = (200,100,123)
  if printPic:
    plt.imshow(img3)
    plt.show()
  return sizePX

In [None]:
def find_pixel(df):
  from tqdm import tqdm

  base_size = []
  height = []

  path = df.loc[:, 'path'].values
  for i in tqdm(path):
    try:
      base = VPP_2(i,printPic=False)
      img = cv2.imread(i)
      h = img.shape[0]
    except:
      pass
    base_size.append(base)
    height.append(h)
  return base_size, height

### Test data

In [None]:
!gdown --id 1L9d532KH68mmhyZZtipG0qio49aKNqWH
!unzip -q /content/test_crop_png.zip

!gdown --id 1hGOBgyZgNMZOu_YetV79PuH_ioIxEWSk # 0
!gdown --id 1SmJCGFZK_caM67TCcGvFUnVFj11_bU4l # 1
!gdown --id 1Hx0BA8B-CgRuLyr7Xjmm08Pw0Oml-OT1 # 2
!gdown --id 1xrtV-og2NZxV4TLp4Ai_fGBn0DEtjf-1 # 3
!gdown --id 12L6lA3PJpCzyoTf9pneDpBue5h3U4wo1 # 4

!unzip -q /content/crop0.zip
!unzip -q /content/crop1.zip
!unzip -q /content/crop2.zip
!unzip -q /content/crop3.zip
!unzip -q /content/crop4.zip

### Prepare test dataset to get into the model

In [None]:
def prepare_test_data(path_test_folder):

  # get the data frame with "filename", "path", and "parentId"
  file_name = os.listdir(path_test_folder)
  df_test = pd.DataFrame(data=file_name, columns=['file_name'])
  df_test['path'] = df_test['file_name'].apply(lambda x:path_test_folder+"/"+x)
  df_test['parentId'] = df_test['file_name'].apply(lambda x:x.split('.')[0])
  
  # get base_line pixel (vppPix) and height of the cropped image
  base, height = find_pixel(df_test)

  # create test dataframe with parentId, vppPix, height
  df_test['vppPix'] = base
  df_test['height'] = height
  df_test.drop(columns=['file_name', 'path'], inplace=True, axis=1)

  return df_test
  # get dataframe of parentId

In [None]:
df_test1 = prepare_test_data("/content/crop_1")
df_test2 = prepare_test_data("/content/crop_2")
df_test = pd.concat([df_test1, df_test2], axis=0)
df_test.to_csv("df_test.csv")

In [None]:
df_test = pd.read_csv('df_test.csv')
df_test.drop(columns=['Unnamed: 0'], axis=1, inplace=True)
df_test.reset_index(drop=True, inplace=True)
df_test

In [None]:
df_font_1 = pd.read_csv("/content/file_submission_lastset.csv")
df_font_1.loc[len(df_font_1.index)] = ['d592bfe3_file', 'TH_Mali_Grade6.ttf']
df_font_1.rename({'d592bfe3_file':'parentId', 'TH_Mali_Grade6.ttf':'font_family'}, axis=1, inplace=True)
# change format a bit
df_font_1['parentId'] = df_font_1['parentId'].apply(lambda x:x.split("_")[0])
df_font_1['font_family'] = df_font_1['font_family'].apply(lambda x:x.split(".")[0])
#df_font_1
df_font_2 = pd.read_csv("/content/SubmissionTheLast.csv")
df_font_2.drop("Unnamed: 0", axis=1, inplace=True)
df_font_2.rename({"name":"parentId", "id":"font_family"}, axis=1, inplace=True)
#df_font_2
df_font = pd.concat([df_font_1,df_font_2], axis=0)
df_font.reset_index(drop=True, inplace=True)
df_font


In [None]:
df_font['font_family'].unique()

In [None]:
def change_wrong_font(font_name):
  font_correct = ""
  if font_name == 'DM_Shining Star_Regular':
    font_correct = 'DM_Shining_Star_Regular'
  elif font_name == 'ANGSA':
    font_correct = 'Angsana_New'
  elif font_name == 'CORDIA':
    font_correct = 'Cordia_New'
  elif font_name == 'THSarabun':
    font_correct = 'TH_Sarabun'
  else:
    font_correct = font_name
  return font_correct

df_font['font_family'] = df_font['font_family'].apply(change_wrong_font)
df_test = df_test.merge(df_font)
df_test = pd.get_dummies(df_test, columns=['font_family'])
# df_test.drop('parentId', axis=1,inplace=True)
df_test

### Model

We have try to use PyCaret first. 
The best result we can get after training the model was the result of Random Forest model. 
But we couldn't find a eay to load the model to use. 
So, we create the random forest model ourselves.

In [None]:
df_train = pd.read_csv("/content/pycaret_v1.csv")
df_train.drop(columns=['Unnamed: 0'], axis=1, inplace=True)
dataset = df_train.copy()
dataset

In [None]:
data = dataset.sample(frac=0.9, random_state=786)
data_unseen = dataset.drop(data.index)

data.reset_index(drop=True, inplace=True)
data_unseen.reset_index(drop=True, inplace=True)

print('Data for Modeling: ' + str(data.shape))
print('Unseen Data For Predictions: ' + str(data_unseen.shape))

In [None]:
data.drop("ratio_cropped_to_height", axis=1, inplace=True)
data_unseen.drop("ratio_cropped_to_height", axis=1, inplace=True)
data = data[['fontSize', 'height', 'vppPix']]
data

### PyCaret

In [None]:
from pycaret.classification import *
exp_mclf101 = setup(data = data, target = 'fontSize', session_id=123)
best = compare_models(fold=12)

### Random Forest

In [None]:
dataset = dataset.drop('ratio_cropped_to_height' , axis=1)
dataset
X = dataset.drop('fontSize', axis=1)
y = dataset['fontSize']

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.3, shuffle=True, random_state=42)
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

import pandas as pd
import numpy as np

from sklearn.ensemble import RandomForestClassifier
forest = RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=-1, oob_score=False, random_state=8998, verbose=0,
                       warm_start=False)

forest.fit(X_train, y_train)

forest.score(X_test, y_test)

In [None]:
import pickle
pickle.dump(forest, open('randomForest.sav', 'wb'))

In [None]:
loaded_model = pickle.load(open('randomForest.sav', 'rb'))
result = loaded_model.score(X_test, y_test)
print(result)

### try the model on test data

In [None]:
import pickle
loaded_model = pickle.load(open('randomForest.sav', 'rb'))
font_size = loaded_model.predict(df_test)
font_size

In [None]:
df_test = pd.read_csv('df_test_final.csv')
df_test.drop(columns=['Unnamed: 0'], axis=1, inplace=True)
df_test.reset_index(drop=True, inplace=True)

df_submit = df_test.copy()
df_submit = df_submit['parentId']
df_submit = pd.DataFrame(data=df_submit, columns=['parentId'])
df_submit['font_size'] = font_size
df_submit

# Task 3: Font Style

In [None]:
!gdown --id 1L843daxmFSZD_hyz_AMIQ6-dzTPDaoGJ
!unzip --qq "/content/font_fam-20220209T082332Z-001.zip" -d"/content/data/"
!gdown https://drive.google.com/uc?id=1wwAIAIlWwCVcH09uyvq5GU28too1cQZo
!unzip --qq "/content/test_png.zip" -d"/content/testdata/"

In [None]:
path_data = "/content/data/font_fam/"
from matplotlib.pyplot import imshow
import matplotlib.cm as cm
import matplotlib.pylab as plt
from keras.preprocessing.image import ImageDataGenerator
import numpy as np
import PIL
from PIL import ImageFilter
import cv2
import itertools
import random
import keras
import imutils
from imutils import paths
import os
from keras import optimizers
from keras.preprocessing.image import img_to_array
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from keras import callbacks
from keras.models import Sequential
from tensorflow.keras.layers import BatchNormalization
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D , UpSampling2D ,Conv2DTranspose
from keras import backend as K

from tqdm import tqdm

%matplotlib inline

In [None]:
def pil_image(img_path):
    pil_im =PIL.Image.open(img_path).convert('L')
    pil_im=pil_im.resize((105,105))
    #imshow(np.asarray(pil_im))
    return pil_im

### Augmentation

In [None]:
def noise_image(pil_im):
    # Adding Noise to image
    img_array = np.asarray(pil_im)
    mean = 0.0   # some constant
    std = 5   # some constant (standard deviation)
    noisy_img = img_array + np.random.normal(mean, std, img_array.shape)
    noisy_img_clipped = np.clip(noisy_img, 0, 255)
    noise_img = PIL.Image.fromarray(np.uint8(noisy_img_clipped)) # output
    #imshow((noisy_img_clipped ).astype(np.uint8))
    noise_img=noise_img.resize((105,105))
    return noise_img

In [None]:
def blur_image(pil_im):
    #Adding Blur to image 
    blur_img = pil_im.filter(ImageFilter.GaussianBlur(radius=3)) # ouput
    #imshow(blur_img)
    blur_img=blur_img.resize((105,105))
    return blur_img

In [None]:
def affine_rotation(img):
    
    #img=cv2.imread(img_path,0)
    rows, columns = img.shape

    point1 = np.float32([[10, 10], [30, 10], [10, 30]])
    point2 = np.float32([[20, 15], [40, 10], [20, 40]])

    A = cv2.getAffineTransform(point1, point2)

    output = cv2.warpAffine(img, A, (columns, rows))
    affine_img = PIL.Image.fromarray(np.uint8(output)) # affine rotated output
    #imshow(output)
    affine_img=affine_img.resize((105,105))
    return affine_img
   

In [None]:
def gradient_fill(image):
    #image=cv2.imread(img_path,0)
    laplacian = cv2.Laplacian(image,cv2.CV_64F)
    laplacian = cv2.resize(laplacian, (105, 105))
    return laplacian

### Prepare dataset

In [None]:
data_path = "/content/data/font_fam/"
data=[]
labels=[]
imagePaths = sorted(list(paths.list_images(data_path)))
random.seed(42)
random.shuffle(imagePaths)

In [None]:
def conv_label(label):
  italic = label.find("italic")
  bold = label.find("bold")
  #print(label)
  #print(italic)
  #print(bold)
  if italic <0 and bold <0:
    return 0
  elif italic >0 and bold <0:
    return 1
  elif italic <0 and bold >0:
    return 2
  elif italic >0 and bold >0:
    return 3

In [None]:
augument=["blur","noise","affine","gradient"]
a=itertools.combinations(augument, 4)

In [None]:
counter=0
for imagePath in tqdm(imagePaths[:30000]):
    #label = imagePath.split(os.path.sep)[-2]
    label = conv_label(imagePath)
    #print(imagePath)
    #print(label)
    #print("-----")
    pil_img = pil_image(imagePath)
    #imshow(pil_img)
    
    # Adding original image
    org_img = img_to_array(pil_img)
    #print(org_img.shape)
    data.append(org_img)
    labels.append(label)

In [None]:
data = np.asarray(data, dtype="float") / 255.0
labels = np.array(labels)

# partition the data into training and testing splits using 75% of
# the data for training and the remaining 25% for testing
(trainX, testX, trainY, testY) = train_test_split(data,labels, test_size=0.25, random_state=42)

In [None]:
trainY = to_categorical(trainY, num_classes=4)
testY = to_categorical(testY, num_classes=4)

aug = ImageDataGenerator(rotation_range=30, width_shift_range=0.1,height_shift_range=0.1, shear_range=0.2, zoom_range=0.2,horizontal_flip=True)
K.set_image_data_format('channels_last')

### Model

In [None]:
 def create_model():
  model=Sequential()

  # Cu Layers 
  model.add(Conv2D(64, kernel_size=(48, 48), activation='relu', input_shape=(105,105,1)))
  model.add(BatchNormalization())
  model.add(MaxPooling2D(pool_size=(2, 2)))

  model.add(Conv2D(128, kernel_size=(24, 24), activation='relu'))
  model.add(BatchNormalization())
  model.add(MaxPooling2D(pool_size=(2, 2)))

  model.add(Conv2DTranspose(128, (24,24), strides = (2,2), activation = 'relu', padding='same', kernel_initializer='uniform'))
  model.add(UpSampling2D(size=(2, 2)))

  model.add(Conv2DTranspose(64, (12,12), strides = (2,2), activation = 'relu', padding='same', kernel_initializer='uniform'))
  model.add(UpSampling2D(size=(2, 2)))

  #Cs Layers
  model.add(Conv2D(256, kernel_size=(12, 12), activation='relu'))

  model.add(Conv2D(256, kernel_size=(12, 12), activation='relu'))

  model.add(Conv2D(256, kernel_size=(12, 12), activation='relu'))

  model.add(Flatten())

  model.add(Dense(4096, activation='relu'))

  model.add(Dropout(0.5))

  model.add(Dense(4096,activation='relu'))

  model.add(Dropout(0.5))

  model.add(Dense(2383,activation='relu'))

  model.add(Dense(4, activation='softmax'))
 
  return model

In [None]:
import tensorflow as tf

batch_size = 128
epochs = 50
model= create_model()
sgd = tf.keras.optimizers.SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)

#sgd = tf.keras.optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
#model.compile(loss='mean_squared_error', optimizer=sgd, metrics=['accuracy'])
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

In [None]:
early_stopping=callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=0, mode='min')
filepath= path_s + "top_model_4class.h5"
checkpoint = callbacks.ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')

callbacks_list = [early_stopping,checkpoint]

In [None]:
model.fit(trainX, trainY,shuffle=True,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(testX, testY),callbacks=callbacks_list)

In [None]:
score = model.evaluate(testX, testY, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

In [None]:
def rev_conv_label(label):
    if label == 0:
        return 'Normal Normal'
    elif label == 1:
        return 'Italic Normal'
    elif label == 2:
        return 'Normal Bold'
    elif label == 3:
        return 'Italic Bold'

data=[]
data.append(org_img)
data = np.asarray(data, dtype="float") / 255.0

y = model.predict(data)
y = np.round(y).astype(int)

# Make Submission File

### Font size

In [None]:
df_fontFam = pd.read_csv("fontSubmit.csv")
df_fontSize = pd.read_csv("/content/submissionFontsize.csv")
df_fontSize.drop("Unnamed: 0", axis=1, inplace=True)
df_fontSize.rename({"parentId":"Id", "font_size":"Predicted"}, axis=1, inplace=True)
df_fontSize['Predicted'] = df_fontSize['Predicted'].apply(lambda x:str(x)+"px")
df_fontSize['Id'] = df_fontSize['Id'].apply(lambda x:x+"_size")
df_fontSize

### Font Family

In [None]:
df_fontFam.drop('Unnamed: 0', axis=1, inplace=True)
df_fontFam['parentId'] = df_fontFam['parentId'].apply(lambda x:x+"_name")
df_fontFam.rename({"parentId":"Id", "font_family":"Predicted"}, axis=1, inplace=True)
df_fontFam

### .TTF

In [None]:
# .ttf
df_fontTTF = df_fontFam.copy()

def ttf(font_name):
  font_ttf = ""
  if font_name == 'Angsana_New':
    return 'ANGSA.ttf'
  elif font_name == 'Cordia_New':
    return 'CORDIA.ttf'
  elif font_name == 'DM_Shining_Star_Regular':
    return 'DM Shining Star Regular.ttf'
  elif font_name == 'FC_Knomphing_Regular':
    return 'FC Knomphing Regular.ttf'
  elif font_name == 'fonttintin':
    return 'fonttintin.ttf'
  elif font_name == 'Kunlasatri':
    return 'Kunlasatri.ttf'
  elif font_name == 'TH_Chakra_Petch':
    return 'TH Chakra Petch.ttf'
  elif font_name == 'TH_Charm_of_AU':
    return 'TH Charm of AU.ttf'
  elif font_name == 'TH_Mali_Grade6':
    return 'TH Mali Grade6.ttf'
  elif font_name == 'TH_Sarabun':
    return 'THSarabun.ttf'

df_fontTTF['Predicted'] = df_fontTTF['Predicted'].apply(ttf)
df_fontTTF['Id'] = df_fontTTF['Id'].apply(lambda x:x.split("_")[0] + "_file")
df_fontTTF

### Font style

In [None]:
df_s1 = pd.read_csv("/content/task_predcit_italic_bold_test1.csv")
df_s2 = pd.read_csv("/content/task_predcit_fontStyle_test2.csv")
df_style = pd.concat([df_s1, df_s2], axis=0)
df_style.reset_index(drop=True, inplace=True)
df_style['Id'] = df_style['Id'].apply(lambda x:x+"_style-weight")
df_style

### Merge all

In [None]:
df_submission = pd.concat([df_style,df_fontFam,df_fontSize,df_fontTTF], axis=0)
df_submission.reset_index(drop=True, inplace=True)
df_submission.set_index("Id", inplace=True)

# check id there are null in the dataframe
sample = pd.read_csv('/content/sample_submission.csv')
sample.rename({'Predicted': 'P'}, inplace=True, axis=1)
df = pd.merge(df_submission, sample, 'right')
df_real = df[['Id','Predicted']]
df_real[df_real['Predicted'].isnull() == True]
df_real['Predicted'].fillna('THSarabun.ttf', inplace=True)
print(len(df_real[df_real['Predicted'].isnull() == True]))

df_real.set_index("Id", inplace=True)
df_real.to_csv("real_submission.csv")

# Thank you

<hr></hr>