<a href="https://colab.research.google.com/github/IT21379406/Manuscript_Language_Classification/blob/main/model_tra.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
data_train_path = '/content/drive/MyDrive/Research/manuscript_image_set/training_dataset'
data_validation_path = '/content/drive/MyDrive/Research/manuscript_image_set/validation_dataset'
data_test_path = '/content/drive/MyDrive/Research/manuscript_image_set/testing_dataset'

In [None]:
img_width = 180
img_height = 180

In [None]:
data_train = tf.keras.utils.image_dataset_from_directory(
    data_train_path,
    shuffle=True,
    image_size=(img_width, img_height),
    batch_size=32,
    validation_split=False
)

In [None]:
data_cat = data_train.class_names

In [None]:
data_val = tf.keras.utils.image_dataset_from_directory(
    data_validation_path,
    image_size=(img_width, img_height),
    batch_size=32,
    shuffle=False,
    validation_split=False
)

In [None]:
data_test = tf.keras.utils.image_dataset_from_directory(
    data_test_path,
    image_size=(img_width, img_height),
    batch_size=32,
    shuffle=False,
    validation_split=False
)

In [None]:
plt.figure(figsize=(10,10))
for image, labels in data_train.take(1):
    for i in range(9):
        plt.subplot(3,3,i+1)
        plt.imshow(image[i].numpy().astype('uint8'))
        plt.title(data_cat[labels[i]])
        plt.axis('off')

In [None]:
from tensorflow.keras import Sequential, layers

In [None]:
data_augmentation = tf.keras.Sequential([
    layers.RandomFlip('horizontal'),
    layers.RandomRotation(0.2),
    layers.RandomZoom(0.2),
])

In [None]:
model = Sequential([
    data_augmentation,
    layers.Rescaling(1./255, input_shape=(180, 180, 3)),
    layers.Conv2D(16, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(32, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(64, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(),
    layers.Flatten(),
    layers.Dropout(0.4),
    layers.Dense(128, activation='relu'),
    layers.Dense(len(data_cat), activation='softmax')
])

In [None]:
model.compile(
    optimizer='adam',
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=['accuracy']
)

In [None]:
epoches_size=5
history=model.fit(data_train, validation_data=data_val, epochs=epoches_size)

In [None]:
epochs_range = range(epoches_size)
plt.figure(figsize=(4, 4))

plt.subplot(1, 2, 1)
plt.plot(epochs_range, history.history['accuracy'], label='Training Accuracy')
plt.plot(epochs_range, history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Accuracy')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(epochs_range, history.history['loss'], label='Training Loss')
plt.plot(epochs_range, history.history['val_loss'], label='Validation Loss')
plt.title('Loss')
plt.legend()

plt.show()

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, accuracy_score

In [None]:
true_labels = np.concatenate([labels.numpy() for _, labels in data_test])
predictions = model.predict(data_test)
predicted_labels = np.argmax(predictions, axis=1)

In [None]:
accuracy = accuracy_score(true_labels, predicted_labels)
print(f"Testing Accuracy: {accuracy * 100:.2f}%")

In [None]:
cm = confusion_matrix(true_labels, predicted_labels)

class_names = data_cat
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=class_names)
disp.plot(cmap=plt.cm.Blues)
plt.title("Confusion Matrix")
plt.show()

In [None]:
model.save('./language_identifier.h5')