### Import Datasets

In [1]:
import numpy as np
import os 
from PIL import Image
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.preprocessing import LabelEncoder

def load_images_with_labels(data_folder):
    images = []
    labels = []
    for label in os.listdir(data_folder):
        folder_path = os.path.join(data_folder, label)
        if os.path.isdir(folder_path):
            for filename in os.listdir(folder_path):
                img_path = os.path.join(folder_path, filename)
                if os.path.isfile(img_path) and img_path.lower().endswith(('.png', '.jpg', '.jpeg')):
                    img = Image.open(img_path).convert('L')  # Convert to grayscale
                    img = img.resize((80, 80))  # Resize to 80x80
                    images.append(np.array(img))
                    labels.append(label)
    return np.array(images), np.array(labels)

# Define the path to your data folder
train_data_folder = 'C:/Users/cmsmc/Desktop/Projects/SinhalaTranslator/Work/Dataset454/train'
validation_data_folder = 'C:/Users/cmsmc/Desktop/Projects/SinhalaTranslator/Work/Dataset454/valid'
test_data_folder = 'C:/Users/cmsmc/Desktop/Projects/SinhalaTranslator/Work/Dataset454/test'

# Load the data 
training_images, training_labels = load_images_with_labels(train_data_folder)
validation_images, validation_labels = load_images_with_labels(validation_data_folder)
test_images, test_labels = load_images_with_labels(test_data_folder)



### Defining the CNN Architecture 

In [4]:
# Defining the CNN architecture.
def create_sinhala_letter_id_cnn():
    model = models.Sequential([
        layers.Input(shape=(80, 80, 1)),  # Input layer with specified shape
        layers.Conv2D(32, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.Flatten(),
        layers.Dense(64, activation='relu'),
        layers.Dense(245, activation='softmax')  # Assuming 454 labels.
    ])
    return model

### Training

In [8]:

# Normalize image data
training_images = training_images / 255.0
validation_images = validation_images / 255.0

training_images = np.squeeze(training_images)  # Remove dimensions of size 1
validation_images = np.squeeze(validation_images)

# Encode labels as integers
label_encoder = LabelEncoder()
encoded_training_labels = label_encoder.fit_transform(training_labels)
encoded_validation_labels = label_encoder.transform(validation_labels) 

# Reshape image data to add a channel dimension
training_images = np.expand_dims(training_images, axis=-1)
validation_images = np.expand_dims(validation_images, axis=-1)

# Check for NumPy version compatibility
if not hasattr(np, 'complex_'):
    np.complex_ = np.complex128

# Create an instance of the CNN.
sinhala_letter_cnn = create_sinhala_letter_id_cnn()

# Compile the model.
sinhala_letter_cnn.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Print model summary.
sinhala_letter_cnn.summary()

# Train the model.
history = sinhala_letter_cnn.fit(training_images, encoded_training_labels, epochs=40, validation_data=(validation_images, encoded_validation_labels))

# Save the trained model
sinhala_letter_cnn.save('./TrainedModels/CNN_244_trial_1.keras')

# Assuming you have `training_labels` from your dataset
label_encoder = LabelEncoder()
encoded_training_labels = label_encoder.fit_transform(training_labels)

# Save the classes
np.save('./TrainedModels/CNN_244_trial_1_classes.npy', label_encoder.classes_)


Epoch 1/40
[1m  59/1436[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1:48[0m 79ms/step - accuracy: 0.0045 - loss: 5.5013

KeyboardInterrupt: 

### Plotting performance

In [None]:
import matplotlib.pyplot as plt

# function to plot training and validation metrics
def plot_metrics(history):

    # Extracting values from the history object
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']
    loss = history.history['loss']
    val_loss = history.history['val_loss']

    # Number of epochs
    epochs = range(1, len(acc) + 1)

    # Plotting training and validation accuracy
    plt.figure(figsize=(12, 6))
    plt.subplot(1, 2, 1)
    plt.plot(epochs, acc, 'bo-', label='Training accuracy')
    plt.plot(epochs, val_acc, 'ro-', label='Validation accuracy')
    plt.title('Training and validation accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()

    # Plotting training and validation loss
    plt.subplot(1, 2, 2)
    plt.plot(epochs, loss, 'bo-', label='Training loss')
    plt.plot(epochs, val_loss, 'ro-', label='Validation loss')
    plt.title('Training and validation loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    plt.tight_layout()
    plt.show()

# Assuming you have already trained your model and have the history object
plot_metrics(history)  

### Testing

In [None]:
from tensorflow.keras.models import load_model
import cv2
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
import os

def load_images_with_labels(folder_path):
    images = []
    labels = []
    for label in os.listdir(folder_path):
        label_folder = os.path.join(folder_path, label)
        for image_name in os.listdir(label_folder):
            image_path = os.path.join(label_folder, image_name)
            image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
            if image is not None:
                images.append(image)
                labels.append(label)
    return np.array(images), np.array(labels)

# Path to the test data folder
test_data_folder = 'C:/Users/cmsmc/Desktop/Projects/SinhalaTranslator/Work/Dataset454/test'

# Load the test images and labels
test_images, test_labels = load_images_with_labels(test_data_folder)
test_images = test_images / 255.0  # Normalize the images
test_images = np.expand_dims(test_images, axis=-1)  # Add the channel dimension

# Load the label encoder used during training
label_encoder = LabelEncoder()

# Load the classes used during training
label_encoder.classes_ = np.load('./TrainedModels/CNN_244_trial_1_classes.npy') 
encoded_test_labels = label_encoder.transform(test_labels)

# Load the trained CNN model
model = load_model('./TrainedModels/CNN_244_trial_1.keras')

# Perform evaluation
predictions = model.predict(test_images)
predicted_labels = np.argmax(predictions, axis=1)

# Calculate accuracy
accuracy = accuracy_score(encoded_test_labels, predicted_labels)
print(f'Accuracy: {accuracy * 100:.2f}%')

# Print classification report
print(classification_report(encoded_test_labels, predicted_labels, target_names=label_encoder.classes_))
