# Import Libraries

In [1]:
# Libraries

import cv2
import os
import splitfolders

from pathlib import Path
from collections import Counter
from PIL import Image

import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import pandas as pd

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam

# Exploratory Data
The data set is a collection of images of alphabets from BISINDO

### Import Data

In [None]:
imageDataset = 'DataFix'

paths = [path.parts[-2:] for path in
         Path(imageDataset).rglob('*.*')]                             #writing purpose ('*.*') so that all image formats can be retrieved
df = pd.DataFrame(data=paths, columns=['Class','Images'])     #create column names for dataframe
df = df.sort_values('Class',ascending=True)                   #sort class name
df.reset_index(drop=True, inplace=True)                       #sort index of each row
df 

### View the total class and the amount of data for each class

In [None]:
totalImage = len(df.Images)
totalClass = len(df['Class'].value_counts())

print("Total Image : {}" .format(totalImage))
print("Total Class : {}" .format(totalClass))

print('Number of images in each class : ')
print(df['Class'].value_counts().sort_index())

### Get the size of each image from the dataset

In [None]:
def storeImageSize(dataset_path):
    image_sizes = []

    # Iterate over all folders in the dataset directory
    for alphabet_folder in os.listdir(dataset_path):
        alphabet_folder_path = os.path.join(dataset_path, alphabet_folder)
        
        # Check if the current path is a directory
        if os.path.isdir(alphabet_folder_path):
            # Iterate over all files in the current alphabet folder
            for filename in os.listdir(alphabet_folder_path):
                file_path = os.path.join(alphabet_folder_path, filename)
                
                # Check if the file is an image
                if filename.lower().endswith(('.jpg')):
                    try:
                        # Open the image
                        with Image.open(file_path) as img:
                            # Append the image size to the list
                            image_sizes.append(img.size)
                    except Exception as e:
                        print(f"Could not open image {filename}: {e}")
    return image_sizes

def countsImageSize(image_sizes):
    # Count the occurrences of each image size
    size_counts = Counter(image_sizes)
    
    # Extract the sizes and their counts
    sizes = list(size_counts.keys())
    counts = list(size_counts.values())
    
    # Convert sizes to string format for better readability in the plot
    sizes_str = [f"{size[0]}x{size[1]}" for size in sizes]
    
    # Create a DataFrame for the table
    df = pd.DataFrame({
        'Image Size (width x height)': sizes_str,
        'Number of Images': counts
    })
    
    df = df.sort_values(by='Number of Images', ascending=False)
    
    # Print the table
    print(df)


# Get image sizes
image_sizes = storeImageSize(imageDataset)

# Plot image size counts and display the table
countsImageSize(image_sizes)


### Visualize sample images from each class

In [None]:
def displaySampleImage(root_folder):
    # Create a list to store image paths
    image_paths = []
    letters = []

    # Iterate over all alphabet folders in the root folder
    for letter_folder in sorted(os.listdir(root_folder)):
        letter_path = os.path.join(root_folder, letter_folder)
        
        if os.path.isdir(letter_path):
            # Get a list of all image files in the folder
            image_files = sorted([
                f for f in os.listdir(letter_path) 
                if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp'))
            ])
            
            if image_files:
                # Select the first image in the folder
                first_image = image_files[0]
                image_paths.append(os.path.join(letter_path, first_image))
                letters.append(letter_folder)
    
    # Calculate the number of rows and columns for the plot
    num_images = len(image_paths)
    cols = 4
    rows = (num_images // cols) + (1 if num_images % cols != 0 else 0)
    
    # Create a figure to display the images
    fig, axes = plt.subplots(rows, cols, figsize=(15, 5 * rows))
    axes = axes.flatten()

    # Display each image
    for i, img_path in enumerate(image_paths):
        img = Image.open(img_path)
        axes[i].imshow(img)
        axes[i].axis('off')
        axes[i].set_title(letters[i])
    
    # Remove any empty subplots
    for j in range(i + 1, len(axes)):
        fig.delaxes(axes[j])

    plt.tight_layout()
    plt.show()


# Display one image per letter
displaySampleImage(imageDataset)

# Preprocessing Data

### Resize Image to 128x128 Pixel

In [None]:
def resizeImage(root_folder, target_folder, size=(128, 128)):
    # Supported image formats
    supported_formats = ('.png', '.jpg', '.jpeg', '.gif', '.bmp')

    # Iterate over all folders and files in the root folder recursively
    for root, dirs, files in os.walk(root_folder):
        for file in files:
            if file.lower().endswith(supported_formats):
                # Construct full file path
                file_path = os.path.join(root, file)
                
                # Determine the relative path inside the root folder
                relative_path = os.path.relpath(root, root_folder)
                
                # Construct target folder path
                target_subfolder = os.path.join(target_folder, relative_path)
                if not os.path.exists(target_subfolder):
                    os.makedirs(target_subfolder)
                
                # Open the image
                with Image.open(file_path) as img:
                    # Resize the image
                    resized_img = img.resize(size, Image.ANTIALIAS)
                    
                    # Construct target file path
                    target_file_path = os.path.join(target_subfolder, file)
                    
                    # Save the resized image
                    resized_img.save(target_file_path)

                #print(f"Resized and saved {file_path} to {target_file_path}")

# Path to the root folder containing images
rootFolder = 'DataFix'

# Path to the target folder to save resized images
targetFolder = 'ResizedData'

# Resize images to 128x128 and save them with the same folder structure
resizeImage(rootFolder, targetFolder, size=(128, 128))


### Converts resized data to grayscale

In [None]:
def processImage(input_path, output_path):
    # Read the image
    img = cv2.imread(input_path)
    
    # Convert the image to grayscale
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    # Save the grayscale image
    cv2.imwrite(output_path, gray)

def processFolder(input_folder, output_folder):
    # Iterate through each folder in the input folder
    for folder_name in sorted(os.listdir(input_folder)):
        folder_path = os.path.join(input_folder, folder_name)
        
        # Check if the item is a folder
        if os.path.isdir(folder_path):
            # Create corresponding output folder if it doesn't exist
            output_subfolder = os.path.join(output_folder, folder_name)
            if not os.path.exists(output_subfolder):
                os.makedirs(output_subfolder)
            
            # Process images in the subfolder
            for filename in sorted(os.listdir(folder_path)):
                if filename.endswith(('.png', '.jpg', '.jpeg')):
                    input_path = os.path.join(folder_path, filename)
                    output_path = os.path.join(output_subfolder, filename)
                    processImage(input_path, output_path)

# Define input and output folders
inputFolder = 'ResizedData'
outputFolder = 'GrayScale'

# Process the folder structure
processFolder(inputFolder, outputFolder)


### Split Dataset into 3 Directory for Train, Validation, and Test

In [2]:
splitfolders.ratio('GrayScale',output='SplitData', seed=1333, ratio=(0.8,0.1,0.1))

Copying files: 5980 files [00:18, 316.34 files/s]


# Data Preparation with ImageDataGenerator

In [5]:
# ImageDataGenerator
traindir = 'SplitData/train'
testdir = 'SplitData/test'
valdir = 'SplitData/val'

training_datagen = ImageDataGenerator(
        rescale = 1./255,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        fill_mode='nearest')

val_datagen = ImageDataGenerator(rescale= 1. / 255)

test_datagen = ImageDataGenerator(rescale = 1./255)


train_generator = training_datagen.flow_from_directory(
	traindir,
	target_size=(128,128),
	class_mode='categorical',
  batch_size=128
)

val_generator = val_datagen.flow_from_directory(directory= valdir, 
                                         target_size=(128, 128), 
                                         batch_size = 128)

test_generator = test_datagen.flow_from_directory(
	testdir,
	target_size=(128,128),
	class_mode='categorical',
  batch_size=128,
  shuffle= False
)


Found 4784 images belonging to 26 classes.
Found 598 images belonging to 26 classes.
Found 598 images belonging to 26 classes.


# Deep Learning Model

### Model Layers

In [None]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32, (3,3), activation='relu', padding='same', input_shape=(128, 128, 1)),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(32, (3,3), activation='relu', padding='same'),
    tf.keras.layers.MaxPooling2D(2,2),

    tf.keras.layers.Conv2D(64, (3,3), activation='relu', padding='same'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(64, (3,3), activation='relu', padding='same'),
    tf.keras.layers.MaxPooling2D(2,2),

    tf.keras.layers.Conv2D(128, (3,3), activation='relu', padding='same'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(128, (3,3), activation='relu', padding='same'),
    tf.keras.layers.MaxPooling2D(2,2),

    tf.keras.layers.Flatten(),

    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    
    tf.keras.layers.Dense(26, activation='softmax')
])

# Print the model summary
model.summary()

### Compile and Fit the Model

In [None]:
model.compile(loss = 'categorical_crossentropy', optimizer=Adam(learning_rate=0.001), metrics=['accuracy'])

In [None]:
history = model.fit(train_generator, epochs=100, validation_data = val_generator, verbose=1)

### Plot the Results

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'r', label='Training accuracy')
plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend(loc=0)
plt.figure()

plt.show()

### Predictions

In [None]:
result = model.predict(test_generator,verbose = 0)

y_pred = np.argmax(result, axis = 1)

y_true = test_generator.labels

# Evaluvate
loss,acc = model.evaluate(test_generator,verbose = 0)

print('The accuracy of the model for testing data is:',acc*100)
print('The Loss of the model for testing data is:',loss)

In [None]:
p = y_pred
y = y_true
correct = np.nonzero(p==y)[0]
incorrect = np.nonzero(p!=y)[0]

print("Correct predicted classes:",correct.shape[0])
print("Incorrect predicted classes:",incorrect.shape[0])

In [None]:
from sklearn.metrics import classification_report
print(classification_report(test_generator.classes, predictions))

In [None]:
import seaborn as sns
from sklearn.metrics import accuracy_score
accuracy_score(test_generator.classes, predictions)
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(test_generator.classes, predictions)
sns.heatmap(cm, annot=True)

# Saving the Model

In [None]:
model.save("modelFix.h5")