# Data Preparation

In [1]:
import skimage as ski
import numpy as np

In [2]:
import sys
sys.path.append('./Scripts')

import pr_data_preparation
import pr_model_training
import pr_save_load_models
import pr_feature_extraction

In [None]:
from pr_data_preparation import preprocess_and_save_images

# Preprocess and save images
preprocess_and_save_images(start_index = 0, batch_size = 200)

In [3]:
from pr_data_preparation import load_images

# Load preprocessed images
x_data, y_data = load_images(start_index = 0, batch_size = 200)

Loading fonts: 100%|██████████| 4/4 [00:04<00:00,  1.17s/it]


# Feature Extraction

In [4]:
from pr_feature_extraction import compute_sift_descriptors

sift_descriptors_to_train_Kmeans, sift_descriptors_to_train_SVM = compute_sift_descriptors(x_data)

Computing SIFT descriptors: 100%|██████████| 800/800 [03:45<00:00,  3.55it/s]


In [5]:
from pr_feature_extraction import train_kmeans

kmeans = train_kmeans(sift_descriptors_to_train_Kmeans)

#### Save Kmeans Model
After training the Kmeans model, we save it for future use.

In [6]:
from pr_save_load_models import save_Kmeans_model

# Save the KMeans model
save_Kmeans_model(kmeans)

In [7]:
from pr_feature_extraction import compute_histograms

feature_vectors = compute_histograms(kmeans, sift_descriptors_to_train_SVM)

Computing histograms: 100%|██████████| 800/800 [00:17<00:00, 46.30it/s]


# Model Training

#### Prepare Data for SVM
We then scale our feature vectors using the StandardScaler from sklearn. This is a good practice before using SVM.

In [8]:
from pr_model_training import prepare_data_for_svm

scaled_feature_vectors = prepare_data_for_svm(feature_vectors)

#### Train SVM
We then train our SVM classifier using the scaled feature vectors. We're using a Radial Basis Function (RBF) kernel.

In [9]:
from pr_model_training import train_svm

clf = train_svm(scaled_feature_vectors, y_data)

#### Save SVM Model
After training the SVM model, we save it for future use.

In [10]:
from pr_save_load_models import save_SVM_model

# Save the SVM model
save_SVM_model(clf)

#### Test SVM
Finally, we test our trained SVM model on the test data and print the score.

In [13]:
import pr_data_preparation

from pr_data_preparation import preprocess_and_save_images

from pr_model_training import test_svm

preprocess_and_save_images(start_index = 750, batch_size = 50)

# Load preprocessed images
x_test, y_test = load_images(start_index = 750, batch_size = 50)

sift_descriptors_Kmeans_test, sift_descriptors_SVM_test = compute_sift_descriptors(x_test)

feature_vectors_test = compute_histograms(kmeans, sift_descriptors_SVM_test)

# Test the SVM
score = test_svm(clf, feature_vectors_test, y_test)

print("Score: ", score)

Processing IBM Plex Sans Arabic:   0%|          | 0/50 [00:00<?, ?it/s]

Processing IBM Plex Sans Arabic: 100%|██████████| 50/50 [00:26<00:00,  1.87it/s]
Processing Lemonada: 100%|██████████| 50/50 [00:30<00:00,  1.66it/s]
Processing Marhey: 100%|██████████| 50/50 [00:34<00:00,  1.47it/s]
Processing Scheherazade New: 100%|██████████| 50/50 [00:29<00:00,  1.67it/s]


Image preprocessing and saving completed.


Loading fonts: 100%|██████████| 4/4 [00:03<00:00,  1.20it/s]
Computing SIFT descriptors: 100%|██████████| 200/200 [01:26<00:00,  2.32it/s]
Computing histograms: 100%|██████████| 200/200 [00:05<00:00, 33.81it/s]


Score:  0.975


# Load Model

#### Load KMeans Model
We can load the saved KMeans model when we need it.

In [None]:
from pr_save_load_models import load_Kmeans_model

# Load the KMeans model
kmeans = load_Kmeans_model()

## Load SVM Model
We can load the saved SVM model when we need it.

In [None]:
from pr_save_load_models import load_SVM_model

# Load the SVM model
clf = load_SVM_model()