In [2]:
from image_util import load_digits_images, to_grayscale, to_histgram, to_pca, to_norm, pipe_from_folder_to_norm
import datetime
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import numpy as np
from logistic_regression import cross_validate_model, LogisticRegression

# Correct paths to your dataset
base_folder = r"./dataset/digits"

start_time = datetime.datetime.now()
print(f"Start Time {start_time}")

# load and normalize images
images, labels = load_digits_images(base_folder)
normalize_time = datetime.datetime.now()
print(f"loading time: {normalize_time - start_time}")

# Step 1 - GRAYSCALE
grayscale_images = to_grayscale(images)
grayscale_time = datetime.datetime.now()
print(f"grayscale time: {grayscale_time - normalize_time}")

# Step 2 - HISTGRAM
histgram_images = to_histgram(grayscale_images)
histgram_time = datetime.datetime.now()
print(f"histogram time: {histgram_time - grayscale_time}")

# Step 3 - PCA
pca_images = to_pca(histgram_images)
pca_time = datetime.datetime.now()
print(f"pca time: {pca_time - histgram_time}")

# Step 4 - Normalization
norm_images = to_norm(pca_images)
norm_time = datetime.datetime.now()
print(f"norm time: {norm_time - pca_time}")

# Step 5 - Encode labels to numerical values
le = LabelEncoder()
labels_encoded = le.fit_transform(labels)
# One-hot encode the labels for softmax regression
labels_one_hot = np.eye(len(np.unique(labels_encoded)))[labels_encoded]
one_hot_time = datetime.datetime.now()
print(f"label Encoding time: {one_hot_time - one_hot_time}")

# Step 6 - # Split into training, validation, and test sets
images_train_full, images_test, labels_train_full, labels_test = train_test_split(norm_images, labels_one_hot,
                                                                                  test_size=0.2, random_state=42)

split_time = datetime.datetime.now()
print(f"split dataset time: {split_time - one_hot_time}")

# Step 7 - Cross validate the model
cross_validation_accuracy = cross_validate_model(norm_images, labels_one_hot, num_folds=5)
cross_validation_time = datetime.datetime.now()
print(f"cross validation time: {cross_validation_time - split_time}")

# Step 8 - Train the model
final_model = LogisticRegression(
    input_size=images_train_full.shape[1],
    num_classes=labels_train_full.shape[1],
    learning_rate=0.1,
    regularization=0.001,
)
final_model.train(images_train_full, labels_train_full, epochs=1000)

# save the model
print(f"saving the model state...")
final_model.save_model("./digits_model")

train_time = datetime.datetime.now()
print(f"train time: {train_time - cross_validation_time}")

# Step 9 Test the final model
labels_test_pred = final_model.predict(images_test)
final_accuracy = np.mean(np.argmax(labels_test, axis=1) == labels_test_pred)
print(f"Final Test Accuracy: {final_accuracy}")
testing_time = datetime.datetime.now()
print(f"testing time: {testing_time - train_time}")

end_time = datetime.datetime.now()
print(f"End Time: {end_time}")
print(f"Total Duration: {end_time - start_time}")


Start Time 2024-11-17 16:04:35.344960
Opening folder ./dataset/digits/0 ...
Opening folder ./dataset/digits/1 ...
Opening folder ./dataset/digits/2 ...
Opening folder ./dataset/digits/3 ...
Opening folder ./dataset/digits/4 ...
Opening folder ./dataset/digits/5 ...
Opening folder ./dataset/digits/6 ...
Opening folder ./dataset/digits/7 ...
Opening folder ./dataset/digits/8 ...
Opening folder ./dataset/digits/9 ...
loading time: 0:00:49.950902
grayscale time: 0:00:19.869458
histogram time: 0:01:40.573673
pca time: 0:00:07.907141
norm time: 0:00:00.042501
label Encoding time: 0:00:00
split dataset time: 0:00:00.038460
Epoch 0, Loss: 2.3036667042858148
Epoch 100, Loss: 0.8614715363092479
Epoch 200, Loss: 0.6855753297770683
Epoch 300, Loss: 0.6206690063616267
Epoch 400, Loss: 0.5865979896855918
Epoch 500, Loss: 0.5656006378461865
Epoch 600, Loss: 0.551414405676085
Epoch 700, Loss: 0.5412415765517858
Epoch 800, Loss: 0.5336356723422283
Epoch 900, Loss: 0.5277703511344882
Validation Accuracy

In [None]:
import datetime
from logistic_regression import LogisticRegression
from image_util import pipe_from_folder_to_norm
import numpy as np

model_filename = "./digits_model"
base_folder = r"./dataset/digits"

start_time = datetime.datetime.now()
print(f"load images and normalize")
images_test = pipe_from_folder_to_norm(base_folder)

print(f"loading digits model file: {model_filename}")
model = LogisticRegression.load_model(model_filename)

print(f"predict the images")
labels_test_pred = model.predict(images_test)

print(f"calc accuracy")
final_accuracy = np.mean(np.argmax(labels_test, axis=1) == labels_test_pred)
print(f"Final Test Accuracy: {final_accuracy}")

end_time = datetime.datetime.now()
print(f"End Time: {end_time}")
print(f"Total Duration: {end_time - start_time}")

load images and normalize
Opening folder ./dataset/digits/0 ...
Opening folder ./dataset/digits/1 ...
Opening folder ./dataset/digits/2 ...
Opening folder ./dataset/digits/3 ...
Opening folder ./dataset/digits/4 ...
Opening folder ./dataset/digits/5 ...
Opening folder ./dataset/digits/6 ...
Opening folder ./dataset/digits/7 ...
Opening folder ./dataset/digits/8 ...
Opening folder ./dataset/digits/9 ...
