In [257]:
from elpv_dataset.utils.elpv_reader import load_dataset
import numpy as np
import matplotlib.pyplot as plt
import cv2
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, confusion_matrix, classification_report
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score, KFold


In [291]:
#OG

images, proba, types = load_dataset()

def map_prob_to_class(probability):
    if probability == 0:
        return 0
    elif probability > 0 and probability < 0.34:
        return 1
    elif probability > 0.64 and probability < 0.68 :
        return 2
    else:
        return 3
    
classes = np.array([map_prob_to_class(float(probability)) for probability in proba])

from imblearn.over_sampling import RandomOverSampler

training_imgs, testing_imgs, training_labels, testing_labels = train_test_split(images, classes, test_size=0.25, random_state=41)

# Balancing the training data using RandomOverSampler
oversampler = RandomOverSampler(random_state=42)
n_samples, width, height = training_imgs.shape
training_imgs_reshaped = training_imgs.reshape(n_samples, width * height)

# Resample the data
training_imgs, training_labels = oversampler.fit_resample(training_imgs_reshaped, training_labels)

# Reshape so it doesn't bug up
training_imgs = training_imgs.reshape(len(training_imgs), width, height)


In [292]:
# # Use SIFT to get keypoints
sift = cv2.SIFT_create()

keypoints = []
for img in training_imgs:
    keypoints1, descriptors1 = sift.detectAndCompute(img, None)
    keypoints.append(len(keypoints1))

In [293]:
from skimage.filters import rank
from skimage.morphology import disk
import concurrent.futures

def normalizeImages(images):
    normalized_images = []
    for image in images:
        mean = np.mean(image)
        std = np.std(image)
        normalized_image = (image - mean) / std
        normalized_image = (normalized_image - np.min(normalized_image)) / (np.max(normalized_image) - np.min(normalized_image))
        normalized_image = normalized_image * 2 - 1  # Rescale to [-1, 1]
        normalized_images.append(normalized_image)
    return normalized_images

def applyAnisotropicDiffusion(images):
    with concurrent.futures.ThreadPoolExecutor() as executor:
        filtered_images = list(executor.map(lambda img: rank.mean_bilateral(img, disk(20), s0=10, s1=10), images))
    return filtered_images

from skimage import filters

def applyThresholding(images):
    binary_images = []
    for image in images:
        ret, new_image = cv2.threshold(image, 122, 255, cv2.THRESH_BINARY)
        binary_images.append(new_image)
    return binary_images


In [294]:
# Thresholding

thresholding = applyThresholding(applyAnisotropicDiffusion(training_imgs))


# thresholding = []
# for img in training_imgs:
#     ret, new_image = cv2.threshold(img, 122, 255, cv2.THRESH_BINARY)
#     thresholding.append(new_image)
    

In [295]:


bl_pixels = [np.sum(img == 0) for img in thresholding]

# Calculate the average number of black pixels
average_black_pixels = np.mean(bl_pixels)
std_black_pixels = np.std(bl_pixels)

diff_from_mean = [(count - average_black_pixels) for count in bl_pixels]
std_deviations_from_mean = [(count - average_black_pixels) / std_black_pixels for count in bl_pixels]

print(average_black_pixels)


20059.38123885918


In [296]:
# LOGISTIC REGRESSION

# Create and train the logistic regression model
model = LogisticRegression(solver='liblinear', max_iter=200, penalty='l1', class_weight='balanced')

#print(y)
X_train = np.column_stack((diff_from_mean, std_deviations_from_mean))


model.fit(X_train, training_labels)

In [297]:
keypoints_test = [len(sift.detectAndCompute(img, None)[0]) for img in testing_imgs]
thresholding_test = applyThresholding(applyAnisotropicDiffusion(testing_imgs))
bl_pixels_test = [np.sum(img == 0) for img in thresholding_test]
diff_from_mean_test = [(count - average_black_pixels) for count in bl_pixels_test]

std_deviations_from_mean_test = [(count - average_black_pixels) / std_black_pixels for count in bl_pixels_test]

# Combine keypoints and black pixel thingy 
X_test = np.column_stack((diff_from_mean_test, std_deviations_from_mean_test))

# Evaluate the model on the test set
y_pred_test = model.predict(X_test)

# Get scores
accuracy = accuracy_score(testing_labels, y_pred_test)
precision = precision_score(testing_labels, y_pred_test, average='weighted')
recall = recall_score(testing_labels, y_pred_test, average='weighted')
f1 = f1_score(testing_labels, y_pred_test, average='weighted')
conf_matrix = confusion_matrix(testing_labels, y_pred_test)
class_report_dict = classification_report(testing_labels, y_pred_test, output_dict=True)

class_report_data = []
for label, metrics in class_report_dict.items():
    row = [label]
    if isinstance(metrics, dict):
        row.extend(metrics.values())
    else:
        row.append(metrics)  # If it's a float, just append it directly
    class_report_data.append(row)

from tabulate import tabulate

# Print out using tabulate
table = [
    ["Accuracy", accuracy],
    ["Precision", precision],
    ["Recall", recall],
    ["F1 Score", f1],
]

print("\nConfusion Matrix:")
class_labels = list(set(testing_labels))
conf_matrix_table = [[f"Actual/Predicted"] + class_labels] + \
                [[class_labels[i]] + conf_matrix[i].tolist() for i in range(len(class_labels))]
print(tabulate(conf_matrix_table, headers="firstrow", tablefmt="grid"))


print(tabulate(table, headers=["Metric", "Value"], tablefmt="grid"))

print("\nClassification Report:")
print(tabulate(class_report_data, headers=["Class"] + list(metrics.keys()), tablefmt="grid"))




Confusion Matrix:
+--------------------+-----+-----+-----+-----+
|   Actual/Predicted |   0 |   1 |   2 |   3 |
|                  0 | 307 |  11 |  21 |  47 |
+--------------------+-----+-----+-----+-----+
|                  1 |  50 |   6 |   6 |  11 |
+--------------------+-----+-----+-----+-----+
|                  2 |  15 |   1 |   1 |   3 |
+--------------------+-----+-----+-----+-----+
|                  3 |  57 |   7 |  11 | 102 |
+--------------------+-----+-----+-----+-----+
+-----------+----------+
| Metric    |    Value |
| Accuracy  | 0.634146 |
+-----------+----------+
| Precision | 0.617412 |
+-----------+----------+
| Recall    | 0.634146 |
+-----------+----------+
| F1 Score  | 0.619846 |
+-----------+----------+

Classification Report:
+--------------+-------------+-----------+------------+-----------+
| Class        |   precision |    recall |   f1-score |   support |
| 0            |    0.715618 | 0.795337  |  0.753374  |       386 |
+--------------+-------------+---

In [298]:
# kf = KFold(n_splits=5, shuffle=True, random_state=42)
# X_combined = X_train + X_test
# y_combined = y_train + y_test

# fold_accuracies = []
# fold_precisions = []
# fold_recalls = []
# fold_f1_scores = []
# fold_conf_matrix = []

# for train_index, test_index in kf.split(X_combined):
#     X_train_fold, X_test_fold = np.array(X_combined)[train_index], np.array(X_combined)[test_index]
#     y_train_fold, y_test_fold = np.array(y_combined)[train_index], np.array(y_combined)[test_index]

#     # Train the model
#     # model.fit(X_train_fold, y_train_fold)

#     # Make predictions on the test set for this fold
#     y_pred_fold = model.predict(X_test_fold)

#     # Evaluate the model for this fold
#     accuracy_fold = accuracy_score(y_test_fold, y_pred_fold)
#     precision_fold = precision_score(y_test_fold, y_pred_fold, average='weighted')
#     recall_fold = recall_score(y_test_fold, y_pred_fold, average='weighted')
#     f1_score_fold = f1_score(y_test_fold, y_pred_fold, average='weighted')
#     conf_matrix = confusion_matrix(y_test_fold, y_pred_fold)


#     # Append metrics to the lists
#     fold_accuracies.append(accuracy_fold)   
#     fold_precisions.append(precision_fold)
#     fold_recalls.append(recall_fold)
#     fold_f1_scores.append(f1_score_fold)
#     fold_conf_matrix.append(conf_matrix)

# # Print metrics for each fold
# for i in range(5):
#     print(f'Fold {i + 1}:')
#     print(f'  Accuracy: {fold_accuracies[i]:.2f}')
#     print(f'  Precision: {fold_precisions[i]:.2f}')
#     print(f'  Recall: {fold_recalls[i]:.2f}')
#     print(f'  F1 Score: {fold_f1_scores[i]:.2f}')
#     print(f'Confusion Matrix: \n {fold_conf_matrix[i]}')
#     print()
    
# # Print average metrics across all folds
# print('Average Metrics Across Folds:')
# print(f'  Average Accuracy: {np.mean(fold_accuracies):.2f}')
# print(f'  Average Precision: {np.mean(fold_precisions):.2f}')
# print(f'  Average Recall: {np.mean(fold_recalls):.2f}')
# print(f'  Average F1 Score: {np.mean(fold_f1_scores):.2f}')