# Project in Machine Learning (2023-2024, Winter Semester)
Classification of objects - Pictures of Cats, Dogs and Wild Animals

In this project, we use a training set of 14630 pictures (512x512, jpg) of cats, dogs and wild animals in order to create a model that can recognize the animals in new images. Then, we test this model in a validation set of 1500 pictures.

Source of the dataset: https://www.kaggle.com/datasets/andrewmvd/animal-faces/ \
Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition \
Yunjey Choi and Youngjung Uh and Jaejun Yoo and Jung-Woo Ha, 2020 \
StarGAN v2: Diverse Image Synthesis for Multiple Domains

### 1. Principal Component Analysis

Ξεκινήσαμε συλλέγοντας όλα τα δεδομένα και μετατρέποντάς τα σε ασπρόμαυρα.

In [None]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
import time

# Start timer
start_time = time.time()
print(start_time)

# Load images and convert to grayscale
def load_images(folder_path, size=(64, 64)):
    images = []
    for filename in os.listdir(folder_path):
        img = cv2.imread(os.path.join(folder_path, filename))
        if img is not None:
            img = cv2.resize(img, size)  # Resize image
            images.append(img)
    return images

# Function to flatten images to vectors
def flatten_images(images):
    flattened_images = []
    for img in images:
        flattened_images.append(img.flatten())
    return np.array(flattened_images)

# Load the dataset of images (cats, dogs, wild animals)
train_folder_path = r"KostasEdition/afhq/train/cat"
images = load_images(train_folder_path)

# Convert images to grayscale
grayscale_images = [cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) for img in images]

# Flatten and convert images to numpy array (vectors)
flattened_resized_images = flatten_images(grayscale_images)

# Convert images to float64 data type
flattened_resized_images = flattened_resized_images.astype('float64')

Έπειτα, ορίσαμε την κλάση PCA, η οποία περιλαμβάνει μεθόδους για τον υπολογισμό της ιδιοαποσύνθεσης του πίνακα συμμεταβλητότητας και την έξοδο του PCA (ο τελικός πίνακας y). \
Η υπερπαράμετρος k είναι η self.n_components.

In [None]:
# Implement PCA
class PCA:
    def __init__(self, n_components):
        self.n_components = n_components
        self.components = None #The Q Matrix
        self.mean = None


    def fit(self, X):
        # Convert to float for calculations
        X = X.astype('float64')

        # Mean centering
        self.mean = np.mean(X, axis=0)
        X -= self.mean

        # Calculate covariance matrix
        cov_matrix = np.cov(X.T)

        # Eigen decomposition
        eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)

        # Sort eigenvalues and eigenvectors
        eigenvectors = eigenvectors.T
        idxs = np.argsort(eigenvalues)[::-1]
        eigenvalues = eigenvalues[idxs]
        eigenvectors = eigenvectors[idxs]
        inv_2_eigenvalues = np.sqrt(np.linalg.inv(np.diag(eigenvalues)))

        # Store first n_components eigenvectors
        self.components = inv_2_eigenvalues @ eigenvectors.T [:self.n_components]

    def transform(self, X):
        # Mean centering
        X -= self.mean

        # Project data onto the components
        return np.dot(X, self.components.T)


    def fit_transform(self, X):
        self.fit(X)
        return self.transform(X)

Τέλος, εφαρμόζουμε τις μεθόδους της κλάσης στις εικόνες και τυπώνουμε τα αποτελέσματα.

In [None]:
# The number of components
n_components = 100 

# Apply PCA to images
pca = PCA(n_components=n_components)
transformed_images = pca.fit_transform(flattened_resized_images)
print("Applied PCA to images")

# Reconstruct images using the transformed data
reconstructed_images = np.dot(transformed_images, pca.components) + pca.mean
print("Reconstructed images using the transformed data")

# Reshape reconstructed images to their original shapes
reconstructed_images = reconstructed_images.reshape(len(images), *grayscale_images[0].shape)
print("Reshaped reconstructed images to their original shapes")

# Ensure reconstructed images are of appropriate data type (e.g., convert to uint8)
reconstructed_images = np.real(reconstructed_images).astype('uint8')
print("Ensure reconstructed images are of appropriate data type (e.g., convert to uint8)")

# Plot original and reconstructed images
fig, axes = plt.subplots(2, 5, figsize=(10, 6))
for i in range(5):
    axes[0, i].imshow(grayscale_images[i], cmap='gray')
    axes[0, i].axis('off')
    axes[0, i].set_title('Original')

    axes[1, i].imshow(reconstructed_images[i], cmap='gray')
    axes[1, i].axis('off')
    axes[1, i].set_title('Reconstructed')

plt.tight_layout()
plt.show()

### 2. Least Squares Regression

Ξεκινάμε ξανά φορτώνοντας τις απαραίτητες βιβλιοθήκες και το dataset.

In [None]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
import time
import random

train_folder_path_cats = r"KostasEdition/afhq/train/cat"
train_folder_path_dogs = r"KostasEdition/afhq/train/dog"
train_folder_path_wild = r"KostasEdition/afhq/train/wild"
images = load_images(train_folder_path_cats, 0)
images.extend(load_images(train_folder_path_dogs), 1)
images.extend(load_images(train_folder_path_wild), 2)

#---

def load_images(folder_path, class_name = None, size=(64, 64)):
    images = []
    image_class = []
    for filename in os.listdir(folder_path):
        img = cv2.imread(os.path.join(folder_path, filename))
        if img is not None:
            img = cv2.resize(img, size)  # Resize image
            images.append(img)
            image_class.append(class_name)
    return images

#Edit images so the become 512x512-dimensional
images = [cv2.resize(img, (32, 32)) for img in images]

Ορίσαμε την κλάση LeastSquaresModel για την διενέργεια του αλγορίθμου. \
Η μέθοδος calculate παράγει το αποτέλεσμα της w*x + b. \
Η μέθοδος error παράγει το σφάλμα μεταξύ των στόχων και των παραχθέντων αποτελεσμάτων \
Η μέθοδος fix υπολογίζει τα νέα weights.

In [None]:
class LeastSquaresModel:
    def __init__(self):
        self.weights = [x for x in random.sample(range(0, 1024), 1024)]
        self.bias = 1
    def calculate(self, x): #Function y = w*x + b
        return sum([self.weights[i] * x[i] for i in range(len(self.weights))]) + self.bias
    def error(self, x, t): #Function error -> t - y_predicted
        return 1/2 * (t - self.calculate(x))**2
    def fix(self, X, T): #Function to fix the error
        self.weights = (np.array(X).T @ np.array(X))^-1 @ (np.array(X).T @ np.array(T))


Ορίσαμε και τις εξής υποστηρικτικές μεθόδους:

In [None]:
def get_array_X(images):
    X = []
    for img in images:
        img_array = np.array(img)
        img_array = img_array.flatten()
        X.append(img_array)
    return X

def get_array_T(image_class):
    T = []
    t = []
    for ic in image_class:
        if ic == 0:
            t = [1, 0, 0]
        if ic == 1:
            t = [0, 1, 0]
        if ic == 2:
            t = [0, 0, 1]
        T.append(t)
    return T

Τέλος, χρησιμοποιούμε την κλάση LeastSquaresModel για να δημιουργήσουμε ένα μοντέλο και να το εκπαιδεύσουμε.

In [None]:
#Initializing the model
model = LeastSquaresModel()

#Train
model.fix(get_array_X(images), get_array_T(image_class))

### 3. Logistic Regression

Όμοια με πριν, φορτώνουμε αρχικά τα data.

In [None]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
import time
import random

#Loading images
train_folder_path_cats = r"KostasEdition/afhq/train/cat"
train_folder_path_dogs = r"KostasEdition/afhq/train/dog"
train_folder_path_wild = r"KostasEdition/afhq/train/wild"
images, targets = load_images(train_folder_path_cats, [1,0,0])
res = load_images(train_folder_path_dogs, [0,1,0])
images.extend(res[0])
targets.extend(res[1])
res = load_images(train_folder_path_wild, [0,0,1])
images.extend(res[0])
targets.extend(res[1])

def load_images(folder_path, class_name = None, size=(64, 64)):
    images = []
    image_class = []
    for filename in os.listdir(folder_path):
        img = cv2.imread(os.path.join(folder_path, filename))
        if img is not None:
            img = cv2.resize(img, size)  # Resize image
            images.append(img)
            image_class.append(class_name)
    return images, image_class #targets

#Reducing image size
images = [cv2.resize(img, (32, 32)) for img in images]

Στην συνέχεια, παράγουμε softmax πιθανότητες για κάθε εικόνα.

In [None]:
#Initializing b vectors for each class
b_class_cats = np.randints(32*32)
b_class_dogs = np.randints(32*32)
b_class_wild = np.randints(32*32)

def getClass_Probabilities(image, b_class_cats, b_class_dogs, b_class_wild):
    e_power_cats = Math.pow(2.71, np.dot(image, b_class_cats))
    e_power_dogs = Math.pow(2.71, np.dot(image, b_class_dogs))
    e_power_wild = Math.pow(2.71, np.dot(image, b_class_wild))
    sum = e_power_cats + e_power_dogs + e_power_wild
    pr_cats = e_power_cats / sum
    pr_dogs = e_power_dogs / sum
    pr_wild = e_power_wild / sum
    if pr_cats > pr_dogs and pr_cats > pr_wild:
        return np.vectors.array([1, 0, 0]), np.vectors.array([pr_cats, pr_dogs, pr_wild]) #Cat
    elif pr_dogs > pr_cats and pr_dogs > pr_wild:
        return np.vectors.array([0, 1, 0]), np.vectors.array([pr_cats, pr_dogs, pr_wild]) #Dog
    else:
        return np.vectors.array([0, 0, 1]), np.vectors.array([pr_cats, pr_dogs, pr_wild]) #Wild

Τα b vectors διορθώνονται καθώς εκπαιδεύεται το μοντέλο μέσω του cross entropy.

In [None]:
def error(images, targets, b_class_cats, b_class_dogs, b_class_wild):
    cross_entropy = 0
    for i in range(len(images)):
        image = images[i]
        target = targets[i]
        pr_res = getClass_Probabilities(image, b_class_cats, b_class_dogs, b_class_wild)[1]
        for j in range(len(pr_res)):
            cross_entropy -= target[j] * Math.log(pr_res[j])
    return cross_entropy

#Applying Logistic Regression
for i in range(30):
    err = error(images, targets, b_class_cats, b_class_dogs, b_class_wild)
    alpha = 0.01
    #Applying regression with cross-entropy
    b_class_cats = b_class_cats + alpha * err
    b_class_dogs = b_class_dogs + alpha * err
    b_class_wild = b_class_wild + alpha * err

### 4. K Nearest Neighbors

In [None]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
import time
import random

images = []
labels = []
targets = []

#--

train_folder_path_cats = r"KostasEdition/afhq/train/cat"
train_folder_path_dogs = r"KostasEdition/afhq/train/dog"
train_folder_path_wild = r"KostasEdition/afhq/train/wild"
images, targets = load_images(train_folder_path_cats, [1,0,0])
res = load_images(train_folder_path_dogs, [0,1,0])
images.extend(res[0])
targets.extend(res[1])
res = load_images(train_folder_path_wild, [0,0,1])
images.extend(res[0])
targets.extend(res[1])

def load_images(folder_path, class_name = None, size=(64, 64)):
    images = []
    image_class = []
    for filename in os.listdir(folder_path):
        img = cv2.imread(os.path.join(folder_path, filename))
        if img is not None:
            img = cv2.resize(img, size)  # Resize image
            images.append(img)
            image_class.append(class_name)
    return images, image_class #targets

#Reducing image size
images = [cv2.resize(img, (32, 32)) for img in images]


Το dataset φορτώνεται. Μέσω της μεθόδου find_class ταξινομείται η εικόνα αναλόγως των k κοντινότερων γειτόνων της. Η error επιστρέφει το τετραγωνικό σφάλμα της εκτίμησης. \
Οι πρώτες 8 εικόνες χρησιμοποιούνται για την εκπαίδευση του μοντέλου.

In [None]:
def find_K_Nearest_Neighbors(X, T, k): # X = data, T = new image to be added to data
    distances = []
    for i in range(len(X)):
        distances.append(np.linalg.norm(X[i] - T))
    distances = np.array(distances)
    indices = distances.argsort()[:k]
    return indices

def find_class(X, T, k):
    indices = find_K_Nearest_Neighbors(X, T, k)
    class_counting_list = [0, 0, 0]
    for indice in indices:
        class_counting_list[labels[indice]-1] += 1
    return np.argmax(class_counting_list)

def add_Image_To_Dataset(data, labels, image):
    data.append(image)
    labels.append(find_class(data, image, 3))
    return data, labels
    
def error(labels, targets):
    return 1/2 * (np.array(labels) - np.array(targets))**2

def reset():
    x = []
    return x

#--

for i in range(1,11):
    k = i
    print("K = ", k)
    data = reset()
    data = images[:8]
    labels = reset()
    labels = targets[:8]
    for i in range(8, len(images)):
        data, labels = add_Image_To_Dataset(data, labels, images[i])
    print("Error: ", error(labels, targets))

### 5. Naive Bayes 

Αρχικά, φορτώνεται το dataset.

In [None]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
import time
import random

#Loading images
train_folder_path_cats = r"KostasEdition/afhq/train/cat"
train_folder_path_dogs = r"KostasEdition/afhq/train/dog"
train_folder_path_wild = r"KostasEdition/afhq/train/wild"
images, targets = load_images(train_folder_path_cats, [1,0,0])
res = load_images(train_folder_path_dogs, [0,1,0])
images.extend(res[0])
targets.extend(res[1])
res = load_images(train_folder_path_wild, [0,0,1])
images.extend(res[0])
targets.extend(res[1])

def load_images(folder_path, class_name = None, size=(64, 64)):
    images = []
    image_class = []
    for filename in os.listdir(folder_path):
        img = cv2.imread(os.path.join(folder_path, filename))
        if img is not None:
            img = cv2.resize(img, size)  # Resize image
            images.append(img)
            image_class.append(class_name)
    return images, image_class #targets

#Reducing image size
images = [cv2.resize(img, (8, 8)) for img in images]

#Convert to grayscale
images = [cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) for img in images]

Στην συνέχεια, βάσει αυτού, υπολογίζονται οι απαραίτητες πιθανότητες που χρειάζονται για την ταξινόμηση των εικόνων.

In [None]:
#Counting colors + calculating probabilities
color_counter = np.zeros(256)
color_probabilities_sum = [[0, 0, 0] for _ in range(256)]

for k in range(len(images)):
    img = images[k]
    for i in range(32):
        for j in range(32):
            color_counter[img[i,j]] += 1
            if targets[k] == [1,0,0]:
                color_probabilities_sum[img[i,j]][0] += 1
            if targets[k] == [0,1,0]:
                color_probabilities_sum[img[i,j]][1] += 1
            if targets[k] == [0,0,1]:
                color_probabilities_sum[img[i,j]][2] += 1

color_probabilities = [[0,0,0] for _ in range(256)] # Pr[Class_i = class_j | ColorX = colorx]
for i in range(256):
    if color_counter[i]!= 0:
        color_probabilities[i] = [color_probabilities_sum[i][0]/color_counter[i], color_probabilities_sum[i][1]/color_counter[i], color_probabilities_sum[i][2]/color_counter[i]]

class_probabilities = [[0,0,0] for _ in range(256)] # Pr[ColorX = colorx | Class_i = class_j]
class_total_count = [0, 0, 0] # [x_i = class_i]
for i in range(256):
    summ = class_probabilities_sum[i][0] + class_probabilities_sum[i][1] + class_probabilities_sum[i][2]
    for j in range(3):
        class_probabilities[i][j] += color_probabilities_sum[i][j]
        class_total_count[j] += color_probabilities_sum[i][j]
        if summ!= 0:
            class_probabilities[i][j] /= summ
        #...

#Calculating prior probabilities Pr[Class_i = class_i]
prior_probabilities = [0,0,0]
for i in range(3):
    prior_probabilities[i] = class_total_count[i] / len(images)

def product_calculator(image, k):  # γινόμενο όλων των P(xi|Ck)
    product = 1
    for i in range(256):
        product *= class_probabilities[i][k]
    return product

Τέλος, εφαρμόζεται ο αλγόριθμος ταξινόμησης.

In [None]:
#Naive-Bayes Classifier
def naive_bayes_classifier(image):
    maxx = -1
    maxx_class = [0,0,0]
    for i in range(3): # For each class
        if prior_probabilities[i]*product_calculator(image, i) > maxx:
            maxx = prior_probabilities[i]*product_calculator(image, i)
            maxx_class = [0,0,0]
            maxx_class[i] = 1
    return maxx_class

for image in images:
    print(naive_bayes_classifier(image))

### 6. Neural Network

Στην αρχή φορτώνεται το dataset και οι απαραίτητες βιβλιοθήκες για την ανάπτυξη του νευρωνικού δικτύου.

In [None]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
import time
import random
import torch
import torchvision.models as models

#Loading images
train_folder_path_cats = r"KostasEdition/afhq/train/cat"
train_folder_path_dogs = r"KostasEdition/afhq/train/dog"
train_folder_path_wild = r"KostasEdition/afhq/train/wild"
images, targets = load_images(train_folder_path_cats, [1,0,0])
res = load_images(train_folder_path_dogs, [0,1,0])
images.extend(res[0])
targets.extend(res[1])
res = load_images(train_folder_path_wild, [0,0,1])
images.extend(res[0])
targets.extend(res[1])

def load_images(folder_path, class_name = None, size=(64, 64)):
    images = []
    image_class = []
    for filename in os.listdir(folder_path):
        img = cv2.imread(os.path.join(folder_path, filename))
        if img is not None:
            img = cv2.resize(img, size)  # Resize image
            images.append(img)
            image_class.append(class_name)
    return images, image_class #targets

#Reducing image size
images = [cv2.resize(img, (8, 8)) for img in images]

#Convert to grayscale
images = [cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) for img in images]

Έπειτα ορίζουμε και εκπαιδεύουμε ένα νευρωνικό δίκτυο μέσω της βιβλιοθήκης Torch και τυπώνουμε τα αποτελέσματα ανά εικόνα, παράλληλα συγκρίνοντας με το επιθυμητό αποτέλεσμα.

In [None]:
#Building the initial neural network
model = models.resnet18(pretrained=True)

#Training the network
model.fc = torch.nn.Linear(512, 3)
epochs = 3
criterion = torch.nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
compiled_model = torch.compile(model, criterion, optimizer, epochs)
for i in range(epochs):
    compiled_model.train(images, targets)

#Results
print("Results:")
for i in range(len(images)):
    img = images[i]
    img = torch.tensor(img)
    pred = compiled_model.predict(img)
    pred = pred.detach().numpy()
    pred = np.argmax(pred)
    print("Image: ", i)
    print("Prediction: ", pred)
    print("Target: ", targets[i])
    print("--------------------------------")

### 7. K Means

Αρχικά φορτώνεται το dataset.

In [None]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
import time
import random

#Loading images
train_folder_path_cats = r"KostasEdition/afhq/train/cat"
train_folder_path_dogs = r"KostasEdition/afhq/train/dog"
train_folder_path_wild = r"KostasEdition/afhq/train/wild"
images, targets = load_images(train_folder_path_cats, [1,0,0])
res = load_images(train_folder_path_dogs, [0,1,0])
images.extend(res[0])
targets.extend(res[1])
res = load_images(train_folder_path_wild, [0,0,1])
images.extend(res[0])
targets.extend(res[1])

def load_images(folder_path, class_name = None, size=(64, 64)):
    images = []
    image_class = []
    for filename in os.listdir(folder_path):
        img = cv2.imread(os.path.join(folder_path, filename))
        if img is not None:
            img = cv2.resize(img, size)  # Resize image
            images.append(img)
            image_class.append(class_name)
    return images, image_class #targets

#Reducing image size
images = [cv2.resize(img, (8, 8)) for img in images]

#Convert to grayscale
images = [cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) for img in images]

Έπειτα ορίζουμε την μέθοδο kmeans η οποία σε 2 φάσεις, πρώτα πραγματοποιεί την εκπαίδευση και μετά την εφαρμογή του αλγορίθμου.

In [None]:
#Number of means
k = 3

def kmeans(images, targets, k):
    #Initialize means with random numbers in [0,256]
    means = np.random.randint(0, 256, (k, 8, 8))
    neighbors = np.zeros(len(images))

    #Train
    for epochs in range(10):
        for i in range(len(images)):
            img = images[i]
            #Calculate distances
            distances = np.zeros(k)
            for j in range(k):
                distances[j] = np.linalg.norm(img - means[j])
            neighbors[i] = argmin(distances)
        #Calculate new means
        for i in range(k):
            means[i] = np.mean(images[neighbors == i], axis=0)

    #Test
    for i in range(len(images)):
        img = images[i]
        #Calculate distances
        distances = np.zeros(k)
        for j in range(k):
            distances[j] = np.linalg.norm(img - means[j])
        neighbors[i] = argmin(distances)
        #Print results
        if (targets[neighbors[i]] == targets[i]):
            print("Success")
        else:
            print("Error")

def argmin(array):
    return np.argmin(array)

Τέλος, εφαρμόζουμε τη μέθοδο στο dataset.

In [None]:
kmeans(images, targets, k)