In [11]:
import zipfile
import os
import numpy as np
from PIL import Image

# Define the path to the zip file and the extraction path
zip_path = '/content/HWData.zip'
extraction_path = '/content/HWData'

# Create the extraction path directory if it doesn't exist
if not os.path.exists(extraction_path):
    os.makedirs(extraction_path)

# Unzip the file
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extraction_path)

print(f"Files extracted to {extraction_path}")

# Update dataset_dir to point to the correct sub-directory where the images are stored
dataset_dir = '/content/HWData/HWData/train'  # Adjust if necessary

# Dictionary to store image data, with keys as class names and values as the matrix of image vectors
class_images = {}

# Loop through each directory in the dataset directory to process each class
for class_dir in os.listdir(dataset_dir):
    class_path = os.path.join(dataset_dir, class_dir)
    if os.path.isdir(class_path):
        # List to hold image vectors for this class
        image_vectors = []

        # Process each image in the directory
        for image_file in os.listdir(class_path):
            image_path = os.path.join(class_path, image_file)
            # Check if the file is an image
            if image_file.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif', '.tiff')):
                try:
                    with Image.open(image_path) as img:
                        # Resize the image to 64x64 pixels
                        img_resized = img.resize((64, 64))

                        # Convert the image to grayscale, if it's not already
                        img_gray = img_resized.convert('L')

                        # Flatten the image to create a vector and append to the list
                        image_vector = np.array(img_gray).flatten()
                        image_vectors.append(image_vector)
                except Exception as e:
                    print(f"Error processing image {image_file}: {e}")

        # Convert list of vectors to a numpy array and transpose it to match 4096xn format
        if image_vectors:
            class_images[class_dir] = np.transpose(np.array(image_vectors))
            print(f"Class {class_dir}, Matrix shape: {class_images[class_dir].shape}")

# Checking if no images were processed for any class
if not class_images:
    print("No images were processed. Check the dataset directory and image file types.")


Files extracted to /content/HWData
Class brain, Matrix shape: (4096, 21)
Class BACKGROUND_Google, Matrix shape: (4096, 94)
Class barrel, Matrix shape: (4096, 10)
Class accordion, Matrix shape: (4096, 12)
Class camera, Matrix shape: (4096, 11)
Class bonsai, Matrix shape: (4096, 27)
Class binocular, Matrix shape: (4096, 8)
Class beaver, Matrix shape: (4096, 10)
Class anchor, Matrix shape: (4096, 9)
Class butterfly, Matrix shape: (4096, 19)
Class ant, Matrix shape: (4096, 9)
Class airplanes, Matrix shape: (4096, 21)
Class brontosaurus, Matrix shape: (4096, 10)
Class buddha, Matrix shape: (4096, 18)
Class bass, Matrix shape: (4096, 12)


In [12]:
# Update dataset_dir to point to the test directory where the images are stored
dataset_dir = '/content/HWData/HWData/test'  # Adjust to the test directory path

# Dictionary to store image data for the test dataset, with keys as class names and values as the matrix of image vectors
test_class_images = {}

# Loop through each directory in the test dataset directory to process each class
for class_dir in os.listdir(dataset_dir):
    class_path = os.path.join(dataset_dir, class_dir)
    if os.path.isdir(class_path):
        # List to hold image vectors for this class
        image_vectors = []

        # Process each image in the directory
        for image_file in os.listdir(class_path):
            image_path = os.path.join(class_path, image_file)
            # Check if the file is an image
            if image_file.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif', '.tiff')):
                try:
                    with Image.open(image_path) as img:
                        # Resize the image to 64x64 pixels
                        img_resized = img.resize((64, 64))

                        # Convert the image to grayscale, if it's not already
                        img_gray = img_resized.convert('L')

                        # Flatten the image to create a vector and append to the list
                        image_vector = np.array(img_gray).flatten()
                        image_vectors.append(image_vector)
                except Exception as e:
                    print(f"Error processing image {image_file}: {e}")

        # Convert list of vectors to a numpy array and transpose it to match 4096xn format
        if image_vectors:
            test_class_images[class_dir] = np.transpose(np.array(image_vectors))
            print(f"Test Class {class_dir}, Matrix shape: {test_class_images[class_dir].shape}")

# Checking if no images were processed for any class in the test dataset
if not test_class_images:
    print("No test images were processed. Check the dataset directory and image file types.")


Test Class brain, Matrix shape: (4096, 8)
Test Class BACKGROUND_Google, Matrix shape: (4096, 46)
Test Class barrel, Matrix shape: (4096, 4)
Test Class accordion, Matrix shape: (4096, 4)
Test Class camera, Matrix shape: (4096, 4)
Test Class bonsai, Matrix shape: (4096, 11)
Test Class binocular, Matrix shape: (4096, 2)
Test Class beaver, Matrix shape: (4096, 4)
Test Class anchor, Matrix shape: (4096, 4)
Test Class butterfly, Matrix shape: (4096, 8)
Test Class ant, Matrix shape: (4096, 4)
Test Class airplanes, Matrix shape: (4096, 13)
Test Class brontosaurus, Matrix shape: (4096, 3)
Test Class buddha, Matrix shape: (4096, 8)
Test Class bass, Matrix shape: (4096, 4)


In [13]:
import numpy as np

# Assume 'class_images' is the dictionary from Stage 1 containing 4096xn matrices for each class
pca_results = {}

for class_name, data_matrix in class_images.items():
    # PCA Step 1: Calculate the covariance matrix
    # Note: We need to transpose the data matrix to fit the shape requirements of cov function (observations as columns)
    covariance_matrix = np.cov(data_matrix, rowvar=False)

    # PCA Step 2: Calculate eigenvalues and eigenvectors
    eigenvalues, eigenvectors = np.linalg.eig(covariance_matrix)

    # PCA Step 3: Sort the eigenvectors by descending eigenvalues
    # Get the indices of the sorted eigenvalues in descending order
    sorted_indices = np.argsort(eigenvalues)[::-1]

    # Select the top three eigenvectors
    top_eigenvectors = eigenvectors[:, sorted_indices[:3]]

    # Store the results in the dictionary
    pca_results[class_name] = {
        'eigenvalues': eigenvalues[sorted_indices[:3]],
        'eigenvectors': top_eigenvectors
    }

    # Print the shape of the matrix containing the top 3 eigenvectors for verification
    print(f"Class {class_name}: Eigenvectors matrix shape (should be nx3): {top_eigenvectors.shape}")

# Now pca_results contains the PCA results for each class, with the top 3 eigenvectors used to transform the data


Class brain: Eigenvectors matrix shape (should be nx3): (21, 3)
Class BACKGROUND_Google: Eigenvectors matrix shape (should be nx3): (94, 3)
Class barrel: Eigenvectors matrix shape (should be nx3): (10, 3)
Class accordion: Eigenvectors matrix shape (should be nx3): (12, 3)
Class camera: Eigenvectors matrix shape (should be nx3): (11, 3)
Class bonsai: Eigenvectors matrix shape (should be nx3): (27, 3)
Class binocular: Eigenvectors matrix shape (should be nx3): (8, 3)
Class beaver: Eigenvectors matrix shape (should be nx3): (10, 3)
Class anchor: Eigenvectors matrix shape (should be nx3): (9, 3)
Class butterfly: Eigenvectors matrix shape (should be nx3): (19, 3)
Class ant: Eigenvectors matrix shape (should be nx3): (9, 3)
Class airplanes: Eigenvectors matrix shape (should be nx3): (21, 3)
Class brontosaurus: Eigenvectors matrix shape (should be nx3): (10, 3)
Class buddha: Eigenvectors matrix shape (should be nx3): (18, 3)
Class bass: Eigenvectors matrix shape (should be nx3): (12, 3)


In [14]:
for class_name, data_matrix in class_images.items():
    # Retrieve the top 3 eigenvectors for this class (nx3 matrix)
    top_eigenvectors = pca_results[class_name]['eigenvectors']

    # Correctly project the data matrix onto these top 3 eigenvectors
    # data_matrix is 4096xn, top_eigenvectors is nx3
    # We need to multiply: data_matrix * top_eigenvectors
    # The result will be a 4096x3 matrix representing the class in the new feature space
    projected_features = np.dot(data_matrix, top_eigenvectors)
    # Dictionary to store PCA features for each class
    pca_features = {}

    # Store the projected features in the dictionary
    pca_features[class_name] = projected_features

    # Print the shape of the projected features matrix to verify its dimensions
    print(f"Class {class_name}: Projected features matrix shape (should be 4096x3): {projected_features.shape}")


Class brain: Projected features matrix shape (should be 4096x3): (4096, 3)
Class BACKGROUND_Google: Projected features matrix shape (should be 4096x3): (4096, 3)
Class barrel: Projected features matrix shape (should be 4096x3): (4096, 3)
Class accordion: Projected features matrix shape (should be 4096x3): (4096, 3)
Class camera: Projected features matrix shape (should be 4096x3): (4096, 3)
Class bonsai: Projected features matrix shape (should be 4096x3): (4096, 3)
Class binocular: Projected features matrix shape (should be 4096x3): (4096, 3)
Class beaver: Projected features matrix shape (should be 4096x3): (4096, 3)
Class anchor: Projected features matrix shape (should be 4096x3): (4096, 3)
Class butterfly: Projected features matrix shape (should be 4096x3): (4096, 3)
Class ant: Projected features matrix shape (should be 4096x3): (4096, 3)
Class airplanes: Projected features matrix shape (should be 4096x3): (4096, 3)
Class brontosaurus: Projected features matrix shape (should be 4096x3

In [22]:
import numpy as np
from PIL import Image
import os

def read_and_process_image(image_path):
    with Image.open(image_path) as img:
        img_resized = img.resize((64, 64))
        img_gray = img_resized.convert('L')
        image_vector = np.array(img_gray).flatten()
    return image_vector

def project_to_pca_space(test_vector, eigenvectors):
    test_vector_reshaped = test_vector.reshape(1, -1)
    test_features = np.dot(test_vector_reshaped, eigenvectors)
    return test_features

def calculate_distances_and_predict(test_features, pca_features):
    min_distance = float('inf')
    predicted_class = None
    for class_name, class_features in pca_features.items():
        distance = np.linalg.norm(test_features - class_features, axis=1).min()
        if distance < min_distance:
            min_distance = distance
            predicted_class = class_name
    return min_distance, predicted_class

# Özvektörler ve PCA özellikleri
eigenvectors = {}
pca_features = {}

for class_name, data_matrix in class_images.items():
    if data_matrix.shape[0] != 4096:
        data_matrix = data_matrix.T  # Her bir sütun bir görseli temsil etmeli

    covariance_matrix = np.cov(data_matrix, rowvar=True)  # Şimdi her satır bir değişken olacak
    eigenvalues, eigenvectors_ = np.linalg.eig(covariance_matrix)
    indices = eigenvalues.argsort()[::-1]
    top_eigenvectors = eigenvectors_[:, indices[:3]]
    eigenvectors[class_name] = top_eigenvectors
    pca_features[class_name] = np.dot(data_matrix.T, top_eigenvectors)  # Projekte edilmiş özellikler

# Test görüntüsü işleme ve sınıf tahmini
test_image_path = '/content/HWData/HWData/test/accordion/image_0052.jpg'
test_vector = read_and_process_image(test_image_path)

predicted_class = None
min_distance = float('inf')

for class_name, class_eigenvectors in eigenvectors.items():
    test_features = project_to_pca_space(test_vector, class_eigenvectors)
    current_distance, current_class = calculate_distances_and_predict(test_features, pca_features)
    if current_distance < min_distance:
        min_distance = current_distance
        predicted_class = current_class

print(f"Tahmin edilen sınıf: {predicted_class}")


Tahmin edilen sınıf: beaver
