In [5]:
from PIL import Image
import numpy as np
import os
import matplotlib.pyplot as plt
import pickle

# Parameters
train_folder = '../Datasets/aqueduct, industrial_area, patio data sets/train/'
test_folder = '../Datasets/aqueduct, industrial_area, patio data sets/test/'
class_folders = ["aqueduct", "industrial_area", "patio"]
patch_size = 32
bins = 8
k = 32  # Number of clusters for K-means

# Patch extraction function
def extract_patches(image, patch_size=32):
    patches = []
    width, height = image.size
    for i in range(0, height, patch_size):
        for j in range(0, width, patch_size):
            patch = image.crop((j, i, j + patch_size, i + patch_size))
            if patch.size == (patch_size, patch_size):
                patches.append(np.array(patch))
    return patches

# Color histogram extraction function
def color_histogram(patch, bins=8):
    hist_r = np.histogram(patch[:, :, 0], bins=bins, range=(0, 256))[0]
    hist_g = np.histogram(patch[:, :, 1], bins=bins, range=(0, 256))[0]
    hist_b = np.histogram(patch[:, :, 2], bins=bins, range=(0, 256))[0]
    return np.concatenate([hist_r, hist_g, hist_b])

# K-means implementation from scratch
def initialize_centroids(features, k):
    indices = np.random.choice(len(features), k, replace=False)
    return features[indices]

def assign_clusters(features, centroids):
    distances = np.linalg.norm(features[:, np.newaxis] - centroids, axis=2)
    return np.argmin(distances, axis=1)

def update_centroids(features, labels, k):
    new_centroids = np.zeros((k, features.shape[1]))
    for i in range(k):
        points_in_cluster = features[labels == i]
        if len(points_in_cluster) > 0:
            new_centroids[i] = np.mean(points_in_cluster, axis=0)
    return new_centroids

def kmeans(features, k, max_iter=100, tol=1e-4):
    centroids = initialize_centroids(features, k)
    for _ in range(max_iter):
        labels = assign_clusters(features, centroids)
        new_centroids = update_centroids(features, labels, k)
        if np.linalg.norm(new_centroids - centroids) < tol:
            break
        centroids = new_centroids
    return centroids, labels

# BoVW representation for an image
def bovw_representation(image_patches, centroids):
    features = [color_histogram(patch) for patch in image_patches]
    features = np.array(features)
    labels = assign_clusters(features, centroids)
    bovw_hist = np.bincount(labels, minlength=len(centroids))
    return bovw_hist / len(features)

# Save data function
def save_data(data, filename):
    with open(filename, 'wb') as f:
        pickle.dump(data, f)

# Load data function
def load_data(filename):
    if os.path.exists(filename):
        with open(filename, 'rb') as f:
            return pickle.load(f)
    return None

# Process images and save BoVW representations for each class separately
def process_images(folder, class_folders, centroids, save_folder, prefix):
    for i, class_name in enumerate(class_folders, 1):
        class_folder = os.path.join(folder, class_name)
        class_data = []
        
        for img_name in os.listdir(class_folder):
            img_path = os.path.join(class_folder, img_name)
            img = Image.open(img_path)
            patches = extract_patches(img, patch_size)
            bovw_vector = bovw_representation(patches, centroids)
            class_data.append((img_name, bovw_vector, class_name))
        
        # Save each class data in a separate file for each image
        for j, data in enumerate(class_data, 1):
            img_name, bovw_vector, class_name = data
            save_data(bovw_vector, os.path.join(save_folder, f'{prefix}_data{i}_{j}.pkl'))

# Main workflow
def main():
    # Check if BoVW data exists; if not, process it
    if not os.path.exists('train_data1_1.pkl'):
        # Collect all patches from training images to fit K-means
        all_train_features = []
        for class_name in class_folders:
            class_folder = os.path.join(train_folder, class_name)
            for img_name in os.listdir(class_folder):
                img_path = os.path.join(class_folder, img_name)
                img = Image.open(img_path)
                patches = extract_patches(img, patch_size)
                for patch in patches:
                    all_train_features.append(color_histogram(patch))

        all_train_features = np.array(all_train_features)
        print("Total training patches:", len(all_train_features))

        # Apply K-means clustering
        centroids, _ = kmeans(all_train_features, k)
        print("K-means clustering completed.")

        # Process train and test datasets and save each class separately
        process_images(train_folder, class_folders, centroids, os.getcwd(), prefix='train')
        process_images(test_folder, class_folders, centroids, os.getcwd(), prefix='test')
        
        print("BoVW representations saved for each class.")
    else:
        print("BoVW data already exists.")

if __name__ == "__main__":
    main()


BoVW data already exists.


In [6]:
import pickle
import os

# Initialize empty lists to store train and test data for each class
train_data1, train_data2, train_data3 = [], [], []
test_data1, test_data2, test_data3 = [], [], []

# Loop through all pickle files in the current directory
for filename in os.listdir('.'):
    if filename.endswith('.pkl'):
        # Load the pickle file
        with open(filename, 'rb') as file:
            data = pickle.load(file)
        
        # Determine which class the file belongs to based on the filename
        if 'train_data1_' in filename:
            train_data1.append(data)
        elif 'train_data2_' in filename:
            train_data2.append(data)
        elif 'train_data3_' in filename:
            train_data3.append(data)
        elif 'test_data1_' in filename:
            test_data1.append(data)
        elif 'test_data2_' in filename:
            test_data2.append(data)
        elif 'test_data3_' in filename:
            test_data3.append(data)

print("Data concatenation and completed.")
print(len(train_data1))
print(len(train_data2))
print(len(train_data3))
print(len(test_data1))
print(len(test_data2))
print(len(test_data3))




Data concatenation and completed.
50
50
50
50
50
50


In [7]:
train_labels1 = np.ones(len(train_data1))  # All samples in train_data1 are class 1
train_labels2 = np.ones(len(train_data2)) * 2  # All samples in train_data2 are class 2
train_labels3 = np.ones(len(train_data3)) * 3  # All samples in train_data3 are class 3

test_labels1 = np.ones(len(test_data1))  # All samples in test_data1 are class 1
test_labels2 = np.ones(len(test_data2)) * 2  # All samples in test_data2 are class 2
test_labels3 = np.ones(len(test_data3)) * 3  # All samples in test_data3 are class 3

# Concatenating the data
train_data = np.concatenate([train_data1, train_data2, train_data3], axis=0)
test_data = np.concatenate([test_data1, test_data2, test_data3], axis=0)

# Concatenating the labels
train_labels = np.concatenate([train_labels1, train_labels2, train_labels3], axis=0)
test_labels = np.concatenate([test_labels1, test_labels2, test_labels3], axis=0)
print(len(train_data))
print(len(train_labels))
print(len(test_data))
print(len(test_labels))



150
150
150
150


In [8]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import Perceptron
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.multiclass import OneVsRestClassifier
from sklearn.utils import shuffle

# Function to plot decision regions
def plot_decision_regions(X_train, y_train, X_test, y_test, model, title="Decision Region"):
    h = 0.02  # step size in the mesh
    x_min, x_max = X_train[:, 0].min() - 1, X_train[:, 0].max() + 1
    y_min, y_max = X_train[:, 1].min() - 1, X_train[:, 1].max() + 1

    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                         np.arange(y_min, y_max, h))

    Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)

    plt.figure(figsize=(10, 8))
    plt.contourf(xx, yy, Z, alpha=0.8, cmap=plt.cm.Paired)

    scatter = plt.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=plt.cm.Paired, marker='o', edgecolors='k', label="Train Data")
    scatter = plt.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=plt.cm.Paired, marker='x', label="Test Data")

    plt.title(title)
    plt.xlabel('Feature 1')
    plt.ylabel('Feature 2')
    plt.legend()
    plt.show()

# Train and evaluate Perceptron model for each pair of classes
def train_perceptron(X_train, y_train):
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)

    model = OneVsRestClassifier(Perceptron())
    model.fit(X_train, y_train)
    return model, scaler

# Evaluate classifier and plot decision regions for a pair of classes
def evaluate_and_plot_pair(X_train, y_train, X_test, y_test, class_pair):
    # Filter data for the given class pair
    mask_train = np.isin(y_train, class_pair)
    mask_test = np.isin(y_test, class_pair)

    X_train_pair = X_train[mask_train]
    y_train_pair = y_train[mask_train]
    X_test_pair = X_test[mask_test]
    y_test_pair = y_test[mask_test]

    model, scaler = train_perceptron(X_train_pair, y_train_pair)

    X_test_pair = scaler.transform(X_test_pair)
    
    # Make predictions
    y_pred = model.predict(X_test_pair)
    
    # Classification metrics
    accuracy = accuracy_score(y_test_pair, y_pred)
    precision = precision_score(y_test_pair, y_pred, average=None)
    recall = recall_score(y_test_pair, y_pred, average=None)
    f1 = f1_score(y_test_pair, y_pred, average=None)

    print(f"Class {class_pair[0]} vs Class {class_pair[1]}")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision}")
    print(f"Recall: {recall}")
    print(f"F1 Score: {f1}")

    cm = confusion_matrix(y_test_pair, y_pred)
    print("Confusion Matrix:")
    print(cm)

    # Plot decision region for the class pair
  

    return accuracy, precision, recall, f1, cm

# Function to evaluate and plot the classifier for all classes
def evaluate_and_plot_all_classes(X_train, y_train, X_test, y_test):
    model, scaler = train_perceptron(X_train, y_train)

    X_test = scaler.transform(X_test)
    
    # Make predictions
    y_pred = model.predict(X_test)
    
    # Classification metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average=None)
    recall = recall_score(y_test, y_pred, average=None)
    f1 = f1_score(y_test, y_pred, average=None)

    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision}")
    print(f"Recall: {recall}")
    print(f"F1 Score: {f1}")

    cm = confusion_matrix(y_test, y_pred)
    print("Confusion Matrix:")
    print(cm)

    # Plot decision region for all classes
    

    return accuracy, precision, recall, f1, cm

# Evaluate and plot for each pair of classes
pairs = [(1, 2), (2, 3), (1, 3)]
for pair in pairs:
    acc, prec, rec, f1, cm = evaluate_and_plot_pair(train_data, train_labels, test_data, test_labels, pair)
    print(f"Performance for Class {pair[0]} vs Class {pair[1]}")
    print(f"Accuracy: {acc:.4f}")
    print(f"Precision: {prec}")
    print(f"Recall: {rec}")
    print(f"F1 Score: {f1}")
    print(f"Confusion Matrix: \n{cm}")
    print()

# Evaluate and plot for all classes
acc_all, prec_all, rec_all, f1_all, cm_all = evaluate_and_plot_all_classes(train_data, train_labels, test_data, test_labels)
print("Performance for All Classes")
print(f"Accuracy: {acc_all:.4f}")
print(f"Precision: {prec_all}")
print(f"Recall: {rec_all}")
print(f"F1 Score: {f1_all}")
print(f"Confusion Matrix: \n{cm_all}")


Class 1 vs Class 2
Accuracy: 0.7200
Precision: [0.73913043 0.7037037 ]
Recall: [0.68 0.76]
F1 Score: [0.70833333 0.73076923]
Confusion Matrix:
[[34 16]
 [12 38]]
Performance for Class 1 vs Class 2
Accuracy: 0.7200
Precision: [0.73913043 0.7037037 ]
Recall: [0.68 0.76]
F1 Score: [0.70833333 0.73076923]
Confusion Matrix: 
[[34 16]
 [12 38]]

Class 2 vs Class 3
Accuracy: 0.7000
Precision: [0.66666667 0.75      ]
Recall: [0.8 0.6]
F1 Score: [0.72727273 0.66666667]
Confusion Matrix:
[[40 10]
 [20 30]]
Performance for Class 2 vs Class 3
Accuracy: 0.7000
Precision: [0.66666667 0.75      ]
Recall: [0.8 0.6]
F1 Score: [0.72727273 0.66666667]
Confusion Matrix: 
[[40 10]
 [20 30]]

Class 1 vs Class 3
Accuracy: 0.6100
Precision: [0.65714286 0.58461538]
Recall: [0.46 0.76]
F1 Score: [0.54117647 0.66086957]
Confusion Matrix:
[[23 27]
 [12 38]]
Performance for Class 1 vs Class 3
Accuracy: 0.6100
Precision: [0.65714286 0.58461538]
Recall: [0.46 0.76]
F1 Score: [0.54117647 0.66086957]
Confusion Matrix: