In [2]:
import os
import cv2
import numpy as np
import csv
from skimage.feature import hog, local_binary_pattern
from sklearn.model_selection import KFold
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from tqdm import tqdm

def load_data_with_preprocessing(dataset_path):
    class_labels = ['CNV', 'DME', 'DRUSEN', 'NORMAL']
    X = []
    y = []
    
    for dataset_type in ['training', 'validation', 'testing']:
        dataset_dir = os.path.join(dataset_path, dataset_type)
        for label in class_labels:
            class_dir = os.path.join(dataset_dir, label)
            for img_name in tqdm(os.listdir(class_dir), desc=f"Loading {label} images from {dataset_type}"):
                img_path = os.path.join(class_dir, img_name)
                image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
                
                # Resize image
                image = cv2.resize(image, (256, 256))
                
                # Preprocessing: HOG and LBP
                hog_features = hog(image, pixels_per_cell=(8, 8), cells_per_block=(2, 2), visualize=False)
                lbp_features = local_binary_pattern(image, P=8, R=1, method='uniform').flatten()
                features = np.hstack((hog_features, lbp_features))

                X.append(features)
                y.append(label)
    
    return np.array(X), np.array(y)

def train_svm_with_cross_validation_with_preprocessing(X, y, csv_filename):
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    accuracies = []

    header = ['Fold', 'Fold Accuracy']
    with open(csv_filename, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(header)
        
        fold = 1
        for train_index, val_index in kf.split(X):
            X_train, X_val = X[train_index], X[val_index]
            y_train, y_val = y[train_index], y[val_index]

            # Train SVM
            svm = SVC(kernel='linear')
            svm.fit(X_train, y_train)
            
            # Validate
            y_pred = svm.predict(X_val)
            accuracy = accuracy_score(y_val, y_pred)
            accuracies.append(accuracy)

            print(f"Fold {fold} Accuracy (With Preprocessing): {accuracy * 100:.2f}%")
            
            # Save fold result to CSV
            writer.writerow([fold, accuracy * 100])
            fold += 1

        # Calculate average accuracy across folds
        avg_accuracy = np.mean(accuracies) * 100
        print(f"Average Accuracy (With Preprocessing): {avg_accuracy:.2f}%")
        
        # Save average accuracy to CSV
        writer.writerow(['Average', avg_accuracy])

# Run this cell to load data and train the SVM with preprocessing
dataset_path = '/home/sheena.mittal.22cse/DIP/Dataset - train+val+test'  # Modify to the path where your dataset is located
X_with_preprocessing, y = load_data_with_preprocessing(dataset_path)
csv_filename_with = 'svm_with_preprocessing_results.csv'
train_svm_with_cross_validation_with_preprocessing(X_with_preprocessing, y, csv_filename_with)


FileNotFoundError: [Errno 2] No such file or directory: '/home/sheena.mittal.22cse/DIP/Dataset - train+val+test/training/CNV'

In [None]:
import os
import cv2
import numpy as np
import csv
from sklearn.model_selection import KFold
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from tqdm import tqdm

def load_data_without_preprocessing(dataset_path):
    class_labels = ['CNV', 'DME', 'DRUSEN', 'NORMAL']
    X = []
    y = []
    
    for dataset_type in ['training', 'validation', 'testing']:
        dataset_dir = os.path.join(dataset_path, dataset_type)
        for label in class_labels:
            class_dir = os.path.join(dataset_dir, label)
            for img_name in tqdm(os.listdir(class_dir), desc=f"Loading {label} images from {dataset_type}"):
                img_path = os.path.join(class_dir, img_name)
                image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
                
                # Resize image
                image = cv2.resize(image, (256, 256))
                
                # Without preprocessing: Use raw pixel values
                features = image.flatten()

                X.append(features)
                y.append(label)
    
    return np.array(X), np.array(y)

def train_svm_with_cross_validation_without_preprocessing(X, y, csv_filename):
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    accuracies = []

    header = ['Fold', 'Fold Accuracy']
    with open(csv_filename, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(header)
        
        fold = 1
        for train_index, val_index in kf.split(X):
            X_train, X_val = X[train_index], X[val_index]
            y_train, y_val = y[train_index], y[val_index]

            # Train SVM
            svm = SVC(kernel='linear')
            svm.fit(X_train, y_train)
            
            # Validate
            y_pred = svm.predict(X_val)
            accuracy = accuracy_score(y_val, y_pred)
            accuracies.append(accuracy)

            print(f"Fold {fold} Accuracy (Without Preprocessing): {accuracy * 100:.2f}%")
            
            # Save fold result to CSV
            writer.writerow([fold, accuracy * 100])
            fold += 1

        # Calculate average accuracy across folds
        avg_accuracy = np.mean(accuracies) * 100
        print(f"Average Accuracy (Without Preprocessing): {avg_accuracy:.2f}%")
        
        # Save average accuracy to CSV
        writer.writerow(['Average', avg_accuracy])

# Run this cell to load data and train the SVM without preprocessing
dataset_path = 'dataset'  # Modify to the path where your dataset is located
X_without_preprocessing, y = load_data_without_preprocessing(dataset_path)
csv_filename_without = 'svm_without_preprocessing_results.csv'
train_svm_with_cross_validation_without_preprocessing(X_without_preprocessing, y, csv_filename_without)
