In [3]:
import numpy as np
import os
from sklearn.decomposition import PCA, TruncatedSVD
import cv2
from scipy.signal import convolve2d
from sklearn.impute import KNNImputer
from scipy.ndimage import median_filter

In [None]:


def preprocess_images(batch_images, target_size=(64, 64)):
    """Resize images to target size"""
    processed_batch = [cv2.resize(img, target_size) for img in batch_images]
    return np.array(processed_batch)

def reduce_dimensions(images, reduction_method='pca', variance_threshold=0.95):
    """Apply dimension reduction using PCA or SVD"""
    original_shape = images.shape
    flattened = images.reshape(len(images), -1)
    
    if reduction_method.lower() == 'pca':
        reducer = PCA(n_components=variance_threshold)
    else:
        reducer = TruncatedSVD(n_components=min(flattened.shape))
    
    reduced = reducer.fit_transform(flattened)
    reconstructed = reducer.inverse_transform(reduced)
    variance_preserved = sum(reducer.explained_variance_ratio_) * 100
    
    print(f"Variance preserved: {variance_preserved:.2f}%")
    return reconstructed.reshape(original_shape), variance_preserved

def apply_convolution(image, kernel):
    """Apply convolution to an image"""
    if len(image.shape) == 3:
        return np.stack([convolve2d(image[:,:,c], kernel, mode='same', boundary='wrap')
                        for c in range(image.shape[2])], axis=2)
    return convolve2d(image, kernel, mode='same', boundary='wrap')

def get_convolution_kernels():
    """Return dictionary of common convolution kernels"""
    return {
        'edge_detection': np.array([[-1, -1, -1],
                                  [-1,  8, -1],
                                  [-1, -1, -1]]),
        'sobel_x': np.array([[-1, 0, 1],
                            [-2, 0, 2],
                            [-1, 0, 1]]),
        'sobel_y': np.array([[-1, -2, -1],
                            [ 0,  0,  0],
                            [ 1,  2,  1]]),
        'color_contrast': np.array([[0, -1, 0],
                                  [-1, 5, -1],
                                  [0, -1, 0]])
    }

def apply_max_pooling(image, pool_size=2):
    """Apply max pooling to an image"""
    h, w = image.shape[:2]
    new_h, new_w = h//pool_size, w//pool_size
    pooled = np.zeros((new_h, new_w) + image.shape[2:])
    
    for i in range(new_h):
        for j in range(new_w):
            pooled[i,j] = np.max(image[i*pool_size:(i+1)*pool_size, 
                                     j*pool_size:(j+1)*pool_size], axis=(0,1))
    return pooled

def apply_avg_pooling(image, pool_size=2):
    """Apply average pooling to an image"""
    h, w = image.shape[:2]
    new_h, new_w = h//pool_size, w//pool_size
    pooled = np.zeros((new_h, new_w) + image.shape[2:])
    
    for i in range(new_h):
        for j in range(new_w):
            pooled[i,j] = np.mean(image[i*pool_size:(i+1)*pool_size, 
                                      j*pool_size:(j+1)*pool_size], axis=(0,1))
    return pooled

def flatten_batch(images):
    """Flatten batch of images"""
    return images.reshape(len(images), -1)

def remove_outliers(image, method='knn', n_neighbors=5, window_size=3):
    """Remove outliers from image using specified method"""
    
    original_shape = image.shape
    h, w = image.shape[:2]
    
    # Handle different color channels separately
    if len(original_shape) == 3:
        cleaned_image = np.zeros_like(image)
        for c in range(original_shape[2]):
            cleaned_image[:,:,c] = remove_outliers(image[:,:,c], method, n_neighbors, window_size)
        return cleaned_image
    
    if method.lower() == 'knn':
        # Prepare data for KNN imputation
        X = np.zeros((h*w, window_size**2))
        for i in range(h):
            for j in range(w):
                # Extract local window
                i_start = max(0, i - window_size//2)
                i_end = min(h, i + window_size//2 + 1)
                j_start = max(0, j - window_size//2)
                j_end = min(w, j + window_size//2 + 1)
                window = image[i_start:i_end, j_start:j_end].flatten()
                # Pad if necessary
                if len(window) < window_size**2:
                    window = np.pad(window, (0, window_size**2 - len(window)), mode='edge')
                X[i*w + j] = window
        
        # Apply KNN imputation
        imputer = KNNImputer(n_neighbors=n_neighbors)
        cleaned = imputer.fit_transform(X)
        
        # Reshape back to image
        return cleaned[:,window_size**2//2].reshape(h, w)
    
    elif method.lower() == 'median':
        return median_filter(image, size=window_size)
    
    else:
        raise ValueError(f"Unknown outlier removal method: {method}")


In [5]:
A = os.listdir('E:/@IIT_BBS/@Sem 1/ML/Final Project/P1')
len(A)

427

In [6]:
import matplotlib.pyplot as plt

In [9]:
cv2.imread('E:/@IIT_BBS/@Sem 1/ML/Final Project/P1/' + A[0]).shape

(4284, 4284, 3)

In [None]:
# Process images in smaller batches to avoid memory issues
batch_size = 10  # Adjust this number based on your available memory
processed_images = []

for i in range(0, len(A), batch_size):
    batch_files = A[i:i + batch_size]
    batch_images = [cv2.imread("P1/" + filename) for filename in batch_files]
    
    # Preprocess current batch
    batch_processed = preprocess_images(batch_images)
    processed_images.extend(batch_processed)

# Convert to numpy array for further processing
processed_images = np.array(processed_images)

# Apply dimension reduction on the processed images
reduced_images, variance = reduce_dimensions(processed_images, 
                                          reduction_method='pca', 
                                          variance_threshold=0.95)

In [None]:
cv2.imshow("Reduced Image", reduced_images[0])  