# Feature Representations

In [10]:
import cv2
import numpy as np
from skimage.feature import hog
from skimage.feature import local_binary_pattern

def extract_combined_features(image):
    # Extract all features
    color_features = extract_color_features(image)
    lbp_features = extract_lbp_features(image)
    hog_features = extract_hog_features(image)
    
    # Combine all features
    combined_features = np.concatenate([color_features, lbp_features, hog_features])
    
    return combined_features

def extract_color_features(img):
    # Convert to different color spaces
    hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
    lab = cv2.cvtColor(img, cv2.COLOR_RGB2LAB)
    
    # Compute color histograms
    hist_rgb = cv2.calcHist([img], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256])
    hist_hsv = cv2.calcHist([hsv], [0, 1, 2], None, [8, 8, 8], [0, 180, 0, 256, 0, 256])
    hist_lab = cv2.calcHist([lab], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256])
    
    # Flatten and normalize histograms
    hist_rgb = hist_rgb.flatten() / hist_rgb.sum()
    hist_hsv = hist_hsv.flatten() / hist_hsv.sum()
    hist_lab = hist_lab.flatten() / hist_lab.sum()
    
    # Compute mean and std for each channel
    means_rgb = img.mean(axis=(0, 1))
    stds_rgb = img.std(axis=(0, 1))
    
    return np.concatenate([hist_rgb, hist_hsv, hist_lab, means_rgb, stds_rgb])

def extract_lbp_features(img):
    gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    
    # Set LBP parameters
    radius = 3  # Radius of the circle
    n_points = 8 * radius  # Number of points to consider in the LBP calculation
    
    # Compute the LBP representation of the image
    lbp = local_binary_pattern(gray, n_points, radius, method='uniform')
    
    # Compute the histogram of the LBP
    hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, n_points + 3), range=(0, n_points + 2))
    
    # Normalize the histogram
    hist = hist.astype("float")
    hist /= (hist.sum() + 1e-7)  # Avoid division by zero
    
    return hist

def extract_hog_features(image, pixels_per_cell=(8, 8), cells_per_block=(2, 2), orientations=9):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    features = hog(gray, orientations=orientations, pixels_per_cell=pixels_per_cell,
                   cells_per_block=cells_per_block, block_norm='L2-Hys', feature_vector=True)
    return features

def extract_sift_features(image, num_features=100):
    sift = cv2.SIFT_create(nfeatures=num_features)
    keypoints, descriptors = sift.detectAndCompute(image, None)
    if descriptors is None:
        return np.zeros((num_features, 128))  # SIFT descriptor is 128-dimensional
    if descriptors.shape[0] < num_features:
        padding = np.zeros((num_features - descriptors.shape[0], 128))
        descriptors = np.vstack((descriptors, padding))
    return descriptors[:num_features].flatten()

def extract_surf_features(image, num_features=100, hessian_threshold=400):
    surf = cv2.xfeatures2d.SURF_create(hessianThreshold=hessian_threshold, nOctaves=4, nOctaveLayers=3, extended=False, upright=True)
    keypoints, descriptors = surf.detectAndCompute(image, None)
    if descriptors is None:
        return np.zeros((num_features, 64))  # SURF descriptor is 64-dimensional
    if descriptors.shape[0] < num_features:
        padding = np.zeros((num_features - descriptors.shape[0], 64))
        descriptors = np.vstack((descriptors, padding))
    return descriptors[:num_features].flatten()

def extract_features(image, method):
    # HOG
    if method == 'HOG':
        return extract_hog_features(image)
    
    # SIFT
    if method == 'SIFT':
        return extract_sift_features(image)
        
    # SURF
    if method == 'SURF':
        return extract_surf_features(image)
    
    if method == 'Combined':
        return extract_combined_features(image)


def create_pixel_features(image, mask, method):
    X = extract_features(image, method)
    y = np.apply_along_axis(lambda x: x[0], 1, mask.reshape(-1,3))
    return X, y

Creating our training and testing sets

In [11]:
import os
from sklearn.model_selection import train_test_split
import random

target_size = (256, 256)

# Used for training on all V datasets (K datasets are corrupted after downloading from Sharepoint)
def get_input_output_paths(root_dir, sub_dirs, max_samples=None):
    input_paths = []
    target_paths = []
    samples_per_dir = max_samples // len(sub_dirs) if max_samples else None
    for sub_dir in sub_dirs:
        input_dir = os.path.join(root_dir, sub_dir, 'image')
        sub_input_paths = [os.path.join(input_dir, f) for f in os.listdir(input_dir)]
        random.shuffle(sub_input_paths)
        sub_target_paths = list(map(lambda x: x.replace("image", "indexLabel"), sub_input_paths))
        
        if samples_per_dir:
            sub_input_paths = sub_input_paths[:samples_per_dir]
            sub_target_paths = sub_target_paths[:samples_per_dir]
        
        input_paths.extend(sub_input_paths)
        target_paths.extend(sub_target_paths)
    return input_paths, target_paths

input_img_paths, _ = get_input_output_paths('WildScenes2d', ['V-01', 'V-02', 'V-03'], 50)

def resize_image_and_mask(image, mask, target_size):
    image_resized = cv2.resize(image, target_size)
    mask_resized = cv2.resize(mask, target_size, interpolation=cv2.INTER_NEAREST)
    return image_resized, mask_resized

X_all = []
y_all = []

for img_path in input_img_paths:
    img = np.asarray(cv2.imread(img_path))
    mask = np.asarray(cv2.imread(img_path.replace('image', 'indexLabel')))
    img, mask = resize_image_and_mask(img, mask, target_size)
    img, mask = create_pixel_features(img, mask, method='Combined')
    X_all.append(img)
    y_all.append(mask)

X = np.array(X_all)
y = np.array(y_all)

def get_majority_values(arr):
    flat_arr = arr.flatten()
    
    # Get unique values and their counts
    unique, counts = np.unique(flat_arr, return_counts=True)
    
    # Find the maximum count
    max_count = counts.max()
    
    # Get all values with the maximum count
    majority_values = unique[counts == max_count]
    
    return majority_values[0]

majority_values = np.apply_along_axis(get_majority_values, 1, y)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42)

In [12]:
print(len(input_img_paths))
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)
# print(np.unique(majority_values))
# print(np.count(2))
print(majority_values)

48
(36, 36164)
(12, 36164)
(36, 65536)
(12, 65536)
[ 8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8 17
  8  8  8  8  8  8  8  8  8  8  8  7  8  8  8  8  8  8  8  8  8  8  8  8]


In [13]:
def element_proportions(arr):
    flat_arr = arr.flatten()
    unique, counts = np.unique(flat_arr, return_counts=True)  
    total = len(flat_arr)
    proportions = counts / total    
    return dict(zip(unique, proportions))

print(element_proportions(y))
print(element_proportions(y_train))
print(element_proportions(y_test))

{2: 0.0901632308959961, 3: 0.0012070337931315105, 4: 0.003926595052083333, 5: 0.001873016357421875, 7: 0.13138357798258463, 8: 0.527019182840983, 9: 0.01747290293375651, 12: 0.00017611185709635416, 14: 0.00017801920572916666, 15: 0.010582605997721354, 16: 0.0010506312052408855, 17: 0.07652568817138672, 18: 0.1384414037068685}
{2: 0.084136962890625, 4: 0.0038121541341145835, 7: 0.1360897488064236, 8: 0.5277188618977865, 9: 0.016742282443576388, 12: 0.00019158257378472222, 14: 0.00023735894097222222, 15: 0.012568155924479166, 16: 0.0011435614691840278, 17: 0.07187186347113715, 18: 0.14548746744791666}
{2: 0.10824203491210938, 3: 0.004828135172526042, 4: 0.004269917805989583, 5: 0.0074920654296875, 7: 0.1172650655110677, 8: 0.5249201456705729, 9: 0.019664764404296875, 12: 0.00012969970703125, 15: 0.004625956217447917, 16: 0.0007718404134114584, 17: 0.09048716227213542, 18: 0.11730321248372395}


Fitting the model

In [14]:
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(max_depth=10, max_leaf_nodes=50, n_estimators=100, random_state=42, verbose=2)
rf.fit(X_train, y_train)

TypeError: RandomForestClassifier.__init__() got an unexpected keyword argument 'max_leaf_node'

Gathering model predictions

In [None]:
y_pred = rf.predict(X_test)

Model evaluation using IOU

In [None]:
from sklearn.metrics import jaccard_score, accuracy_score, f1_score

y_pred_whole = np.concatenate(y_pred)
y_test_whole = np.concatenate(y_test)
iou = jaccard_score(y_test_whole, y_pred_whole, average=None)
print(f'IoU for each class: {iou}')
print(f'Mean IoU: {np.mean(iou)}')
print(f'Accuracy: {accuracy_score(y_test_whole, y_pred_whole)}')
print(f'F1: {f1_score(y_test_whole, y_pred_whole, average=None)}')

In [None]:
img1 = np.asarray(cv2.imread('V-01/image/1623379829-508641462.png'))
img1 = cv2.resize(img1, target_size)
features = extract_combined_features(img1)
y_pred1 = rf.predict([features])

img2 = np.asarray(cv2.imread('V-01/image/1623379830-781623853.png'))
img2 = cv2.resize(img2, target_size)
features = extract_combined_features(img2)
y_pred2 = rf.predict([features])

In [None]:
colours = [[0, 0, 0], 
           [75, 25, 230], 
           [75, 180, 60], 
           [25, 225, 255], 
           [200, 130, 0], 
           [180, 30, 145], 
           [240, 240, 70], 
           [230, 50, 240], 
           [60, 245, 210], 
           [75, 25, 230], 
           [128, 128, 0], 
           [40, 110, 170], 
           [200, 250, 255], 
           [0, 0, 128], 
           [195, 255, 170], 
           [0, 128, 128], 
           [190, 190, 250], 
           [128, 0, 0], 
           [128, 128, 128]]


y_pred1 = y_pred1.reshape(256,256)
y_pred2 = y_pred2.reshape(256,256)

result1 = np.array([[colours[num] for num in row] for row in y_pred1])
actual1 = np.asarray(cv2.imread('V-01/label/1623379829-508641462.png'))
actual1 =  cv2.resize(actual1, target_size)

import matplotlib.pyplot as plt

plt.figure(figsize=(10, 10))
plt.subplot(1, 2, 1)
plt.axis('off')
plt.imshow(cv2.cvtColor(result1.astype(np.uint8), cv2.COLOR_BGR2RGB))
plt.subplot(1, 2, 2)
plt.axis('off')
plt.imshow(cv2.cvtColor(actual1, cv2.COLOR_BGR2RGB))
plt.show()

In [None]:
result2 = np.array([[colours[num] for num in row] for row in y_pred2])
actual2 = np.asarray(cv2.imread('V-01/label/1623379830-781623853.png'))
actual2 =  cv2.resize(actual2, target_size)

plt.figure(figsize=(10, 10))
plt.subplot(1, 2, 1)
plt.axis('off')
plt.imshow(cv2.cvtColor(result2.astype(np.uint8), cv2.COLOR_BGR2RGB))
plt.subplot(1, 2, 2)
plt.axis('off')
plt.imshow(cv2.cvtColor(actual2, cv2.COLOR_BGR2RGB))
plt.show()