In [231]:
# the task of this script is to: 
# 1. generate ALL fetaures of the image
# 2. store in  file location as .npz
# 3. check final dimension and see if it is loading for later classifying it

In [1]:
import cv2
import pytesseract
import numpy as np
np.set_printoptions(suppress=True, precision=3)
from PIL import Image, ImageDraw
import os
from skimage.feature import graycomatrix, graycoprops
import pickle

In [107]:
# get region of interest ROI
def extract_roi(img, threshold):
    if len(img.shape) != 2:
        raise ValueError("Input image must be grayscale")
    roi_mask = (img < threshold).astype(np.uint8)
    # roi_pixels = np.sum(roi_mask)
    return roi_mask

In [108]:
# lets get e's extracted

def get_e(image_path, crop, op_dir):
    # Load the image
    # image_path = 'test_images/test_0.png'  # Replace with your image
    img1 = cv2.imread(image_path)
    img2 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
    
    #upscalling images, solely to help tesseract, image extraction will be done from the og image
    # img = cv2.resize(img2, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
    img = cv2.resize(img2, (img1.shape[1], img1.shape[0]), interpolation=cv2.INTER_CUBIC)   # this is the processed image which 
                                                                                           # we put in the tesseract
                                                                                           # coz it needs clearer pcitures to 
                                                                                           # accurately locate the e's
    # print(img.shape, img2.shape)
    
    # Get image height (Tesseract origin is bottom-left)
    h, w = img.shape
    
    # op_dir = 'saved_e'
    os.makedirs(op_dir, exist_ok=True)
    
    # Draw setup
    # draw = ImageDraw.Draw(img)
    
    # Get character bounding boxes
    boxes = pytesseract.image_to_boxes(img)
    count = 0
    
    #set dimensions of the boxes 
    # crop = 14
    half = crop//2
    
    # Draw green dots on top of each 'e'
    for b in boxes.strip().splitlines():
        b = b.split()
        char, x1, y1, x2, y2 = b[0], int(b[1]), int(b[2]), int(b[3]), int(b[4])
        
        # Flip y-coordinates
        y1_new = h - y1
        y2_new = h - y2
        # cx = (x1 + x2) // 2
        # cy = (y1_new + y2_new) // 2
    
        # make a bounding box around the e
        cv2.rectangle(img, (x1, y2_new), (x2, y1_new), (255, 0, 0), 1)
    
        if char.lower() == 'e' and count<50:
    
            # compute center of bounding box    
            cx = (x1 + x2) // 2
            cy = (y1_new + y2_new) // 2
        
            # Get top-left and bottom-right coordinates of fixed crop
            x_start = max(0, cx - half)
            y_start = max(0, cy - half)
            x_end = min(img2.shape[1], cx + half)
            y_end = min(img2.shape[0], cy + half)
    
            # crop image
            cropped = img2[y_start:y_end, x_start:x_end]
    
            # if dimensions okay, at image to output dir, 
            # later make sure that the image dirs are constant, else will cause computation error
            if cropped.shape[0] > 0 and cropped.shape[1] > 0:
                out_path = os.path.join(op_dir, f"e_{count+1}.png")
                cv2.imwrite(out_path, cropped)
                count += 1
    
    cv2.imwrite("output_with_boxes.png", img)
    print(f"Saved {count} 'e' characters in '{op_dir}/' and annotated image as 'output_with_boxes.png'")

In [41]:
get_e('Dataset_OG/BASE_OG.jpg',14, '/home/raginivi/Desktop/CV_project/saved_e/1')
get_e('Dataset_OG/banded_image_10_0.04.jpg',14, '/home/raginivi/Desktop/CV_project/saved_e/2')
get_e('Dataset_OG/banded_image_10_0.01.jpg',14, '/home/raginivi/Desktop/CV_project/saved_e/3')
get_e('Dataset_OG/banded_image_10_0.1.jpg',14, '/home/raginivi/Desktop/CV_project/saved_e/4')
get_e('Dataset_OG/banded_image_5_0.1.jpg',14, '/home/raginivi/Desktop/CV_project/saved_e/5')

Saved 50 'e' characters in '/home/raginivi/Desktop/CV_project/saved_e/2/' and annotated image as 'output_with_boxes.png'


In [5]:
# ALL NORMAL FEATURES

In [109]:
def compute_variance(img, roi_mask):
    roi_indices = np.where(roi_mask)
    roi_pixels = img[roi_indices]
    R = len(roi_pixels)
    if R == 0:
        return 0
    mu = np.mean(roi_pixels)
    variance = np.sum((roi_pixels - mu)**2)/R # variance
    return variance


def compute_entropy(img, roi_mask):
    roi_indices = np.where(roi_mask)
    roi_pixels = img[roi_indices]
    R = len(roi_pixels)
    if R == 0:
        return 0

    hist, _ = np.histogram(roi_pixels, bins=256, range=(0, 255)) # probability density function
    p = hist / R  # p_Img(alpha)
    entropy = -np.sum(p * np.log2(p + 1e-10)) # this is added to avoid log 0 ...can think of it as a small value epsilon
    return entropy

In [110]:
# ALL GLCM FEATURES

In [111]:
def glcm(img_path, dist, levels):
    img = cv2.imread(img_path)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    # Quantize to 8 gray levels (0 to 7)
    levels = 256
    # quantized = (gray / (256 // levels)).astype(np.uint8)

    distance = list(range(1, dist+1))
    R = extract_roi(gray, 128)
    R = np.sum(R).item()
    # R = extract_roi(gray, 128)
    print(f"ROI (R): {R}, type: {type(R)}") 
    
    if R == 0:
        print("Warning: ROI is too small, dividing by a small constant")
        return np.zeros(levels, levels, distance)  # Prevent division by zero
    
    # Compute GLCM
    glcm = graycomatrix(gray, distances=distance, angles=[0], levels=levels, symmetric=False, normed=False)
    
    # display GLCM matrix for angle=0 and distance=1
    # print(glcm[:, :, 0, 0])
    n_glcm = glcm[:,:,:,0]/R 
    # print(n_glcm)
    return n_glcm

In [112]:
def get_sum(arr,ch):
    # print(arr.shape)
    if ch == 'c':
        c = np.sum(arr, axis=0, keepdims=True)
        # print("Shape is:", c.shape)
        return c
    elif ch == 'r':
        r = np.sum(arr, axis=1, keepdims=True) 
        # print("Shape is:", r.shape)
        return r

In [113]:
def get_var(arr, mean):
    m, n, d = arr.shape

    if m!=1 and n ==1: 
        a = m
        axis = 0
    elif n!=1 and m ==1: 
        a = n
        axis = 1
    else: print("not valid state")

    var = np.zeros((1,1,d))
    mean = mean.flatten()
    
    for i in range(d):
        summ = 0
        for j in range(a):
            val = arr[j, 0, i] if axis == 0 else arr[0, j, i]
            summ += (j ** 2) * val
        var[0, 0, i] = summ - mean[i]
    return var

In [114]:
def get_energy(arr):
    '''input GLCM matirx, normalized'''
    m,n,d = arr.shape
    e = np.zeros(d)
    
    for i in range(d):
        for j in range(m):
            for k in range(n):
                e[i] = e[i] + (arr[j,k,i]**2)
    return e

In [115]:
def get_entropy1(arr, ar, ac):
    '''input GLCM matirx, normalized, mean_row and mean_column'''
    m, n, d = arr.shape
    
    m1, n1, d = ar.shape
    if n1!=1: print("Nope, shape of ar is not correct")
                    
    m2, n2, d = ac.shape
    if m2!=1: print("Nope, chape of ac is not correct")

    # for k in range(d):
    # summ = np.zeros(d)        
    #     for i in range(m1):
    #         for j in range(n2):
    #             summ[i][1][d] = summ[][][d] + arr[i][j][d]*np.log(ar[m][1]*ac[1][n])

    summ = np.zeros(d)
    for k in range(d):
        for i in range(m):
            for j in range(n):
                p = arr[i, j, k]
                pr = ar[i, 0, k]
                pc = ac[0, j, k]
                if p > 0 and pr > 0 and pc > 0:
                    summ[k] += p * np.log(pr * pc)
    
    return -1*summ

In [116]:
def get_entropy2(ar, ac):
    '''input mean row and mean column'''
    m1, n1, d = ar.shape
    if n1!=1: print("Nope, shape of ar is not correct")
                    
    m2, n2, d = ac.shape
    if m2!=1: print("Nope, chape of ac is not correct")

    # for k in range(d):
    # summ = np.zeros(d)        
    #     for i in range(m1):
    #         for j in range(n2):
    #             summ[i][1][d] = summ[][][d] + arr[i][j][d]*np.log(ar[m][1]*ac[1][n])


    summ = np.zeros(d)
    for k in range(d):
        # summ = 0
        for i in range(m1):
            for j in range(n2):
                if ar[i, 0, k] * ac[0, j, k]> 0:
                    summ[k] = summ[k] + ar[i, 0, k] * ac[0, j, k] * np.log(ar[i, 0, k] * ac[0, j, k])
    
                
    return -1*summ

In [117]:
def get_entropy3(arr):
    '''input GLCM matirx, normalized <<--'''
    m, n, d = arr.shape
    
    # for k in range(d):
    # summ = np.zeros(d)        
    #     for i in range(m1):
    #         for j in range(n2):
    #             summ[i][1][d] = summ[][][d] + arr[i][j][d]*np.log(ar[m][1]*ac[1][n])

    summ = np.zeros(d)
    for k in range(d):
        # summ = 0
        for i in range(m):
            for j in range(n):
                if arr[i,j,k]>0:
                    summ[k] = summ[k] + arr[i,j,k]*np.log(arr[i,j,k])
    
    return -1*summ

In [118]:
def max_prob(arr):
    '''input GLCM matirx, normalized'''
    m, n, d = arr.shape

    mP = np.zeros(d)
    for i in range(d):
        mP[i] = np.max(arr[:,:,i])

    return mP

In [119]:
def get_haralicks(arr, ur, uc, var_r, var_c, eps = 1e-8):
    '''input GLCM matirx, normalized, 
    mean row and mean column,
    variance row and variance column,
    and epsilon value to prevent denom form goint 0'''
    m, n, d = arr.shape

    arr = arr.astype(np.float64)
    ur = ur.astype(np.float64)
    uc = uc.astype(np.float64)
    var_r = var_r.astype(np.float64)
    var_c = var_c.astype(np.float64)

    h = np.zeros(d)

    for k in range(d):
        for i in range(m):
            for j in range(n):
                h[k] += (i - ur[0,0,k])*(j - uc[0,0,k])*arr[i,j,k]/(np.sqrt(var_r[0,0,k]*var_c[0,0,k]) + eps)
    
    # print(h)
    return h

In [120]:
def get_dCorr(arr, ur, uc):
    '''input GLCM matirx, normalized,
    mean row and mean column'''
    m,n,d = arr.shape

    arr = arr.astype(np.float64)
    ur = ur.astype(np.float64)
    uc = uc.astype(np.float64)
    
    dC = np.zeros(d)

    for k in range(d):
        mu_r = ur[0, 0, k]
        mu_c = uc[0, 0, k]
        for i in range(m):
            for j in range(n):
                dC[k] += abs(i - j) * (i + j - mu_r - mu_c) * arr[i, j, k]

    return dC

In [121]:
def get_D_k(arr):
    ''' output is (max_diff+1, d) array, where each D[k] is a sum over |n - m| = k'''
    
    m, n, d = arr.shape

    md = max(m, n) - 1

    D_k = np.zeros((md+1, d))
    for k in range(d):
        for i in range(m):
            for j in range(n):
                diff = np.abs(i - j)
                D_k[diff, k] += arr[i,j,k]

    return D_k

In [122]:
def get_Denergy(arr):
    return np.sum(arr, axis=0) 

In [123]:
def get_Dentropy3(arr):
    '''input GLCM matrix, normalized <-'''
    m, d= arr.shape
    
    # for k in range(d):
    # summ = np.zeros(d)        
    #     for i in range(m1):
    #         for j in range(n2):
    #             summ[i][1][d] = summ[][][d] + arr[i][j][d]*np.log(ar[m][1]*ac[1][n])

    summ = np.zeros(d)
    for k in range(d):
        # summ = 0
        for i in range(m):
                if arr[i,k]>0:
                    summ[k] = summ[k] + arr[i,k]*np.log(arr[i,k])
    
    return -1*summ

In [124]:
def get_inertia(D_k):
    
    k = np.arange(D_k.shape[0])[:, np.newaxis]  # shape (256, 1)
    ID = np.sum((k**2) * D_k, axis=0)  # sum over k for each distance
    return ID

In [125]:
def get_hD(D_k):
    
    k = np.arange(D_k.shape[0])[:, np.newaxis]  # shape (256, 1)
    hD = np.sum(D_k / (1 + k**2), axis=0)
    return hD

In [126]:
def get_S_k(arr):
    ''' output is (max_diff+1, d) array, where each D[k] is a sum over |n - m| = k'''
    
    m, n, d = arr.shape

    ms = m+n-2

    S_k = np.zeros((ms+1, d))
    for k in range(d):
        for i in range(m):
            for j in range(n):
                summ = i + j
                S_k[summ, k] += arr[i,j,k]

    return S_k

In [127]:
def mean_S(S_k):
    ks = np.arange(S_k.shape[0])  # array of k from 0 to 510
    mu_S = np.sum(ks[:, None] * S_k, axis=0)  # shape: (d,)
    return mu_S.reshape(1, -1)  # shape (1, 10)


In [128]:
def get_sigma(S_k, mu_S):
    k = np.arange(511).reshape(-1, 1)  # shape (512, 1)
    result = np.sum(((k - mu_S) ** 2) * S_k, axis=0)
    return result.reshape(1, -1)  # shape (1, 10)


In [129]:
def get_A_B_d(S_k, mu_r, mu_c, sigma_r, sigma_c):
    m, d = S_k.shape  # e.g., (510, 10)
    k = np.arange(m).reshape(-1, 1)  # shape (510, 1)

    mu_r = mu_r.reshape(1, 10)
    mu_c = mu_r.reshape(1, 10)
    sigma_r = mu_r.reshape(1, 10)
    sigma_c = mu_r.reshape(1, 10)

    # print(mu_r.shape, mu_c.shape, sigma_r.shape, sigma_c.shape)
    AD = np.zeros((1, d))
    BD = np.zeros((1, d))

    for l in range(d):
        # Extract scalars from 1x10 inputs
        mu_r_l = float(mu_r[0, l])
        mu_c_l = float(mu_c[0, l])
        sigma_r_l = float(sigma_r[0, l])
        sigma_c_l = float(sigma_c[0, l])

        diff = k - mu_r_l - mu_c_l  # shape (510, 1)
        denom = sigma_r_l**2 + sigma_c_l**2 + 2 * sigma_r_l * sigma_c_l
        if denom == 0:
            denom = 1e-10

        AD[0, l] = np.sum((diff.flatten() ** 3) * S_k[:, l]) / (denom ** 1.5)
        BD[0, l] = np.sum((diff.flatten() ** 4) * S_k[:, l]) / (denom ** 2)

    return AD, BD

In [130]:
# ALL DFT FEATURES

In [131]:
def normalized_projection(img, roi_mask):
    proj = np.sum(img * roi_mask, axis = 1)
    norm = np.sum(roi_mask, axis = 1)
    b = np.zeros_like(proj, dtype=float)
    for i in range(len(b)):
        if norm[i]>0:
            b[i] = proj[i]/norm[i]
    return b

In [132]:
def dft_features(b):
    """More direct implementation of Equation 6"""
    if len(b) < 240:
        b_padded = np.pad(b, (0, 240 - len(b)), mode='constant')
    else:
        b_padded = b[:240]
    
    N = 240
    features = np.zeros(15)
    
    # Frequencies centered at [10,20,...,150] cycles/inch
    for i in range(15):
        n = (i + 1) * 10  # 10,20,...,150
        # Calculate the DFT at frequency n
        dft_val = 0
        for k in range(N):
            dft_val += b_padded[k] * np.exp(-1j * 2 * np.pi * n * k / N)
        features[i] = np.abs(dft_val)
    
    return features

In [133]:
# make a final feature vector

In [134]:
# make a complete glcm feature 

def get_all_glcm_features(img_path, dist, levels):
    featurs = []

    # basic glcm features x 4
    s1 = glcm(img_path, dist, levels)
    s11 = get_sum(s1, 'r')
    s12 = get_sum(s1, 'c')
    s111 = get_sum(s11, 'c').reshape(1, 10) #
    # print("s111", s111.shape)
    s112 = get_sum(s12, 'r').reshape(1, 10) #
    # print("s112", s112.shape)
    s13 = get_var(s11, s111).reshape(1, 10)#
    # print("s13", s13.shape)
    s14 = get_var(s12, s112).reshape(1, 10) #
    # print("s14", s14.shape)
    
    # 2nd level of features x 7
    s21 = get_energy(s1).reshape(1, 10) #
    # print("s21", s21.shape)
    s22 = get_entropy1(s1, s11, s12) #
    s23 = get_entropy2(s11, s12) #
    s24 = get_entropy3(s1) #

    s25 = max_prob(s1) #
    s26 = get_haralicks(s1, get_sum(s11, 'c'), get_sum(s12, 'r'), get_var(s11, s111), get_var(s12, s112)) #
    s27 = get_dCorr(s1, get_sum(s11, 'c'), get_sum(s12, 'r')) #

    # differnce hist features x 4
    d1 = get_D_k(s1)
    d11 = get_Denergy(d1) #
    d12 = get_Dentropy3(d1) #
    d13 = get_inertia(d1) #
    d14 = get_hD(d1) #

    # sum hist features x 5
    m1 = get_S_k(s1)
    m2 = mean_S(m1)
    m11 = get_Denergy(m1) #
    m12 = get_Dentropy3(m1) #
    m13 = get_sigma(m1, m2) #
    # m14 = get_A_D(m1, get_sum(s11, 'c'), get_sum(s12, 'r'), np.sqrt(get_var(s11, s111)), np.sqrt(get_var(s12, s112))) #
    # print("m14", m14.shape)
    # m15 = get_B_D(m1, get_sum(s11, 'c'), get_sum(s12, 'r'), np.sqrt(get_var(s11, s111)), np.sqrt(get_var(s12, s112))) #
    # print("m15", m15.shape)
    m14, m15 = get_A_B_d(m1, get_sum(s11, 'c'), get_sum(s12, 'r'), np.sqrt(get_var(s11, s111)), np.sqrt(get_var(s12, s112)))
    
    feature_vector = [s111, s112, s13, s14, s21, s22, s23, s24, s25, s26, s27, d11, 
                      d12, d13, d14, m11, m12, m13, m14, m15]

    final_vector = np.vstack(feature_vector) 
    return final_vector
    # print(final_vector.shape)


In [135]:
# if __name__ == "__main__":

#     # img_path = "/home/raginivi/Desktop/CV_project/saved_e/e_1.png"  
#     image_path = "/home/raginivi/Desktop/CV_project/saved_e/5/ef/features_03.pkl"
#     gray_img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE) 
    
#     feature_20_glcm = get_all_glcm_features(image_path, 10, 256)
    
#     roi_mask = extract_roi(gray_img, threshold=128)
#     b_vector = normalized_projection(gray_img, roi_mask)
#     features_15_dft = dft_features(b_vector).reshape(-1,1)

#     variance = compute_variance(gray_img, roi_mask)
#     entropy = compute_entropy(gray_img, roi_mask)
#     features_2_roi = np.array([[variance], [entropy]]) 

#     print(f"Extracted GLCM feature vector: {feature_20_glcm}")
#     print(f"Shape of GLCM feature vector: {np.shape(feature_20_glcm)}")
#     print(f"15-band DFT features:\n", features_15_dft)
#     print(f"Shape of DFT feature vector: {np.shape(features_15_dft)}")
#     print(f"2 features:\n", features_2_roi)
#     print(f"2 features vector: {np.shape(features_2_roi)}")

error: OpenCV(4.11.0) /io/opencv/modules/imgproc/src/color.cpp:199: error: (-215:Assertion failed) !_src.empty() in function 'cvtColor'


In [102]:
def extract_and_save_features(image_paths, labels, save_dir):
    """
    Extracts features from the provided image paths, processes them, 
    and stores them at the given save directory.
    
    Parameters:
    - image_paths: List of paths to the images.
    - labels: List of corresponding class labels (integers).
    - save_dir: Directory where the feature vectors will be saved.
    
    The function extracts a 22x10 feature vector for each character in an image
    and saves them into a '.npz' file at the image's location.
    """
    
    # Ensure the directory to save the features exists
    os.makedirs(save_dir, exist_ok=True)

    # Loop through each image and its corresponding label
    for idx, (img_path, label) in enumerate(zip(image_paths, labels)):
        # Step 1: Read the image in grayscale
        gray_img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)

        if gray_img is None:
            print(f"Error: Unable to read image at {img_path}")
            continue  # Skip if image is not read successfully

        # Step 2: Extract GLCM features (20x10 matrix)
        feature_20_glcm = get_all_glcm_features(img_path, 10, 256)  # Assuming you have this function
        # 'feature_20_glcm' is a 20x10 matrix, which will be the first part of the 22x10 feature vector

        # Step 3: Extract ROI (Region of Interest) mask and DFT features
        roi_mask = extract_roi(gray_img, threshold=128)  # Assuming you have this function
        b_vector = normalized_projection(gray_img, roi_mask)  # Assuming this function returns a 1D vector
        features_15_dft = dft_features(b_vector).reshape(15, 1)  # Assuming you have this function
        # print(features_15_dft.shape)
        
        # Step 4: Compute ROI statistical features (variance and entropy)
        variance = compute_variance(gray_img, roi_mask)  # Assuming you have this function
        entropy = compute_entropy(gray_img, roi_mask)  # Assuming you have this function
        features_2_roi = np.array([[variance], [entropy]])  # shape: (2, 1)

        # Step 5: Combine all features into a final feature vector (22x10)
        # 'glcm' (20x10), 'dft' (15x1), 'roi' (2x1) need to be combined
        # Assuming we concatenate them along the appropriate axis to get a 22x10 array.
        
        # Combine features to get a 22x10 matrix:
        combined_features = np.concatenate([
            feature_20_glcm,                  # shape: (20, 10)a
            features_15_dft.repeat(10, axis=1),  # shape: (15, 10)
            features_2_roi.repeat(10, axis=1) # shape: (2, 10) -- repeat to match 10 characters
        ], axis=0)  # This will give us a shape of (22, 10)

        save_path = os.path.join(save_dir, f'features_{idx:02d}.pkl')
        with open(save_path, 'wb') as f:
            pickle.dump(combined_features, f) 

        print(f"Saved: {save_path}")

In [103]:
# with open('output_features/features_00.pkl', 'rb') as f:
#     unpickled_features = pickle.load(f)

# # Display the unpickled features (the 2D matrix)
# print("Unpickled Features (Shape: {}):".format(unpickled_features.shape))
# print(unpickled_features)

# # Optionally, if you want to display specific rows or columns:
# print("\nFirst Row:")
# print(unpickled_features[7])  # Display the first row

# print("\nFirst Column:")
# print(unpickled_features[:, 7])  # Display the first column

In [137]:
def process_directories(input_dirs, save_base_dir):
    """
    Process all images in the input directories, extract features, and save them.
    
    Parameters:
    - input_dirs: List of directories to process (each directory is a class).
    - save_base_dir: Base directory where the feature files will be stored.
    """
    for class_dir in input_dirs:
        # Get the class label (the name of the directory)
        class_label = os.path.basename(class_dir)
        
        # Create a subdirectory called 'ef' inside the current class directory
        save_dir = os.path.join(class_dir, 'ef')
        os.makedirs(save_dir, exist_ok=True)
        
        # Get all image paths in the current class directory
        image_paths = []
        labels = []  # List of class labels corresponding to images
        for img_name in os.listdir(class_dir):
            img_path = os.path.join(class_dir, img_name)
            if img_path.lower().endswith(('.png', '.jpg', '.jpeg')):  # Ensure only image files are processed
                image_paths.append(img_path)
                labels.append(class_label)  # Using directory name as the label
        
        # Call function to extract features and save them
        extract_and_save_features(image_paths, labels, save_dir)


# Example usage:
input_dirs = ['/home/raginivi/Desktop/CV_project/saved_e/1', '/home/raginivi/Desktop/CV_project/saved_e/2'
              , '/home/raginivi/Desktop/CV_project/saved_e/3', '/home/raginivi/Desktop/CV_project/saved_e/4',
             '/home/raginivi/Desktop/CV_project/saved_e/5']  # Replace with your actual directories
save_base_dir = 'extracted_features'  # Where the features will be saved

process_directories(input_dirs, save_base_dir)

ROI (R): 55, type: <class 'int'>
Saved: /home/raginivi/Desktop/CV_project/saved_e/1/ef/features_00.pkl
ROI (R): 49, type: <class 'int'>
Saved: /home/raginivi/Desktop/CV_project/saved_e/1/ef/features_01.pkl
ROI (R): 55, type: <class 'int'>
Saved: /home/raginivi/Desktop/CV_project/saved_e/1/ef/features_02.pkl
ROI (R): 49, type: <class 'int'>
Saved: /home/raginivi/Desktop/CV_project/saved_e/1/ef/features_03.pkl
ROI (R): 48, type: <class 'int'>
Saved: /home/raginivi/Desktop/CV_project/saved_e/1/ef/features_04.pkl
ROI (R): 58, type: <class 'int'>
Saved: /home/raginivi/Desktop/CV_project/saved_e/1/ef/features_05.pkl
ROI (R): 41, type: <class 'int'>
Saved: /home/raginivi/Desktop/CV_project/saved_e/1/ef/features_06.pkl
ROI (R): 47, type: <class 'int'>
Saved: /home/raginivi/Desktop/CV_project/saved_e/1/ef/features_07.pkl
ROI (R): 49, type: <class 'int'>
Saved: /home/raginivi/Desktop/CV_project/saved_e/1/ef/features_08.pkl
ROI (R): 51, type: <class 'int'>
Saved: /home/raginivi/Desktop/CV_project

In [105]:
def process_directories(input_dirs, save_base_dir):
    """
    Process all images in the input directories, extract features, and save them.
    
    Parameters:
    - input_dirs: List of directories to process (each directory is a class).
    - save_base_dir: Base directory where the feature files will be stored.
    """
    for class_dir in input_dirs:
        # Get the class label (the name of the directory)
        class_label = os.path.basename(class_dir)
        
        # Create a subdirectory called 'ef' inside the current class directory
        save_dir = os.path.join(class_dir, 'ef')
        os.makedirs(save_dir, exist_ok=True)
        
        # Get all image paths in the current class directory
        image_paths = []
        labels = []  # List of class labels corresponding to images
        for img_name in os.listdir(class_dir):
            img_path = os.path.join(class_dir, img_name)
            if img_path.lower().endswith(('.png', '.jpg', '.jpeg')):  # Ensure only image files are processed
                image_paths.append(img_path)
                labels.append(class_label)  # Using directory name as the label
        
        # Call function to extract features and save them
        extract_and_save_features(image_paths, labels, save_dir)


# Example usage:
input_dirs = ['/home/raginivi/Desktop/CV_project/saved_e/5']  # Replace with your actual directories
save_base_dir = 'extracted_features'  # Where the features will be saved

process_directories(input_dirs, save_base_dir)

ROI (R): 56, type: <class 'int'>
Saved: /home/raginivi/Desktop/CV_project/saved_e/4/ef/features_00.pkl
ROI (R): 50, type: <class 'int'>
Saved: /home/raginivi/Desktop/CV_project/saved_e/4/ef/features_01.pkl
ROI (R): 55, type: <class 'int'>
Saved: /home/raginivi/Desktop/CV_project/saved_e/4/ef/features_02.pkl
ROI (R): 49, type: <class 'int'>
Saved: /home/raginivi/Desktop/CV_project/saved_e/4/ef/features_03.pkl
ROI (R): 47, type: <class 'int'>
Saved: /home/raginivi/Desktop/CV_project/saved_e/4/ef/features_04.pkl
ROI (R): 69, type: <class 'int'>
Saved: /home/raginivi/Desktop/CV_project/saved_e/4/ef/features_05.pkl
ROI (R): 41, type: <class 'int'>
Saved: /home/raginivi/Desktop/CV_project/saved_e/4/ef/features_06.pkl
ROI (R): 46, type: <class 'int'>
Saved: /home/raginivi/Desktop/CV_project/saved_e/4/ef/features_07.pkl
ROI (R): 48, type: <class 'int'>
Saved: /home/raginivi/Desktop/CV_project/saved_e/4/ef/features_08.pkl
ROI (R): 49, type: <class 'int'>
Saved: /home/raginivi/Desktop/CV_project

In [138]:
import pickle

# Replace with your actual file path
# file_path = '/home/raginivi/saved_e/1/ef/features_02.pkl'
file_path = 'saved_e/5/ef/features_03.pkl'

with open(file_path, 'rb') as f:
    data = pickle.load(f)

# Print the raw object
print("Type of unpickled object:", type(data))
print("\nContents:\n", data)


Type of unpickled object: <class 'numpy.ndarray'>

Contents:
 [[3.640e+00 3.360e+00 3.080e+00 2.800e+00 2.520e+00 2.240e+00 1.960e+00
  1.680e+00 1.400e+00 1.120e+00]
 [3.640e+00 3.360e+00 3.080e+00 2.800e+00 2.520e+00 2.240e+00 1.960e+00
  1.680e+00 1.400e+00 1.120e+00]
 [1.444e+05 1.299e+05 1.194e+05 1.105e+05 1.012e+05 9.042e+04 7.939e+04
  6.918e+04 6.039e+04 5.517e+04]
 [1.464e+05 1.299e+05 1.141e+05 1.054e+05 1.002e+05 9.143e+04 8.121e+04
  7.018e+04 5.939e+04 5.006e+04]
 [8.640e-02 7.680e-02 6.560e-02 6.000e-02 5.760e-02 4.800e-02 4.000e-02
  3.360e-02 3.120e-02 2.240e-02]
 [1.952e+01 1.839e+01 1.722e+01 1.606e+01 1.459e+01 1.331e+01 1.191e+01
  1.047e+01 8.927e+00 7.274e+00]
 [7.106e+01 6.178e+01 5.305e+01 4.497e+01 3.677e+01 2.981e+01 2.335e+01
  1.760e+01 1.250e+01 8.147e+00]
 [1.386e+01 1.287e+01 1.191e+01 1.083e+01 9.664e+00 8.669e+00 7.640e+00
  6.572e+00 5.383e+00 4.381e+00]
 [8.000e-02 8.000e-02 4.000e-02 6.000e-02 8.000e-02 6.000e-02 4.000e-02
  2.000e-02 6.000e-02 2.00