In [1]:
# !pip install -U imbalanced-learn

Defaulting to user installation because normal site-packages is not writeable


In [1]:
import os
import cv2
from scipy.stats import loguniform
from time import time 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from skimage.filters.rank import entropy
from skimage.morphology import disk
from skimage.filters import sobel
from scipy.ndimage import gaussian_filter

from skimage.feature import canny
from skimage.filters import median
from skimage.restoration import denoise_bilateral

from skimage.filters import prewitt_h, prewitt_v
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from imblearn.over_sampling import SMOTE

from sklearn import svm
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import GridSearchCV 
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report

scaler = StandardScaler()

import joblib

In [2]:
# ref : https://www.tutorialkart.com/opencv/python/opencv-python-resize-image/#gsc.tab=0

# DOWNSCALE BY 50% : (480,499) = size
def downsample_image(img):
    scale_percent = 20 
    width = int(img.shape[1] * scale_percent / 100)
    height = int(img.shape[0] * scale_percent / 100)
    dim = (width, height)

    resized = cv2.resize(img, dim, interpolation = cv2.INTER_AREA)
    return resized
 
def upsample_image(img):   # To be used after prediction
    scale_percent = 500 
    width = int(img.shape[1] * scale_percent / 100)
    height = int(img.shape[0] * scale_percent / 100)
    dim = (width, height)

    resized = cv2.resize(img, dim, interpolation = cv2.INTER_AREA)
    return resized

def find_segfile_of_image(image,segmentations):
        image_name = image.split(".")[0]
        for seg in segmentations:
            if image_name in seg:
                return seg

In [3]:
### PREPROCESSING FUNCTION
def preprocess_image(image_path):
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    
    # check if size is correct 
    # 1) Convert to gray scale
    # 2) Normalize 
    # 3) Downsample image to 20%
    
    if(img.shape[0] != 960):
        print("Change shape")
    gray_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    final_img = gray_img/255.0
    final_img = downsample_image(gray_img)
    
    return final_img

def preprocess_seg_map(seg_path):
    # reading array 
    seg_map = cv2.imread(seg_path,cv2.IMREAD_GRAYSCALE)
    # Convert from bool to uint8
    seg_map = seg_map.astype(np.uint8)
    # convert the max value pixel to 1 (To create a binary seg map)
    seg_map[seg_map == 255] = 1
    
    # Downsample the seg_map by 50%
    seg_map = downsample_image(seg_map)
    return seg_map

def extract_features(gray_img,seg_map):
    radius = 1
    entropy_img = entropy(gray_img,disk(radius))
    
    sigma = 5  
    gauss_image = gaussian_filter(gray_img, sigma=sigma) #### (APPLIED ON COLOR ORIGINALLY)
    sobel_filtered = sobel(gray_img) 
    
    edges = canny(gray_img)
    
    median_filtered = median(gray_img)                               ##### (APPLIED ON COLOR ORIGINALLY)
    
    bilateral_filtered = denoise_bilateral(gray_img,channel_axis=-1,multichannel=False) ##### (APPLIED ON COLOR ORIGINALLY)
    
    prewitt_horizontal = prewitt_h(gray_img)
    prewitt_vertical = prewitt_v(gray_img)
    
    pixel_feature = gray_img.reshape(-1).reshape(-1, 1)                        #1
    entropy_feature = entropy_img.reshape(-1).reshape(-1, 1)                   #2
    gauss_feature = gauss_image.reshape(-1).reshape(-1, 1)                     #3
    sobel_feature = sobel_filtered.reshape(-1).reshape(-1, 1)                  #4
    edges_feature = edges.reshape(-1).reshape(-1, 1)                           #5
    median_feature = median_filtered.reshape(-1).reshape(-1, 1)                #6
    bilateral_feature = bilateral_filtered.reshape(-1).reshape(-1, 1)          #7
    prewitt_horizontal_feature = prewitt_horizontal.reshape(-1).reshape(-1, 1) #8
    prewitt_vertical_feature = prewitt_vertical.reshape(-1).reshape(-1, 1)     #9
    
    label = seg_map.reshape(-1).reshape(-1, 1) 
    
    feature_matrix = np.hstack((pixel_feature,entropy_feature,gauss_feature,
                               sobel_feature,edges_feature,median_feature,
                               bilateral_feature,prewitt_horizontal_feature,
                               prewitt_vertical_feature,label))
    return feature_matrix

#### PREPROCESSING AND FEATURE EXTRACTION 

1) Preprocessing : 
    
    (a) Conversion to gray scale (For feature extraction)
    
    (b) Resizing to 150x150 if not training in 960x999 
    
    (c) Normalizing : divide by 255.0
    
2) Feature extraction

   Features:-
    (1) Entropy  (TEXTURE)
    
    (2) Sobel    
    
    (3) Gaussian          
    
    (4) Canny edge detector
    
    (5) Median filter
    
    (6) Bilateral filter
    
    (7) Prewitt vertical filter
    
    (8) Prewitt horizontal filter
    
    (9) Pixel intensity 
    
    CAN ADD GABOR FEATURES TO THIS AS WELL
    
    (10)Pixel label (1 or 0)
    
    PROBLEM : 1) Some features are found on gray scale some on color 
                 afterwards we get feature maps of different channels
                 How to flatten them to get consistent dimensions afterwards 
                 in order to get a feature vector for an image

In [4]:
image_folder = "IMAGES"
seg_folder = "SEG1"

images = os.listdir(image_folder)
segmentations = os.listdir(seg_folder)

image_features_matrices_combined = []

i = 0
for image in images:
    # 10 images considered
    if(i == 2):
        break
    
    # preprocessing image
    image_path = os.path.join(image_folder,image)
    gray_img = preprocess_image(image_path)
    
    
    # preprocessing segmentation map
    seg_name = find_segfile_of_image(image,segmentations)
    seg_path = os.path.join(seg_folder,seg_name)
    seg_map = preprocess_seg_map(seg_path)
    
    
    # feature extraction    
    feature_matrix = extract_features(gray_img,seg_map)
    image_features_matrices_combined.append(feature_matrix)
    
    print("Feature matrix for image ",image," : ",feature_matrix.shape)
    i+=1
    
print()
Final_matrix = np.vstack(image_features_matrices_combined)
print("Final shape of data matrix : ",Final_matrix.shape)

#     print("dtype of image array : ",img.dtype)
#     print("dtype of seg map : ",seg_map.dtype)
#     dtype of image array :  uint8
#     dtype of seg map :  uint8

Feature matrix for image  Image_01L.jpg  :  (38208, 10)
Feature matrix for image  Image_01R.jpg  :  (38208, 10)

Final shape of data matrix :  (76416, 10)


  bilateral_filtered = denoise_bilateral(gray_img,channel_axis=-1,multichannel=False) ##### (APPLIED ON COLOR ORIGINALLY)


#### TRAIN, VALIDATION AND TEST SPLITS 

In [5]:
df = pd.DataFrame(Final_matrix)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...
382075,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
382076,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
382077,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
382078,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [6]:
is_zero_df = df.eq(0).all().all()

if is_zero_df:
    print("The entire DataFrame is filled with 0's.")
else:
    print("The DataFrame contains non-zero values.")
    
filtered_df = df[df.iloc[:, -1] == 1]
filtered_df

The DataFrame contains non-zero values.


Unnamed: 0,0,1,2,3,4,5,6,7,8,9
900,28.0,2.321928,25.0,0.104576,1.0,28.0,0.155991,0.150327,-0.011765,1.0
1082,45.0,2.321928,27.0,0.081467,0.0,42.0,0.177210,0.113725,-0.005229,1.0
1083,42.0,2.321928,28.0,0.060380,0.0,42.0,0.181053,0.086275,0.022222,1.0
1099,37.0,1.921928,28.0,0.030455,0.0,37.0,0.169538,0.044444,-0.010458,1.0
1282,40.0,2.321928,31.0,0.001961,0.0,45.0,0.188996,0.001307,0.001307,1.0
...,...,...,...,...,...,...,...,...,...,...
380790,82.0,2.321928,61.0,0.035430,0.0,82.0,0.250460,-0.036601,0.020915,1.0
380989,76.0,2.321928,54.0,0.124417,0.0,79.0,0.151533,-0.175163,-0.010458,1.0
380990,79.0,2.321928,53.0,0.150151,0.0,79.0,0.154587,-0.209150,0.009150,1.0
381188,37.0,2.321928,48.0,0.216667,1.0,37.0,0.025611,-0.308497,-0.023529,1.0


#### Final_matrix : matrix that is to be used for training 
#### df : final df 

#### TRAIN : VALIDATION : TEST SPLIT ( 70 : 15 : 15)

In [5]:
# Random shuffle the matrix 
X = Final_matrix
np.random.shuffle(X)

n_samples = X.shape[0]

train_ratio = 0.8
test_ratio = 0.2

n_train = int(train_ratio * n_samples)
n_test = n_samples - n_train

x_train = X[:n_train, :]
x_test = X[n_train :, :]

print(f"X_train shape: {x_train.shape}")
print(f"X_test shape: {x_test.shape}")

X_train shape: (61132, 10)
X_test shape: (15284, 10)


#### CLASS IMBALANCE CHECK IN TRAIN MATRIX 

In [6]:
class_labels = x_train[:, -1]

count_class_0 = np.count_nonzero(class_labels == 0)
count_class_1 = np.count_nonzero(class_labels == 1)

print(f"Number of samples with class label 0: {count_class_0}")
print(f"Number of samples with class label 1: {count_class_1}")

Number of samples with class label 0: 56925
Number of samples with class label 1: 4207


In [7]:
x_train_features = x_train[:, :-1]
# X_train_features_scaled = scaler.fit_transform(X_train_features)

y_train_target = x_train[:, -1]

print("Before Xtrain shape : ",x_train_features.shape)
print("Before Ytrain shape : ",y_train_target.shape)
print()

count_zeros = np.count_nonzero(y_train_target == 0)
count_ones = np.count_nonzero(y_train_target == 1)
print("Number of zeros in ytrain before : ", count_zeros)
print("Number of ones ytrain before : ", count_ones)
print()
############################### CLASS BALANCING USING SMOTE #############################33
smote = SMOTE(random_state=42)
x_train_resampled, y_train_resampled = smote.fit_resample(x_train_features, y_train_target)

############################### COUNTING CLASS WISE SAMPLES #############################33

count_zeros = np.count_nonzero(y_train_resampled == 0)
count_ones = np.count_nonzero(y_train_resampled == 1)

print("Number of zeros in ytrain after :", count_zeros)
print("Number of ones ytrain after :", count_ones)
print()
############################### SHAPES OF TRAINING DATA ####################################
print("x_train shape : ",x_train_resampled.shape)
print("y_train shape : ",y_train_resampled.shape)
print()

Before Xtrain shape :  (61132, 9)
Before Ytrain shape :  (61132,)

Number of zeros in ytrain before :  56925
Number of ones ytrain before :  4207

Number of zeros in ytrain after : 56925
Number of ones ytrain after : 56925

x_train shape :  (113850, 9)
y_train shape :  (113850,)



####                                                                       SUPPORT VECTOR MACHINES

In [8]:
#Avoiding data copy: For SVC, SVR, NuSVC and NuSVR, 
#if the data passed to certain methods is not C-ordered 
#contiguous and double precision,it will be copied before calling 
#the underlying C implementation. You can check whether a given 
#numpy array is C-contiguous by inspecting its flags attribute.

# checking if x_train resampled and y_train resampled are C-contigous
def check_c_cont(arr):
    if arr.flags['C_CONTIGUOUS']:
        print("The array is C-contiguous.")
    else:
        print("The array is not C-contiguous.")

print("x_train_resampled :-")
check_c_cont(x_train_resampled)

print("y_train_resampled :-")
check_c_cont(y_train_resampled)

# Setting C: C is 1 by default and it’s a reasonable default choice. 
#            If you have a lot of noisy observations you should decrease it: 
#            decreasing C corresponds to more regularization.

# C will be set to 0.2

x_train_resampled :-
The array is C-contiguous.
y_train_resampled :-
The array is C-contiguous.


#### TRAINING : BALANCED CLASSES , 2 IMAGES : 30th December

#### : 1) Support vector classifier  with Randomized search CV

In [None]:
print("Fitting the classifier to the training set")
t0 = time()

param_grid = {
    "C": loguniform(1e3, 1e5),
    "gamma": loguniform(1e-4, 1e-1),
    "kernel" : ['linear','poly']
}

clf = RandomizedSearchCV(
    svm.SVC(C = 0.2, class_weight="balanced", cache_size = 3000, verbose = True), param_grid, n_iter=10
)

clf = clf.fit(x_train_resampled, y_train_resampled)
print("done in %0.3fs" % (time() - t0))
print("Best estimator found by grid search:")
print(clf.best_estimator_)

# clf_params = {'cv': None,
#              'error_score': float('nan'),
#              'estimator__C': 0.3,
#              'estimator__break_ties': False,
#              'estimator__cache_size': 3000,
#              'estimator__class_weight': 'balanced',
#              'estimator__coef0': 0.0,
#              'estimator__decision_function_shape': 'ovr',
#              'estimator__degree': 3,
#              'estimator__gamma': 'scale',
#              'estimator__kernel': 'rbf',
#              'estimator__max_iter': -1,
#              'estimator__probability': False,
#              'estimator__random_state': None,
#              'estimator__shrinking': True,
#              'estimator__tol': 0.001,
#              'estimator__verbose': True,
#              'estimator': svm.SVC(class_weight='balanced'),
#              'n_iter': 10,
#              'n_jobs': None,
#              'param_distributions': {'C': loguniform(1e3, 1e5),
#               'gamma': loguniform(1e-4, 1e-1),
#               'Kernel': ['linear', 'poly']},
#              'pre_dispatch': '2*n_jobs',
#              'random_state': None,
#              'refit': True,
#              'return_train_score': True,
#              'scoring': None,
#              'verbose': 1}

# clf.set_params(**clf_params)
# clf.get_params()

Fitting the classifier to the training set
[LibSVM]

In [None]:
# saving the model checkpoint 

joblib.dump(model, 'Trained_model_checkpoints/TM_3_301223_without_scaler_with_SMOTE_2Image_RandomizedSearch.pkl')

#### TEST SET ACCURACY 

In [None]:
X_test_features = X_test[:, :-1]
# X_test_features_scaled = scaler.transform(X_test_features)

y_test_target = X_test[:, -1]

y_pred = clf.predict(X_test_features)

count_zeros = np.count_nonzero(y_pred == 0)
count_ones = np.count_nonzero(y_pred == 1)

print("Number of zeros:", count_zeros)
print("Number of ones:", count_ones)

accuracy = accuracy_score(y_test_target, y_pred)
report = classification_report(y_test_target, y_pred,target_names=['Not Vein', 'Vein'])

print(f"Accuracy: {accuracy}")
print("Classification Report:\n", report)