In [1]:
# !pip install -U imbalanced-learn

Defaulting to user installation because normal site-packages is not writeable


In [4]:
import os
import cv2 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from skimage.filters.rank import entropy
from skimage.morphology import disk
from skimage.filters import sobel
from scipy.ndimage import gaussian_filter

from skimage.feature import canny
from skimage.filters import median
from skimage.restoration import denoise_bilateral

from skimage.filters import prewitt_h, prewitt_v

from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE

from sklearn import svm
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report

scaler = StandardScaler()

import joblib

In [5]:
# ref : https://www.tutorialkart.com/opencv/python/opencv-python-resize-image/#gsc.tab=0

# DOWNSCALE BY 50% : (480,499) = size
def downsample_image(img):
    scale_percent = 20 
    width = int(img.shape[1] * scale_percent / 100)
    height = int(img.shape[0] * scale_percent / 100)
    dim = (width, height)

    resized = cv2.resize(img, dim, interpolation = cv2.INTER_AREA)
    return resized
 
def upsample_image(img):   # To be used after prediction
    scale_percent = 500 
    width = int(img.shape[1] * scale_percent / 100)
    height = int(img.shape[0] * scale_percent / 100)
    dim = (width, height)

    resized = cv2.resize(img, dim, interpolation = cv2.INTER_AREA)
    return resized

In [6]:
### PREPROCESSING FUNCTION
def preprocess_image(image_path):
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    
    # check if size is correct 
    # 1) Convert to gray scale
    # 2) Normalize 
    # 3) Downsample image to 20%
    
    if(img.shape[0] != 960):
        print("Change shape")
    gray_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    final_img = gray_img/255.0
    final_img = downsample_image(gray_img)
    
    return final_img

def preprocess_seg_map(seg_path):
    # reading array 
    seg_map = cv2.imread(seg_path,cv2.IMREAD_GRAYSCALE)
    # Convert from bool to uint8
    seg_map = seg_map.astype(np.uint8)
    # convert the max value pixel to 1 (To create a binary seg map)
    seg_map[seg_map == 255] = 1
    
    # Downsample the seg_map by 50%
    seg_map = downsample_image(seg_map)
    return seg_map

def extract_features(gray_img,seg_map):
    
    radius = 1
    entropy_img = entropy(gray_img,disk(radius))
    sigma = 5  
    gauss_image = gaussian_filter(gray_img, sigma=sigma) #### (APPLIED ON COLOR ORIGINALLY)
    sobel_filtered = sobel(gray_img) 
    edges = canny(gray_img)
    median_filtered = median(gray_img)                               ##### (APPLIED ON COLOR ORIGINALLY)
    bilateral_filtered = denoise_bilateral(gray_img,channel_axis=-1,multichannel=False) ##### (APPLIED ON COLOR ORIGINALLY)
    prewitt_horizontal = prewitt_h(gray_img)
    prewitt_vertical = prewitt_v(gray_img)
    
    pixel_feature = gray_img.reshape(-1).reshape(-1, 1)                        #1
    entropy_feature = entropy_img.reshape(-1).reshape(-1, 1)                   #2
    gauss_feature = gauss_image.reshape(-1).reshape(-1, 1)                     #3
    sobel_feature = sobel_filtered.reshape(-1).reshape(-1, 1)                  #4
    edges_feature = edges.reshape(-1).reshape(-1, 1)                           #5
    median_feature = median_filtered.reshape(-1).reshape(-1, 1)                #6
    bilateral_feature = bilateral_filtered.reshape(-1).reshape(-1, 1)          #7
    prewitt_horizontal_feature = prewitt_horizontal.reshape(-1).reshape(-1, 1) #8
    prewitt_vertical_feature = prewitt_vertical.reshape(-1).reshape(-1, 1)     #9
    
    label = seg_map.reshape(-1).reshape(-1, 1) 
    
    feature_matrix = np.hstack((pixel_feature,entropy_feature,gauss_feature,
                               sobel_feature,edges_feature,median_feature,
                               bilateral_feature,prewitt_horizontal_feature,
                               prewitt_vertical_feature,label))
    return feature_matrix
    
def find_segfile_of_image(image,segmentations):
        image_name = image.split(".")[0]
        for seg in segmentations:
            if image_name in seg:
                return seg
            
#     print("entropy_feature : ",entropy_img.shape)
#     print("gauss_feature : ",gauss_image.shape)
#     print("sobel_feature : ",sobel_filtered.shape)
#     print("edges_feature : ",edges.shape)
#     print("median_feature : ",median_filtered.shape)
#     print("bilateral_feature : ",bilateral_filtered.shape)
#     print("prewitt_horizontal_feature : ",prewitt_horizontal.shape)
#     print("prewitt_vertical_feature : ",prewitt_vertical.shape)
    
#     print()
#     print("entropy_feature : ",entropy_feature.shape)
#     print("gauss_feature : ",gauss_feature.shape)
#     print("sobel_feature : ",sobel_feature.shape)
#     print("edges_feature : ",edges_feature.shape)
#     print("median_feature : ",median_feature.shape)
#     print("bilateral_feature : ",bilateral_feature.shape)
#     print("prewitt_horizontal_feature : ",prewitt_horizontal_feature.shape)
#     print("prewitt_vertical_feature : ",prewitt_vertical_feature.shape)

#### PREPROCESSING AND FEATURE EXTRACTION 

1) Preprocessing : 
    
    (a) Conversion to gray scale (For feature extraction)
    
    (b) Resizing to 150x150 if not training in 960x999 
    
    (c) Normalizing : divide by 255.0
    
2) Feature extraction

   Features:-
    (1) Entropy  (TEXTURE)
    
    (2) Sobel    
    
    (3) Gaussian          
    
    (4) Canny edge detector
    
    (5) Median filter
    
    (6) Bilateral filter
    
    (7) Prewitt vertical filter
    
    (8) Prewitt horizontal filter
    
    (9) Pixel intensity 
    
    CAN ADD GABOR FEATURES TO THIS AS WELL
    
    (10)Pixel label (1 or 0)
    
    PROBLEM : 1) Some features are found on gray scale some on color 
                 afterwards we get feature maps of different channels
                 How to flatten them to get consistent dimensions afterwards 
                 in order to get a feature vector for an image

In [7]:
image_folder = "IMAGES"
seg_folder = "SEG1"

images = os.listdir(image_folder)
segmentations = os.listdir(seg_folder)

image_features_matrices_combined = []

for image in images:    
    # preprocessing image
    image_path = os.path.join(image_folder,image)
    gray_img = preprocess_image(image_path)
    
    
    # preprocessing segmentation map
    seg_name = find_segfile_of_image(image,segmentations)
    seg_path = os.path.join(seg_folder,seg_name)
    seg_map = preprocess_seg_map(seg_path)
    
    
    # feature extraction    
    feature_matrix = extract_features(gray_img,seg_map)
    image_features_matrices_combined.append(feature_matrix)
    
    print("Feature matrix for image ",image," : ",feature_matrix.shape)

print()
Final_matrix = np.vstack(image_features_matrices_combined)
print("Final shape of data matrix : ",Final_matrix.shape)

#     print("dtype of image array : ",img.dtype)
#     print("dtype of seg map : ",seg_map.dtype)
#     dtype of image array :  uint8
#     dtype of seg map :  uint8

  bilateral_filtered = denoise_bilateral(gray_img,channel_axis=-1,multichannel=False) ##### (APPLIED ON COLOR ORIGINALLY)


Feature matrix for image  Image_01L.jpg  :  (38208, 10)
Feature matrix for image  Image_01R.jpg  :  (38208, 10)
Feature matrix for image  Image_02L.jpg  :  (38208, 10)
Feature matrix for image  Image_02R.jpg  :  (38208, 10)
Feature matrix for image  Image_03L.jpg  :  (38208, 10)
Feature matrix for image  Image_03R.jpg  :  (38208, 10)
Feature matrix for image  Image_04L.jpg  :  (38208, 10)
Feature matrix for image  Image_04R.jpg  :  (38208, 10)
Feature matrix for image  Image_05L.jpg  :  (38208, 10)
Feature matrix for image  Image_05R.jpg  :  (38208, 10)
Feature matrix for image  Image_06L.jpg  :  (38208, 10)
Feature matrix for image  Image_06R.jpg  :  (38208, 10)
Feature matrix for image  Image_07L.jpg  :  (38208, 10)
Feature matrix for image  Image_07R.jpg  :  (38208, 10)
Feature matrix for image  Image_08L.jpg  :  (38208, 10)
Feature matrix for image  Image_08R.jpg  :  (38208, 10)
Feature matrix for image  Image_09L.jpg  :  (38208, 10)
Feature matrix for image  Image_09R.jpg  :  (382

#### TRAIN, VALIDATION AND TEST SPLITS 

In [8]:
df = pd.DataFrame(Final_matrix)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...
1069819,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1069820,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1069821,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1069822,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [9]:
is_zero_df = df.eq(0).all().all()

if is_zero_df:
    print("The entire DataFrame is filled with 0's.")
else:
    print("The DataFrame contains non-zero values.")

The DataFrame contains non-zero values.


In [10]:
filtered_df = df[df.iloc[:, -1] == 1]
filtered_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
900,28.0,2.321928,25.0,0.104576,1.0,28.0,0.155991,0.150327,-0.011765,1.0
1082,45.0,2.321928,27.0,0.081467,0.0,42.0,0.177210,0.113725,-0.005229,1.0
1083,42.0,2.321928,28.0,0.060380,0.0,42.0,0.181053,0.086275,0.022222,1.0
1099,37.0,1.921928,28.0,0.030455,0.0,37.0,0.169538,0.044444,-0.010458,1.0
1282,40.0,2.321928,31.0,0.001961,0.0,45.0,0.188996,0.001307,0.001307,1.0
...,...,...,...,...,...,...,...,...,...,...
1068519,34.0,1.921928,28.0,0.021591,0.0,35.0,0.057826,-0.020915,-0.019608,1.0
1068520,34.0,0.970951,28.0,0.006201,0.0,35.0,0.064739,-0.009150,0.003922,1.0
1068521,35.0,2.321928,29.0,0.016867,0.0,36.0,0.068638,0.003922,0.022222,1.0
1068718,32.0,1.921928,24.0,0.084621,0.0,34.0,0.029302,-0.121569,-0.003922,1.0


#### Final_matrix : matrix that is to be used for training 
#### df : final df 

#### TRAIN : VALIDATION : TEST SPLIT ( 70 : 15 : 15)

In [11]:
# Random shuffle the matrix 
X = Final_matrix
np.random.shuffle(X)

n_samples = X.shape[0]

train_ratio = 0.01
val_ratio = 0.80
test_ratio = 0.19

n_train = int(train_ratio * n_samples)
n_val = int(val_ratio * n_samples)
n_test = n_samples - n_train - n_val

X_train = X[:n_train, :]
X_val = X[n_train:n_train + n_val, :]
X_test = X[n_train + n_val:, :]

print(f"X_train shape: {X_train.shape}")
print(f"X_val shape: {X_val.shape}")
print(f"X_test shape: {X_test.shape}")

X_train shape: (10698, 10)
X_val shape: (855859, 10)
X_test shape: (203267, 10)


#### CLASS IMBALANCE CHECK IN TRAIN MATRIX 

In [12]:
class_labels = X_train[:, -1]

count_class_0 = np.count_nonzero(class_labels == 0)
count_class_1 = np.count_nonzero(class_labels == 1)

print(f"Number of samples with class label 0: {count_class_0}")
print(f"Number of samples with class label 1: {count_class_1}")

Number of samples with class label 0: 9978
Number of samples with class label 1: 720


In [13]:
X_train_features = X_train[:, :-1]
# X_train_features_scaled = scaler.fit_transform(X_train_features)

y_train_target = X_train[:, -1]

print("Before Xtrain shape : ",X_train_features.shape)
print("Before Ytrain shape : ",y_train_target.shape)
print()

count_zeros = np.count_nonzero(y_train_target == 0)
count_ones = np.count_nonzero(y_train_target == 1)
print("Number of zeros in ytrain before : ", count_zeros)
print("Number of ones ytrain before : ", count_ones)
print()
############################### CLASS BALANCING USING SMOTE #############################33
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train_features, y_train_target)

############################### COUNTING CLASS WISE SAMPLES #############################33

count_zeros = np.count_nonzero(y_train_resampled == 0)
count_ones = np.count_nonzero(y_train_resampled == 1)

print("Number of zeros in ytrain after :", count_zeros)
print("Number of ones ytrain after :", count_ones)
print()
############################### SHAPES OF TRAINING DATA ####################################
print("X_train shape : ",X_train_resampled.shape)
print("y_train shape : ",y_train_resampled.shape)
print()

Before Xtrain shape :  (10698, 9)
Before Ytrain shape :  (10698,)

Number of zeros in ytrain before :  9978
Number of ones ytrain before :  720

Number of zeros in ytrain after : 9978
Number of ones ytrain after : 9978

X_train shape :  (19956, 9)
y_train shape :  (19956,)



#### TRAINING :CLASS BALANCED

#### : 1) NORMAL LINEAR SVM , CHANGING PARAMETERS MANUALLY : 1% training data , 80% validation

In [34]:
# clf = svm.SVC(kernel='linear',C = 0.1,verbose = True,cache_size = 7000,gamma = 1e-3)
# print(clf)

# # 8 minutes

SVC(C=0.1, cache_size=7000, gamma=0.001, kernel='linear', verbose=True)


In [35]:
# clf.fit(X_train_resampled, y_train_resampled)

[LibSVM]

#### 1) GRID SEARCH ON SAME 1% train data

In [14]:
parameters = {'kernel':('linear', 'poly'), 
              'C':[0.001, 1],
              'gamma' : [0.001,0.01]}
        
svc = svm.SVC(verbose = True,cache_size = 7000)
clf = GridSearchCV(svc, parameters,n_jobs=3,verbose=1,return_train_score=True)
print(clf)

GridSearchCV(estimator=SVC(cache_size=7000, verbose=True), n_jobs=3,
             param_grid={'C': [0.001, 1], 'gamma': [0.001, 0.01],
                         'kernel': ('linear', 'poly')},
             return_train_score=True, verbose=1)


In [None]:
clf.fit(X_train_resampled, y_train_resampled)

Fitting 5 folds for each of 8 candidates, totalling 40 fits


In [None]:
joblib.dump(clf, 'Trained_model_checkpoints/TM_6_02012024_GRID-CV_without_scaler_with_SMOTE_14Image_1%train.pkl')

#### CHECKING ACCURACY ON VALIDATION SET AFTER TRAINING MODEL

In [37]:
X_val_features = X_val[:, :-1]
# X_val_features_scaled = scaler.transform(X_val_features)

y_val_target = X_val[:, -1]

y_pred = clf.predict(X_val_features)

count_zeros = np.count_nonzero(y_pred == 0)
count_ones = np.count_nonzero(y_pred == 1)

print("Number of zeros:", count_zeros)
print("Number of ones:", count_ones)

accuracy = accuracy_score(y_val_target, y_pred)
report = classification_report(y_val_target, y_pred)

print(f"Accuracy: {accuracy}")
print("Classification Report:\n", report)

Number of zeros: 655943
Number of ones: 199916
Accuracy: 0.8070768666334057
Classification Report:
               precision    recall  f1-score   support

         0.0       0.98      0.81      0.89    800712
         1.0       0.22      0.82      0.35     55147

    accuracy                           0.81    855859
   macro avg       0.60      0.81      0.62    855859
weighted avg       0.94      0.81      0.85    855859



#### TEST SET ACCURACY 

In [15]:
X_test_features = X_test[:, :-1]
# X_test_features_scaled = scaler.transform(X_test_features)

y_test_target = X_test[:, -1]

y_pred = clf.predict(X_test_features)

accuracy = accuracy_score(y_test_target, y_pred)
report = classification_report(y_test_target, y_pred)

print(f"Accuracy: {accuracy}")
print("Classification Report:\n", report)

Accuracy: 0.8354849965108164
Classification Report:
               precision    recall  f1-score   support

         0.0       0.99      0.83      0.90      5348
         1.0       0.27      0.85      0.41       384

    accuracy                           0.84      5732
   macro avg       0.63      0.84      0.66      5732
weighted avg       0.94      0.84      0.87      5732

