In [1]:
import os
import cv2
import torch
import warnings
import random
import numpy as np
import seaborn as sns
from tqdm import tqdm
from PIL import Image
import torch.nn as nn
import matplotlib.pyplot as plt
from pycaret.classification import *
from torchvision.models import resnet152
from torchvision.transforms import transforms
import pandas as pd

warnings.filterwarnings('ignore')

In [2]:
# img_read = cv2.imread('C:\\Users\\Eurus\\Desktop\\YoloV8\\train\\images\\T_1321.jpg', cv2.IMREAD_GRAYSCALE)
# sns.heatmap(np.array(img_read), cmap='hot', annot=True, cbar=True)

Here is what is gonna happen:
 - iterate over all pictures
 - read them all as GrayScale Images
 - extract images features
 - save them inside of a list
 - feed it into a transformer

In [3]:
Deep_features = []
root = 'C:\\Users\\Eurus\\Desktop\\Data'
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# device = torch.device('cpu')

# Load the ResNet-152 model
resnet = resnet152(pretrained=False)

In [4]:
resnet.to(device)
resnet.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [5]:
# Transformation for preprocessing
preprocess = transforms.Compose([
    transforms.Resize((540, 540)),
    transforms.ToTensor()
])

In [6]:
def feature_extractor(root):
    # Iterate over the images
    for idx in tqdm(os.listdir(root)):
        
        # Read the image
        path = os.path.join(root, idx)
        img = Image.open(path)
        
        # Preprocess and Normalize the images
        img_tensor = preprocess(img).div(255.0).unsqueeze(0).to(device)
        
        # Extract features using ResNet-152
        with torch.no_grad():
            features = resnet(img_tensor)
            
        # Flatten the features
        features = features.flatten().cpu().numpy()
        
        # Store the features in the dictionary
        if "T" in idx:
            Deep_features.append({'Label':'T','Features':features})
        elif "N" in idx:
            Deep_features.append({'Label':'N','Features':features})
        else:
            print("error!")
            
    
    return Deep_features
        

In [7]:
Train_features = feature_extractor(root)

100%|████████████████████████████████████████████████████████████████████████████| 13832/13832 [19:46<00:00, 11.66it/s]


In [8]:
# shuffle the data
random.shuffle(Train_features)

In [9]:
len(Train_features)

13832

In [10]:
Train_features[0]

{'Label': 'N',
 'Features': array([-1.06906367e+04, -2.52137656e+04,  2.92583965e+04,  2.28539238e+04,
        -2.23684102e+04, -1.16935771e+04,  8.50717480e+03, -2.72449390e+03,
        -3.79249492e+04,  1.15733555e+04,  1.24553311e+04,  2.67641699e+04,
        -3.44325122e+03, -3.34180933e+03, -2.67707422e+04, -6.16123340e+03,
        -1.09487012e+03, -1.48649316e+04,  4.16235693e+03,  9.56235547e+03,
        -1.49115049e+04, -3.16527559e+04,  5.23125859e+04, -5.31482324e+03,
        -5.17468994e+03,  1.22583813e+03,  7.54630518e+03,  1.60013994e+04,
         2.62790488e+04,  1.15739170e+04,  5.44310840e+03, -1.82375410e+04,
        -1.27712334e+04,  4.91903594e+04, -7.69909961e+03,  3.43182617e+03,
        -5.56962793e+03, -1.69605469e+04,  2.17618984e+04,  1.81115000e+04,
         2.15427500e+04,  7.81693457e+03,  1.00527949e+04, -2.11916387e+04,
        -2.12089417e+02,  2.19853223e+04, -3.58642920e+03, -1.50200801e+04,
         1.91814980e+04,  6.05122021e+03,  3.50683398e+04, -5

In [11]:
df = pd.DataFrame()

# Iterate over the list of dictionaries
for idx, d in enumerate(Train_features):
    # Extract the label and features from each dictionary
    label = d['Label']
    features = d['Features']
    
    # Create a dictionary for the row data
    row_data = {'Label': label}
    
    # Add the features as columns to the row dictionary
    for i, value in enumerate(features):
        column_name = f'F_{i}'
        row_data[column_name] = value
    
    # Append the row to the DataFrame
    df = df.append(row_data, ignore_index=True)

df.head(5)

Unnamed: 0,Label,F_0,F_1,F_2,F_3,F_4,F_5,F_6,F_7,F_8,...,F_990,F_991,F_992,F_993,F_994,F_995,F_996,F_997,F_998,F_999
0,N,-10690.636719,-25213.765625,29258.396484,22853.923828,-22368.410156,-11693.577148,8507.174805,-2724.493896,-37924.949219,...,34881.0,-11942.504883,10494.958008,-15776.727539,30764.744141,18928.335938,-13308.816406,-17215.457031,-10167.539062,20104.916016
1,N,-7629.322754,-18402.431641,21075.640625,17044.130859,-17074.0625,-8875.350586,6207.130371,-2534.134033,-28035.839844,...,25396.242188,-8040.164062,7526.416504,-11134.735352,21891.035156,13781.102539,-9286.52832,-12237.918945,-7031.791992,14757.584961
2,N,-7409.713867,-17593.28125,19657.0625,16256.388672,-16373.37207,-8285.15625,5567.615234,-2038.848877,-26340.152344,...,23134.189453,-7232.945801,7283.832031,-10198.464844,20487.828125,12480.43457,-8593.717773,-11392.19043,-6298.755371,13492.181641
3,N,-9487.986328,-21533.810547,24994.900391,19448.421875,-19460.894531,-10532.538086,7491.643066,-2692.489014,-32500.560547,...,29641.876953,-9823.654297,8988.515625,-13260.978516,26138.876953,16142.136719,-11179.166016,-14604.679688,-8382.214844,17354.947266
4,N,-11941.234375,-29637.691406,35033.589844,27598.150391,-26310.652344,-13651.644531,10284.6875,-3895.448486,-44680.09375,...,41469.59375,-14147.382812,12032.883789,-18577.267578,35908.304688,22637.023438,-15558.220703,-20085.800781,-11846.132812,23801.306641


In [15]:
df.tail(10)

Unnamed: 0,Label,F_0,F_1,F_2,F_3,F_4,F_5,F_6,F_7,F_8,...,F_990,F_991,F_992,F_993,F_994,F_995,F_996,F_997,F_998,F_999
13822,N,-10404.021484,-23338.478516,26978.138672,21753.648438,-22130.128906,-10867.045898,7291.555176,-2227.296875,-35857.980469,...,31758.216797,-10215.650391,10416.59082,-14363.47168,28445.808594,16768.814453,-11932.501953,-16069.323242,-9301.821289,19169.078125
13823,T,-16160.044922,-37650.609375,44081.273438,34148.871094,-33653.707031,-17329.068359,12416.479492,-4097.287598,-56819.59375,...,52097.046875,-17646.4375,15809.777344,-23504.994141,45653.710938,28152.269531,-19507.972656,-26091.615234,-15724.344727,30584.486328
13824,N,-7369.031738,-16503.121094,19281.771484,15614.791016,-16197.25,-7546.625488,5154.735352,-1926.464844,-25304.265625,...,22346.271484,-6845.833496,7722.492676,-10210.019531,19939.275391,11934.598633,-8350.81543,-11268.93457,-6698.681641,14051.40625
13825,N,-11295.024414,-27929.931641,33156.992188,25682.466797,-24629.8125,-12899.165039,9742.039062,-3303.626709,-41991.59375,...,39164.136719,-13299.09082,11353.364258,-17584.5,34069.644531,21509.464844,-14844.583984,-19079.046875,-11296.425781,22739.292969
13826,N,-11531.06543,-26543.714844,29964.583984,24526.183594,-25373.324219,-12047.337891,7985.197754,-2640.860107,-39977.628906,...,34985.78125,-10964.642578,11470.073242,-15836.332031,31279.884766,18891.460938,-13139.791016,-17808.771484,-10510.90625,21594.855469
13827,N,-7913.774902,-18795.771484,21092.375,17432.974609,-17709.652344,-8537.285156,5703.744141,-1913.878418,-28235.363281,...,24887.076172,-7891.48291,8098.3125,-11107.34082,22069.673828,13317.243164,-9317.119141,-12448.583984,-7344.066406,14753.078125
13828,N,-8127.605957,-18284.626953,21075.283203,17009.740234,-17330.498047,-8499.487305,5624.021973,-1689.32373,-27940.847656,...,24652.466797,-7982.913574,8117.382324,-11183.851562,22202.949219,12958.379883,-9235.305664,-12523.611328,-7219.841797,14884.139648
13829,N,-7838.163574,-17738.267578,20008.613281,16096.833984,-16626.548828,-8046.173828,5014.510742,-1214.207275,-26748.257812,...,23486.769531,-7628.03418,7584.827637,-10528.854492,20979.726562,12384.614258,-8713.296875,-12124.87207,-7216.768066,14084.831055
13830,N,-10912.828125,-26230.761719,30469.412109,23902.880859,-23354.072266,-12312.500977,8830.892578,-3127.529541,-39456.234375,...,36355.976562,-12274.033203,10649.001953,-16334.696289,31622.322266,20030.740234,-13482.427734,-17892.792969,-10645.318359,21083.908203
13831,N,-14110.157227,-32665.962891,38424.914062,29519.992188,-28756.90625,-15167.238281,10969.397461,-3567.516357,-49158.820312,...,45420.019531,-15473.536133,13587.240234,-20801.265625,39611.5625,24667.794922,-17107.271484,-22678.830078,-13596.544922,26554.994141


In [12]:
del Train_features

In [16]:
# y = df['Label']
y.tail(10)

13822    N
13823    T
13824    N
13825    N
13826    N
13827    N
13828    N
13829    N
13830    N
13831    N
Name: Label, dtype: object

In [17]:
df.drop(['Label'], axis=1, inplace=True)
df.head()

Unnamed: 0,F_0,F_1,F_2,F_3,F_4,F_5,F_6,F_7,F_8,F_9,...,F_990,F_991,F_992,F_993,F_994,F_995,F_996,F_997,F_998,F_999
0,-10690.636719,-25213.765625,29258.396484,22853.923828,-22368.410156,-11693.577148,8507.174805,-2724.493896,-37924.949219,11573.355469,...,34881.0,-11942.504883,10494.958008,-15776.727539,30764.744141,18928.335938,-13308.816406,-17215.457031,-10167.539062,20104.916016
1,-7629.322754,-18402.431641,21075.640625,17044.130859,-17074.0625,-8875.350586,6207.130371,-2534.134033,-28035.839844,8740.40918,...,25396.242188,-8040.164062,7526.416504,-11134.735352,21891.035156,13781.102539,-9286.52832,-12237.918945,-7031.791992,14757.584961
2,-7409.713867,-17593.28125,19657.0625,16256.388672,-16373.37207,-8285.15625,5567.615234,-2038.848877,-26340.152344,7972.287598,...,23134.189453,-7232.945801,7283.832031,-10198.464844,20487.828125,12480.43457,-8593.717773,-11392.19043,-6298.755371,13492.181641
3,-9487.986328,-21533.810547,24994.900391,19448.421875,-19460.894531,-10532.538086,7491.643066,-2692.489014,-32500.560547,10386.19043,...,29641.876953,-9823.654297,8988.515625,-13260.978516,26138.876953,16142.136719,-11179.166016,-14604.679688,-8382.214844,17354.947266
4,-11941.234375,-29637.691406,35033.589844,27598.150391,-26310.652344,-13651.644531,10284.6875,-3895.448486,-44680.09375,13856.203125,...,41469.59375,-14147.382812,12032.883789,-18577.267578,35908.304688,22637.023438,-15558.220703,-20085.800781,-11846.132812,23801.306641


In [18]:
classifier = setup(data = df ,
                   target = y ,
                   session_id = 100 ,
                   n_jobs=-1,
                   use_gpu=True,
                   fold=5)

Unnamed: 0,Description,Value
0,Session id,100
1,Target,Label
2,Target type,Binary
3,Target mapping,"N: 0, T: 1"
4,Original data shape,"(13832, 1001)"
5,Transformed data shape,"(13832, 1001)"
6,Transformed train set shape,"(9682, 1001)"
7,Transformed test set shape,"(4150, 1001)"
8,Numeric features,1000
9,Preprocess,True


In [21]:
top_5 = compare_models(n_select = 5);

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lightgbm,Light Gradient Boosting Machine,0.9663,0.9767,0.6685,0.9508,0.7849,0.7672,0.7814,0.564
lda,Linear Discriminant Analysis,0.9576,0.9507,0.6753,0.8315,0.7452,0.7223,0.727,0.476
ridge,Ridge Classifier,0.9541,0.0,0.564,0.8999,0.6933,0.6699,0.6914,0.382
rf,Random Forest Classifier,0.9498,0.9579,0.4719,0.9631,0.6331,0.6096,0.6547,0.628
et,Extra Trees Classifier,0.9493,0.9533,0.4708,0.9549,0.6297,0.6059,0.6503,0.648
gbc,Gradient Boosting Classifier,0.9396,0.9343,0.3831,0.9066,0.537,0.5104,0.5658,0.538
lr,Logistic Regression,0.9393,0.8988,0.4517,0.8033,0.5768,0.5469,0.574,0.48
dt,Decision Tree Classifier,0.9318,0.7963,0.6303,0.629,0.6294,0.5919,0.592,0.468
knn,K Neighbors Classifier,0.9279,0.8268,0.3135,0.7639,0.4442,0.4127,0.4602,1.192
ada,Ada Boost Classifier,0.9232,0.8834,0.2584,0.7303,0.3805,0.3496,0.4044,0.71
