# Import the necessary libraries

In [1]:
import gc
import os
import cv2 
import random
import warnings
import numpy as np
import pandas as pd
from PIL import Image
from tqdm import tqdm
from sklearn.model_selection import *
from sklearn.preprocessing import StandardScaler


import torch 
import torch.nn as nn
import torch.optim as optim
from torchvision.models import resnet152 # for feature extraction
from torch.utils.data import DataLoader # for batching data
from torch.utils.data import TensorDataset 
from torchvision.transforms import transforms 

warnings.filterwarnings('ignore')

# Load the Feature Extractor

In [2]:
Deep_features = []
root = 'C:\\Users\\Eurus\\Desktop\\Data\\images'
device = torch.device('cuda')

# Load the ResNet-152 model
resnet = resnet152(pretrained=False)

In [3]:
resnet.to(device)
resnet.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

# Prepare the Data

In [4]:
# Transformation for preprocessing
preprocess = transforms.Compose([
    transforms.Resize((540, 540)),
    transforms.ToTensor()
])

In [5]:
def feature_extractor(root):
    # Iterate over the images
    for idx in tqdm(os.listdir(root)):
        
        # Read the image
        path = os.path.join(root, idx)
        img = Image.open(path)
        
        # Preprocess the images (Normalize-> add a dummy dimention -> move it to cuda)
        img_tensor = preprocess(img).div(255.0).unsqueeze(0).to(device)
        
        # Extract features using ResNet-152
        with torch.no_grad():
            features = resnet(img_tensor)
            
        # Flatten the features
        features = features.flatten().cpu().numpy()
        
        # Store the features in the dictionary
        if "T" in idx:
            Deep_features.append({'Label':'T','Features':features})
        elif "N" in idx:
            Deep_features.append({'Label':'N','Features':features})
        else:
            print("error!")
            
    
    return Deep_features
        

In [6]:
Train_features = feature_extractor(root)

100%|████████████████████████████████████████████████████████████████████████████| 13832/13832 [19:48<00:00, 11.64it/s]


In [7]:
# shuffle the data
random.shuffle(Train_features)

In [8]:
"Cleaning feature titles to make the dataset easy to use"

df = pd.DataFrame()

# Iterate over the list of dictionaries
for idx, d in enumerate(Train_features):
    # Extract the label and features from each dictionary
    label = d['Label']
    features = d['Features']
    
    # Create a dictionary for the row data
    row_data = {'Label': label}
    
    # Add the features as columns to the row dictionary
    for i, value in enumerate(features):
        column_name = f'F_{i}'
        row_data[column_name] = value
    
    # Append the row to the DataFrame
    df = df.append(row_data, ignore_index=True)

    
# Now we have df, Delete the residual
del Train_features

# Observe the results
df.head(5)

Unnamed: 0,Label,F_0,F_1,F_2,F_3,F_4,F_5,F_6,F_7,F_8,...,F_990,F_991,F_992,F_993,F_994,F_995,F_996,F_997,F_998,F_999
0,N,-86358.3125,-592.58844,-5710.901367,49465.164062,-48005.507812,37615.105469,29600.255859,22371.427734,-12076.921875,...,953.743652,-5307.672852,10409.431641,3719.478271,36079.519531,-25277.724609,16164.134766,26493.423828,37378.191406,5075.222656
1,N,-100164.882812,402.488678,-7050.362793,58362.257812,-55502.164062,43868.769531,33861.671875,25561.644531,-14152.048828,...,946.102051,-5392.260742,12801.692383,4423.374512,40764.867188,-29081.923828,18600.822266,30330.238281,42400.34375,5599.158203
2,N,-68036.554688,-48.795006,-4249.842773,38660.34375,-38329.1875,29435.041016,23136.304688,17443.474609,-9410.007812,...,505.79248,-4341.439453,8105.806152,3140.482178,27746.773438,-19775.447266,12988.277344,21419.863281,30045.658203,3970.303955
3,N,-97685.640625,-630.680237,-6430.760742,55620.160156,-54095.453125,42730.726562,33483.761719,25371.103516,-14056.546875,...,1140.874512,-5827.972656,12226.415039,4020.497803,40441.429688,-28323.912109,18601.367188,29816.771484,41923.832031,5565.824707
4,N,-121133.789062,467.916412,-8485.570312,70088.90625,-65944.984375,51607.628906,40869.964844,31156.957031,-17059.806641,...,1466.643066,-6366.206055,14577.682617,5287.39209,50111.5625,-35235.535156,21954.982422,36479.433594,51752.140625,6641.978027


In [9]:
"Encoding the Labels"

for row in tqdm(range(len(df))):
    if df.loc[row,'Label'] == "N":
        df.loc[row,'Label'] = 0
        
    elif df.loc[row,'Label'] == "T":
        df.loc[row,'Label'] = 1

# Convert them all to float32        
df = df.astype('float32')
df.info()        

100%|█████████████████████████████████████████████████████████████████████████| 13832/13832 [00:00<00:00, 14322.87it/s]

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13832 entries, 0 to 13831
Columns: 1001 entries, Label to F_999
dtypes: float32(1001)
memory usage: 52.8 MB





# Train Test Split

In [10]:
X_train, X_test, y_train, y_test = train_test_split(df.iloc[:, 1:], df['Label'], test_size=0.2, random_state=42, stratify=df['Label'])

# Delete the residual
del df 
gc.collect()

0

In [11]:
# Load them as PyTorch Tensors
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32)

In [12]:
# Create DataLoader objects for batching the data
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)

# Define the Tabular Transformer module

In [13]:
class TabularTransformer(nn.Module):
    def __init__(
        self,
        input_dim,                   # Total number of Feature columns
        hidden_dim,                  # Neuron Density of MLP Layers
        output_dim,                  # Total number of Label columns
        num_attention_heads,         # Total Number of attention heads of Transformer
        num_transformer_layers,      # Total Number of transformer layers
        num_mlp_layers,              # Total number of MLP layers
        middle_activation='relu',    # MLP layer Activation Function
        last_activation="sigmoid",   # Last layer Activation Function
        dropout_prob=0.1,            # Dropout possibility of Dropout layers (float)
        pooling_type="global"          # Pooling Strategy : mean(Average) , max(Maximum), global(mean)
    ):
        super(TabularTransformer, self).__init__()
        
        # Embedding layer
        self.embedding = nn.Linear(input_dim, hidden_dim)
        
        # Transformer layers
        self.transformer = nn.TransformerDecoder(
            nn.TransformerDecoderLayer(
                d_model = hidden_dim,
                nhead = num_attention_heads,
            ),
            num_layers = num_transformer_layers
        )
        
        # MLP Layers
        self.mlp = nn.Sequential()
        for i in range(num_mlp_layers):
            # Last FC Layer
            if i == num_mlp_layers-1:
                self.mlp.add_module(
                    f"FC_{i+1}",
                    nn.Linear(hidden_dim, output_dim)
                )
            else:
                self.mlp.add_module(
                    f"FC_{i+1}",
                    nn.Linear(hidden_dim,hidden_dim)
                )    
            # Activation Functions for Final FC Layer
            if i == num_mlp_layers-1:
                if last_activation == "sigmoid":
                    self.mlp.add_module(
                        f"Activation_{i+1}",
                        nn.Sigmoid()
                    )
                elif last_activation == "softmax":
                    self.mlp.add_module(
                        f"Activation_{i+1}",
                        nn.Softmax(dim=-1)
                    )
                else:
                    raise ValueError(f"Invalid activation function for the last layer: {last_activation}")
            
            # Activation Functions for other FC layers
            else:
                if middle_activation == "relu":
                    self.mlp.add_module(
                        f"Activation_{i+1}",
                        nn.ReLU()
                    )
                elif middle_activation == "leakyrelu":
                    self.mlp.add_module(
                        f"Activation_{i+1}",
                        nn.LeakyReLU()
                    )
                else:
                    raise ValueError(f"Invalid activation function for the middle layers : {last_activation} ")
            
            # Dropout Layer
            if i != num_mlp_layers-1:
                if dropout_prob > 0.0:
                    self.mlp.add_module(
                        f"Dropout_{i+1}",
                        nn.Dropout(dropout_prob)
                    )
       
                    
    def forward(self, x, memory=None): 
        # Apply linear transformation
        embedded = self.embedding(x)
        embedded = self.embedding.weight[embedded.long(), :].to_dense()
        """The line below rearranges the dimensions of the embedded tensor.
        Dimensions are rearranged to have the batch dimension (1), sequence dimension (0), and feature dimension (2).
        This is done to prepare the tensor for the subsequent transformer layer, which expects
        the sequence dimension to be in the second position."""
        embedded = embedded.permute(1, 0, 2)
        
        """This line below rearranged embedded tensor through the self.transformer layer,
        which is an instance of nn.TransformerEncoder. The transformer layer applies multi-head self-attention
        and position-wise fully connected layers to the input sequence. It captures the contextual dependencies within
        the sequence and learns representations that capture the relationships between different elements of the sequence."""
        transformer_output = self.transformer(embedded, memory)
        
#         # Reset the dimensions to original order for upcoming FC/Pooling Layers
        transformer_output = transformer_output.permute(1, 0, 2)
        
        # Pooling Layer
        if self.pooling_type == "mean":
            pooled_output = F.avg_pool1d(transformer_output, kernel_size=transformer_output.size(1)).squeeze(2)
        elif self.pooling_type == "max":
            pooled_output = F.max_pool1d(transformer_output, kernel_size=transformer_output.size(1)).squeeze(2)
        elif self.pooling_type == "global":
            pooled_output = torch.mean(transformer_output, dim=1)
        else:
            raise ValueError(f"Invalid pooling_type: {self.pooling_type}")
        
        
        mlp_output = self.mlp(pooled_output)          
        return mlp_output
                    

# Model Instantiation

In [14]:
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = torch.device("cuda")

In [15]:
model = TabularTransformer(
    input_dim = 1000,
    output_dim = 1,
    hidden_dim = 256,
    num_attention_heads = 4,
    num_transformer_layers = 4,
    num_mlp_layers = 4,
    middle_activation = 'relu',
    last_activation = 'sigmoid',
    dropout_prob = 0.2,
    pooling_type = 'global'

).to(device)

In [16]:
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [17]:
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'

In [18]:
num_epochs = 200
model.train()

for epoch in range(num_epochs):
    running_loss = 0.0
    TP = 0
    FN = 0
    FP = 0
    
    for inputs, labels in train_loader:
        # All set to work on same device
        inputs, labels = inputs.to(device), labels.to(device)
        
        # Reset the Gradients
        optimizer.zero_grad()
        
        "Debug"
        print("Input shape:", inputs.shape)
        
        # Forward Pass
        outputs = model(inputs)
        loss = criterion(outputs.squeeze(), labels)
        
        "Debug"
        print("Embedded shape:", embedded.shape)
        """In your case, the input_dim is set to 1000, so the input tensor should have the shape (8, 1000)"""
        
        # Backpropagate
        loss.backward()
        
        # Update the weights and loss
        optimizer.step()
        running_loss += loss.item()
        
    # Metric Calculations
    if TP == 0 and FN == 0:  # (Recall is 0/0)
        recall = 0.0
        precision = 0.0
        f1_score = 0.0
        
    elif TP == 0 and FP == 0:  # (Precision is 0/0)
        recall = 0.0
        precision = 0.0
        f1_score = 0.0
    
    else:
        recall = TP/(TP+FN)
        precision = TP/(TP+FP)
        f1_score = (2*precision*recall)/(precision+recall)
        
    print(f"Epoch {epoch+1}/{num_epochs} - Loss: {running_loss/len(train_loader):.4f}, Recall: {recall:.4f}, Precision: {precision:.4f}, F1 Score: {f1_score:.4f}")
        

Input shape: torch.Size([8, 1000])


AssertionError: was expecting embedding dimension of 256, but got 1000