In [None]:
# Import PyDrive and associated libraries.
# This only needs to be done once per notebook.
import logging
logging.getLogger('googleapiclient.discovery_cache').setLevel(logging.ERROR)
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
import glob
# Authenticate and create the PyDrive client.
# This only needs to be done once per notebook.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install timm

In [None]:
!unzip /content/drive/MyDrive/Dataset/Kvasir/kvasir-dataset-v2.zip -d /content/drive/MyDrive/Dataset/Kvasir

In [None]:
import torch
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
import os
from tqdm import tqdm

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

dataset = ImageFolder('/content/drive/MyDrive/Dataset/Kvasir/kvasir-dataset-v2', transform=transform)

# Create a directory to save the extracted features
output_dir = '/content/drive/MyDrive/Dataset/Kvasir/Feature_file_bw'
os.makedirs(output_dir, exist_ok=True)

In [None]:
# Get the class names
class_names = dataset.classes
class_names

['dyed-lifted-polyps',
 'dyed-resection-margins',
 'esophagitis',
 'normal-cecum',
 'normal-pylorus',
 'normal-z-line',
 'polyps',
 'ulcerative-colitis']

In [None]:
import timm

class MobileOneFeatureExtractor(torch.nn.Module):
    def __init__(self,num_classes):
        super(MobileOneFeatureExtractor, self).__init__()
        self.model = timm.create_model('mobileone_s4', pretrained=True, num_classes=8)
        # Remove the final classification layer
        self.features = torch.nn.Sequential(*list(self.model.children())[:-1])

    def forward(self, x):
        features = self.features(x)
        return features


# Load the model state_dict from the checkpoint file
checkpoint_path = '/content/drive/MyDrive/Dataset/Kvasir/Model_checkpoint/mobileOne_s4-epoch=14-val_acc=0.9187.ckpt'
checkpoint = torch.load(checkpoint_path)
model_state_dict = checkpoint['state_dict']

# Initialize the feature extractor with the best checkpoint
feature_extractor = MobileOneFeatureExtractor(num_classes=8)
feature_extractor.load_state_dict(model_state_dict, strict=False)
feature_extractor.to(device)
feature_extractor.eval()


Downloading model.safetensors:   0%|          | 0.00/60.4M [00:00<?, ?B/s]

MobileOneFeatureExtractor(
  (model): ByobNet(
    (stem): MobileOneBlock(
      (conv_kxk): ModuleList(
        (0): ConvNormAct(
          (conv): Conv2d(3, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
          (bn): BatchNormAct2d(
            64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity()
            (act): Identity()
          )
        )
      )
      (conv_scale): ConvNormAct(
        (conv): Conv2d(3, 64, kernel_size=(1, 1), stride=(2, 2), bias=False)
        (bn): BatchNormAct2d(
          64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): Identity()
        )
      )
      (drop_path): Identity()
      (attn): Identity()
      (act): ReLU(inplace=True)
    )
    (stages): Sequential(
      (0): Sequential(
        (0): MobileOneBlock(
          (conv_kxk): ModuleList(
            (0): ConvNormAct(
              (conv): Conv2d(64, 64, kernel_s

In [None]:
import pickle

# Defining the maximum number of samples to save in each chunk
chunk_size = 1000

# for filename and path
filename_prefix = f"rifat_mobileone_s4"

# Initialize a counter to keep track of the chunk number
chunk_counter = 0

# Create an empty list to store progress bars for each chunk
progress_bars = []

# Loop through the data and save it in chunks
with tqdm(total=len(dataset), desc="Extracting Features", dynamic_ncols=True) as pbar_total:
    for start_idx in range(0, len(dataset), chunk_size):
        end_idx = start_idx + chunk_size if start_idx + chunk_size < len(dataset) else len(dataset)
        chunk_data = [dataset[i] for i in range(start_idx, end_idx)]  # Access and store elements as a list

        # Extract features for the current chunk
        features = []
        labels = []
        with torch.no_grad():
            for image, label in chunk_data:
                image = image.to(device)
                feature = feature_extractor(image.unsqueeze(0))  # Extract features for a single image
                features.append(feature.cpu())
                labels.append(label)

        # Concatenate the features and labels for the current chunk
        chunk_features = torch.cat(features, dim=0)
        chunk_labels = torch.tensor(labels)

        # Define the filename for the current chunk
        filename = f"{filename_prefix}_chunk{chunk_counter}_features.pkl"
        output_path = os.path.join(output_dir, filename)

        # Create a dictionary for the current chunk
        data_dict = {'features': chunk_features, 'labels': chunk_labels}

        # Save the current chunk as a pickle file
        with open(output_path, 'wb') as file:
            pickle.dump(data_dict, file)

        # Update the total progress bar
        pbar_total.update(len(chunk_data))

        # Update the chunk counter
        chunk_counter += 1

print("All chunks saved.")

Extracting Features: 100%|██████████| 8000/8000 [04:45<00:00, 28.06it/s]

All chunks saved.





In [None]:
import os
os.listdir("/content/drive/MyDrive/Dataset/Kvasir/Feature_file_bw")

['rifat_mobileone_s4_chunk0_features.pkl',
 'rifat_mobileone_s4_chunk1_features.pkl',
 'rifat_mobileone_s4_chunk2_features.pkl',
 'rifat_mobileone_s4_chunk3_features.pkl',
 'rifat_mobileone_s4_chunk4_features.pkl',
 'rifat_mobileone_s4_chunk5_features.pkl',
 'rifat_mobileone_s4_chunk6_features.pkl',
 'rifat_mobileone_s4_chunk7_features.pkl']

In [None]:
import os
import pickle
import numpy as np
import torch


# Directory where the chunked pickle files are located
chunked_pickle_dir = '/content/drive/MyDrive/Dataset/Kvasir/Feature_file_bw'

# Initialize empty lists to store features and labels as numpy arrays
all_features = []
all_labels = []

# Loop through the chunked pickle files in the directory
for filename in os.listdir(chunked_pickle_dir):
    if filename.startswith("rifat_mobileone_s4_chunk") and filename.endswith(".pkl"):
        file_path = os.path.join(chunked_pickle_dir, filename)
        with open(file_path, 'rb') as file:
            data = pickle.load(file)
        features = data['features'].numpy()
        labels = data['labels'].numpy()

        # Append the loaded features and labels to the respective lists
        all_features.append(features)
        all_labels.append(labels)

# Combine all loaded features and labels as NumPy arrays
combined_features = np.concatenate(all_features, axis=0)
combined_labels = np.concatenate(all_labels, axis=0)

# Get the feature length
feature_length = combined_features.shape[1]

# Define the filename with feature length
combined_output_filename = f"many_ft_rifat_mobileOne_s4_{feature_length}.pkl"
combined_output_path = os.path.join(chunked_pickle_dir, combined_output_filename)

# Save the combined features and labels as a single pickle file
with open(combined_output_path, 'wb') as combined_file:
    pickle.dump({'features': combined_features, 'labels': combined_labels}, combined_file)


### Inspecting feature file

In [None]:
import pickle
import numpy as np
import torch

feature_file = '/content/drive/MyDrive/Dataset/Kvasir/Feature_file_bw/many_ft_rifat_mobileOne_s4_2048.pkl'

# Load the combined features and labels
with open(feature_file, 'rb') as file:
    data = pickle.load(file)

# Access the features and labels
features = data['features']
labels = data['labels']

# for converting them to PyTorch tensors
features_tensor = torch.from_numpy(features)
labels_tensor = torch.from_numpy(labels)



In [None]:
print(features_tensor.shape)
print(features.shape)

torch.Size([8000, 2048, 7, 7])
(8000, 2048, 7, 7)


- 8000: This represents 8000 samples or feature vectors.
- 2048: This dimension indicates that each feature vector has 2048 elements or features.
- 7 and 7: height and width of the feature maps, indicating that the features were extracted from images with a size of 7x7 pixels.

In [None]:
features = np.mean(features, axis=(2, 3))
features.shape

(8000, 2048)

In [None]:
labels

array([0, 0, 0, ..., 7, 7, 7])

In [None]:
import pickle

# Define the file path for saving
output_file = '/content/drive/MyDrive/Dataset/Kvasir/Feature_file_bw/rifat_mobileOne_s4_2048.pkl'

# Create a dictionary to store the data
data = {'features': features, 'labels': labels}

# Save the data using pickle
with open(output_file, 'wb') as file:
    pickle.dump(data, file)

print(f"Data saved to {output_file}")


Data saved to /content/drive/MyDrive/Dataset/Kvasir/Feature_file_bw/rifat_mobileOne_s4_2048.pkl
