In [1]:
# Import the necessary libraries
import os
import torch
import torchvision
import torchvision.transforms as transforms
import pandas as pd
from PIL import Image
from torch.utils.data import Dataset, DataLoader
import csv

In [2]:
# Set the device to use for PyTorch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [3]:
# Define a dataset that loads the images from a folder
class FolderDataset(Dataset):
    def __init__(self, folder_path, transform=None):
        self.folder_path = folder_path
        self.transform = transform
        self.files = os.listdir(folder_path)
        
    def __len__(self):
        return len(self.files)
    
    def __getitem__(self, idx):
        img_path = os.path.join(self.folder_path, self.files[idx])
        image = Image.open(img_path)
        # Convert the image to a PyTorch tensor
        image = transforms.ToTensor()(image)
        # resize the image to 224x224
        image = transforms.Resize((224, 224))(image)
        if self.transform:
            image = self.transform(image)
        return image

In [4]:
# Create a dataset that loads the images from the "images" folder
image_dir = "data/images"
image_list = os.listdir(image_dir)
dataset = FolderDataset(image_dir)

# Create a dataloader for the dataset
dataloader = DataLoader(
    dataset, batch_size=1, shuffle=False, num_workers=10
)

In [5]:
# Load the pre-trained VGG19 model
model = torchvision.models.densenet121(pretrained=True)

# Set the model to evaluation mode
model.eval()

# Move the model to the specified device
model = model.to(device)

Downloading: "https://download.pytorch.org/models/densenet121-a639ec97.pth" to /home/temsfrog/.cache/torch/hub/checkpoints/densenet121-a639ec97.pth


  0%|          | 0.00/30.8M [00:00<?, ?B/s]

In [6]:
# Create a Pandas DataFrame to store the extracted features
#df = pd.DataFrame(columns=["image", "feature"])

header = ['image_id', "features"]
# create csv file
f_features = open('data/features_densenet.csv', 'w')
# initialize writer for csv
writer_features = csv.writer(f_features)
# write header
writer_features.writerow(header)

19

In [7]:
import tqdm
test = None
# Extract features from the images in the dataset
for i, inputs in enumerate(tqdm.tqdm(dataloader)):
    # Move the input images to the specified device
    inputs = inputs.to(device)

    # Extract the features from the intermediate layer of the VGG19 model
    features = model.features(inputs)

    # Convert the features to a NumPy array
    features = features.detach().cpu().numpy()
    # Reshape the features to a 1D array
    features = features.reshape(features.shape[0], -1)
    # to string
    features = features[0].tolist()
    #test = features
    # Add the extracted features to the DataFrame using pandas.concat
    # df = pd.concat([df,pd.DataFrame({"image": [image_list[i]],"feature": [features[0]],})])
    
    # write to csv
    writer_features.writerow([image_list[i], features])

100%|██████████| 1491/1491 [01:49<00:00, 13.62it/s]


In [8]:
# Save the DataFrame as a CSV file
#df.to_csv("data/features3.csv", index=False)

# Close the file
f_features.close()