# Benchmarking the performance of Pelican part2

## Dataset
[ImageNet](https://www.kaggle.com/c/imagenet-object-localization-challenge/overview)
Using this [script](https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh) to prepare the data first. Then train it using ResNet50.

## Hardware
Google Colab T4 GPU with high RAM

In [3]:
!pip install torchdata
!pip install pelicanfs fsspec

Collecting torchdata
  Downloading torchdata-0.7.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.7 MB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/4.7 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.7/4.7 MB[0m [31m23.6 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m4.6/4.7 MB[0m [31m91.1 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.7/4.7 MB[0m [31m60.7 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=2->torchdata)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=2->torchdata)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=2->tor

In [2]:
import torch
print(torch.cuda.is_available())


True


In [1]:
import os
from PIL import Image
import torch
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import Dataset, DataLoader
import fsspec
import numpy as np
import pandas as pd
from pelicanfs.core import PelicanFileSystem
import time


class RemoteImageFolder(Dataset):
    def __init__(self, root, transform=None):
        self.root = root
        self.transform = transform
        self.fs = PelicanFileSystem()  # Initialize PelicanFileSystem
        self.samples = self.make_dataset()
        print(f"Found {len(self.samples)} samples in {self.root}")

    def make_dataset(self):
        samples = []
        for rootpath, _, dirnames in self.fs.walk(self.root):
          for dirctory in dirnames:
            dirpath = os.path.join(rootpath, dirctory)
        for subpath, _, filenames in self.fs.walk(dirpath):
            for image in filenames:
                if image.lower().endswith(('png', 'jpg', 'jpeg', 'bmp', 'gif')):
                    samples.append((os.path.join(subpath, image), os.path.basename(dirctory)))
        return samples

    def default_loader(self, path):
        with self.fs.open(path, 'rb') as f:
            return Image.open(f).convert('RGB')

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        path, target = self.samples[idx]
        sample = self.default_loader(path)
        if self.transform is not None:
            sample = self.transform(sample)
        return sample, target

# Define transformations for training and validation
train_transforms = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

val_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

start_time = time.time()
# Define the Pelican paths
trainfile_path = "pelican://osg-htc.org/chtc/PUBLIC/hzhao292/ILSVRC/Data/CLS-LOC/train"
valfile_path = "pelican://osg-htc.org/chtc/PUBLIC/hzhao292/ILSVRC/Data/CLS-LOC/val"

# Load the datasets
train_dataset = RemoteImageFolder(root=trainfile_path, transform=train_transforms)
val_dataset = RemoteImageFolder(root=valfile_path, transform=val_transforms)
print(train_dataset[1])

# Create the dataloaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=0, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=0, pin_memory=True)
end_time = time.time()
elapsed_time = end_time - start_time
print(f'Perparing data time: {elapsed_time:.2f} seconds')


Found 1300 samples in pelican://osg-htc.org/chtc/PUBLIC/hzhao292/ILSVRC/Data/CLS-LOC/train
Found 50 samples in pelican://osg-htc.org/chtc/PUBLIC/hzhao292/ILSVRC/Data/CLS-LOC/val
(tensor([[[ 1.5810,  1.5810,  1.5297,  ...,  1.9578,  1.9578,  1.9578],
         [ 1.5810,  1.5810,  1.5468,  ...,  1.9920,  2.0092,  1.9920],
         [ 1.5982,  1.5982,  1.5810,  ...,  1.9407,  1.9749,  1.9749],
         ...,
         [-1.2445, -1.1932, -1.1760,  ..., -1.8268, -1.8268, -1.8268],
         [-1.2274, -1.1589, -1.2103,  ..., -1.8268, -1.8268, -1.8268],
         [-1.1932, -1.1760, -1.1932,  ..., -1.8097, -1.8268, -1.8268]],

        [[ 2.3585,  2.3585,  2.3585,  ...,  2.4286,  2.4286,  2.4286],
         [ 2.3585,  2.3585,  2.3761,  ...,  2.4286,  2.4286,  2.4286],
         [ 2.3761,  2.3761,  2.3585,  ...,  2.4286,  2.4111,  2.4286],
         ...,
         [-0.9503, -0.9853, -1.0028,  ..., -1.7031, -1.7031, -1.7031],
         [-0.9678, -0.9503, -0.9503,  ..., -1.7031, -1.7031, -1.7031],
         [

In [8]:
import torch.nn as nn
import torch.optim as optim
from torchvision import models

import torch.multiprocessing as mp
import time

# Set multiprocessing start method to 'spawn'
mp.set_start_method('spawn', force=True)

# Load a pre-trained model (e.g., ResNet-50)
model = models.resnet50(pretrained=True)

# Modify the final layer to match the number of classes in ImageNet
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 1000)  # ImageNet has 1000 classes

# Move the model to the GPU if available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

def train_model(model, dataloaders, criterion, optimizer, num_epochs=25):
    for epoch in range(num_epochs):
        start_time = time.time()
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
                data_loader = train_loader
            else:
                model.eval()   # Set model to evaluate mode
                data_loader = val_loader

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data
            for inputs, labels in data_loader:
                inputs = inputs.to(device)
           #     labels = labels.tensor(labels, dtype=torch.long).to(device)

                # Zero the parameter gradients
                optimizer.zero_grad()

                # Forward
                # Track history only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
        #            loss = criterion(outputs, labels)

                    # Backward + optimize only if in training phase
                    if phase == 'train':
 #                       loss.backward()
                        optimizer.step()

                # Statistics
  #              running_loss += loss.item() * inputs.size(0)
          #      running_corrects += torch.sum(preds == labels.data)

#            epoch_loss = running_loss / len(data_loader.dataset)
 #           epoch_acc = running_corrects.double() / len(data_loader.dataset)

 #           print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

        end_time = time.time()
        elapsed_time = end_time - start_time
        print(f'Elapsed time: {elapsed_time:.2f} seconds')

    return model

# Train the model
model = train_model(model, {'train': train_loader, 'val': val_loader}, criterion, optimizer, num_epochs=25)


Epoch 0/24
----------
Elapsed time: 17.36 seconds
Epoch 1/24
----------
Elapsed time: 17.12 seconds
Epoch 2/24
----------
Elapsed time: 16.90 seconds
Epoch 3/24
----------
Elapsed time: 16.84 seconds
Epoch 4/24
----------
Elapsed time: 16.75 seconds
Epoch 5/24
----------
Elapsed time: 16.79 seconds
Epoch 7/24
----------
Elapsed time: 16.90 seconds
Epoch 8/24
----------
Elapsed time: 16.82 seconds
Epoch 9/24
----------
Elapsed time: 16.78 seconds
Epoch 10/24
----------
Elapsed time: 16.64 seconds
Epoch 11/24
----------
Elapsed time: 16.91 seconds
Epoch 12/24
----------
Elapsed time: 16.98 seconds
Epoch 13/24
----------
Elapsed time: 17.22 seconds
Epoch 14/24
----------
Elapsed time: 17.07 seconds
Epoch 15/24
----------
Elapsed time: 17.18 seconds
Epoch 16/24
----------
Elapsed time: 17.27 seconds
Epoch 17/24
----------
Elapsed time: 17.34 seconds
Epoch 18/24
----------
Elapsed time: 17.12 seconds
Epoch 19/24
----------
Elapsed time: 16.79 seconds
Epoch 20/24
----------
Elapsed time: 16.

In [None]:
torch.save(model.state_dict(), 'resnet50_imagenet.pth')


## Using DataPipe

In [None]:
# Streaming version
import fsspec
from pelicanfs.core import PelicanFileSystem, PelicanMap, OSDFFileSystem
zipfilepath = "pelican://osg-htc.org/chtc/PUBLIC/hzhao292/imagenet-object-localization-challenge.zip"
trainfile_path = "/chtc/PUBLIC/hzhao292/ILSVRC/Data/CLS-LOC/train"
valfile_path = "pelican://osg-htc.org/chtc/PUBLIC/hzhao292/ILSVRC/Data/CLS-LOC/val"
fs = fsspec.filesystem('pelican')
pfs = PelicanFileSystem('pelican://osg-htc.org/')

# dp2 = IterableWrapper([zipfilepath])  \
#         .open_files_by_fsspec(mode="rb") \
#         .load_from_zip()
# for path, filestream in dp2:
#     print(path, filestream)
#     break

In [12]:
import fsspec
import torch
torch.utils.data.datapipes.utils.common.DILL_AVAILABLE = torch.utils._import_utils.dill_available()
# Need add this line if run in google colab, or it will cause error
# torch.utils.data.datapipes.utils.common.DILL_AVAILABLE = torch.utils._import_utils.dill_available()
from torchdata.datapipes.iter import IterableWrapper, FileOpener
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from torchvision.io import read_image
from torchdata.datapipes.iter import IterDataPipe
from torchdata import dataloader2 as DataLoader2

class RemoteImageDataPipe(IterDataPipe):
    def __init__(self, root, transform=None):
        self.root = root
        self.transform = transform
        self.fs = fsspec.filesystem('pelican')
        self.files = self.fs.ls(root)

    def __iter__(self):
        for file in self.files:
            with self.fs.open(file, 'rb') as f:
                img = read_image(f).convert('RGB')
                if self.transform:
                    img = self.transform(img)
                yield img

# Define transformations for training and validation
train_transforms = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

val_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Define the S3 paths
trainfile_path = "/chtc/PUBLIC/hzhao292/ILSVRC/Data/CLS-LOC/train"
valfile_path = "/chtc/PUBLIC/hzhao292/ILSVRC/Data/CLS-LOC/val"

# Create DataPipes for training and validation datasets
train_datapipe = RemoteImageDataPipe(root=trainfile_path, transform=train_transforms)
val_datapipe = RemoteImageDataPipe(root=valfile_path, transform=val_transforms)
print(type(train_datapipe))

# Create DataLoaders
train_loader = DataLoader2(train_datapipe, batch_size=32, shuffle=True, num_workers=4, pin_memory=True)
val_loader = DataLoader2(val_datapipe, batch_size=32, shuffle=False, num_workers=4, pin_memory=True)

for i in train_loader:
    print(i)
    break

<class '__main__.RemoteImageDataPipe'>


TypeError: 'module' object is not callable