# Preparing the data set

In [None]:
import os
from PIL import Image
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader

class datacreator(Dataset):
    def __init__(self, img_dir, label, transform=None):
        """
        img_dir: Directory containing images.
        label: Label for the images or a list of labels for each image.
        transform: Optional transform to be applied on an image.
        """
        self.img_dir = img_dir
        self.label = label
        self.transform = transform
        self.img_names = [f for f in os.listdir(img_dir) if os.path.isfile(os.path.join(img_dir, f))]

    def __len__(self):
        # Return the total number of images
        return len(self.img_names)

    def __getitem__(self, idx):
        # Load the image at the given index
        img_path = os.path.join(self.img_dir, self.img_names[idx])
        image = Image.open(img_path).convert('RGB')

        # If label is a single label, use it for all images; else, get the corresponding label
        if isinstance(self.label, list):
            label = self.label[idx]
        else:
            label = self.label

        # Apply transformations if any
        if self.transform:
            image = self.transform(image)

        return image, label

# Define the transformations for the images
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize the image to 224x224
    transforms.ToTensor(),  # Convert the image to a PyTorch tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize the image
])

In [None]:
#working with zip files
from google.colab import files
import zipfile

# Upload the zip file
uploaded_nethmi = files.upload()

zip_file_name = "Nethmi.zip"
output_dir = "Nethmi"

# Extract the zip file
with zipfile.ZipFile(zip_file_name, 'r') as zip_ref:
    zip_ref.extractall(output_dir)

# Verify extraction
print("Extraction complete!")

Saving Nethmi.zip to Nethmi.zip
Extraction complete!


In [None]:
# Check the content of the extracted directory
extracted_files = os.listdir(output_dir)
print("Files in the directory:", extracted_files)

# Check if there's a subdirectory and list its contents
subdir_nethmi = os.path.join(output_dir, extracted_files[0])

img_dir = output_dir  # This should match the directory where images were extracted
label = 1
# Create dataset class
dataset = datacreator(subdir_nethmi,label,transform = transform)

print("Number of images found:", len(dataset))

# Create dataloader
dataloader_nethmi = DataLoader(dataset,batch_size=32,shuffle=True)

Files in the directory: ['Nethmi']
Number of images found: 4622


In [None]:
uploaded_none = files.upload()

zip_file_none = "None.zip"
output_dir_none = "None"

# Extract the zip file
with zipfile.ZipFile(zip_file_none, 'r') as zip_ref:
    zip_ref.extractall(output_dir_none)

# Verify extraction
print("Extraction complete!")

In [None]:
# Check the content of the extracted directory
extracted_none = os.listdir(output_dir_none)
print("Files in the directory:", extracted_none)

# Check if there's a subdirectory and list its contents
subdir_none = os.path.join(output_dir_none, extracted_none[0])

img_dir = output_dir_none  # This should match the directory where images were extracted
label = 0
# Create dataset class
dataset_none = datacreator(subdir_none,label,transform = transform)

print("Number of images found:", len(dataset_none))

# Create dataloader
dataloader_none = DataLoader(dataset_none,batch_size=32,shuffle=True)

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models

# Load a pretrained ResNet model
model = models.resnet18(pretrained=True)

# Modify the final layer (for example, for 10 classes)
num_classes = 10  # Change this to match your dataset
model.fc = nn.Linear(model.fc.in_features, num_classes)

# Move the model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)


In [None]:
# Concatenate data loaders

from torch.utils.data import ConcatDataset

# Concatenate the two data loaders
combined_dataset = ConcatDataset([dataset, dataset_none])

# Creating a single data Loader
combined_dataloader = DataLoader(combined_dataset, batch_size=32, shuffle=True)

# Iterate through the single data loader
for images, labels in combined_dataloader:
    print(f"Batch of images shape: {images.shape}")
    print(f"Batch of labels: {labels}")


In [None]:
from google.colab import files
uploaded_nethmi = files.upload()

zip_file_name = "None.zip"
output_dir = "None"

# Extract the zip file
with zipfile.ZipFile(zip_file_name, 'r') as zip_ref:
    zip_ref.extractall(output_dir)

# Verify extraction
print("Extraction complete!")

In [None]:

# Example usage:
img_dir = 'path_to_images'  # Path to your image directory
label = 0  # Assign a single label for all images (e.g., Nethmi's label)
# OR if you have different labels for each image, use something like:
# labels = [0, 1, 0, 1]  # Labels corresponding to each image

# Create the dataset
dataset = datacreator(img_dir=img_dir, label=label, transform=transform)

# DataLoader for batching and shuffling
data_loader = DataLoader(dataset, batch_size=4, shuffle=True)

# Iterate through the DataLoader
for images, labels in data_loader:
    print(f"Batch of images shape: {images.shape}")
    print(f"Batch of labels: {labels}")