In [4]:
!pip install --upgrade augly
!pip install nlpaug



In [34]:
import os
import random
import shutil
import glob
from sklearn.model_selection import train_test_split
import augly.image as imaugs
import augly.utils as utils
from PIL import Image
import numpy as np

# Define dataset paths
DATASET_PATH = "dataset"
TRAIN_PATH = "Train"
TEST_PATH = "Test"
AUGMENTED_TRAIN_PATH = "Aug_train"

# Create train and test directories
os.makedirs(TRAIN_PATH, exist_ok=True)
os.makedirs(TEST_PATH, exist_ok=True)
os.makedirs(AUGMENTED_TRAIN_PATH, exist_ok=True)

# Load image paths
image_paths = glob.glob(os.path.join(DATASET_PATH, "*.jpg"))

# Extract labels from filenames
cats = [path for path in image_paths if "cat" in os.path.basename(path)]
dogs = [path for path in image_paths if "dog" in os.path.basename(path)]

# Ensure equal distribution in training set

cats_train, cats_test = train_test_split(cats, test_size=0.2, random_state=1)
dogs_train, dogs_test = train_test_split(dogs, test_size=0.2, random_state=1)

train_paths = cats_train + dogs_train
test_paths = cats_test + dogs_test
train_labels = ["cat"] * len(cats_train) + ["dog"] * len(dogs_train)
test_labels = ["cat"] * len(cats_test) + ["dog"] * len(dogs_test)

# Move images to respective directories
def move_files(file_paths, labels, destination):
    for file_path, label in zip(file_paths, labels):
        label_path = os.path.join(destination, label)
        os.makedirs(label_path, exist_ok=True)
        shutil.copy(file_path, os.path.join(label_path, os.path.basename(file_path)))

move_files(train_paths, train_labels, TRAIN_PATH)
move_files(train_paths, train_labels, AUGMENTED_TRAIN_PATH)
move_files(test_paths, test_labels, TEST_PATH)

print('total training samples are', len(train_paths))




total training samples are 112


In [35]:
# Define augmentation function
# Define augmentation function
def augment_image(image_path):
    image = Image.open(image_path).convert("RGB")
    
    augmentations = [
        imaugs.Rotate(degrees=random.uniform(-30, 30)),
        imaugs.Blur(radius=random.uniform(1, 3)),
        imaugs.Crop(random.uniform(0.1, 0.3)),
        imaugs.Sharpen(factor=random.uniform(1.5, 2.5)),
        imaugs.Brightness(factor=random.uniform(0.5, 1.5)),
        imaugs.Contrast(factor=random.uniform(0.5, 1.5)),
        imaugs.Saturation(factor=random.uniform(0.5, 1.5)),
        imaugs.Pixelization(ratio=random.uniform(0.1, 0.3)),
    ]
    
    augmented_images = []
    for _ in range(2):  # Generate twice the train set size
        random.shuffle(augmentations)  # Ensure different sequence
        aug_pipeline = imaugs.Compose(augmentations[:3])  # Select 3 random augmentations
        augmented_image = aug_pipeline(image)
        augmented_images.append(augmented_image)
    
    return augmented_images

# Augment the train set
train_images = glob.glob(os.path.join(AUGMENTED_TRAIN_PATH, "*/*.jpg"))

for img_path in train_images:
    label = "cat" if "cat" in os.path.basename(img_path) else "dog"
    augmented_images = augment_image(img_path)
    base_name = os.path.basename(img_path).split(".")[0]
    
    label_dir = os.path.join(AUGMENTED_TRAIN_PATH, label)  # Save augmented images in train set
    os.makedirs(label_dir, exist_ok=True)
    
    for idx, aug_img in enumerate(augmented_images):
        save_path = os.path.join(label_dir, f"{base_name}_aug{idx}.jpg")
        aug_img.save(save_path)

# Dataset statistics
original_train_count = len(train_images)
augmented_train_count = len(glob.glob(os.path.join(AUGMENTED_TRAIN_PATH, "*/*.jpg"))) - original_train_count
test_count = len(glob.glob(os.path.join(TEST_PATH, "*/*.jpg")))

dataset_stats = {
    "Original Train Set": original_train_count,
    "Augmented Train Set": augmented_train_count,
    "Final Train Set (Original + Augmented)": original_train_count + augmented_train_count,
    "Test Set": test_count
}

print("Dataset Statistics:")
for key, value in dataset_stats.items():
    print(f"{key}: {value}")

Dataset Statistics:
Original Train Set: 112
Augmented Train Set: 224
Final Train Set (Original + Augmented): 336
Test Set: 28


In [37]:
!pip install transformers

Collecting transformers
  Downloading transformers-4.48.3-py3-none-any.whl.metadata (44 kB)
     ---------------------------------------- 0.0/44.4 kB ? eta -:--:--
     ---------------------------------------- 44.4/44.4 kB 1.1 MB/s eta 0:00:00
Collecting huggingface-hub<1.0,>=0.24.0 (from transformers)
  Downloading huggingface_hub-0.28.1-py3-none-any.whl.metadata (13 kB)
Collecting tokenizers<0.22,>=0.21 (from transformers)
  Downloading tokenizers-0.21.0-cp39-abi3-win_amd64.whl.metadata (6.9 kB)
Collecting safetensors>=0.4.1 (from transformers)
  Downloading safetensors-0.5.2-cp38-abi3-win_amd64.whl.metadata (3.9 kB)
Downloading transformers-4.48.3-py3-none-any.whl (9.7 MB)
   ---------------------------------------- 0.0/9.7 MB ? eta -:--:--
   ---------------------------------------- 0.1/9.7 MB 2.2 MB/s eta 0:00:05
   - -------------------------------------- 0.4/9.7 MB 4.2 MB/s eta 0:00:03
   --- ------------------------------------ 0.9/9.7 MB 6.2 MB/s eta 0:00:02
   ----- ---------

In [41]:
from transformers import AutoModelForImageClassification, AutoFeatureExtractor
import torch

model_name = "microsoft/resnet-50"

# Load the pre-trained model with randomly initialized weights
model = AutoModelForImageClassification.from_pretrained(model_name, num_labels=2, ignore_mismatched_sizes=True)
model_aug = AutoModelForImageClassification.from_pretrained(model_name, num_labels=2, ignore_mismatched_sizes=True)

feature_extractor = AutoFeatureExtractor.from_pretrained(model_name)

Some weights of ResNetForImageClassification were not initialized from the model checkpoint at microsoft/resnet-50 and are newly initialized because the shapes did not match:
- classifier.1.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([2]) in the model instantiated
- classifier.1.weight: found shape torch.Size([1000, 2048]) in the checkpoint and torch.Size([2, 2048]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of ResNetForImageClassification were not initialized from the model checkpoint at microsoft/resnet-50 and are newly initialized because the shapes did not match:
- classifier.1.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([2]) in the model instantiated
- classifier.1.weight: found shape torch.Size([1000, 2048]) in the checkpoint and torch.Size([2, 2048]) in the model instantiated
You should probably TRAIN this model on a down-

In [40]:
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import glob

# Define transforms (resize images to match ResNet-50 input size)
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Required for ResNet-50
    transforms.ToTensor(),
    transforms.Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std),
])

class DogsCatsDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform
    
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        image = Image.open(self.image_paths[idx]).convert("RGB")
        if self.transform:
            image = self.transform(image)
        label = 0 if "cat" in self.image_paths[idx] else 1  # 0 for cats, 1 for dogs
        return image, label

# Load train and test images
train_images = glob.glob("Train/*/*.jpg")
aug_train_images = glob.glob("Aug_train/*/*.jpg")
test_images = glob.glob("Test/*/*.jpg")

train_labels = [0 if "cat" in img else 1 for img in train_images]
aug_train_labels = [0 if "cat" in img else 1 for img in aug_train_images]
test_labels = [0 if "cat" in img else 1 for img in test_images]

# Create datasets and dataloaders
train_dataset = DogsCatsDataset(train_images, train_labels, transform)
aug_train_dataset = DogsCatsDataset(aug_train_images, aug_train_labels, transform)
test_dataset = DogsCatsDataset(test_images, test_labels, transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
aug_train_loader = DataLoader(aug_train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [42]:
import torch.nn as nn
import torch.optim as optim

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images).logits
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader):.4f}")


Epoch 1/10, Loss: 0.6914
Epoch 2/10, Loss: 0.6617
Epoch 3/10, Loss: 0.6348
Epoch 4/10, Loss: 0.6162
Epoch 5/10, Loss: 0.5877
Epoch 6/10, Loss: 0.5702
Epoch 7/10, Loss: 0.5389
Epoch 8/10, Loss: 0.5068
Epoch 9/10, Loss: 0.4876
Epoch 10/10, Loss: 0.4677


In [43]:
import torch.nn as nn
import torch.optim as optim

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_aug.to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_aug.parameters(), lr=0.0001)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model_aug.train()
    running_loss = 0.0
    
    for images, labels in aug_train_loader:
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images).logits
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader):.4f}")


Epoch 1/10, Loss: 1.4762
Epoch 2/10, Loss: 1.4647
Epoch 3/10, Loss: 1.4870
Epoch 4/10, Loss: 1.4700
Epoch 5/10, Loss: 1.4646
Epoch 6/10, Loss: 1.4756
Epoch 7/10, Loss: 1.4750
Epoch 8/10, Loss: 1.4716
Epoch 9/10, Loss: 1.4711
Epoch 10/10, Loss: 1.4884


In [44]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import numpy as np

model.eval()
all_preds, all_labels = [], []

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images).logits
        preds = torch.argmax(outputs, dim=1)
        
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Compute metrics
accuracy = accuracy_score(all_labels, all_preds)
precision = precision_score(all_labels, all_preds)
recall = recall_score(all_labels, all_preds)
f1 = f1_score(all_labels, all_preds)

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")


Accuracy: 0.8214
Precision: 1.0000
Recall: 0.6429
F1 Score: 0.7826


In [45]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import numpy as np

model_aug.eval()
all_preds, all_labels = [], []

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model_aug(images).logits
        preds = torch.argmax(outputs, dim=1)
        
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Compute metrics
accuracy = accuracy_score(all_labels, all_preds)
precision = precision_score(all_labels, all_preds)
recall = recall_score(all_labels, all_preds)
f1 = f1_score(all_labels, all_preds)

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")


Accuracy: 0.5000
Precision: 0.5000
Recall: 0.3571
F1 Score: 0.4167
