In [1]:
import pandas as pd
import os
from skimage import io
from skimage.transform import resize
import torch
import torchvision
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [3]:
df = pd.read_csv("dogcat\\cat.csv")
df2 = df.copy()
print(len(df))

12500


In [4]:
for index, row in df.iterrows():
    id_value = row["id"]
    label_value = row["label"]
    id_value -= 1
    id_str = str(id_value)
    value = f"cat.{id_str}.JPG"
    #print(value)
    #print(label_value)
    df["id"] = value
    df["label"] = label_value

In [5]:
for index, row in df2.iterrows():
    id_value = row["id"]
    label_value = row["label"]
    id_value -= 1
    id_str = str(id_value)
    value = f"dog.{id_str}.JPG"
    #print(value)
    #print(label_value)
    df2["id"] = value
    df2["label"] = label_value+1

In [6]:
result_df = pd.concat([df, df2], axis=0)
print(result_df)

                  id  label
0      cat.12499.JPG      0
1      cat.12499.JPG      0
2      cat.12499.JPG      0
3      cat.12499.JPG      0
4      cat.12499.JPG      0
...              ...    ...
12495  dog.12499.JPG      1
12496  dog.12499.JPG      1
12497  dog.12499.JPG      1
12498  dog.12499.JPG      1
12499  dog.12499.JPG      1

[25000 rows x 2 columns]


In [7]:
class catdogDataset(Dataset):
    def __init__(self, annotation, rootdic, transform = None, target_size=(224, 224)):
        self.annotation = annotation
        self.rootdic = rootdic
        self.transform = transform
        self.target_size = target_size
        
    def __len__(self):
        return len(self.annotation)
    def __getitem__(self, index):
        imgPath = os.path.join(self.rootdic, self.annotation.iloc[index, 0])
        image = io.imread(imgPath)
        image = resize(image, self.target_size, anti_aliasing=True) # convert the image into 224x224
        
        y = torch.tensor([float(self.annotation.iloc[index, 1])])
        
        if self.transform:
            image = self.transform(image)
        
        return (image, y)
        

In [8]:
dataset = catdogDataset(annotation = result_df, rootdic = "dogcat\\traindata", transform = transforms.ToTensor(), target_size=(224, 224))

In [9]:
train_set, test_set = torch.utils.data.random_split(dataset, [20000, 5000])

In [10]:
in_channel = 3
num_classes = 2
batch_size = 32
num_epochs = 5
learning_rate = 0.001

In [11]:
train_loader = DataLoader(dataset = train_set, batch_size = batch_size, shuffle = True)
test_loader = DataLoader(dataset = test_set, batch_size = batch_size, shuffle = True)

In [12]:
for batch, (data, target) in enumerate(test_loader):
    print(data.shape)
    print(target.shape)
    break

torch.Size([32, 3, 224, 224])
torch.Size([32, 1])


In [13]:
from torchvision import models, transforms

In [20]:
vgg16 = models.vgg16(pretrained=True)

for param in vgg16.parameters():
    param.requires_grad = False

# Enable gradients for the final fully connected layer
vgg16.classifier[6].requires_grad = True

# Print the model summary to inspect the architecture
print(vgg16)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [21]:
num_features = vgg16.classifier[6].in_features
vgg16.classifier[6] = nn.Linear(num_features, 1) 

In [22]:
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(vgg16.parameters(), lr=0.0001)

In [23]:
vgg16.to(device)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [24]:

# Modify the optimizer to only optimize parameters with gradients
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, vgg16.parameters()), lr=0.0001)

# Learning rate scheduler
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

# Training loop
for epoch in range(num_epochs):
    vgg16.train()

    for images, labels in train_loader:
        images = images.to(torch.float32)
        labels = labels.to(torch.float32) 

        outputs = vgg16(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Learning rate scheduling step
    scheduler.step()

    # Validation (optional) - evaluate on your validation set here if available

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")


Epoch [1/5], Loss: 0.0035
Epoch [2/5], Loss: 0.0014
Epoch [3/5], Loss: 0.0006
Epoch [4/5], Loss: 0.0003
Epoch [5/5], Loss: 0.0002


In [25]:
# Save the model and additional information
checkpoint = {
    'epoch': epoch,
    'model_state_dict': vgg16.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'loss': loss.item(),
    # Add any other information you want to save
}

torch.save(checkpoint, 'saved_vgg16.pth')


In [28]:
vgg16 = models.vgg16(pretrained=True)
num_features = vgg16.classifier[6].in_features
vgg16.classifier[6] = nn.Linear(num_features, 1) 

In [29]:
# Create an instance of the model
#model = VGG_net(in_channels=3, num_classes=1)

# Load the saved model
checkpoint = torch.load('saved_vgg16.pth')

# Load the model's state dictionary
vgg16.load_state_dict(checkpoint['model_state_dict'])

# Load the optimizer's state dictionary (if needed)
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

# Other information, e.g., epoch, loss
epoch = checkpoint['epoch']
loss = checkpoint['loss']

# Set the model to evaluation mode (for inference)
vgg16.eval()


VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [30]:
import torch
from torchvision import transforms
from PIL import Image

def inference_single_image(model, image_path):
    # Load and preprocess the image
    image = Image.open(image_path).convert('RGB')
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
    ])
    image = transform(image)
    image = image.unsqueeze(0)  # Add batch dimension

    model.eval()
    
    with torch.no_grad():
        image = image.to(device)
        output = model(image)
        print(output)
        probability = torch.sigmoid(output).item()
    
    return probability


In [35]:
image_path = "C:\\Users\\Admin\\dogcat\\54.jpg"
probability = inference_single_image(vgg16, image_path)
print(probability)

tensor([[-0.1555]])
0.4612075388431549


In [36]:
def predict_class(probability, threshold=0.5):
    if probability > threshold:
        return "Dog"
    else:
        return "Cat"

In [37]:
class_prediction = predict_class(probability)
print(f"The predicted class is: {class_prediction}")

The predicted class is: Cat
