In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
import torch
import torchvision
from torch import optim
from torch import nn
import torch.utils.data
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
from torch.autograd import Variable
from matplotlib import image as img
from torchvision import datasets, transforms, models
import torch.utils.data as data
from torch.utils.data import Dataset
import os

In [2]:
# Define the data transformations for data augmentation and normalization
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize images to a consistent size
    transforms.ToTensor(),  # Convert images to PyTorch tensors
])

# Set the path to your dataset
data_dir = './pellegrin_db'

# Create a dataset from the image folders
dataset = datasets.ImageFolder(data_dir, transform=transform)

# Define the ratio for train and test split
train_ratio = 0.8  # 80% of the data for training, 20% for testing

# Calculate the sizes of the train and test sets
num_data = len(dataset)
num_train = int(train_ratio * num_data)
num_test = num_data - num_train

# Split the dataset into train and test sets
train_set, test_set = random_split(dataset, [num_train, num_test])

# Define batch size and create data loaders for train and test sets
batch_size = 16
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)

# Check the length of train and test sets
print(f"Number of training examples: {len(train_set)}")
print(f"Number of testing examples: {len(test_set)}")

# You can now use train_loader and test_loader for training and testing your binary classification model.



Number of training examples: 156
Number of testing examples: 40


In [3]:
import torch.nn as nn

model=models.resnet50(pretrained=True)
for param in model.parameters():
    param.requires_grad = False 
model.fc = nn.Sequential( 
               nn.Linear(model.fc.in_features, 128),
               nn.ReLU(),
               nn.Linear(128, 2),
               nn.Softmax(dim=1) 
               )



In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print( torch.cuda.is_available())
model.to(device)

True


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [5]:
def train(model, train_loader, val_loader, criterion, optimizer, epochs):
    a_train_loss = []
    a_train_accuracy = []
    a_val_loss = []
    a_val_accuracy = []
    
    for epoch in range(epochs):
        model.train()
        train_loss = 0.0
        train_corrects = 0

        for inputs, labels in train_loader:
            inputs = inputs.to(device)  # Move inputs to the same device as the model
            labels = labels.to(device)  # Move labels to the same device as the model
    
            outputs = model(inputs)
            loss = criterion(outputs, labels) #calculate loss

            optimizer.zero_grad() #set gradients to zero
            loss.backward()       #compute gradients
            optimizer.step()     #Update parameters

            
            #add the loss
            #By default it takes the average of the batch, hence we multiply it with the batch size,
            #so that at the end we can get the total average of whole set
            train_loss += loss.item() * inputs.size(0)
            
            #get the index of class of predicted output
            pred = torch.argmax(outputs, 1)
            
            train_corrects += torch.sum(pred == labels.data).item()

        train_loss /= len(train_loader.dataset)
        train_acc = train_corrects/ len(train_loader.dataset)

        #Validation Loss and Accuracy
        model.eval()
        val_loss = 0.0
        val_corrects = 0

        with torch.no_grad():
            for inputs, labels in val_loader:
                
                inputs= inputs.to(device) 
                labels=labels.to(device)
                
                outputs = model(inputs)
                loss = criterion(outputs, labels)

                val_loss += loss.item() * inputs.size(0)
                pred=torch.argmax(outputs, 1)
                val_corrects+= torch.sum(pred == labels.data).item()

        val_loss /= len(val_loader.dataset)
        val_acc = val_corrects/len(val_loader.dataset)
        
        a_train_loss.append(train_loss)
        a_train_accuracy.append(train_acc)
        a_val_loss.append(val_loss)
        a_val_accuracy.append(val_acc)

        print(f"Epoch {epoch+1}/{epochs} - Training Loss: {train_loss:.4f}, Train Accuracy: {train_acc:.4f}, "
              f"Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_acc:.4f}")

    return model, a_train_loss, a_train_accuracy, a_val_loss, a_val_accuracy

In [6]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
model, train_loss, train_accuracy, test_loss, test_accuracy = train(model, train_loader, test_loader, criterion, optimizer, epochs=10)

Epoch 1/10 - Training Loss: 0.6474, Train Accuracy: 0.6410, Validation Loss: 0.5857, Validation Accuracy: 0.7000
Epoch 2/10 - Training Loss: 0.6409, Train Accuracy: 0.6603, Validation Loss: 0.5940, Validation Accuracy: 0.7000
Epoch 3/10 - Training Loss: 0.5473, Train Accuracy: 0.7564, Validation Loss: 0.5294, Validation Accuracy: 0.8250
Epoch 4/10 - Training Loss: 0.5213, Train Accuracy: 0.8013, Validation Loss: 0.4896, Validation Accuracy: 0.8000
Epoch 5/10 - Training Loss: 0.4937, Train Accuracy: 0.8205, Validation Loss: 0.5415, Validation Accuracy: 0.8000
Epoch 6/10 - Training Loss: 0.5780, Train Accuracy: 0.7308, Validation Loss: 0.5858, Validation Accuracy: 0.7000
Epoch 7/10 - Training Loss: 0.5551, Train Accuracy: 0.7308, Validation Loss: 0.4622, Validation Accuracy: 0.8500
Epoch 8/10 - Training Loss: 0.4974, Train Accuracy: 0.7949, Validation Loss: 0.4691, Validation Accuracy: 0.8250
Epoch 9/10 - Training Loss: 0.4869, Train Accuracy: 0.8333, Validation Loss: 0.4852, Validation 

In [7]:
training_stats = pd.DataFrame({
    'train_loss': train_loss,
    'train_accuracy': train_accuracy,
    'test_loss': test_loss,
    'test_accuracy': test_accuracy
})
training_stats.to_csv('./training_stats/ua_bighorn_ResNet50_stats.csv', index=False)
torch.save(model,'./models/ua_bighorn_male-female_ResNet50_classification.pth' )

In [8]:
model = torch.load('./models/ua_bighorn_male-female_ResNet50_classification.pth', weights_only = False)

In [9]:
model.eval()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

corr = 0
total = 0

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        
        outputs = model(images)
        
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        corr += (predicted == labels).sum().item()

print('Accuracy of the model on all test Data: %d %%' % ( 100 * corr / total))

Accuracy of the model on all test Data: 87 %
