In [1]:

#!pip install torch 
#!pip install torchsummary
#!pip install torchvision

In [2]:
import torch
from datetime import datetime
import time
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchsummary import summary
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split

In [3]:
class CNN(nn.Module):
    def __init__(self, num_classes=2):
        super(CNN, self).__init__()
        # Input: 1x224x224
        self.features = nn.Sequential(
            # Conv1: (224+4-5)/1 + 1 = 224
            nn.Conv2d(1, 6, kernel_size=5, stride=1, padding=2),  # Output: 6x224x224
            nn.ReLU(inplace=True),
            # MaxPool1: (224-2)/2 + 1 = 112
            nn.MaxPool2d(kernel_size=2, stride=2),  # Output: 6x112x112
            
            # Conv2: (112+4-5)/1 + 1 = 112 
            nn.Conv2d(6, 16, kernel_size=5, stride=1, padding=2),  # Output: 16x112x112
            nn.ReLU(inplace=True),
            # MaxPool2: (112-2)/2 + 1 = 56  
            nn.MaxPool2d(kernel_size=2, stride=2),  # Output: 16x56x56

            # Conv3: (56+2-3)/2 + 1 = 28
            nn.Conv2d(16, 32, kernel_size=3, stride=2, padding=1), # Output: 32x28x28
            nn.ReLU(inplace=True),
            # MaxPool3: (28-2)/2 + 1 = 14
            nn.MaxPool2d(kernel_size=2, stride=2), # Output: 32x14x14
            
            # Conv4: (14+2-3)/2 + 1 = 7
            nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1), # Output: 64x7x7
            nn.ReLU(inplace=True),
            # MaxPool4: (7-2)/2 + 1 = 3
            nn.MaxPool2d(kernel_size=2, stride=2), # Output: 64x3x3
        )
        
        # Calculate flattened size: 64 * 3 * 3 = 576
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(64 * 3 * 3, 256),  # 576 -> 256
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(256, 64), # 256 -> 64
            nn.ReLU(inplace=True),
            nn.Linear(64, num_classes), # 64 -> 2
        )

    def forward(self, x):
        # Convolutional layers
        x = self.features(x)

        x = torch.flatten(x, 1)

        # Fully connected layers
        x = self.classifier(x)
        return x

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

if device.type == 'cuda':
    print(f'GPU Device: {torch.cuda.get_device_name(0)}')
    print(f'Memory Usage:')
    print(f'Allocated: {torch.cuda.memory_allocated(0)/1024**2:.2f}MB')
    print(f'Cached: {torch.cuda.memory_reserved(0)/1024**2:.2f}MB')


Using device: cuda
GPU Device: NVIDIA GeForce RTX 3050 Laptop GPU
Memory Usage:
Allocated: 0.00MB
Cached: 0.00MB


In [5]:
model = CNN().to(device)

In [6]:
summary(model, (1, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [-1, 6, 224, 224]             156
              ReLU-2          [-1, 6, 224, 224]               0
         MaxPool2d-3          [-1, 6, 112, 112]               0
            Conv2d-4         [-1, 16, 112, 112]           2,416
              ReLU-5         [-1, 16, 112, 112]               0
         MaxPool2d-6           [-1, 16, 56, 56]               0
            Conv2d-7           [-1, 32, 28, 28]           4,640
              ReLU-8           [-1, 32, 28, 28]               0
         MaxPool2d-9           [-1, 32, 14, 14]               0
           Conv2d-10             [-1, 64, 7, 7]          18,496
             ReLU-11             [-1, 64, 7, 7]               0
        MaxPool2d-12             [-1, 64, 3, 3]               0
          Dropout-13                  [-1, 576]               0
           Linear-14                  [

In [7]:
loss_func = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())

# Convert input picture to tensor
matrix_converter = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),  
    transforms.Resize((224, 224)),                 
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
    transforms.ToTensor()
])

In [8]:
model_number = 1
# Set data set directory
data_dir = f'data_for_model_{model_number}/train/'
test_dir = f'data_for_model_{model_number}/test/'

# Load data set
dataset = datasets.ImageFolder(data_dir,transform=matrix_converter)
test_dataset = datasets.ImageFolder(test_dir,transform=matrix_converter)

batch_size = 32
train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True)
test_loader = DataLoader(test_dataset, shuffle=False, num_workers=4, pin_memory=True)
    

In [9]:
print(f"Noof classes: {len(dataset.classes)}")
print(f"Classes: {dataset.classes}")
print(f"Total samples: {len(dataset)}")

Noof classes: 2
Classes: ['cat', 'dog']
Total samples: 3000


In [10]:
num_epochs = 40

In [None]:
# Training loop
for epoch in range(num_epochs):
    model.train()
    total_loss = 0.0
    correct = 0
    epoch_start_time = time.time()
    
    for batch_idx, (images, targets) in enumerate(train_loader):
        # Move data to the same device as the model
        images = images.to(device)
        targets = targets.to(device)
        
        # Forward pass
        predictions = model(images)  
        loss = loss_func(predictions, targets)
        
        # Calculate accuracy
        _, predicted = torch.max(predictions.data, 1)
        correct += (predicted == targets).sum().item()
        
        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        
        if (batch_idx + 1) % 10 == 0:
            print(f"Epoch [{epoch + 1}/{num_epochs}], "
                  f"Batch [{batch_idx + 1}/{len(train_loader)}], "
                  f"Loss: {loss.item():.4f}")
    
    print("-" * 30)
    
    avg_loss = total_loss / len(dataset)
    accuracy = 100 * correct / len(dataset)
    print(f"Epoch [{epoch + 1}/{num_epochs}]")
    print(f"Loss: {avg_loss:.4f}")
    print(f"Accuracy: {accuracy:.2f}%")
    print("-" * 30)
    
    # if device.type == 'cuda':
    #     torch.cuda.empty_cache()

Epoch [1/40], Batch [10/94], Loss: 0.6896
Epoch [1/40], Batch [20/94], Loss: 0.6849
Epoch [1/40], Batch [30/94], Loss: 0.6980
Epoch [1/40], Batch [40/94], Loss: 0.6820
Epoch [1/40], Batch [50/94], Loss: 0.6948
Epoch [1/40], Batch [60/94], Loss: 0.6932
Epoch [1/40], Batch [70/94], Loss: 0.6964
Epoch [1/40], Batch [80/94], Loss: 0.6934
Epoch [1/40], Batch [90/94], Loss: 0.6883
------------------------------
Epoch [1/40]
Loss: 0.0217
Accuracy: 50.60%
------------------------------
Epoch [2/40], Batch [10/94], Loss: 0.6858
Epoch [2/40], Batch [20/94], Loss: 0.6912
Epoch [2/40], Batch [30/94], Loss: 0.6977
Epoch [2/40], Batch [40/94], Loss: 0.6942
Epoch [2/40], Batch [50/94], Loss: 0.6897
Epoch [2/40], Batch [60/94], Loss: 0.6842
Epoch [2/40], Batch [70/94], Loss: 0.6867
Epoch [2/40], Batch [80/94], Loss: 0.6932
Epoch [2/40], Batch [90/94], Loss: 0.6922
------------------------------
Epoch [2/40]
Loss: 0.0217
Accuracy: 49.20%
------------------------------
Epoch [3/40], Batch [10/94], Loss:

In [12]:
model.eval()
with torch.no_grad():
    test_loss = 0.0
    test_correct = 0
    for test_images, test_targets in test_loader:
        # Move test data to device
        test_images = test_images.to(device)
        test_targets = test_targets.to(device)
        
        test_predictions = model(test_images)  
        loss = loss_func(test_predictions, test_targets)
        test_loss += loss.item()
        _, predicted = torch.max(test_predictions.data, 1)
        test_correct += (predicted == test_targets).sum().item()
    
    avg_test_loss = test_loss / len(test_dataset)
    test_accuracy = 100 * test_correct / len(test_dataset)
    print(f"Test Loss: {avg_test_loss:.4f}")
    print(f"Test Accuracy: {test_accuracy:.2f}%")

Test Loss: 0.4164
Test Accuracy: 81.00%


In [None]:
times = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
torch.save({
'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
'final_accuracy': accuracy,
}, f'CNN_model_{model_number}_final_{times}.pth')

print(f"Best accuracy: {accuracy:.2f}%")

Best accuracy: 72.83%
