In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models, transforms
from torch.utils.data import DataLoader, Dataset
import os
from PIL import Image

torch.manual_seed(0)

class ChessDataset(Dataset):
    def __init__(self, directory, transform=None):
        self.directory = directory
        self.transform = transform
        self.images = []
        self.labels = []
        
        for label, subdir in enumerate(['insufficient_material', 'sufficient_material']):
            subdir_path = os.path.join(directory, subdir)
            for filename in os.listdir(subdir_path):
                if filename.endswith('.png'):
                    self.images.append(os.path.join(subdir_path, filename))
                    self.labels.append(label)
    
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        image_path = self.images[idx]
        image = Image.open(image_path).convert('RGB')
        label = self.labels[idx]
        
        if self.transform:
            image = self.transform(image)
        
        return image, label

data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

# load a pretrained ResNet model
model = models.resnet18(weights='ResNet18_Weights.DEFAULT')

# create the dataset
train_dataset = ChessDataset('chess_dataset/train', transform=data_transforms['train'])
val_dataset = ChessDataset('chess_dataset/val', transform=data_transforms['val'])

# define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)


###########################
##  Beginning of Part a  ##
###########################

# TODO: modify the fully connected layer for binary classification
# hint: it might be helpful to print(model) to see how the model is structured

model.fc = nn.Linear(model.fc.in_features, 2)



# use a GPU if available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)


batch_size = 4
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)


###########################
##  Beginning of Part b  ##
###########################


def train(epoch_num):
    # set the model to train mode
    model.train()

    running_loss = 0.0
    total_loss = 0.0
    running_count = 0
    total_count = 0      

    for batch_index, (inputs, labels) in enumerate(train_loader):
        inputs = inputs.to(device)
        labels = labels.to(device)

        
        #TODO: complete the training loop. You will need to do the following:
        
        # zero the parameter gradients 
        optimizer.zero_grad()

        # run a forward pass
        outputs = model(inputs)

        # run a backwards pass and optimizer step
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        
        
        # update loss and count
        running_loss += loss.item() * inputs.size(0)
        total_loss += loss.item() * inputs.size(0)

        running_count += inputs.size(0)
        total_count += inputs.size(0)

        # print every 50 mini-batches
        if batch_index % 50 == 49:
            print('[%d, %5d] avg batch loss: %.3f avg epoch loss: %.3f' %
                (epoch_num + 1, batch_index + 1, running_loss / running_count, total_loss / total_count))
            running_loss = 0.0
            running_count = 0


def validate():
    # set the model to evaluation mode
    model.eval()
    total_loss = 0.0
    total_correct = 0
    total_count = 0

    # no need to track gradients for validation
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            # TODO: run the forward pass (same as part c)
            outputs = model(inputs)

            # TODO: from your output (which are probabilities for each class, find the predicted
            # class)
            _, predicted = torch.max(outputs, 1)
            correct_count = (predicted == labels).sum().item()
            loss = criterion(outputs, labels)

            # update loss and count
            
            total_loss += loss.item() * labels.size(0)
            total_correct += correct_count
            total_count += labels.size(0)

    accuracy = 100 * total_correct / total_count
    print()
    print(f"Evaluation loss: {total_loss / total_count :.3f}")
    print(f'Accuracy of the model on the validation images: {accuracy: .2f}%')
    print()


###########################
##  Beginning of Part c  ##
###########################


# TODO: validate your model before training
validate()

num_epochs = 1
# TODO: train and validate your model
for epoch in range(num_epochs):
    train(epoch)
    validate()


Evaluation loss: 0.664
Accuracy of the model on the validation images:  61.67%

[1,    50] avg batch loss: 0.648 avg epoch loss: 0.648
[1,   100] avg batch loss: 0.343 avg epoch loss: 0.495
[1,   150] avg batch loss: 0.182 avg epoch loss: 0.391
[1,   200] avg batch loss: 0.243 avg epoch loss: 0.354
[1,   250] avg batch loss: 0.151 avg epoch loss: 0.313

Evaluation loss: 0.020
Accuracy of the model on the validation images:  100.00%



In [9]:
import numpy as np

def convolution2d(image, kernel):
    """
    Perform 2D convolution on an input image with a given kernel.
    
    Arguments:
    image : numpy.ndarray
        2D array representing the input image.
    kernel : numpy.ndarray
        2D array representing the convolution kernel/filter.
    
    Returns:
    numpy.ndarray
        2D array representing the output convolved image.
    """
    image_height, image_width = image.shape
    kernel_height, kernel_width = kernel.shape
    
    # Calculate output dimensions
    output_height = image_height - kernel_height + 1
    output_width = image_width - kernel_width + 1
    
    # Initialize output image
    output_image = np.zeros((output_height, output_width))
    
    # Perform convolution
    for i in range(output_height):
        for j in range(output_width):
            # Extract the region of interest from the image
            image_patch = image[i:i+kernel_height, j:j+kernel_width]
            # Perform element-wise multiplication and sum
            output_image[i, j] = np.sum(image_patch * kernel)
    
    return output_image

# Example usage:
# Input image (5x5 matrix)
input_image = np.array([[-1, -1, 1, -1],
                        [-1, 1, -1, -1],
                        [1, 1, -1, -1],
                        [-1, -1, -1, -1]])

# Convolution kernel/filter (3x3 matrix)
filter_matrix = np.array([[-1, -1, -1],
                          [-1, -1, 1],
                          [-1, 1, 1]])

# Perform 2D convolution
output_image = convolution2d(input_image, filter_matrix)
print("Output Image:")
print(output_image)

Output Image:
[[-1. -3.]
 [-3. -1.]]
