## Binary Classifier

In [1]:
import os
import cv2
import numpy as np
import PIL.Image
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import ToTensor, Resize, Compose


class ImageDataset(Dataset):
    def __init__(self, folder_path, transform=None):
        self.folder_path = folder_path
        self.transform = transform

        self.img_paths = []
        self.labels = []

        for class_name in ['pos', 'neg']:
            class_folder = os.path.join(self.folder_path, class_name)
            class_label = 1 if class_name == 'pos' else 0
            for file_name in os.listdir(class_folder):
                file_path = os.path.join(class_folder, file_name)
                self.img_paths.append(file_path)
                self.labels.append(class_label)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, index):
        img_path = self.img_paths[index]
        label = self.labels[index]

        # img = cv2.imread(img_path)
        # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        
        img = cv2.imread(img_path, cv2.IMREAD_COLOR)
        img = PIL.Image.fromarray(img)

        if self.transform:
            img = self.transform(img)

        return img, label


In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import KFold
import torchvision.transforms as transforms

def train(model, train_loader, val_loader, num_epochs, lr):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0
        train_acc = 0.0
        for i, (inputs, labels) in enumerate(train_loader):
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            train_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            train_acc += torch.sum(preds == labels.data)
        train_loss /= len(train_loader.dataset)
        train_acc /= len(train_loader.dataset)

        model.eval()
        val_loss = 0.0
        val_acc = 0.0
        with torch.no_grad():
            for inputs, labels in val_loader:
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item() * inputs.size(0)
                _, preds = torch.max(outputs, 1)
                val_acc += torch.sum(preds == labels.data)
            val_loss /= len(val_loader.dataset)
            val_acc /= len(val_loader.dataset)

        print('Epoch [{}/{}], Train Loss: {:.4f}, Train Acc: {:.4f}, Val Loss: {:.4f}, Val Acc: {:.4f}'
              .format(epoch+1, num_epochs, train_loss, train_acc, val_loss, val_acc))

def k_fold_cross_validation(model, input_folder, num_epochs, lr, batch_size, num_workers, num_folds):
    transform = transforms.Compose([
        transforms.ColorJitter(0.2, 0.2, 0.2, 0.2),
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    print("Loading dataset...")
    dataset = ImageDataset(input_folder, transform=transform)
    print("Dataset successfully loaded!")
    kf = KFold(n_splits=num_folds, shuffle=True, random_state=42)
    fold = 1
    for train_idx, val_idx in kf.split(dataset):
        print('Fold {}/{}'.format(fold, num_folds))
        train_dataset = torch.utils.data.Subset(dataset, train_idx)
        val_dataset = torch.utils.data.Subset(dataset, val_idx)
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)
        train(model, train_loader, val_loader, num_epochs, lr)
        fold += 1


In [3]:
from torchvision import models
model = models.resnet18(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 2)



In [None]:
k_fold_cross_validation(model, "posneg_dataset", num_epochs=2, lr=0.001, batch_size=80, num_workers=4, num_folds=2)

Loading dataset...
Dataset successfully loaded!
Fold 1/2


In [5]:
#Save the model
torch.save(model.state_dict(), 'binary_classifier.pth')