In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch import nn
import torch.nn.functional as F
from torch.optim.lr_scheduler import StepLR
import torch.optim as optim
import pandas as pd 
CFG = {
    "batch_size": 32,
    "epoch": 10,
    "log_interval": 100,
}

In [2]:
import os
import pandas as pd
from torchvision.io import read_image
from torch.utils.data import Dataset
from torchvision import datasets
from torch.utils.data import DataLoader
from PIL import Image
import numpy as np
preprocess_transform = transforms.Compose([
    transforms.ToTensor(), # turn image to tensor
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    # augmentations
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.RandomRotation(5),
    transforms.RandomResizedCrop(128),
    
    transforms.Resize((512,512))
])

val_transform = transforms.Compose([
    transforms.ToTensor(), # turn image to tensor
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    transforms.Resize((512,512))
])
class CustomImageDataset(Dataset):
    def __init__(self, df, img_dir, transform=None):
        self.df = df
        self.img_dir = img_dir
        self.transform = transform
        

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.df.iloc[idx,0]+ ".jpeg")

        image = Image.open(img_path)
        image = np.array(image)
        label = self.df.iloc[idx, 1]
        if self.transform:
            image = self.transform(image)
        
        return image, label

df = pd.read_csv('/kaggle/input/bau-ain2001-fall22-a3p1/train.csv')

from sklearn.model_selection import train_test_split
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)

train_set = CustomImageDataset(train_df, img_dir='/kaggle/input/bau-ain2001-fall22-a3p1/images/images',transform=preprocess_transform)
val_set = CustomImageDataset(val_df, img_dir='/kaggle/input/bau-ain2001-fall22-a3p1/images/images',transform=val_transform)

train_loader = DataLoader(train_set, batch_size=32, shuffle=True)
test_loader = DataLoader(val_set, batch_size=32, shuffle=True)

In [3]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # 512 x 512 x 3
        self.conv1 = nn.Conv2d(3, 32, 3, 1)
        # 510 x 510 x 32
        self.conv2 = nn.Conv2d(32, 2, 3, 1)
        # 508 x 508 x 64
        self.dropout = nn.Dropout(0.25)
        # after maxpooling and flattening we got (batch x 32258 )
        self.fc1 = nn.Linear(32258, 128)
        self.fc2 = nn.Linear(128, 1)
        self.activation = nn.LeakyReLU()
        

    def forward(self, x):
        x = self.conv1(x)
        x= self.activation(x)
        x = self.conv2(x)
        x= self.activation(x)
        x = F.max_pool2d(x, 2)
        x= self.activation(x)
        x = F.max_pool2d(x, 2)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x= self.activation(x)
        x = self.fc2(x)
        output = torch.sigmoid(x)
        output = torch.reshape(output, (x.shape[0],))
        return output
    
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    
def train(model, train_loader, optimizer, epoch, criterion):
    model.train()
    
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output.float(), target.float())
        loss.backward()
        optimizer.step()
        if batch_idx % CFG['log_interval'] == 0:
            print('Train Epoch: {} \tLoss: {:.6f}'.format(epoch, loss.item()))

def test(model, test_loader, criterion):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += criterion(output.float(), target.float()).item()
            pred = output.float()
            correct += pred.eq(target.view_as(pred)).sum().item()
    
    test_loss /= len(test_loader.dataset) / CFG['batch_size']
    print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [4]:
model = Net().to(device)
criterion = nn.BCELoss()
# adam
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(CFG["epoch"]):  # loop over the dataset multiple times
    train(model, train_loader, optimizer, epoch, criterion)
    test(model, test_loader, criterion)


print('Finished Training')

Train Epoch: 0 	Loss: 0.694103
Test set: Average loss: 0.7377, Accuracy: 0/280 (0%)
Train Epoch: 1 	Loss: 0.688026
Test set: Average loss: 0.8594, Accuracy: 0/280 (0%)
Train Epoch: 2 	Loss: 0.710937
Test set: Average loss: 0.7642, Accuracy: 0/280 (0%)
Train Epoch: 3 	Loss: 0.632014
Test set: Average loss: 1.0266, Accuracy: 0/280 (0%)
Train Epoch: 4 	Loss: 0.635668
Test set: Average loss: 1.0663, Accuracy: 0/280 (0%)
Train Epoch: 5 	Loss: 0.623121
Test set: Average loss: 0.6031, Accuracy: 0/280 (0%)
Train Epoch: 6 	Loss: 0.615623
Test set: Average loss: 1.0554, Accuracy: 0/280 (0%)
Train Epoch: 7 	Loss: 0.494156
Test set: Average loss: 1.1762, Accuracy: 0/280 (0%)
Train Epoch: 8 	Loss: 0.486409
Test set: Average loss: 1.4536, Accuracy: 0/280 (0%)
Train Epoch: 9 	Loss: 0.511135
Test set: Average loss: 1.3880, Accuracy: 0/280 (0%)
Finished Training


In [5]:
class TestDataset(Dataset):
    def __init__(self, df, img_dir, transform=None):
        self.df = df
        self.img_dir = img_dir
        self.transform = transform
    
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.df.iloc[idx,0]+ ".jpeg")
        image = Image.open(img_path)
        image = np.array(image)
        if self.transform:
            image = self.transform(image)
        return image


    
test_df = pd.read_csv('/kaggle/input/bau-ain2001-fall22-a3p1/test.csv')
test_set = TestDataset(test_df, img_dir='/kaggle/input/bau-ain2001-fall22-a3p1/images/images',transform=val_transform)
test_loader = DataLoader(test_set, batch_size=32, shuffle=False)

model.eval()
preds = []
with torch.no_grad():
    for data in test_loader:
        data = data.to(device)
        output = model(data)
        preds.extend(output.cpu().numpy().tolist())
        
test_df['cancer_score'] = preds

# image id and cancer score
test_df = test_df[['img_id', 'cancer_score']]
test_df.to_csv('submission.csv', index=False)