In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
import os
import numpy as np
import pandas as pd

from sklearn.metrics import roc_auc_score, f1_score, classification_report

import matplotlib.pyplot as plt
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image

In [3]:
num_class = 3

new_label={
    0:0,
    1:1,
    2:1,
    3:2,
    4:2
        }

path = "/kaggle/input/kneeoa/"

image_path = []
labels = []


for label in [0,2,3,4]:
    
    
    image_list = os.listdir(f"{path}train/{label}")

    for p in image_list:
        image_path+= [ f"{path}train/{label}/"+ p ]

    labels += [new_label[label]] * len(image_list)
    
train_data=pd.DataFrame({"Filepath":image_path,"Labels":labels})
train_data.shape


(4732, 2)

In [4]:
image_path = []
labels = []


for label in [0,2,3,4]:
    
    
    image_list = os.listdir(f"{path}val/{label}")

    for p in image_list:
        image_path+= [ f"{path}val/{label}/"+ p ]

    labels += [new_label[label]] * len(image_list)
    
val_data=pd.DataFrame({"Filepath":image_path,"Labels":labels})
val_data.shape


(673, 2)

In [5]:
class custom_dataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.df = dataframe
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_name = self.df.iloc[idx, 0]
        image = Image.open(img_name)
        label =  self.df.iloc[idx, 1]

        image = self.transform(image)

        return image, label


In [6]:
pre_processing = {
    'train': transforms.Compose([
        transforms.Grayscale(num_output_channels=3),
        transforms.Resize((256,256)),
        transforms.RandomResizedCrop((224,224)),
        transforms.RandomHorizontalFlip(),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'eval': transforms.Compose([
        transforms.Grayscale(num_output_channels=3),
        transforms.Resize((256,256)),
        transforms.CenterCrop((224,224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
}

In [7]:
custom_train_data=custom_dataset(dataframe=train_data,transform=pre_processing['train'])
custom_val_data=custom_dataset(dataframe=val_data,transform=pre_processing['eval'])

batch_size=32
train_loader = DataLoader(custom_train_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(custom_val_data, batch_size=batch_size)



In [8]:
import torchvision.models as models
import torch.nn as nn

model = models.resnet18(weights='IMAGENET1K_V1')
model.fc = nn.Linear(model.fc.in_features, num_class)

import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)  
scheduler = ReduceLROnPlateau(optimizer,mode ='min',patience =3,factor=0.1,verbose=True)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 136MB/s]


In [9]:
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

model = model.to(device)
print(device)

cuda


In [10]:
def calculate_validation_loss(model, val_loader, criterion):
    model.eval()
    total_loss = 0.0
    num_samples = 0

    with torch.no_grad():
        for inputs, targets in val_loader:
            inputs, targets = inputs.to(device), targets.to(device)  
            outputs = model(inputs)  
            loss = criterion(outputs, targets)  
            total_loss += loss.item() * inputs.size(0)
            num_samples += inputs.size(0)

    model.train()  
    return total_loss / num_samples

In [11]:
from tempfile import TemporaryDirectory

In [12]:
num_epochs = 40  # the number of training epochs 
i =0
with TemporaryDirectory() as tempdir:
    best_model_params_path = os.path.join(tempdir, 'best_model_params.pt')

    torch.save(model.state_dict(), best_model_params_path)
    best_acc = 0.0
    for epoch in range(num_epochs):
        k =0
        print("Epoch =",i,"\n")
        model.train()  
        for inputs, labels in train_loader:
            if k%20==0:
                print(k,"/",i)
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            k+=1

        i+=1
        print("=================================")
        model.eval()  
        with torch.no_grad():
            correct = 0
            total = 0
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()


        validation_accuracy = correct / total

        print(f'Validation Accuracy: {validation_accuracy * 100:.2f}%')
        
        if validation_accuracy>best_acc:
            best_acc=validation_accuracy
            torch.save(model.state_dict(), best_model_params_path)

        validation_loss = calculate_validation_loss(model, val_loader,criterion)
        scheduler.step(validation_loss)
        current_lr = optimizer.param_groups[0]['lr']
        print(f"Epoch {epoch}: Learning Rate = {current_lr}")
        if current_lr < 5e-8:
            break
    #     if validation_accuracy>=0.98:
    #         print("Accuracy reached")
    #         break
        print("=================================")

Epoch = 0 

0 / 0
20 / 0
40 / 0
60 / 0
80 / 0
100 / 0
120 / 0
140 / 0
Validation Accuracy: 61.37%
Epoch 0: Learning Rate = 0.001
Epoch = 1 

0 / 1
20 / 1
40 / 1
60 / 1
80 / 1
100 / 1
120 / 1
140 / 1
Validation Accuracy: 63.89%
Epoch 1: Learning Rate = 0.001
Epoch = 2 

0 / 2
20 / 2
40 / 2
60 / 2
80 / 2
100 / 2
120 / 2
140 / 2
Validation Accuracy: 65.08%
Epoch 2: Learning Rate = 0.001
Epoch = 3 

0 / 3
20 / 3
40 / 3
60 / 3
80 / 3
100 / 3
120 / 3
140 / 3
Validation Accuracy: 65.53%
Epoch 3: Learning Rate = 0.001
Epoch = 4 

0 / 4
20 / 4
40 / 4
60 / 4
80 / 4
100 / 4
120 / 4
140 / 4
Validation Accuracy: 71.62%
Epoch 4: Learning Rate = 0.001
Epoch = 5 

0 / 5
20 / 5
40 / 5
60 / 5
80 / 5
100 / 5
120 / 5
140 / 5
Validation Accuracy: 75.78%
Epoch 5: Learning Rate = 0.001
Epoch = 6 

0 / 6
20 / 6
40 / 6
60 / 6
80 / 6
100 / 6
120 / 6
140 / 6
Validation Accuracy: 74.44%
Epoch 6: Learning Rate = 0.001
Epoch = 7 

0 / 7
20 / 7
40 / 7
60 / 7
80 / 7
100 / 7
120 / 7
140 / 7
Validation Accuracy: 76.08%

In [13]:
image_path = []
labels = []


for label in [0,2,3,4]:
    
    
    image_list = os.listdir(f"{path}test/{label}")

    for p in image_list:
        image_path+= [ f"{path}test/{label}/"+ p ]

    labels += [new_label[label]] * len(image_list)
    
test_data=pd.DataFrame({"Filepath":image_path,"Labels":labels})

custom_test_data=custom_dataset(dataframe=test_data,transform=pre_processing['eval'])
test_loader=DataLoader(custom_test_data,batch_size=batch_size)


In [14]:
model.eval()  
with torch.no_grad():
    correct = 0
    total = 0
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()


    test_accuracy = correct / total

    print(f'Test Accuracy: {test_accuracy * 100:.2f}%')

Test Accuracy: 83.16%
