In [None]:
import os

folder_path = "/kaggle/input/diabetic-retinopathy-resized/resized_train_cropped/resized_train_cropped"

def count_images_in_folder(folder_path):
    # Initialize a counter for images
    image_count = 0

    # List all files in the folder
    files = os.listdir(folder_path)

    # Iterate through the files
    for file in files:
        # Check if the file has a common image extension
        if file.lower().endswith(('.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp')):
            image_count += 1

    return image_count

# Call the function to count images in the folder
num_images = count_images_in_folder(folder_path)

# Print the result
print(f"Number of images in the folder: {num_images}")


In [None]:
import csv

# Replace 'your_file.csv' with the path to your CSV file
csv_file_path = '/kaggle/input/diabetic-retinopathy-resized/trainLabels_cropped.csv'

# Initialize a counter for the number of rows
row_count = 0

# Open and read the CSV file
with open(csv_file_path, 'r', newline='') as csv_file:
    csv_reader = csv.reader(csv_file)
    
    # Skip the header row if it exists
    next(csv_reader, None)
    
    # Iterate through the rows and count them
    for row in csv_reader:
        row_count += 1

print(f'The CSV file "{csv_file_path}" contains {row_count} rows.')

In [None]:
import os
import cv2
import matplotlib.pyplot as plt

folder = '/kaggle/input/diabetic-retinopathy-resized/resized_train_cropped/resized_train_cropped'

image_files_display = os.listdir(folder)
print(image_files_display[:10])

for image_file in image_files_display[:5]:
    image_path = os.path.join(folder, image_file)
    image = cv2.imread(image_path)
    plt.figure()
    plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))  # Convert BGR to RGB for correct display
    plt.title(f"Image Shape: {image.shape},{image_file}")
    plt.axis('off')
    plt.show()


In [None]:
import pandas as pd
csv_path = "/kaggle/input/diabetic-retinopathy-resized/trainLabels_cropped.csv"

df = pd.read_csv(csv_path)


In [None]:
import pandas as pd
import matplotlib.pyplot as plt

label_counts = df['level'].value_counts()


label_counts.plot(kind='bar')
plt.xlabel("Label")
plt.ylabel("Number of Data Points")
plt.title("Label Distribution")
plt.grid(True)
plt.show()

In [None]:
print(df.head())
print(df.tail())

images = df['image'][:5]
file_names = images+'.jpeg'
print(file_names.tolist())

In [None]:
images = df[['image']]

import os
import cv2
import matplotlib.pyplot as plt

folder = '/kaggle/input/diabetic-retinopathy-resized/resized_train_cropped/resized_train_cropped'

access = 10
index=0
for image_file in file_names.tolist():
    index+=1
    if index >= access:
        break
    image_path = os.path.join(folder, image_file)
    image = cv2.imread(image_path)
    
    plt.figure()
    plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))  # Convert BGR to RGB for correct display
    plt.title(f"Image Shape: {image.shape},{image_file}")
    plt.axis('off')
    plt.show()

In [None]:
from sklearn.model_selection import train_test_split
columns_to_drop = ["Unnamed: 0.1","Unnamed: 0"]
data = df.drop(columns=columns_to_drop)

train_data, temp_data = train_test_split(data, test_size=0.4, random_state=42)
test_data, validation_data = train_test_split(temp_data, test_size=0.5, random_state=42)


In [None]:
import torch
import albumentations as A 
from albumentations.pytorch import ToTensorV2  
from torch.utils.data import DataLoader
import torchvision
import torchvision.transforms as transforms
import numpy as np
import os

In [None]:

import torch
import albumentations as A 
from albumentations.pytorch import ToTensorV2  

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
LEARNING_RATE = 1e-3
WEIGHT_DECAY = 5e-4
BATCH_SIZE = 64
NUM_EPOCHS = 100
NUM_WORKERS = 4

image_folder = "/kaggle/input/diabetic-retinopathy-resized/resized_train_cropped/resized_train_cropped"
std_transforms = A.Compose([A.Resize(512,512),ToTensorV2()])


train_transforms = A.Compose(
    [
        A.Resize(224,224),
        A.Normalize(
            mean = [1.66043119 ,1.15943037 , 0.8302967 ],
            std =  [1.10998907, 0.81032822, 0.66805789],
            max_pixel_value = 255.0
        ),
        A.RandomResizedCrop(224, 224, scale=(1/1.15, 1.15), ratio=(0.7, 1.3), interpolation=1, always_apply=False, p=0.5),
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        A.Rotate(limit=30, p=0.5),
        A.ColorJitter(brightness=0.15, contrast=0.12, saturation=0.05, hue=0.04, always_apply=False, p=0.3),
        ToTensorV2()
    ]
)

test_transforms = A.Compose(
    [
        A.Resize(224,224),
        A.Normalize(
            mean = [1.66043119 ,1.15943037 , 0.8302967 ],
            std =  [1.10998907, 0.81032822, 0.66805789],
            max_pixel_value = 255.0
        ),
        ToTensorV2()
    ]
)



## creating dataset using pytorch dataset class

In [None]:

#import config
import os
import pandas as pd
import numpy as np
from torch.utils.data import Dataset, DataLoader
from PIL import Image #python image library, allow to python deal with images
from tqdm import tqdm #progress bar



class DRDataset(Dataset):
    def __init__(self, image_folder, data, train=True, transform=None):
        super().__init__()
        self.image_folder = image_folder
        self.data = data
        self.train = train
        self.transform = transform
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self,index):
        if self.train:
            image_file,label = self.data.iloc[index]
        else:
            image_file,label = self.data.iloc[index]
            
        image = np.array(Image.open(os.path.join(self.image_folder,image_file+'.jpeg')))
        
        if self.transform:
            image = self.transform(image=image)['image']
        
        return image, label, image_file
    



In [None]:

import torch
import albumentations as A 
from albumentations.pytorch import ToTensorV2 

std_transforms = A.Compose([A.Resize(512,512),ToTensorV2()])

dataset = DRDataset(
            image_folder = "/kaggle/input/diabetic-retinopathy-resized/resized_train_cropped/resized_train_cropped",
            data = data,
            train = True,
            transform = std_transforms)


data_loader = DataLoader(dataset = dataset, batch_size = 64, shuffle=False)



## transforming original dataset

In [None]:
train_dataset = DRDataset(
            image_folder = "/kaggle/input/diabetic-retinopathy-resized/resized_train_cropped/resized_train_cropped",
            data = train_data,
            train = True,
            transform = train_transforms)

train_loader = DataLoader(dataset = train_dataset, batch_size = 64, num_workers = NUM_WORKERS, shuffle=True)

validation_dataset = DRDataset(
            image_folder = "/kaggle/input/diabetic-retinopathy-resized/resized_train_cropped/resized_train_cropped",
            data = validation_data,
            train = True,
            transform = test_transforms)

validation_loader = DataLoader(dataset = validation_dataset, batch_size = 64, num_workers = NUM_WORKERS, shuffle=True)


test_dataset = DRDataset(
            image_folder = "/kaggle/input/diabetic-retinopathy-resized/resized_train_cropped/resized_train_cropped",
            data = test_data,
            train = True,
            transform = test_transforms)

test_loader = DataLoader(dataset = test_dataset, batch_size = 64, num_workers = NUM_WORKERS, shuffle=False)

print(len(train_dataset))
print(len(validation_dataset))
print(len(test_dataset))

print(train_dataset[1])




In [None]:
import torch
import torch.nn as nn
import torchvision.models as models
from efficientnet_pytorch import EfficientNet
from torch import nn, optim
from sklearn.metrics import cohen_kappa_score

train_losses = []
validation_losses = []
kappa_losses = []

def train_one_epoch(loader,model,optimizer,loss_fn,scaler,device):
    losses = []
    loop = tqdm(loader)
    for batch_idx,(data,targets,_) in enumerate(loop):
        data = data.to(device)
        targets = targets.to(device)

        #forward
        with torch.cuda.amp.autocast():
            scores = model(data)
            loss = loss_fn(scores,targets)

        losses.append(loss.item())

        #backward
        optimizer.zero_grad()
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        loop.set_postfix(loss=loss.item())   #The progress bar is updated to display the current loss for the batch using 

    print(f"Loss average over epoch: {sum(losses)/len(losses)}")
    train_losses.append(sum(losses)/len(losses))
    print(train_losses)
    
    
def validate(loader, model, loss_fn, device):
    model.eval()  # Set the model to evaluation mode
    losses = []
    all_preds = []
    all_targets = []
    
    with torch.no_grad():
        for data, targets, _ in loader:
            data = data.to(device)
            targets = targets.to(device)
            
            scores = model(data)
            loss = loss_fn(scores, targets)
            losses.append(loss.item())
            
            _, predicted = scores.max(1)
            all_preds.extend(predicted.cpu().numpy())
            all_targets.extend(targets.cpu().numpy())
    
    average_loss = sum(losses) / len(losses)
    kappa = cohen_kappa_score(all_targets, all_preds, weights='quadratic')
    
    print(f"Validation Loss: {average_loss:.4f}, Quadratic Kappa: {kappa:.4f}")
    kappa_losses.append(kappa)
    validation_losses.append(average_loss)
    print(kappa_losses)
    print(validation_losses)


model = models.resnet50(pretrained=True)

for param in model.parameters():
    param.requires_grad = False
       
# Unfreeze last layer
for param in model._fc.parameters():
    param.requires_grad = True
    
# Replace last layer
num_ftrs = model._fc.in_features
model.fc = nn.Linear(num_ftrs, 5)
model = model.to(DEVICE)
    
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(),lr=LEARNING_RATE,weight_decay=WEIGHT_DECAY)
scalar = torch.cuda.amp.GradScaler()

for epoch in range(NUM_EPOCHS):
    print('epoch: ',epoch)
    train_one_epoch(train_loader,model,optimizer,loss_fn,scalar,DEVICE)
    validate(validation_loader, model, loss_fn, DEVICE)
    
    




In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

sns.set()

fig = plt.figure(figsize = (15, 5))


plt.subplot(1, 2, 1)
plt.plot(train_losses, 'red',   label = 'Training')
plt.plot(validation_losses, 'green', label = 'Validation')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

# plot kappa dynamics
plt.subplot(1, 2, 2)
plt.plot(kappa_losses, 'blue', label = 'Kappa')
plt.xlabel('Epoch')
plt.ylabel('Kappa')
plt.legend()

In [None]:

from sklearn.metrics import confusion_matrix
import seaborn as sn
import pandas as pd

y_pred = []
y_true = []

# iterate over test data
for inputs, labels, _ in test_loader:
    inputs = inputs.to(DEVICE)
    output = model(inputs) # Feed Network

    output = (torch.max(torch.exp(output), 1)[1]).data.cpu().numpy()
    y_pred.extend(output) # Save Prediction

    labels = labels.data.cpu().numpy()
    y_true.extend(labels) # Save Truth

# constant for classes
classes = ('0', '1', '2', '3', '4')

# Build confusion matrix
cf_matrix = confusion_matrix(y_true, y_pred)
df_cm = pd.DataFrame(cf_matrix / np.sum(cf_matrix, axis=1)[:, None], index = [i for i in classes],
                     columns = [i for i in classes])
plt.figure(figsize = (12,7))
sn.heatmap(df_cm, annot=True)
plt.savefig('output.png')

In [None]:
from sklearn.metrics import classification_report

target_names = ('0', '1', '2', '3', '4')

report = classification_report(y_true, y_pred, target_names=target_names)

print(report)