In [1]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import cv2
from PIL import Image
import random

import torch
from torchvision.datasets import ImageFolder
from torchvision import datasets
from torchvision import  transforms
from torch.utils.data import DataLoader
import torch.nn as nn
import torchvision.models as models
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset

import os
from pathlib import Path
from sklearn.metrics import f1_score
import shutil

# Data structure

In [2]:
path = {
    "train":Path('/kaggle/working/new_dataset/train'),
    "val": Path('/kaggle/working/new_dataset/val') ,
    "submission": Path('/kaggle/working/new_dataset/submission')
}

In [3]:
# folders = [
#     path['train']/'bac',
#     path['train']/'vir',
#     path['train']/'norm',
#     path['val']/'bac',
#     path['val']/'vir',
#     path['val']/'norm',
#     path['submission'],
# ]
# for folder in folders:
#     os.makedirs(folder, exist_ok=True)

In [4]:
def move_file(path,file,destination):
    
        if 'bacteria' in file.lower():
            target_folder = 'BACTERIA'
        elif 'virus' in file.lower():
            target_folder = 'VIRUS'
        elif 'sub' in file.lower():
            target_folder = ''
        else:
            target_folder = 'NORMAL'
        source_path = path/file
        destination_path = destination/target_folder/file
        destination_path_copy = destination/target_folder/f'copy_{file}'

        destination_path.parent.mkdir(parents=True, exist_ok=True)
        shutil.copy(source_path, destination_path)

        if target_folder == 'VIRUS' or target_folder == 'NORMAL':
            shutil.copy(source_path, destination_path_copy)



In [5]:
path_in = {
    "train":Path('/kaggle/input/buw-chest-x-ray-challange/challenge_data/train'),
    "val": Path('/kaggle/input/buw-chest-x-ray-challange/challenge_data/val') ,
    "test": Path('/kaggle/input/buw-chest-x-ray-challange/challenge_data/test'),
    "submission": Path('/kaggle/input/buw-chest-x-ray-challange/challenge_data/submission')
}

for i in os.listdir(path_in["submission"]):
    move_file(path_in["submission"],i,path['submission'])

for i in os.listdir(path_in["train"]/'PNEUMONIA'):
    move_file(path_in["train"]/'PNEUMONIA',i,path['train'])

for i in os.listdir(path_in["train"]/'NORMAL'):
    move_file(path_in["train"]/'NORMAL',i,path['train'])

for i in os.listdir(path_in["val"]/'PNEUMONIA'):
    move_file(path_in["val"]/'PNEUMONIA',i,path['val'])

for i in os.listdir(path_in["val"]/'NORMAL'):
    move_file(path_in["val"]/'NORMAL',i,path['val'])

for i in os.listdir(path_in["test"]/'PNEUMONIA'):
    move_file(path_in["test"]/'PNEUMONIA',i,path['val'])

for i in os.listdir(path_in["test"]/'NORMAL'):
    move_file(path_in["test"]/'NORMAL',i,path['val'])
    


# Filter etc.

In [6]:
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames[:2]:
        print(os.path.join(dirname, filename))

/kaggle/input/buw-chest-x-ray-challange/challenge_data/submission/SUB-137.jpeg
/kaggle/input/buw-chest-x-ray-challange/challenge_data/submission/SUB-209.jpeg
/kaggle/input/buw-chest-x-ray-challange/challenge_data/val/PNEUMONIA/person1947_bacteria_4876.jpeg
/kaggle/input/buw-chest-x-ray-challange/challenge_data/val/PNEUMONIA/person1946_bacteria_4875.jpeg
/kaggle/input/buw-chest-x-ray-challange/challenge_data/val/NORMAL/NORMAL2-IM-1431-0001.jpeg
/kaggle/input/buw-chest-x-ray-challange/challenge_data/val/NORMAL/NORMAL2-IM-1440-0001.jpeg
/kaggle/input/buw-chest-x-ray-challange/challenge_data/test/PNEUMONIA/person1676_virus_2892.jpeg
/kaggle/input/buw-chest-x-ray-challange/challenge_data/test/PNEUMONIA/person1650_virus_2852.jpeg
/kaggle/input/buw-chest-x-ray-challange/challenge_data/test/NORMAL/NORMAL2-IM-0336-0001.jpeg
/kaggle/input/buw-chest-x-ray-challange/challenge_data/test/NORMAL/NORMAL2-IM-0337-0001.jpeg
/kaggle/input/buw-chest-x-ray-challange/challenge_data/train/PNEUMONIA/person118

In [7]:
seed = 42
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

# Falls CUDA genutzt wird
if torch.cuda.is_available():
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # Falls mehrere GPUs genutzt werden
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

In [8]:
def rgb_to_grayscale(image: torch.Tensor):
    weights = torch.tensor([0.2989, 0.5870, 0.1140], device=image.device).view(1, 3, 1, 1)
    grayscale = (image * weights).sum(dim=1, keepdim=True)
    return grayscale

def grayscale_to_rgb(grayscale: torch.Tensor):
    return grayscale.expand(-1, 3, -1, -1) 


def filter_1(x: torch.Tensor):#Threshold
    arr = torch.where(x > torch.mean(x), torch.tensor(0, dtype=x.dtype, device=x.device), x)
    return arr

def filter_2(x: torch.Tensor):#Threshold and Highpass
    image = torch.where(x > torch.mean(x), torch.tensor(0, dtype=x.dtype, device=x.device), x)
    image = rgb_to_grayscale(image)
    laplacian_kernel = torch.tensor([[[[ 0, -1,  0], 
                                       [-1,  4, -1], 
                                       [ 0, -1,  0]]]], dtype=torch.float32)
    high_pass = F.conv2d(image, laplacian_kernel, padding=1)
    high_pass = grayscale_to_rgb(high_pass)
    return high_pass


In [9]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Lambda(lambda img: filter_1(img)),
    transforms.Resize((224, 224)),
    transforms.RandomRotation(10),
    transforms.RandomHorizontalFlip(),
])

In [10]:
dataset = ImageFolder(root=path['train'],transform=transform)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

In [11]:
#34,152
model = models.resnet152(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = torch.nn.Linear(num_ftrs, 3)

Downloading: "https://download.pytorch.org/models/resnet152-394f9c45.pth" to /root/.cache/torch/hub/checkpoints/resnet152-394f9c45.pth
100%|██████████| 230M/230M [00:01<00:00, 236MB/s]


In [12]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

loss_fn = nn.CrossEntropyLoss()

## Training

10 epochs f1score 90640

In [13]:
epochs = 5

for epoch in range(epochs):
    model.train()
    sample=0

    for inputs, labels in dataloader:
        sample+=1
        inputs = inputs.to(device).float()  # Auf GPU verschieben und in float konvertieren
        labels = labels.to(device)

        outputs = model(inputs)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        print(f"\rEpoch [{epoch+1}/{epochs}], Sample[{sample}] Loss: {loss.item():.4f}",end="")

Epoch [5/5], Sample[247] Loss: 0.5478

# Validierung 


In [14]:
valid_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Lambda(lambda img: filter_1(img)),
    transforms.Resize((224, 224)),
])

In [15]:

valid_dataset = datasets.ImageFolder(root=path['val'], transform=valid_transform)
valid_loader = DataLoader(valid_dataset, batch_size=32, shuffle=False)

model.to(device)
model.eval()

all_labels = []
all_preds = []

with torch.no_grad():
    for inputs, labels in valid_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        
        outputs = model(inputs)
        
        _, predicted = torch.max(outputs, 1)

        all_labels.extend(labels.cpu().numpy())
        all_preds.extend(predicted.cpu().numpy())

all_labels = np.where(np.array(all_labels)!=1,1,0)
all_preds = np.where(np.array(all_preds)!=1,1,0)




f1 = f1_score(all_labels, all_preds, average='weighted')  # Für unbalancierte Klassen ist 'weighted' sinnvoll
print(f'F1-Score: {f1:.4f}')


F1-Score: 0.6861


87,5  

In [16]:

class UnlabeledDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.image_paths = [os.path.join(root_dir, fname) for fname in os.listdir(root_dir)]

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        image = Image.open(image_path).convert("RGB")  # Stelle sicher, dass es 3 Kanäle hat

        if self.transform:
            image = self.transform(image)

        return image



path_unlabeled = '/kaggle/input/buw-chest-x-ray-challange/challenge_data/submission'
unlabeled_dataset = UnlabeledDataset(root_dir=path_unlabeled, transform=valid_transform)
unlabeled_loader = DataLoader(unlabeled_dataset, batch_size=32, shuffle=False)

model.to(device)
model.eval()

all_preds = []

with torch.no_grad():
    for inputs in unlabeled_loader:
        inputs = inputs.to(device)
        
        outputs = model(inputs)
        
        _, predicted = torch.max(outputs, 1)
        
        all_preds.extend(predicted.cpu().numpy())
all_preds = np.where(np.array(all_preds)!=1,1,0)

print(all_preds)

[0 1 1 1 1 1 1 1 0 1 1 0 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 0 1 0
 1 0 1 1 1 1 1 1 1 1 0 1 1 1 1 0 1 1 1 1 1 1 1 1 0 0 1 0 1 1 1 1 0 1 1 1 1
 1 1 0 0 1 0 1 1 0 0 1 1 0 1 0 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 0 1 1
 0 1 1 1 1 1 0 1 1 0 0 1 0 0 1 0 1 0 1 1 1 1 0 1 1 0 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 0 0 1 1 1 0 1 1 1 1 1 0 0 1 1 0 0 1 1 1 1 1 1 0 0 1 0 0 1 1
 1 1 1 1 1 1 0 0 0 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1
 1 1 0 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 0 0 0 1 1 0 1 1 1 1 1
 1 1 1 1 0 1 1 0 1 1 0 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 0 1 0 0 1 0 0 1 1 1 1
 1 0 0 0 0 1 1 1 1 1 0 1 1 0 1 1]


In [17]:
 filenames = [x.name for x in Path.iterdir(path['submission'])]

In [18]:
submission = {
    "ID" :filenames ,
    "Label" : all_preds
}
df  = pd.DataFrame(submission)
df  =df.sort_values(by="ID", ascending=True)

df.head()
df.to_csv('submission.csv',index=0)

