In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!pip install monai

In [None]:
import wandb
import os
from tqdm.autonotebook import tqdm
import cv2
from pathlib import Path
import pydicom as dicom
from fastai import *
from fastai.vision.all import *
import matplotlib.pyplot as plt
from monai.config import print_config
from monai.config import print_config , KeysCollection
from monai.utils import first , set_determinism
from monai.transforms import (
    Compose,
    LoadImage,
    LoadImaged,
    EnsureChannelFirst,
    EnsureChannelFirstd,
    ToTensor,
    ToTensord,
    ScaleIntensityRange,
    ScaleIntensityRanged,
    ThresholdIntensity,
    ThresholdIntensityd,
    SaveImaged,
    Spacingd,
    CropForegroundd,
    Orientationd,
    AsDiscrete,
    RandCropByPosNegLabeld,
    DivisiblePadd,
    Resized,
    RandFlipd,
    RandRotate90d,
    RandShiftIntensityd




)

print_config()

In [None]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
my_secret = user_secrets.get_secret('wandb')
wandb.login(key = my_secret)

In [None]:
df = pd.read_csv('/kaggle/input/rsnapneumoniadetectionchallenge/stage_2_train_labels.csv/stage_2_train_labels.csv')
df

In [None]:
np.unique(df['Target'] , return_counts = True)

In [None]:
nor_weight = 1/20672
pn_weight = 1/9555

In [None]:
df['patientId'].drop_duplicates(inplace = True)
df.shape

In [None]:
HOME = os.getcwd()

In [None]:
len(df)

In [None]:
ROOT_PATH = Path('/kaggle/input/rsnapneumoniadetectionchallenge/stage_2_train_images')
out_path = Path('train').mkdir(parents = True , exist_ok = True)
val = Path('valid').mkdir(parents = True , exist_ok = True)
sums , sums_squared = 0,0
train_path = Path('./train')
valid_path = Path('./valid')


train_size = 15000 * 0.8
for i,j in enumerate(tqdm(df['patientId'][:15000])):
    
    fn = df['patientId'].iloc[i]
    label = df['Target'].iloc[i]
    
    img_path = ROOT_PATH/fn
    img_path = img_path.with_suffix('.dcm')
    img = dicom.dcmread(img_path).pixel_array / 255
    img_r = cv2.resize(img , (224,224)).astype(np.float16)
    save_path = train_path/f'{str(label)}'
    save_path.mkdir(parents=True, exist_ok=True)
    val_path = valid_path/f'{str(label)}'
    val_path.mkdir(parents=True, exist_ok=True)
    
    if i <=train_size:
         np.save(save_path/fn ,img_r )
    else:
         np.save(val_path/fn ,img_r )
            
            
    normalizer = 224*224 #new image dims
    if i <= train_size:
        sums += np.sum(img_r)/normalizer
        sums_squared += (img_r **2).sum() / normalizer

In [None]:
mean = sums / train_size
print(f'train images mean = {mean}')

std = np.sqrt((sums_squared/train_size) - mean**2)
print(f'train images standard deviation = {std}')

In [None]:
import torch
from torch import nn , optim, Tensor, manual_seed, argmax
import torchvision
from torchvision import transforms
import torchmetrics
import torch.nn.functional as F
from glob import glob
torch.__version__

In [None]:
train_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean = mean , std = std ),
    transforms.RandomAffine(degrees = (-5,5) , translate = (0, 0.05) , scale = (0.9 , 1.1) ),
    transforms.RandomResizedCrop(size = (224,224) , scale = (0.5 , 1.0), antialias = True)
    
])


val_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean = mean , std = std),
    
])

In [None]:
def load_file(path):
    return np.load(path).astype(np.float32)

train_dataset = torchvision.datasets.DatasetFolder('/kaggle/working/train/' ,
                                                   loader = load_file ,
                                                   extensions = 'npy' ,
                                                   transform = train_transforms)

val_dataset = torchvision.datasets.DatasetFolder('/kaggle/working/valid/' ,
                                                   loader = load_file ,
                                                   extensions = 'npy' ,
                                                   transform = val_transforms)

In [None]:
fig , axis = plt.subplots(3,3,figsize = (9,10))
for i in range(3):
    for j in range(3):
        random_idx = np.random.randint(0 , 300)
        img , label = train_dataset[random_idx]
        axis[i][j].imshow(img[0] , cmap = 'bone')
        axis[i][j].set_title(label)
plt.show();

In [None]:
from torch.cuda.amp import autocast, GradScaler

scaler = GradScaler()

In [None]:
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint

In [None]:
model = torchvision.models.resnet18(pretrained = True)
for params in zip( model.layer1.parameters() , model.layer2.parameters(), model.layer3.parameters()):
    for param in params:
        param.requires_grad = False
    


In [None]:
sweep_config = {
    'method' : 'random'
}

In [None]:
metric = {
    'name' : 'val_accuracy',
    'goal': 'maximize'
}

sweep_config['metric'] = metric

In [None]:
parameters = {
    'optimizer' : {
        'values' : ['adam', 'adamw'  ,'sgd']

}}

sweep_config['parameters'] = parameters

In [None]:
np.log(1e-4) , np.log(1e-2)

In [None]:
parameters.update({
    'learning_rate' : {
        'distribution': 'log_uniform',
        'min': -9.2,
        'max': -4.6
    },
    'pos_weight' : {
        'values' : [2,3]

    },
    'batch_size':{
        'values': [16 , 32 , 64]
    }
})

In [None]:
import pprint

pprint.pprint(sweep_config)

In [None]:
sweep_id = wandb.sweep(sweep = sweep_config , project = 'xray_sweep_4')

In [None]:
#sweep model building

class PneumoniaModel(pl.LightningModule):
    
    def __init__(self , optimizer = 'adam' , lr = 0.001 , pos_weight = 2 ):
        super().__init__()
        
        #Model
        self.model = torchvision.models.resnet18()
        self.model.conv1 = torch.nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
        self.model.fc = torch.nn.Linear(in_features=512, out_features=1, bias=True)
        
        
#         for params in zip(self.model.layer1.parameters() , self.model.layer2.parameters(), self.model.layer3.parameters()):
#             for param in params:
#                 param.requires_grad = False

            

        self.save_hyperparameters()
        
        
        #optim
        self.lr = lr
        self.optimizer = torch.optim.Adam(self.model.parameters() , lr = lr)
        
        #loss
        self.pos_weight = pos_weight
        self.loss = torch.nn.BCEWithLogitsLoss(pos_weight = torch.tensor(pos_weight))
    
        
        #Metrics
        self.train_acc = torchmetrics.Accuracy(task = 'binary' )
        self.val_acc = torchmetrics.Accuracy(task = 'binary')
        self.test_acc = torchmetrics.Accuracy(task = 'binary')
#         self.f1_score = torchmetrics.F1Score(task = 'binary')
        self.precision = torchmetrics.classification.BinaryPrecision()
        self.recall = torchmetrics.classification.BinaryRecall()
#         self.sensitivity = torchmetrics.classification.BinarySpecificityAtSensitivity(min_sensitivity = 0.5)
        
        
        
    def forward(self , data):
        pred = self.model(data)
        return pred
    
    def training_step(self, batch , batch_idx , config = None):
        x_ray, label = batch
        label = label.float()
        pred = self(x_ray)[:,0]
        loss = self.loss(pred, label)
        accuracy = self.train_acc(pred,label)
#         f1 = self.f1_score(pred,label)
        self.config = wandb.config
        self.log_dict({'train_accuracy': accuracy , 'train_loss': loss}
                      ,on_step = False ,on_epoch = True , prog_bar = True)


        return loss
        
        
    def validation_step(self, batch , batch_idx):
        x_ray, label = batch
        label = label.float()
        pred = self(x_ray)[:,0]
        loss = self.loss(pred, label)
        accuracy = self.val_acc(pred,label)
#         f1 = self.f1_score(pred,label)
        precision = self.precision(pred,label)
        recall = self.recall(pred,label)
#         sensitivity = self.sensitivity(pred,label.int())
        self.log_dict({'val_loss': loss ,'val_accuracy': accuracy,'precision': precision , 'recall': recall}
                      ,on_step = False ,on_epoch = True , prog_bar = True)
        
#         if batch_idx % 5 == 0:
#             x_ray = x_ray[:8]
#             grid = torchvision.utils.make_grid(x_ray.view(-1,1,224,224))



        return loss
    

    
    def test_step(self, batch , batch_idx):
        x_ray, label = batch
        label = label.float()
        pred = self(x_ray)[:,0]
        loss = self.loss(pred, label)
        self.log('test_loss', loss , on_epoch = True)
    

    
    def configure_optimizers(self):
        return [self.optimizer]
        

In [None]:
from pytorch_lightning.loggers import WandbLogger

In [None]:
#sweep building

def sweep_iteration():
    wandb.init()
    wandb_logger = WandbLogger()
    
    model = PneumoniaModel(optimizer = wandb.config.optimizer,
                          lr = wandb.config.learning_rate,
                          pos_weight = wandb.config.pos_weight)
    
    #loaders
    batch_size = wandb.config.batch_size
    train_loader = torch.utils.data.DataLoader(train_dataset , batch_size = batch_size , num_workers = 6 , shuffle = True)
    val_loader = torch.utils.data.DataLoader(val_dataset , batch_size = batch_size , num_workers = 6 )
    
    trainer = pl.Trainer( logger = wandb_logger,
                        max_epochs = 15)
    
    trainer.fit(model , train_loader,val_loader)
    trainer.validate(model , val_loader)

In [None]:
wandb.agent(sweep_id , function = sweep_iteration)

In [None]:
batch_size = 32
num_workers = 4


train_loader = torch.utils.data.DataLoader(train_dataset , batch_size = batch_size , num_workers = num_workers , shuffle = True)
val_loader = torch.utils.data.DataLoader(val_dataset , batch_size = batch_size , num_workers = num_workers )

In [None]:
#model building

class PneumoniaModel(pl.LightningModule):
    
    def __init__(self):
        super().__init__()
        self.model = torchvision.models.resnet18(pretrained = True)
        self.model.conv1 = torch.nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
        self.model.fc = torch.nn.Linear(in_features=512, out_features=1, bias=True)
        self.save_hyperparameters()

        self.optimizer = torch.optim.SGD(self.model.parameters() , lr = 0.0033)
        pos_weights = 1
        self.loss = torch.nn.BCEWithLogitsLoss(pos_weight = torch.tensor(pos_weights))

        self.train_acc = torchmetrics.Accuracy(task = 'binary' )
        self.val_acc = torchmetrics.Accuracy(task = 'binary')
        self.test_acc = torchmetrics.Accuracy(task = 'binary')
        self.f1_score = torchmetrics.F1Score(task = 'binary')
        self.conf_m = torchmetrics.ConfusionMatrix(task = 'binary')
    def forward(self , data):
        pred = self.model(data)
        return pred
    
    def training_step(self, batch , batch_idx , config = None):
        x_ray, label = batch
        label = label.float()
        pred = self(x_ray)[:,0]
        loss = self.loss(pred, label)
        accuracy = self.train_acc(pred,label)
        f1 = self.f1_score(pred,label)
        self.config = wandb.config
        self.log_dict({'train_accuracy': accuracy , 'train_loss': loss}
                      ,on_step = False ,on_epoch = True , prog_bar = True)


        return loss
        
        
    def validation_step(self, batch , batch_idx):
        x_ray, label = batch
        label = label.float()
        pred = self(x_ray)[:,0]
        loss = self.loss(pred, label)
        accuracy = self.val_acc(pred,label)
        f1 = self.f1_score(pred,label)
        matrix = self.conf_m(pred,label)
        self.log_dict({'val_loss': loss ,'val_accuracy': accuracy,'val_F1score': f1}
                      ,on_step = False ,on_epoch = True , prog_bar = True)
        
#         if batch_idx % 1000 == 0:
#             x_ray = x_ray[:8]
#             grid = torchvision.utils.make_grid(x_ray.view(-1,1,224,224))



        return loss
    

    
    def test_step(self, batch , batch_idx):
        x_ray, label = batch
        label = label.float()
        pred = self(x_ray)[:,0]
        loss = self.loss(pred, label)
        self.log('test_loss', loss , on_epoch = True)
    

    
    def configure_optimizers(self):
        return [self.optimizer]
        

In [None]:
model = PneumoniaModel()

In [None]:
# wandb.init(project = 'Pneumonia_exp')
checkpoint_callback = ModelCheckpoint(monitor = 'val_accuracy' , save_top_k = 10 , mode = 'max'
                                      , filename ='sample-xray-{epoch:02d}-{val_loss:.2f}'  )

wandb_logger = WandbLogger(log_model="all" , save_dir = './L06ogs' )
trainer = pl.Trainer( logger = wandb_logger , callbacks = checkpoint_callback , min_epochs = 20 ,max_epochs = 100 ,log_every_n_steps = 1 ,
                     fast_dev_run = False , enable_model_summary = True)

In [None]:
trainer.fit( model , train_loader, val_loader )
trainer.validate(model , val_loader)

In [None]:
torchvision.models.efficientnet_b3()

In [None]:
#model building

class PneumoniaModeleff(pl.LightningModule):
    
    def __init__(self):
        super().__init__()
        self.model  = torchvision.models.efficientnet_b3(pretrained=False)
        self.model.features[0][0] = torch.nn.Conv2d(1, 40, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        self.model.classifier[-1] = torch.nn.Linear(in_features=1536, out_features=1, bias=True)
        self.model.features[0][1] = torch.nn.BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

        self.save_hyperparameters()

        self.optimizer = torch.optim.Adam(self.model.parameters() , lr = 0.001)
        pos_weights = 2
        self.loss = torch.nn.BCEWithLogitsLoss(pos_weight = torch.tensor(pos_weights))

        self.train_acc = torchmetrics.Accuracy(task = 'binary' )
        self.val_acc = torchmetrics.Accuracy(task = 'binary')
        self.test_acc = torchmetrics.Accuracy(task = 'binary')
        self.f1_score = torchmetrics.F1Score(task = 'binary')
        self.conf_m = torchmetrics.ConfusionMatrix(task = 'binary')
        
    def forward(self , data):
        pred = self.model(data)
        return pred
    
    def training_step(self, batch , batch_idx):
        x_ray, label = batch
        label = label.float()
        pred = self(x_ray)[:,0]
        loss = self.loss(pred, label)
        accuracy = self.train_acc(pred,label)
        f1 = self.f1_score(pred,label)
        self.log_dict({'train_accuracy': accuracy , 'train_loss': loss}
                      ,on_step = False ,on_epoch = True , prog_bar = True)


        return loss
        
        
    def validation_step(self, batch , batch_idx):
        x_ray, label = batch
        label = label.float()
        pred = self(x_ray)[:,0]
        loss = self.loss(pred, label)
        accuracy = self.val_acc(pred,label)
        f1 = self.f1_score(pred,label)
        matrix = self.conf_m(pred,label)
        self.log_dict({'val_loss': loss ,'val_accuracy': accuracy,'val_F1score': f1}
                      ,on_step = False ,on_epoch = True , prog_bar = True)
        
#         if batch_idx % 50 == 0:
#             x_ray = x_ray[:8]
#             grid = torchvision.utils.make_grid(x_ray.view(-1,1,224,224))
#             self.logger.experiment.add_image('pneumonia',grid,self.global_step)
#             self.log('confusion matrix' , matrix)


        return loss
    

    
    def test_step(self, batch , batch_idx):
        x_ray, label = batch
        label = label.float()
        pred = self(x_ray)[:,0]
        loss = self.loss(pred, label)
        self.log('test_loss', loss , on_epoch = True)
    

    
    def configure_optimizers(self):
        return [self.optimizer]
        

In [None]:
checkpoint_callback = ModelCheckpoint(monitor = 'val_accuracy' , save_top_k = 10 , mode = 'max'
                                      , filename ='xray-{epoch:02d}-{val_loss:.2f}'  )

wandb_logger = WandbLogger(log_model="all" , save_dir = './L02ogs' )
trainer = pl.Trainer( logger = wandb_logger , callbacks = checkpoint_callback , min_epochs = 20 ,max_epochs = 50,log_every_n_steps = 1 ,
                     fast_dev_run = False , enable_model_summary = True)

In [None]:
# !wget 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b5-86493f6b.pth'

In [None]:
model_2 = PneumoniaModeleff()

In [None]:
trainer.fit( model_2 , train_loader, val_loader )
trainer.validate(model_2 , val_loader)

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = PneumoniaModeleff.load_from_checkpoint('/kaggle/input/eff-model/eff_model.ckpt')
torch.save(model.state_dict() , 'eff_state_dict.pth')
torch.save(model , 'effmodel.pth' )
model.eval()
model.to(device)

In [None]:
model = torch.load('/kaggle/working/effmodel.pt')
model.eval()

In [None]:
preds = []
labels = []

with torch.no_grad():
    for data,label in tqdm(val_dataset):
        data = data.to(device).float().unsqueeze(0)
        pred = torch.sigmoid(model(data)[0].cpu())
        preds.append(pred)
        labels.append(label)
    
preds = torch.tensor(preds)
labels = torch.tensor(labels)

In [None]:
acc = torchmetrics.Accuracy(task = 'binary')(preds,labels)
precision = torchmetrics.Precision(task = 'binary')(preds,labels)
recall = torchmetrics.Recall(task = 'binary')(preds,labels)
matrix = torchmetrics.ConfusionMatrix(task = 'binary' ,num_classes = 2)(preds,labels)

In [None]:
print(f'precision = {precision}')
print(f'recall = {recall}')
print(f'accuracy = {acc}')
print(f'matrix = {matrix}')

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
sns.heatmap(matrix , annot = True , fmt=".1f" , xticklabels = True  , yticklabels = True )


In [None]:
model = PneumoniaModel.load_from_checkpoint('/kaggle/input/resmodel/res_model.ckpt')
model.to(device)

In [None]:
preds = []
labels = []

with torch.no_grad():
    for data,label in tqdm(val_dataset):
        data = data.to(device).float().unsqueeze(0)
        pred = torch.sigmoid(model(data)[0].cpu())
        preds.append(pred)
        labels.append(label)
    
preds = torch.tensor(preds)
labels = torch.tensor(labels)

In [None]:
acc = torchmetrics.Accuracy(task = 'binary')(preds,labels)
precision = torchmetrics.Precision(task = 'binary')(preds,labels)
recall = torchmetrics.Recall(task = 'binary')(preds,labels)
matrix = torchmetrics.ConfusionMatrix(task = 'binary' ,num_classes = 2)(preds,labels)

In [None]:
print(f'precision = {precision}')
print(f'recall = {recall}')
print(f'accuracy = {acc}')
print(f'matrix = {matrix}')

In [None]:
sns.heatmap(matrix , annot = True , fmt=".1f" )

In [None]:
preds = []
labels = []

with torch.no_grad():
    for data,label in tqdm(val_dataset):
        data = data.to(device).float().unsqueeze(0)
        pred = torch.sigmoid(model(data)[0].cpu())
        print(pred)
        preds.append(pred)
        labels.append(label)
        break
    
preds = torch.tensor(preds)
labels = torch.tensor(labels)

In [None]:
acc = torchmetrics.Accuracy(task = 'binary')(preds,labels)
precision = torchmetrics.Precision(task = 'binary')(preds,labels)
recall = torchmetrics.Recall(task = 'binary')(preds,labels)
matrix = torchmetrics.ConfusionMatrix(task = 'binary' ,num_classes = 2)(preds,labels)

print(f'precision = {precision}')
print(f'recall = {recall}')
print(f'accuracy = {acc}')
print(f'matrix = {matrix}')

In [None]:
sns.heatmap(matrix , annot = True , fmt=".1f" )

In [None]:
class PneumoniaFetModel(pl.LightningModule):
    def __init__(self):
        super().__init__()
        self.model = torchvision.models.resnet18(pretrained = True)
        self.model.conv1 = torch.nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
        self.model.fc = torch.nn.Linear(in_features=512, out_features=1, bias=True)
        
        self.feature_map = torch.nn.Sequential(*list(self.model.children())[:-2]) #-2 includes the last avg poo;ing and the last fc layers
        
    def forward(self,data):
        #feature extraxtion
        feature_map = self.feature_map(data) #fitting the model without the last 2 layers to the data
        av_pool_out = torch.nn.functional.adaptive_avg_pool2d(input = feature_map , output_size = (1,1))
        av_pool_flattened = torch.flatten(av_pool_out) #512 flat

        #normal prediction
        pred = self.model.fc(av_pool_flattened)
            
        return pred , feature_map

In [None]:
model_feat = PneumoniaFetModel.load_from_checkpoint('/kaggle/input/resmodel/res_model.ckpt' , strict = False)
model_feat.eval();

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_feat.to(device)

In [None]:
def cam(model , img):
    img.to(device)
    model.to(device)
    with torch.inference_mode():
        pred, features = model(img.unsqueeze(0))
        
    features = features.reshape((512,49)) #features original shape 512,7,7 3d to 2d
    weight_params = list(model.model.fc.parameters())[0] # zero to access fc layer weights only away from biases 512
    weight = weight_params[0].detach()
    
    cam = torch.matmul(weight , features) # weight = 512 , features = 512,49 ----> 49 element vector
    cam_img = cam.reshape(7,7).cpu()
    
    return cam_img , torch.sigmoid(pred)

In [None]:
%matplotlib inline
def visualize(img , cam , pred):
    img = img[0]    #removes channel dim
    cam = transforms.functional.resize(cam.unsqueeze(0), (224,224))[0]
    
    fig , ax = plt.subplots(1,2 , figsize = (10,8))
    ax[0].imshow(img , cmap = 'gray')
    ax[1].imshow(img , cmap = 'gray')
    ax[1].imshow(cam , alpha = 0.5 , cmap = 'inferno_r' )
    
    plt.title(pred > 0.5)
    plt.savefig('x_ray_feat')

In [None]:
img = val_dataset[-13][0] #only image without label

act_map ,pred = cam(model_feat , img.cuda())

In [None]:
visualize(img , act_map , pred)