In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import torch
from torchvision import transforms
from scifAI.dl.dataset import DatasetGenerator
from scifAI.dl.utils import get_statistics
from torch.utils.data import DataLoader
from lightning.pytorch.callbacks import LearningRateMonitor
import neptune
from sklearn.metrics import matthews_corrcoef, classification_report,confusion_matrix, accuracy_score, balanced_accuracy_score, cohen_kappa_score, f1_score,  precision_score, recall_score
from sklearn.metrics import confusion_matrix,ConfusionMatrixDisplay
import os
import random
import lightning.pytorch as pl

In [57]:

seed_value = 42

os.environ['PYTHONHASHSEED']=str(seed_value)
random.seed(seed_value)

np.random.seed(seed_value)
torch.manual_seed(seed_value)

<torch._C.Generator at 0x7fa5d8b86570>

In [41]:
metadata = pd.read_csv("/home/jedrzej/projects/image_flow_cytometry_fine_tune/data/jedrzej/metadata_subset.csv.gz")
metadata

  metadata = pd.read_csv("/home/jedrzej/projects/image_flow_cytometry_fine_tune/data/jedrzej/metadata_subset.csv.gz")


Unnamed: 0,file,experiment,donor,condition,object_number,set,label
0,/home/jedrzej/projects/image_flow_cytometry_fi...,Experiment_1,Donor_1,+SEA,53764,unlabeled,-1
1,/home/jedrzej/projects/image_flow_cytometry_fi...,Experiment_1,Donor_1,+SEA,38075,unlabeled,-1
2,/home/jedrzej/projects/image_flow_cytometry_fi...,Experiment_1,Donor_1,+SEA,39302,unlabeled,-1
3,/home/jedrzej/projects/image_flow_cytometry_fi...,Experiment_1,Donor_1,+SEA,50406,unlabeled,-1
4,/home/jedrzej/projects/image_flow_cytometry_fi...,Experiment_1,Donor_1,+SEA,29629,train,No_cell_cell_interaction
...,...,...,...,...,...,...,...
1065905,/home/jedrzej/projects/image_flow_cytometry_fi...,Experiment_4,Donor_9,DIG-TCB,76910,unlabeled,-1
1065906,/home/jedrzej/projects/image_flow_cytometry_fi...,Experiment_4,Donor_9,DIG-TCB,89427,unlabeled,-1
1065907,/home/jedrzej/projects/image_flow_cytometry_fi...,Experiment_4,Donor_9,DIG-TCB,80928,unlabeled,-1
1065908,/home/jedrzej/projects/image_flow_cytometry_fi...,Experiment_4,Donor_9,DIG-TCB,83923,unlabeled,-1


In [42]:
metadata.set.unique()

array(['unlabeled', 'train', 'test', 'labeled', 'validation'],
      dtype=object)

In [43]:
indx = metadata.condition.isin(["-SEA","+SEA"])
metadata = metadata.loc[indx, :].reset_index(drop = True )

In [44]:
set_of_interesting_classes = ['B_cell',  'T_cell', 
                        'T_cell_with_signaling',
                        'T_cell_with_B_cell_fragments',
                        'B_T_cell_in_one_layer',
                        'Synapses_without_signaling', 
                        'Synapses_with_signaling',
                        'No_cell_cell_interaction', 
                        'Multiplets'] 

indx = metadata.set.isin([ "train", "validation","test" ])
indx = indx & metadata.label.isin(set_of_interesting_classes)

train_index = metadata["set"] == "train"
train_index = train_index & metadata.label.isin(set_of_interesting_classes)
train_index = train_index[train_index].index

validation_index = metadata["set"] == "validation"
validation_index = validation_index & metadata.label.isin(set_of_interesting_classes)
validation_index = validation_index[validation_index].index

test_index = metadata["set"] == "test"
test_index = test_index & metadata.label.isin(set_of_interesting_classes)
test_index = test_index[test_index].index

In [45]:
metadata["set"].unique()

array(['unlabeled', 'train', 'test', 'labeled', 'validation'],
      dtype=object)

In [46]:


label_map = dict()
for i, cl in enumerate(set_of_interesting_classes):
    label_map[cl] = i

label_map['-1'] = -1
label_map[-1] = -1


In [47]:
label_map

{'B_cell': 0,
 'T_cell': 1,
 'T_cell_with_signaling': 2,
 'T_cell_with_B_cell_fragments': 3,
 'B_T_cell_in_one_layer': 4,
 'Synapses_without_signaling': 5,
 'Synapses_with_signaling': 6,
 'No_cell_cell_interaction': 7,
 'Multiplets': 8,
 '-1': -1,
 -1: -1}

In [48]:
channels = {
     "Ch1": ("Greys", "BF"),  
     "Ch2": ("Greens", "Antibody"),
     "Ch3": ("Reds", "CD18"),
     "Ch4": ("Oranges", "F-Actin"),
     "Ch6": ("RdPu", "MHCII"),
     "Ch7": ("Purples", "CD3/CD4"),
     "Ch11": ("Blues", "P-CD3zeta"),
     "Ch12": ("Greens", "Live-Dead")
 }

In [50]:
selected_channels = [0,3,4,5,6]
model_dir = "models"
log_dir = "logs"
scaling_factor = 4095.
reshape_size = 160
train_transform = [
         transforms.RandomVerticalFlip(),
         transforms.RandomHorizontalFlip(),
         transforms.RandomRotation(45)
        ]
test_transform = [ ]

In [51]:
train_dataset = DatasetGenerator(metadata=metadata.loc[train_index,:],
                                 label_map=label_map,
                                 selected_channels=selected_channels,
                                 scaling_factor=scaling_factor,
                                 reshape_size=reshape_size,
                                 transform=transforms.Compose(train_transform))

In [52]:
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=False, num_workers=6)




In [53]:
statistics = get_statistics(train_loader, selected_channels=selected_channels)


100%|██████████| 23/23 [02:30<00:00,  6.55s/it]

statistics used: {'min': tensor([0., 0., 0., 0., 0.]), 'p01': tensor([0., 0., 0., 0., 0.]), 'p05': tensor([0., 0., 0., 0., 0.]), 'p25': tensor([0.1940, 0.0081, 0.0092, 0.0126, 0.0094]), 'p50': tensor([0.1951, 0.0153, 0.0118, 0.0186, 0.0096]), 'p75': tensor([0.1956, 0.0198, 0.0184, 0.0254, 0.0097]), 'p95': tensor([0.1970, 0.0402, 0.0329, 0.0364, 0.0102]), 'p99': tensor([0.2068, 0.0834, 0.0690, 0.0905, 0.0109]), 'max': tensor([0.3288, 0.7867, 0.4370, 0.3362, 0.1153]), 'mean': tensor([0.1723, 0.0172, 0.0148, 0.0199, 0.0085]), 'std': tensor([0.0627, 0.0192, 0.0138, 0.0154, 0.0032])}





In [21]:
class AddGaussianNoise(object):
    def __init__(self, mean=0., std=1.):
        self.std = std
        self.mean = mean
        
    def __call__(self, tensor):
        return tensor + torch.randn(tensor.size()) * self.std + self.mean
    
    def __repr__(self):
        return self.__class__.__name__ + '(mean={0}, std={1})'.format(self.mean, self.std)

In [22]:
class MinMaxScaler(object):
    def __init__(self, min_in , max_in, min_out, max_out):
        self.min_in = min_in.reshape(-1,1,1)
        self.max_in = max_in.reshape(-1,1,1)
        self.min_out = min_out
        self.max_out = max_out
        
    def __call__(self, tensor):
        
        tensor_ = (tensor - self.min_in)/(self.max_in - self.min_in)
        tensor_ = tensor_*(self.max_out - self.min_out) + self.min_out
        tensor_[tensor_<self.min_out]= self.min_out
        tensor_[tensor_>self.max_out]= self.max_out
        return tensor_
    
    def __repr__(self):
        return self.__class__.__name__ + '(min_out={0}, max_out={1})'.format(self.min_out, self.max_out)

In [None]:
train_transform = transforms.Compose([ 
        MinMaxScaler(           min_in =  statistics["p05"] , 
                                max_in =  statistics["p95"] , 
                                min_out =  0. , 
                                max_out =  1.),
        transforms.RandomResizedCrop(reshape_size, scale=(0.6, 1.0), ratio=(0.8, 1.2)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        AddGaussianNoise(mean=0., std=0.01),
])

validation_transforms =  transforms.Compose([ 
        MinMaxScaler(           min_in =  statistics["p05"] , 
                                max_in =  statistics["p95"] , 
                                min_out =  0. , 
                                max_out =  1.),
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        AddGaussianNoise(mean=0., std=0.01),
])

test_transforms =  transforms.Compose([ 
        MinMaxScaler(           min_in =  statistics["p05"] , 
                                max_in =  statistics["p95"] , 
                                min_out =  0. , 
                                max_out =  1.),
])


In [None]:


# #resnet18_modified.load_state_dict(torch.load('supervised_learning_synapse_model.pth')) 

# #lr_scheduler = LRScheduler(policy='StepLR', step_size=5, gamma=0.6)
# lr_scheduler = LRScheduler(policy='ReduceLROnPlateau', factor=0.1, patience=10)
# #checkpoint = Checkpoint(f_params='resnet_18_imagenet_pretraiend_supervised_learning.pth', monitor='valid_acc_best')


# epoch_scoring = EpochScoring("f1_macro", 
#                              name =  "valid_f1_macro", 
#                              on_train = False,
#                              lower_is_better = False)

# early_stopping = EarlyStopping(monitor='valid_f1_macro', 
#                                patience=100, 
#                                threshold=0.0001, 
#                                threshold_mode='rel', 
#                                lower_is_better=False)

# model = NeuralNetClassifier(    
#     swin, 
#     criterion=nn.CrossEntropyLoss,
#     lr=0.01,
#     batch_size=128,
#     max_epochs=1000,
#     optimizer=optim.Adam,
#     iterator_train__shuffle=True,
#     iterator_train__num_workers=4,
#     iterator_valid__shuffle=False,
#     iterator_valid__num_workers=2,
#     callbacks=[lr_scheduler,epoch_scoring, early_stopping],
#     train_split=predefined_split(validation_dataset_resnet_18),
#     device="cuda",
#     warm_start=True)

In [None]:
run = neptune.init_run(
    project="appsilon/image-flow-cytometry-finetune",
    api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiI3OTA1ZjQwZS03MDczLTRiMzgtYmRhOS1iYjM2Y2EyMjcwMDMifQ==",
)

lr_monitor = LearningRateMonitor(logging_interval='step')

In [None]:
trainer = pl.Trainer(
    max_epochs=1000,
    accelerator="gpu" if torch.cuda.is_available() else "cpu",
    logger=pl.loggers.NeptuneLogger(run=run),  # Neptune integration
    callbacks=[pl.callbacks.EarlyStopping(monitor="val_f1", patience=100)]
)

# Fit the model
trainer.fit(model, datamodule=data_module)

# Test the model
trainer.test(model, datamodule=data_module)

In [55]:

def classification_complete_report(y_true, y_pred ,labels = None  ): 
    print(classification_report(y_true, y_pred, labels = labels))
    print(15*"----")
    print("matthews correlation coeff: %.4f" % (matthews_corrcoef(y_true, y_pred)) )
    print("Cohen Kappa score: %.4f" % (cohen_kappa_score(y_true, y_pred)) )
    print("Accuracy: %.4f & balanced Accuracy: %.4f" % (accuracy_score(y_true, y_pred), balanced_accuracy_score(y_true, y_pred)) )
    print("macro F1 score: %.4f & micro F1 score: %.4f" % (f1_score(y_true, y_pred, average = "macro"), f1_score(y_true, y_pred, average = "micro")) )
    print("macro Precision score: %.4f & micro Precision score: %.4f" % (precision_score(y_true, y_pred, average = "macro"), precision_score(y_true, y_pred, average = "micro")) )
    print("macro Recall score: %.4f & micro Recall score: %.4f" % (recall_score(y_true, y_pred, average = "macro"), recall_score(y_true, y_pred, average = "micro")) )
    cm = confusion_matrix(y_true, y_pred,labels= labels)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)
    disp.plot(cmap=plt.cm.Blues)
    plt.show()
    print(15*"----")