In [46]:
import pandas as pd
import numpy as np
import torch
from torchvision import transforms
from scifAI.dl.dataset import DatasetGenerator
from scifAI.dl.utils import get_statistics
from torch.utils.data import DataLoader
import neptune
import os
import random
import lightning.pytorch as pl
import sys
sys.path.append('..')
from utils import data_module, resnet
from lightning.pytorch.callbacks import LearningRateMonitor, EarlyStopping

In [25]:

seed_value = 42

os.environ['PYTHONHASHSEED']=str(seed_value)
random.seed(seed_value)

np.random.seed(seed_value)
torch.manual_seed(seed_value)

<torch._C.Generator at 0x7f9e167e8390>

In [26]:
metadata = pd.read_csv("/home/jedrzej/projects/image_flow_cytometry_fine_tune/data/jedrzej/metadata_subset.csv.gz")
metadata

  metadata = pd.read_csv("/home/jedrzej/projects/image_flow_cytometry_fine_tune/data/jedrzej/metadata_subset.csv.gz")


Unnamed: 0,file,experiment,donor,condition,object_number,set,label
0,/home/jedrzej/projects/image_flow_cytometry_fi...,Experiment_1,Donor_1,+SEA,53764,unlabeled,-1
1,/home/jedrzej/projects/image_flow_cytometry_fi...,Experiment_1,Donor_1,+SEA,38075,unlabeled,-1
2,/home/jedrzej/projects/image_flow_cytometry_fi...,Experiment_1,Donor_1,+SEA,39302,unlabeled,-1
3,/home/jedrzej/projects/image_flow_cytometry_fi...,Experiment_1,Donor_1,+SEA,50406,unlabeled,-1
4,/home/jedrzej/projects/image_flow_cytometry_fi...,Experiment_1,Donor_1,+SEA,29629,train,No_cell_cell_interaction
...,...,...,...,...,...,...,...
1065905,/home/jedrzej/projects/image_flow_cytometry_fi...,Experiment_4,Donor_9,DIG-TCB,76910,unlabeled,-1
1065906,/home/jedrzej/projects/image_flow_cytometry_fi...,Experiment_4,Donor_9,DIG-TCB,89427,unlabeled,-1
1065907,/home/jedrzej/projects/image_flow_cytometry_fi...,Experiment_4,Donor_9,DIG-TCB,80928,unlabeled,-1
1065908,/home/jedrzej/projects/image_flow_cytometry_fi...,Experiment_4,Donor_9,DIG-TCB,83923,unlabeled,-1


In [27]:
metadata.set.unique()

array(['unlabeled', 'train', 'test', 'labeled', 'validation'],
      dtype=object)

In [28]:
indx = metadata.condition.isin(["-SEA","+SEA"])
metadata = metadata.loc[indx, :].reset_index(drop = True )

In [29]:
set_of_interesting_classes = ['B_cell',  'T_cell', 
                        'T_cell_with_signaling',
                        'T_cell_with_B_cell_fragments',
                        'B_T_cell_in_one_layer',
                        'Synapses_without_signaling', 
                        'Synapses_with_signaling',
                        'No_cell_cell_interaction', 
                        'Multiplets'] 

indx = metadata.set.isin([ "train", "validation","test" ])
indx = indx & metadata.label.isin(set_of_interesting_classes)

train_index = metadata["set"] == "train"
train_index = train_index & metadata.label.isin(set_of_interesting_classes)
train_index = train_index[train_index].index

validation_index = metadata["set"] == "validation"
validation_index = validation_index & metadata.label.isin(set_of_interesting_classes)
validation_index = validation_index[validation_index].index

test_index = metadata["set"] == "test"
test_index = test_index & metadata.label.isin(set_of_interesting_classes)
test_index = test_index[test_index].index

In [30]:
metadata["set"].unique()

array(['unlabeled', 'train', 'test', 'labeled', 'validation'],
      dtype=object)

In [31]:
label_map = dict()
for i, cl in enumerate(set_of_interesting_classes):
    label_map[cl] = i

label_map['-1'] = -1
label_map[-1] = -1


In [32]:
label_map

{'B_cell': 0,
 'T_cell': 1,
 'T_cell_with_signaling': 2,
 'T_cell_with_B_cell_fragments': 3,
 'B_T_cell_in_one_layer': 4,
 'Synapses_without_signaling': 5,
 'Synapses_with_signaling': 6,
 'No_cell_cell_interaction': 7,
 'Multiplets': 8,
 '-1': -1,
 -1: -1}

In [33]:
channels = {
     "Ch1": ("Greys", "BF"),  
     "Ch2": ("Greens", "Antibody"),
     "Ch3": ("Reds", "CD18"),
     "Ch4": ("Oranges", "F-Actin"),
     "Ch6": ("RdPu", "MHCII"),
     "Ch7": ("Purples", "CD3/CD4"),
     "Ch11": ("Blues", "P-CD3zeta"),
     "Ch12": ("Greens", "Live-Dead")
 }

In [34]:
selected_channels = [0,3,4,5,6]
model_dir = "models"
log_dir = "logs"
scaling_factor = 4095.
reshape_size = 256
train_transform = [
         transforms.RandomVerticalFlip(),
         transforms.RandomHorizontalFlip(),
         transforms.RandomRotation(45)
        ]
test_transform = [ ]

In [35]:
train_dataset = DatasetGenerator(metadata=metadata.loc[train_index,:],
                                 label_map=label_map,
                                 selected_channels=selected_channels,
                                 scaling_factor=scaling_factor,
                                 reshape_size=reshape_size,
                                 transform=transforms.Compose(train_transform))

In [36]:
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=False, num_workers=6)




In [37]:
statistics = get_statistics(train_loader, selected_channels=selected_channels)


100%|██████████| 23/23 [06:32<00:00, 17.05s/it]

statistics used: {'min': tensor([0., 0., 0., 0., 0.]), 'p01': tensor([0., 0., 0., 0., 0.]), 'p05': tensor([0., 0., 0., 0., 0.]), 'p25': tensor([0.1943, 0.0128, 0.0092, 0.0129, 0.0094]), 'p50': tensor([0.1950, 0.0162, 0.0136, 0.0201, 0.0096]), 'p75': tensor([0.1956, 0.0205, 0.0193, 0.0257, 0.0097]), 'p95': tensor([0.1962, 0.0349, 0.0296, 0.0345, 0.0101]), 'p99': tensor([0.1989, 0.0588, 0.0474, 0.0478, 0.0107]), 'max': tensor([0.3288, 0.7866, 0.4370, 0.3362, 0.1153]), 'mean': tensor([0.1723, 0.0172, 0.0148, 0.0199, 0.0085]), 'std': tensor([0.0627, 0.0141, 0.0109, 0.0122, 0.0031])}





In [38]:
class AddGaussianNoise(object):
    def __init__(self, mean=0., std=1.):
        self.std = std
        self.mean = mean
        
    def __call__(self, tensor):
        return tensor + torch.randn(tensor.size()) * self.std + self.mean
    
    def __repr__(self):
        return self.__class__.__name__ + '(mean={0}, std={1})'.format(self.mean, self.std)

In [39]:
class MinMaxScaler(object):
    def __init__(self, min_in , max_in, min_out, max_out):
        self.min_in = min_in.reshape(-1,1,1)
        self.max_in = max_in.reshape(-1,1,1)
        self.min_out = min_out
        self.max_out = max_out
        
    def __call__(self, tensor):
        
        tensor_ = (tensor - self.min_in)/(self.max_in - self.min_in)
        tensor_ = tensor_*(self.max_out - self.min_out) + self.min_out
        tensor_[tensor_<self.min_out]= self.min_out
        tensor_[tensor_>self.max_out]= self.max_out
        return tensor_
    
    def __repr__(self):
        return self.__class__.__name__ + '(min_out={0}, max_out={1})'.format(self.min_out, self.max_out)

In [40]:
train_transform = transforms.Compose([ 
        MinMaxScaler(           min_in =  statistics["p05"] , 
                                max_in =  statistics["p95"] , 
                                min_out =  0. , 
                                max_out =  1.),
        transforms.RandomResizedCrop(reshape_size, scale=(0.6, 1.0), ratio=(0.8, 1.2)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        AddGaussianNoise(mean=0., std=0.01),
])

validation_transform =  transforms.Compose([ 
        MinMaxScaler(           min_in =  statistics["p05"] , 
                                max_in =  statistics["p95"] , 
                                min_out =  0. , 
                                max_out =  1.),
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        AddGaussianNoise(mean=0., std=0.01),
])

test_transform =  transforms.Compose([ 
        MinMaxScaler(           min_in =  statistics["p05"] , 
                                max_in =  statistics["p95"] , 
                                min_out =  0. , 
                                max_out =  1.),
])


In [41]:
model = resnet.ResnetModel(len(set_of_interesting_classes), len(selected_channels))



In [42]:
lr=0.01
batch_size=128
max_epochs=1000

In [43]:
module = data_module.SynapseFormationDataModule(metadata, train_index, validation_index, test_index, label_map, selected_channels, statistics, train_transform,
                                                validation_transform, test_transform, batch_size, reshape_size)

In [44]:
run = neptune.init_run(
    project="appsilon/image-flow-cytometry-finetune",
    api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiI3OTA1ZjQwZS03MDczLTRiMzgtYmRhOS1iYjM2Y2EyMjcwMDMifQ==",
)

[neptune] [info   ] Neptune initialized. Open in the app: https://app.neptune.ai/appsilon/image-flow-cytometry-finetune/e/IM-5


In [47]:
lr_monitor = LearningRateMonitor(logging_interval='step')

trainer = pl.Trainer(
    max_epochs=500,
    accelerator="gpu" if torch.cuda.is_available() else "cpu",
    logger=pl.loggers.NeptuneLogger(run=run, log_model_checkpoints=False),
    callbacks=[lr_monitor])

trainer.fit(model, datamodule=module)

trainer.test(model, datamodule=module)

run.stop()

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type               | Params | Mode 
---------------------------------------------------------
0 | model     | ResNet             | 11.2 M | train
1 | train_acc | MulticlassAccuracy | 0      | train
2 | val_acc   | MulticlassAccuracy | 0      | train
3 | f1_score  | MulticlassF1Score  | 0      | train
---------------------------------------------------------
11.2 M    Trainable params
0         Non-trainable params
11.2 M    Total params
44.750    Total estimated model params size (MB)
71        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

OutOfMemoryError: CUDA out of memory. Tried to allocate 512.00 MiB. GPU 0 has a total capacity of 14.57 GiB of which 432.75 MiB is free. Process 1875067 has 192.00 MiB memory in use. Including non-PyTorch memory, this process has 364.00 MiB memory in use. Process 1983349 has 13.60 GiB memory in use. Of the allocated memory 202.74 MiB is allocated by PyTorch, and 21.26 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:


# #resnet18_modified.load_state_dict(torch.load('supervised_learning_synapse_model.pth')) 

# #lr_scheduler = LRScheduler(policy='StepLR', step_size=5, gamma=0.6)
# lr_scheduler = LRScheduler(policy='ReduceLROnPlateau', factor=0.1, patience=10)
# #checkpoint = Checkpoint(f_params='resnet_18_imagenet_pretraiend_supervised_learning.pth', monitor='valid_acc_best')


# epoch_scoring = EpochScoring("f1_macro", 
#                              name =  "valid_f1_macro", 
#                              on_train = False,
#                              lower_is_better = False)

# early_stopping = EarlyStopping(monitor='valid_f1_macro', 
#                                patience=100, 
#                                threshold=0.0001, 
#                                threshold_mode='rel', 
#                                lower_is_better=False)

# model = NeuralNetClassifier(    
#     swin, 
#     criterion=nn.CrossEntropyLoss,
#     lr=0.01,
#     batch_size=128,
#     max_epochs=1000,
#     optimizer=optim.Adam,
#     iterator_train__shuffle=True,
#     iterator_train__num_workers=4,
#     iterator_valid__shuffle=False,
#     iterator_valid__num_workers=2,
#     callbacks=[lr_scheduler,epoch_scoring, early_stopping],
#     train_split=predefined_split(validation_dataset_resnet_18),
#     device="cuda",
#     warm_start=True)