## Setup

## Run this if running on colab

In [None]:
from IPython.display import clear_output; token = input(); clear_output()

In [None]:
! git clone https://$token@github.com/SzymonLukasik/Deep4Life.git

Cloning into 'Deep4Life'...
remote: Enumerating objects: 680, done.[K
remote: Counting objects: 100% (412/412), done.[K
remote: Compressing objects: 100% (213/213), done.[K
remote: Total 680 (delta 244), reused 298 (delta 186), pack-reused 268[K
Receiving objects: 100% (680/680), 49.99 MiB | 7.37 MiB/s, done.
Resolving deltas: 100% (385/385), done.


In [None]:
%cd /content/Deep4Life

/content/Deep4Life


In [None]:
!pip install anndata

Collecting anndata
  Downloading anndata-0.10.7-py3-none-any.whl (122 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m122.4/122.4 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting array-api-compat!=1.5,>1.4 (from anndata)
  Downloading array_api_compat-1.6-py3-none-any.whl (36 kB)
Installing collected packages: array-api-compat, anndata
Successfully installed anndata-0.10.7 array-api-compat-1.6


In [None]:
! pip install pyometiff

Collecting pyometiff
  Downloading pyometiff-1.0.0-py3-none-any.whl (37 kB)
Collecting imagecodecs (from pyometiff)
  Downloading imagecodecs-2024.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (39.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m39.6/39.6 MB[0m [31m16.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: imagecodecs, pyometiff
Successfully installed imagecodecs-2024.1.1 pyometiff-1.0.0


## Imports

In [None]:
import pandas as pd
from pandas import DataFrame
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
import plotly.express as px
import plotly.graph_objects as go
import plotly.subplots as sp
import pyometiff
import os
import gdown
import json
from pathlib import Path
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVC, LinearSVC, NuSVC
from sklearn.model_selection import GridSearchCV

from typing import List
from src.datasets import load_d4ls
from sklearn.model_selection import train_test_split

pd.set_option('display.max_columns', None)

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
!mkdir data

In [None]:
!gdown 1-0YOHE1VoTRWqfBJLHQorGcHmkhCYvqW

Downloading...
From (original): https://drive.google.com/uc?id=1-0YOHE1VoTRWqfBJLHQorGcHmkhCYvqW
From (redirected): https://drive.google.com/uc?id=1-0YOHE1VoTRWqfBJLHQorGcHmkhCYvqW&confirm=t&uuid=c30ccd15-8c19-47d8-a204-1ad95489127f
To: /content/Deep4Life/train.zip
100% 843M/843M [00:15<00:00, 54.2MB/s]


In [None]:
load_d4ls.DATA_PATH

PosixPath('/content/Deep4Life/data')

In [None]:
!unzip train.zip -d $load_d4ls.DATA_PATH

Archive:  train.zip
   creating: /content/Deep4Life/data/train/
  inflating: /content/Deep4Life/data/train/cell_data.h5ad  
   creating: /content/Deep4Life/data/train/images_masks/
   creating: /content/Deep4Life/data/train/images_masks/img/
  inflating: /content/Deep4Life/data/train/images_masks/img/IMMUcan_Batch20210701_LUNG_10041507-LUNG-VAR-TIS-01-IMC-01_003.tiff  
  inflating: /content/Deep4Life/data/train/images_masks/img/IMMUcan_Batch20201113_10062587-SPECT-VAR-TIS-01-IMC-01_003.tiff  
  inflating: /content/Deep4Life/data/train/images_masks/img/IMMUcan_Batch20210701_LUNG_10019062-LUNG-VAR-TIS-01-IMC-01_002.tiff  
  inflating: /content/Deep4Life/data/train/images_masks/img/IMMUcan_2022_WFLOW_10071582-SPECT-VAR-TIS-01-IMC-01_006.tiff  
  inflating: /content/Deep4Life/data/train/images_masks/img/IMMUcan_Batch20201113_10042701-GU-VAR-TIS-01-IMC-01_001.tiff  
  inflating: /content/Deep4Life/data/train/images_masks/img/IMMUcan_Batch20211215_10068187-SPECT-VAR-TIS-UNST-03_004.tiff  
  

## Load anndata

In [None]:
train_anndata = load_d4ls.load_full_anndata()

# Training from the command line

In [None]:
! git checkout lukass/svm_baseline

Branch 'lukass/svm_baseline' set up to track remote branch 'lukass/svm_baseline' from 'origin'.
Switched to a new branch 'lukass/svm_baseline'


In [7]:
! git pull origin

remote: Enumerating objects: 109, done.[K
remote: Counting objects:   1% (1/90)[Kremote: Counting objects:   2% (2/90)[Kremote: Counting objects:   3% (3/90)[Kremote: Counting objects:   4% (4/90)[Kremote: Counting objects:   5% (5/90)[Kremote: Counting objects:   6% (6/90)[Kremote: Counting objects:   7% (7/90)[Kremote: Counting objects:   8% (8/90)[Kremote: Counting objects:  10% (9/90)[Kremote: Counting objects:  11% (10/90)[Kremote: Counting objects:  12% (11/90)[Kremote: Counting objects:  13% (12/90)[Kremote: Counting objects:  14% (13/90)[Kremote: Counting objects:  15% (14/90)[Kremote: Counting objects:  16% (15/90)[Kremote: Counting objects:  17% (16/90)[Kremote: Counting objects:  18% (17/90)[Kremote: Counting objects:  20% (18/90)[Kremote: Counting objects:  21% (19/90)[Kremote: Counting objects:  22% (20/90)[Kremote: Counting objects:  23% (21/90)[Kremote: Counting objects:  24% (22/90)[Kremote: Counting objects:  25% (23/90)[K

In [19]:
! git status

On branch lukass/svm_baseline
Your branch is up to date with 'origin/lukass/svm_baseline'.

Untracked files:
  (use "git add <file>..." to include in what will be committed)
	[31mtrain.zip[m

nothing added to commit but untracked files present (use "git add" to track)


In [15]:
! git checkout -- src/models/sklearn_svm.py

In [20]:
! git branch

* [32mlukass/svm_baseline[m
  master[m


In [None]:
! pip install scanpy

Collecting scanpy
  Downloading scanpy-1.10.1-py3-none-any.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m10.0 MB/s[0m eta [36m0:00:00[0m
Collecting legacy-api-wrap>=1.4 (from scanpy)
  Downloading legacy_api_wrap-1.4-py3-none-any.whl (15 kB)
Collecting pynndescent>=0.5 (from scanpy)
  Downloading pynndescent-0.5.12-py3-none-any.whl (56 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.8/56.8 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
Collecting session-info (from scanpy)
  Downloading session_info-1.0.0.tar.gz (24 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting umap-learn!=0.5.0,>=0.5 (from scanpy)
  Downloading umap_learn-0.5.6-py3-none-any.whl (85 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.7/85.7 kB[0m [31m7.3 MB/s[0m eta [36m0:00:00[0m
Collecting stdlib_list (from session-info->scanpy)
  Downloading stdlib_list-0.10.0-py3-none-any.whl (79 kB)
[2K     [

In [None]:
! pip install scikit_learn==1.4.2

Collecting scikit_learn==1.4.2
  Downloading scikit_learn-1.4.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.1/12.1 MB[0m [31m35.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: scikit_learn
  Attempting uninstall: scikit_learn
    Found existing installation: scikit-learn 1.2.2
    Uninstalling scikit-learn-1.2.2:
      Successfully uninstalled scikit-learn-1.2.2
Successfully installed scikit_learn-1.4.2


## SVM Baseline

In [18]:
!python3 src/train_and_validate.py --method sklearn_svm/svc --config linear

Validation accuracy of 0 fold: 0.9403075405720434
Validation accuracy of 1 fold: 0.9252561247216036
Validation accuracy of 2 fold: 0.9370896391152503
Validation accuracy of 3 fold: 0.954309122420041
Validation accuracy of 4 fold: 0.9147199970123056
Config saved to: /content/Deep4Life/results/sklearn_svm/svc/linear_2024-05-08_20-33-44_seed_42_folds_5/config.yaml
Metrics saved to: /content/Deep4Life/results/sklearn_svm/svc/linear_2024-05-08_20-33-44_seed_42_folds_5/metrics.json
Retraining model...
Model saved to: /content/Deep4Life/results/sklearn_svm/svc/linear_2024-05-08_20-33-44_seed_42_folds_5/saved_model.joblib


In [None]:
!python3 src/train_and_validate.py --method sklearn_svm/svc --config linear

##  Other methods

In [274]:
! git fetch origin master

From https://github.com/SzymonLukasik/Deep4Life
 * branch            master     -> FETCH_HEAD


In [24]:
!python3 src/train_and_validate.py --method xgboost --config standard

Validation accuracy of 0 fold: 0.9465614632834215
Validation accuracy of 1 fold: 0.9298663697104677
Validation accuracy of 2 fold: 0.949499417927823
Validation accuracy of 3 fold: 0.9634866866236017
Validation accuracy of 4 fold: 0.9142905159374825
Config saved to: /content/Deep4Life/results/xgboost/standard_2024-05-08_21-15-37_seed_42_folds_5/config.yaml
Metrics saved to: /content/Deep4Life/results/xgboost/standard_2024-05-08_21-15-37_seed_42_folds_5/metrics.json
Retraining model...
Model saved to: /content/Deep4Life/results/xgboost/standard_2024-05-08_21-15-37_seed_42_folds_5/saved_model.json


In [25]:
!python3 src/train_and_validate.py --method sklearn_mlp --config standard

Iteration 1, loss = 0.31067227
Validation score: 0.931054
Iteration 2, loss = 0.18925858
Validation score: 0.941773
Iteration 3, loss = 0.16341107
Validation score: 0.944271
Iteration 4, loss = 0.14912965
Validation score: 0.949579
Iteration 5, loss = 0.13604193
Validation score: 0.954782
Validation accuracy of 0 fold: 0.9371693714695598
Iteration 1, loss = 0.29911168
Validation score: 0.932986
Iteration 2, loss = 0.18063588
Validation score: 0.936686
Iteration 3, loss = 0.15690637
Validation score: 0.944971
Iteration 4, loss = 0.14170221
Validation score: 0.950130
Iteration 5, loss = 0.13136569
Validation score: 0.948775
Validation accuracy of 1 fold: 0.9191314031180401
Iteration 1, loss = 0.30200358
Validation score: 0.930720
Iteration 2, loss = 0.18364342
Validation score: 0.937684
Iteration 3, loss = 0.15999960
Validation score: 0.937942
Iteration 4, loss = 0.14541076
Validation score: 0.949342
Iteration 5, loss = 0.13417762
Validation score: 0.948001
Validation accuracy of 2 fold:

In [26]:
!python3 src/train_and_validate.py --method torch_mlp --config standard

   0m: epoch 1 [################### ] 99% train loss = 0.3690642094 Validation score: 0.928713
   0m: epoch 2 [################### ] 99% train loss = 0.1922636195 Validation score: 0.939120
   0m: epoch 3 [################### ] 99% train loss = 0.1660849914 Validation score: 0.940785
   0m: epoch 4 [################### ] 99% train loss = 0.1519366081 Validation score: 0.941565
   0m: epoch 5 [################### ] 99% train loss = 0.1391868441 Validation score: 0.947757
   0m: epoch 6 [################### ] 99% train loss = 0.1322276416 Validation score: 0.951192
   0m: epoch 7 [################### ] 99% train loss = 0.1241243638 Validation score: 0.953429
   1m: epoch 8 [################### ] 99% train loss = 0.1194663133 Validation score: 0.955875
   1m: epoch 9 [################### ] 99% train loss = 0.1152636276 Validation score: 0.951764
   1m: epoch 10 [################### ] 99% train loss = 0.1106805969 Validation score: 0.957488
   1m: epoch 11 [################### ] 99% train 

In [27]:
!python3 src/train_and_validate.py --method stellar --config standard

Training - epoch 0: 100% 100/100 [00:02<00:00, 36.37it/s, Loss=1.19, Accuracy=0.664]
Training - epoch 1: 100% 100/100 [00:02<00:00, 39.72it/s, Loss=0.483, Accuracy=0.848]
Training - epoch 2: 100% 100/100 [00:02<00:00, 40.13it/s, Loss=0.366, Accuracy=0.883]
Training - epoch 3: 100% 100/100 [00:02<00:00, 38.32it/s, Loss=0.345, Accuracy=0.889]
Training - epoch 4: 100% 100/100 [00:03<00:00, 25.21it/s, Loss=0.297, Accuracy=0.902]
Training - epoch 5: 100% 100/100 [00:03<00:00, 32.56it/s, Loss=0.285, Accuracy=0.904]
Training - epoch 6: 100% 100/100 [00:02<00:00, 39.81it/s, Loss=0.251, Accuracy=0.916]
Training - epoch 7: 100% 100/100 [00:02<00:00, 39.29it/s, Loss=0.269, Accuracy=0.91]
Training - epoch 8: 100% 100/100 [00:02<00:00, 37.82it/s, Loss=0.262, Accuracy=0.911]
Training - epoch 9: 100% 100/100 [00:03<00:00, 26.76it/s, Loss=0.246, Accuracy=0.916]
Training - epoch 10: 100% 100/100 [00:03<00:00, 32.73it/s, Loss=0.235, Accuracy=0.92]
Training - epoch 11: 100% 100/100 [00:02<00:00, 36.60it/

In [28]:
!python3 src/train_and_validate.py --method stellar --config custom

Training - epoch 0: 100% 100/100 [00:06<00:00, 14.86it/s, Loss=0.855, Accuracy=0.751]
Training - epoch 1: 100% 100/100 [00:08<00:00, 11.90it/s, Loss=0.408, Accuracy=0.874]
Training - epoch 2: 100% 100/100 [00:06<00:00, 14.52it/s, Loss=0.334, Accuracy=0.895]
Training - epoch 3: 100% 100/100 [00:08<00:00, 11.96it/s, Loss=0.324, Accuracy=0.897]
Training - epoch 4: 100% 100/100 [00:06<00:00, 15.19it/s, Loss=0.288, Accuracy=0.904]
Training - epoch 5: 100% 100/100 [00:08<00:00, 12.36it/s, Loss=0.279, Accuracy=0.907]
Training - epoch 6: 100% 100/100 [00:07<00:00, 13.67it/s, Loss=0.292, Accuracy=0.903]
Training - epoch 7: 100% 100/100 [00:07<00:00, 12.91it/s, Loss=0.247, Accuracy=0.916]
Training - epoch 8: 100% 100/100 [00:07<00:00, 13.68it/s, Loss=0.267, Accuracy=0.912]
Training - epoch 9: 100% 100/100 [00:09<00:00, 10.94it/s, Loss=0.23, Accuracy=0.924]
Training - epoch 10: 100% 100/100 [00:07<00:00, 13.49it/s, Loss=0.21, Accuracy=0.929]
Training - epoch 11: 100% 100/100 [00:07<00:00, 12.69it

In [29]:
!python3 src/train_and_validate.py --method stellar --config custom_random_nodes

Training - epoch 0: 100% 961/961 [00:15<00:00, 63.30it/s, Loss=0.26, Accuracy=0.914]
Training - epoch 1: 100% 961/961 [00:15<00:00, 61.85it/s, Loss=0.167, Accuracy=0.94]
Training - epoch 2: 100% 961/961 [00:17<00:00, 53.40it/s, Loss=0.144, Accuracy=0.947]
Training - epoch 3: 100% 961/961 [00:19<00:00, 49.82it/s, Loss=0.131, Accuracy=0.951]
Training - epoch 4:   8% 80/961 [00:02<00:27, 32.59it/s, Loss=0.121, Accuracy=0.956]
Traceback (most recent call last):
  File "/content/Deep4Life/src/train_and_validate.py", line 255, in <module>
    main()
  File "/content/Deep4Life/src/train_and_validate.py", line 60, in main
    cross_validation_metrics = cross_validation(
  File "/content/Deep4Life/src/train_and_validate.py", line 143, in cross_validation
    model.train(train_data)
  File "/content/Deep4Life/src/models/custom_stellar.py", line 232, in train
    self._train_graph_batch(train_data_loader, self.cfg.epochs)
  File "/content/Deep4Life/src/models/custom_stellar.py", line 295, in _tra

In [34]:
!pip install pyg_lib torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-2.2.1+cu121.html

Looking in links: https://data.pyg.org/whl/torch-2.2.1+cu121.html
Collecting pyg_lib
  Downloading https://data.pyg.org/whl/torch-2.2.0%2Bcu121/pyg_lib-0.4.0%2Bpt22cu121-cp310-cp310-linux_x86_64.whl (2.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m32.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torch_scatter
  Downloading https://data.pyg.org/whl/torch-2.2.0%2Bcu121/torch_scatter-2.1.2%2Bpt22cu121-cp310-cp310-linux_x86_64.whl (10.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.9/10.9 MB[0m [31m64.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torch_sparse
  Downloading https://data.pyg.org/whl/torch-2.2.0%2Bcu121/torch_sparse-0.6.18%2Bpt22cu121-cp310-cp310-linux_x86_64.whl (5.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.0/5.0 MB[0m [31m70.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torch_cluster
  Downloading https://data.pyg.org/whl/torch-2.2.0%2Bcu121/torch_cluster-1.6.3%2Bp

In [35]:
!python3 src/train_and_validate.py --method stellar --config custom_neighbors

Training - epoch 0: 100% 961/961 [00:33<00:00, 28.83it/s, Loss=0.26, Accuracy=0.917]
Training - epoch 1: 100% 961/961 [01:54<00:00,  8.38it/s, Loss=0.13, Accuracy=0.952]
Training - epoch 2:  80% 766/961 [04:47<01:13,  2.67it/s, Loss=0.111, Accuracy=0.958]
Traceback (most recent call last):
  File "/content/Deep4Life/src/train_and_validate.py", line 255, in <module>
    main()
  File "/content/Deep4Life/src/train_and_validate.py", line 60, in main
    cross_validation_metrics = cross_validation(
  File "/content/Deep4Life/src/train_and_validate.py", line 143, in cross_validation
    model.train(train_data)
  File "/content/Deep4Life/src/models/custom_stellar.py", line 226, in train
    self._train_graph_batch(train_data_loader, self.cfg.epochs)
  File "/content/Deep4Life/src/models/custom_stellar.py", line 290, in _train_graph_batch
    output, _ = self.model(batch)
  File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
    return s

# Test

In [277]:
!git fetch origin

In [278]:
!git pull origin

Updating ad5b3b6..9c109bc
Fast-forward
 src/models/sklearn_svm.py |  2 [32m+[m[31m-[m
 src/train_and_validate.py | 90 [32m+++++++++++++++++++++++++++++++++[m[31m----------------------------------[m
 2 files changed, 46 insertions(+), 46 deletions(-)


In [54]:
!git checkout lukass/introduce_test_mode

Switched to branch 'lukass/introduce_test_mode'
Your branch is behind 'origin/lukass/introduce_test_mode' by 2 commits, and can be fast-forwarded.
  (use "git pull" to update your local branch)


## Creating a dummy test set

In [None]:
from src.datasets.data_utils import load_full_anndata
test_anndata = load_full_anndata(False)

False


In [265]:
import scanpy

sampled_anndata = scanpy.pp.subsample(test_anndata, n_obs=1000, copy=True, random_state=42)
sampled_anndata.write_h5ad("/content/Deep4Life/data/test/cell_data.h5ad")
len(sampled_anndata.obs["cell_labels"].cat.categories)

14

In [None]:
filenames_list = sampled_anndata.obs["image"].unique()
len(filenames_list)

124

In [None]:
filenames = "\n".join(filenames_list)
with open("filenames", "w") as f:
  f.write(filenames)

In [254]:
! ls data/train/images_masks/img | wc -l

125


In [256]:
! mkdir -p data/test/images_masks/img

In [257]:
! mkdir -p data/test/images_masks/masks

In [259]:
!cat filenames|wc -l

123


In [260]:
!cat filenames | xargs -I {} cp data/train/images_masks/img/{} data/test/images_masks/img

In [261]:
!cat filenames | xargs -I {} cp data/train/images_masks/masks/{} data/test/images_masks/masks

In [262]:
! ls data/test/images_masks/img | wc -l

124


In [263]:
! ls data/test/images_masks/masks | wc -l

124


## Running inference on a trained svm

In [272]:
!python3 src/train_and_validate.py --method sklearn_svm/svc --config linear test linear_2024-05-08_20-33-44_seed_42_folds_5

1000


## Running on best results from the drive (I created a shurtcut to my drive to the shared folder)

In [270]:
!cp -r /content/drive/MyDrive/best_results/* ./results/

In [279]:
!python3 src/train_and_validate.py --method stellar --config custom test custom_2024-05-08_22-23-22_seed_42_folds_5

In [281]:
!python3 src/train_and_validate.py --method xgboost --config standard test standard_2024-05-08_21-15-37_seed_42_folds_5