<p align="center">بسم الله الرحمن الرحیم</p>

# Libraries

In [None]:
!pip install spams
!pip install staintools

In [None]:
import os
import staintools
import cv2 as cv
import PIL
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Hyper parameter

In [None]:
normalized_base_path = "/content/drive/MyDrive/normalized3"
histo_base_path = "/content/drive/MyDrive/histo3"
target_image_path = "/content/drive/MyDrive/color-normalization/colorstandard_brca.png"
image_size = "3072"

# prepare normalization tool

In [None]:
folders = os.listdir(histo_base_path)
if ".ipynb_checkpoints" in folders:
    folders.remove(".ipynb_checkpoints")
print(f"Number of folders: {len(folders)}")

In [None]:
normalizer = staintools.StainNormalizer(method='vahadane')
target_image = staintools.read_image(target_image_path)
normalizer.fit(target_image)

# Normalization

In [None]:
from_index = 13
to_index = 14

folder_update = folders[from_index:to_index]
for idx, f in enumerate(folder_update):
    print(f"WSI[{idx+from_index:02d}/{len(folders)}]:\t{f}")
    image_names = os.listdir(f"{histo_base_path}/{f}/{image_size}/")
    image_ids = [name.replace(".png", "") for name in image_names]
    image_path = [f"{histo_base_path}/{f}/{image_size}/{p}" for p in image_names]
    if not os.path.exists(f"{normalized_base_path}/{f}"):
        os.mkdir(f"{normalized_base_path}/{f}")
    if not os.path.exists(f"{normalized_base_path}/{f}/{image_size}"):
        os.mkdir(f"{normalized_base_path}/{f}/{image_size}")
    i = 0
    for path in tqdm(image_path):
        patched_image = np.array(Image.open(path))
        try:
            normalized_image = normalizer.transform(patched_image)
            normalized_image = Image.fromarray(normalized_image)
            normalized_image.save(f"{normalized_base_path}/{f}/{image_size}/{image_ids[i]}.png", "PNG")
        except:
            print(f"Patch {i} is not saved")
        i += 1

# Quality check

In [None]:
# check number of images 3072
print("="*10, " 3072 ", "="*10)
for idx, f in enumerate(folders):
    normal_path = f"{normalized_base_path}/{f}/3072/"
    base_path = f"{histo_base_path}/{f}/3072/"
    if os.path.exists(normal_path) and os.path.exists(base_path):
        norm = len(os.listdir(normal_path))
        base = len(os.listdir(base_path))
        print(idx, base-norm, base, norm, f)
    else:
        print(idx, "<NOT FOUND>", f)

# check number of images 1024
print("="*10, " 1024 ", "="*10)
for idx, f in enumerate(folders):
    normal_path = f"{normalized_base_path}/{f}/1024/"
    base_path = f"{histo_base_path}/{f}/1024/"
    if os.path.exists(normal_path) and os.path.exists(base_path):
        norm = len(os.listdir(normal_path))
        base = len(os.listdir(base_path))
        print(idx, base-norm, base, norm, f)
    else:
        print(idx, "<NOT FOUND>", f)

In [None]:
# check for duplicate images. e.g. "28304_2394 (1).png"

print("="*10, " 3072 ", "="*10)
for idx, f in enumerate(folders):
    normal_path = f"{normalized_base_path}/{f}/3072/"
    if os.path.exists(normal_path):
        norm = os.listdir(normal_path)
        for n in norm:
            if "(1)" in n:
                print(idx, f, n)

print("="*10, " 1024 ", "="*10)
for idx, f in enumerate(folders):
    normal_path = f"{normalized_base_path}/{f}/1024/"
    if os.path.exists(normal_path):
        norm = os.listdir(normal_path)
        for n in norm:
            if "(1)" in n:
                print(idx, f, n)



In [None]:
# find missing images
missed_images = []
for idx, f in enumerate(folders):
    normal_path = f"{normalized_base_path}/{f}/1024/"
    base_path = f"{histo_base_path}/{f}/1024/"
    if os.path.exists(normal_path) and os.path.exists(base_path):
        norm = os.listdir(normal_path)
        base = os.listdir(base_path)
        for b in base:
            if b not in norm:
                missed_images.append([f"{base_path}{b}", f"{normal_path}{b}"])

In [None]:
# fixed missing images
for base, dest in tqdm(missed_images):
    patched_image = np.array(Image.open(base))
    try:
        normalized_image = normalizer.transform(patched_image)
        normalized_image = Image.fromarray(normalized_image)
        normalized_image.save(dest, "PNG")
    except:
        print(f"Error")

# Upload to google cloud

In [None]:
!sudo apt-get install apt-transport-https ca-certificates gnupg
!curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key --keyring /usr/share/keyrings/cloud.google.gpg add -

Reading package lists... Done
Building dependency tree       
Reading state information... Done
ca-certificates is already the newest version (20211016ubuntu0.20.04.1).
gnupg is already the newest version (2.2.19-3ubuntu2.2).
gnupg set to manually installed.
The following package was automatically installed and is no longer required:
  libnvidia-common-510
Use 'sudo apt autoremove' to remove it.
The following NEW packages will be installed:
  apt-transport-https
0 upgraded, 1 newly installed, 0 to remove and 21 not upgraded.
Need to get 1,704 B of archives.
After this operation, 162 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu focal-updates/universe amd64 apt-transport-https all 2.0.9 [1,704 B]
Fetched 1,704 B in 0s (18.3 kB/s)
debconf: unable to initialize frontend: Dialog
debconf: (No usable dialog-like program is installed, so the dialog based frontend cannot be used. at /usr/share/perl5/Debconf/FrontEnd/Dialog.pm line 76, <> line 1.)
debconf: fal

In [None]:
!gcloud auth login

Go to the following link in your browser:

    https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=32555940559.apps.googleusercontent.com&redirect_uri=https%3A%2F%2Fsdk.cloud.google.com%2Fauthcode.html&scope=openid+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fuserinfo.email+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fcloud-platform+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fappengine.admin+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fsqlservice.login+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fcompute+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Faccounts.reauth&state=loULaOPeeuNFGNKKKs9pRUrShVa8Vb&prompt=consent&access_type=offline&code_challenge=27UNzNXSyTV4C8K_Ju5-pWtOB_67AHRQ_1sW8IgmPpY&code_challenge_method=S256

Enter authorization code: 4/0AWtgzh5c5shVPvoJDueNuLMq_gZqUYZ5s3bgLyZZ4IyNP5eR-wFTzxeLbIH4IhyP2yI6QA

You are now logged in as [irguard.dev1@gmail.com].
Your current project is [None].  You can change this setting by running:
  $ gcloud config set project PROJECT_ID


In [None]:
!gcloud storage cp -r /content/drive/MyDrive/normalized3  gs://nlp-fall1401-images

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Copying file:///content/drive/MyDrive/normalized3/TCGA-BH-A0E9-01Z-00-DX1/1024/34292_38388.png to gs://nlp-fall1401-images/normalized3/TCGA-BH-A0E9-01Z-00-DX1/1024/34292_38388.png
Copying file:///content/drive/MyDrive/normalized3/TCGA-BH-A0E9-01Z-00-DX1/1024/34292_39412.png to gs://nlp-fall1401-images/normalized3/TCGA-BH-A0E9-01Z-00-DX1/1024/34292_39412.png
Copying file:///content/drive/MyDrive/normalized3/TCGA-BH-A0E9-01Z-00-DX1/1024/34292_40436.png to gs://nlp-fall1401-images/normalized3/TCGA-BH-A0E9-01Z-00-DX1/1024/34292_40436.png
Copying file:///content/drive/MyDrive/normalized3/TCGA-BH-A0E9-01Z-00-DX1/1024/34292_41460.png to gs://nlp-fall1401-images/normalized3/TCGA-BH-A0E9-01Z-00-DX1/1024/34292_41460.png
Copying file:///content/drive/MyDrive/normalized3/TCGA-BH-A0E9-01Z-00-DX1/1024/34292_42484.png to gs://nlp-fall1401-images/normalized3/TCGA-BH-A0E9-01Z-00-DX1/1024/34292_42484.png
Copying file:///content/drive/MyDri