# Setup environment

In [1]:
# install openslide dependencies
!sudo apt-get install openslide-tools
!sudo apt-get install python-openslide
!pip install openslide-python

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following additional packages will be installed:
  libopenslide0
Suggested packages:
  libtiff-tools
The following NEW packages will be installed:
  libopenslide0 openslide-tools
0 upgraded, 2 newly installed, 0 to remove and 38 not upgraded.
Need to get 104 kB of archives.
After this operation, 297 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy/universe amd64 libopenslide0 amd64 3.4.1+dfsg-5build1 [89.8 kB]
Get:2 http://archive.ubuntu.com/ubuntu jammy/universe amd64 openslide-tools amd64 3.4.1+dfsg-5build1 [13.8 kB]
Fetched 104 kB in 1s (72.1 kB/s)
debconf: unable to initialize frontend: Dialog
debconf: (No usable dialog-like program is installed, so the dialog based frontend cannot be used. at /usr/share/perl5/Debconf/FrontEnd/Dialog.pm line 78, <> line 2.)
debconf: falling back to frontend: Readline
debconf: unable to initialize frontend: Readl

In [2]:
# install flamingo and histogpt
!pip install flamingo-pytorch --no-deps
!pip install git+https://github.com/marrlab/HistoGPT

Collecting flamingo-pytorch
  Downloading flamingo_pytorch-0.1.2-py3-none-any.whl (7.8 kB)
Installing collected packages: flamingo-pytorch
Successfully installed flamingo-pytorch-0.1.2
Collecting git+https://github.com/marrlab/HistoGPT
  Cloning https://github.com/marrlab/HistoGPT to /tmp/pip-req-build-4pebi0ej
  Running command git clone --filter=blob:none --quiet https://github.com/marrlab/HistoGPT /tmp/pip-req-build-4pebi0ej
  Resolved https://github.com/marrlab/HistoGPT to commit 41b7306efcf596e085d9c58c1e7ec39484c66558
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting einops>=0.4 (from histogpt==0.1.0)
  Downloading einops-0.7.0-py3-none-any.whl (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.6/44.6 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting einops-exts (from histogpt==0.1.0)
  Downloading einops_exts-0.0.4-py3-none-any.whl (3.9 kB)
Collecting sacremoses>=0.1.1 (from histogpt==0.1.0)
  Downloading sacremoses-0.1.1-py3-no

In [5]:
# check whether to use a gpu or cpu
import torch

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


# Simple examples

In [4]:
# make a forward pass through the model
from transformers import BioGptConfig
from histogpt.models import HistoGPTForCausalLM, PerceiverResamplerConfig

histogpt = HistoGPTForCausalLM(BioGptConfig(), PerceiverResamplerConfig())
histogpt = histogpt.to(device)

text = torch.randint(0, 42384, (1, 256)).to(device)
image = torch.rand(1, 1024, 768).to(device)

print(histogpt(text, image).logits.size())

torch.Size([1, 256, 42384])


In [39]:
# generate text autoregressively
from histogpt.helpers.inference import generate

out = generate(
    model=histogpt,
    prompt=torch.randint(0, 42384, (1, 2)),
    image=torch.rand(1, 2, 768),
    length=256,
    top_k=40,
    top_p=0.95,
    temp=0.7,
    device=device
)



# Generate reports from features

In [7]:
# download model weights
!wget https://huggingface.co/marr-peng-lab/histogpt/resolve/main/histogpt-1b-6k-pruned.pth?download=true

--2024-03-15 12:42:05--  https://huggingface.co/marr-peng-lab/histogpt/resolve/main/histogpt-1b-6k-pruned.pth?download=true
Resolving huggingface.co (huggingface.co)... 13.35.7.57, 13.35.7.5, 13.35.7.81, ...
Connecting to huggingface.co (huggingface.co)|13.35.7.57|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://cdn-lfs-us-1.huggingface.co/repos/f6/8f/f68faf0906e39e8c3590cdbdd523457dc01bcea2a52d9de48cd7b06821eaac6a/16835f1069ffcfb5b379f3d1423fbf3d99a679d1b426e7b28c4604c8e1cd6956?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27histogpt-1b-6k-pruned.pth%3B+filename%3D%22histogpt-1b-6k-pruned.pth%22%3B&Expires=1710765725&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcxMDc2NTcyNX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zL2Y2LzhmL2Y2OGZhZjA5MDZlMzllOGMzNTkwY2RiZGQ1MjM0NTdkYzAxYmNlYTJhNTJkOWRlNDhjZDdiMDY4MjFlYWFjNmEvMTY4MzVmMTA2OWZmY2ZiNWIzNzlmM2QxNDIzZmJmM2Q5OWE

In [8]:
# download example features
!wget https://huggingface.co/marr-peng-lab/histogpt/resolve/main/2023-03-06%2023.51.44.h5?download=true

--2024-03-15 12:45:46--  https://huggingface.co/marr-peng-lab/histogpt/resolve/main/2023-03-06%2023.51.44.h5?download=true
Resolving huggingface.co (huggingface.co)... 13.35.7.38, 13.35.7.5, 13.35.7.57, ...
Connecting to huggingface.co (huggingface.co)|13.35.7.38|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://cdn-lfs-us-1.huggingface.co/repos/f6/8f/f68faf0906e39e8c3590cdbdd523457dc01bcea2a52d9de48cd7b06821eaac6a/72aaa4f690facfa0b02ffcec2b327e480933e134faf8633307097273294f6b49?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%272023-03-06%252023.51.44.h5%3B+filename%3D%222023-03-06+23.51.44.h5%22%3B&Expires=1710765946&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcxMDc2NTk0Nn19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zL2Y2LzhmL2Y2OGZhZjA5MDZlMzllOGMzNTkwY2RiZGQ1MjM0NTdkYzAxYmNlYTJhNTJkOWRlNDhjZDdiMDY4MjFlYWFjNmEvNzJhYWE0ZjY5MGZhY2ZhMGIwMmZmY2VjMmIzMjdlNDgwOTMzZT

In [13]:
# load model weights
PATH = '/content/histogpt-1b-6k-pruned.pth?download=true'
state_dict = torch.load(PATH, map_location=device)
histogpt.load_state_dict(state_dict, strict=True)

<All keys matched successfully>

In [35]:
# get text prompt and image features
import h5py
from transformers import BioGptTokenizer

tokenizer = BioGptTokenizer.from_pretrained("microsoft/biogpt")

prompt = 'Final diagnosis:'
prompt = torch.tensor(tokenizer.encode(prompt)).unsqueeze(0).to(device)

with h5py.File('/content/2023-03-06 23.51.44.h5?download=true', 'r') as f:
    features = f['feats'][:]
    features = torch.tensor(features).unsqueeze(0).to(device)

In [40]:
out = generate(
    model=histogpt,
    prompt=prompt,
    image=features,
    length=256,
    top_k=40,
    top_p=0.95,
    temp=0.7,
    device=device
)

decoded = tokenizer.decode(out[0, 1:])
print(decoded)

                                                

Final diagnosis: Basal cell carcinoma. Microscopic findings: In the epidermis, endophytic proliferations of basaloid cell strands are observed in solid nest and strand-shaped formations, featuring palisade-like core positions in the peripheral zones. There is a cellular inflammatory stroma reaction. Critical findings: The diagnosis is solid basal cell carcinoma with a tumor thickness of 0. 7 mm. The specimen is accurately and completely described.




# ToDo: Get reports from images

In [None]:
from histogpt.helpers.patching import get_models

ctranspath = get_models('cpu')

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
