In [7]:
import torch
import os
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt

os.chdir('F:\\workspace\\pathology\\gigapath')

In [36]:
df = pd.DataFrame({
    'id': [f"sample_{i:04d}" for i in range(1, 201)],
    'slide_addr': [f"data/wsi/sample_{i:03d}.svs" for i in range(1, 201)],
    'tumour_type': np.random.choice(['glioma', 'ependymoma', 'glioblastoma', 'meningioma', 'normal'], 200),
    'duration': np.random.gamma(1.5, 24, 200).astype(int),
    'IDH1': np.random.choice([0, 1], 200),
    'TP53': np.random.choice([0, 1], 200),
    'ATRX': np.random.choice([0, 1], 200),
    'PTEN': np.random.choice([0, 1], 200),
    'EGFR': np.random.choice([0, 1], 200),
    'TERT': np.random.choice([0, 1], 200),
    'created_date': pd.date_range('2014-01-01', periods=200, freq='D'),
    'quality_score': np.random.uniform(0.5, 1.0, 200)
})

df.to_csv('data\\metadata.tbl', sep='\t', index=False)
df

Unnamed: 0,id,slide_addr,tumour_type,duration,IDH1,TP53,ATRX,PTEN,EGFR,TERT,created_date,quality_score
0,sample_0001,data/wsi/sample_001.svs,glioma,39,1,1,0,0,1,1,2014-01-01,0.661797
1,sample_0002,data/wsi/sample_002.svs,glioma,19,1,1,0,0,1,0,2014-01-02,0.911148
2,sample_0003,data/wsi/sample_003.svs,meningioma,47,0,0,1,1,0,1,2014-01-03,0.506143
3,sample_0004,data/wsi/sample_004.svs,ependymoma,34,0,1,1,1,0,1,2014-01-04,0.913237
4,sample_0005,data/wsi/sample_005.svs,normal,77,0,1,1,0,1,0,2014-01-05,0.629299
...,...,...,...,...,...,...,...,...,...,...,...,...
195,sample_0196,data/wsi/sample_196.svs,meningioma,33,0,0,1,1,0,1,2014-07-15,0.791321
196,sample_0197,data/wsi/sample_197.svs,meningioma,21,1,1,1,1,0,0,2014-07-16,0.967144
197,sample_0198,data/wsi/sample_198.svs,ependymoma,25,0,1,1,0,1,1,2014-07-17,0.500449
198,sample_0199,data/wsi/sample_199.svs,meningioma,20,0,0,1,0,0,1,2014-07-18,0.870751


In [None]:
# test dataset
df_test = pd.DataFrame({
    'id': [f"sample_{i:04d}" for i in range(1001, 1051)],
    'slide_addr': [f"data/wsi/sample_{i:03d}.svs" for i in range(1001, 1051)],
    'tumour_type': np.random.choice(['glioma', 'ependymoma', 'glioblastoma', 'meningioma', 'normal'], 50),
    'duration': np.random.gamma(1.5, 24, 50).astype(int),
    'IDH1': np.random.choice([0, 1], 50),
    'TP53': np.random.choice([0, 1], 50),
    'ATRX': np.random.choice([0, 1], 50),
    'PTEN': np.random.choice([0, 1], 50),
    'EGFR': np.random.choice([0, 1], 50),
    'TERT': np.random.choice([0, 1], 50),
    'created_date': pd.date_range('2014-01-01', periods=50, freq='D'),
    'quality_score': np.random.uniform(0.5, 1.0, 50)
})

df_test.to_csv('data\\metadata_test.tbl', sep='\t', index=False)
df_test

## tile embeddings

In [10]:
# Pattern: intercept + 256 * tile_seq
def generate_coordinate_list(x_range, y_range, TILE_SIZE=256, x_intercept=3072, y_intercept=7808):
    """
    Generate a list of starting pixel coordinates for tiles.

    Args:
        x_range: tuple (min_tiles, max_tiles) for x-axis
        y_range: tuple (min_tiles, max_tiles) for y-axis
        x_intercept: starting x coordinate
        y_intercept: starting y coordinate

    Returns:
        List of (x, y) coordinate tuples
    """
    coordinates = []

    for x_tile in range(x_range[0], x_range[1]):
        x_coord = x_intercept + TILE_SIZE * x_tile

        for y_tile in range(y_range[0], y_range[1]):
            y_coord = y_intercept + TILE_SIZE * y_tile
            coordinates.append((x_coord, y_coord))

    return coordinates

# Generate the coordinate list
# Adjust these ranges to match your slide dimensions
coordinate_list = generate_coordinate_list(
    x_range=(0, 1000),  # Adjust based on your slide width
    y_range=(0, 1000),  # Adjust based on your slide height
    x_intercept=3072,
    y_intercept=7808
)

In [81]:
for i in range(1,201):
    nrows = np.random.poisson(lam=275)
    ncols = 1536
    row_seq = random.sample(range(1000), nrows)

    embeds = torch.randn(nrows, ncols)
    coords = torch.tensor(np.array([coordinate_list[i] for i in row_seq]))
    sample_dict = {'tile_embeds': embeds, 'coords': coords}

    torch.save(sample_dict, 'output\\tiles\\rand_embed\\' + f'sample_{i:04d}.pt')

In [None]:
for i in range(1001, 1051):
    nrows = np.random.poisson(lam=275)
    ncols = 1536
    row_seq = random.sample(range(1000), nrows)

    embeds = torch.randn(nrows, ncols)
    coords = torch.tensor(np.array([coordinate_list[i] for i in row_seq]))
    sample_dict = {'tile_embeds': embeds, 'coords': coords}

    torch.save(sample_dict, 'output\\tiles\\rand_embed\\' + f'sample_{i:04d}.pt')

## load & save model

In [2]:
os.chdir('F:\\workspace\\pathology\\gigapath\\code\\transfer')
from gliopath.models.load import giga_slide_enc
os.chdir('F:\\workspace\\pathology\\gigapath')
model = giga_slide_enc(path='model/pub/slide_encoder.pth', global_pool=True)
model.eval()



F:\workspace\pathology\gigapath
dilated_ratio:  [1, 2, 4, 8, 16]
segment_length:  [np.int64(1024), np.int64(5792), np.int64(32768), np.int64(185363), np.int64(1048576)]
Number of trainable LongNet parameters:  85148160
Global Pooling: True
[93m Pretrained weights not found at local-dir:model/pub/slide_encoder.pth. Randomly initialized the model! [00m
Slide encoder param # 86330880


## lean model

In [5]:
# ============================================
# STEP 1: Aggressive Pruning (Remove 70-90% of weights)
# ============================================
import torch.nn.utils.prune as prune
import torch.nn as nn

def prune_model(model, amount=0.8):  # Remove 80% of weights
    for name, module in model.named_modules():
        if isinstance(module, (nn.Linear, nn.Conv2d)):
            prune.l1_unstructured(module, name='weight', amount=amount)
            if hasattr(module, 'bias') and module.bias is not None:
                prune.l1_unstructured(module, name='bias', amount=amount)
            # Make pruning permanent
            prune.remove(module, 'weight')
            if hasattr(module, 'bias') and module.bias is not None:
                prune.remove(module, 'bias')
    return model

model = prune_model(model, amount=0.8)

In [6]:
# ============================================
# STEP 2: INT8 Quantization (8x memory reduction)
# ============================================
# Dynamic quantization
quantized_model = torch.quantization.quantize_dynamic(
    model,
    {nn.Linear, nn.Conv2d, nn.LSTM, nn.GRU},  # All major layer types
    dtype=torch.qint8
)

In [7]:
# ============================================
# STEP 3: Half Precision for remaining float layers
# ============================================
quantized_model = quantized_model.half()

In [9]:
import bitsandbytes as bnb

# Replace Linear layers with 4-bit versions
def replace_linear_with_4bit(model):
    for name, module in model.named_children():
        if isinstance(module, nn.Linear):
            # Replace with 4-bit quantized version
            new_module = bnb.nn.Linear4bit(
                module.in_features,
                module.out_features,
                bias=module.bias is not None
            )
            setattr(model, name, new_module)
        else:
            replace_linear_with_4bit(module)
    return model

quantized_model = replace_linear_with_4bit(quantized_model)

In [10]:
# ============================================
# STEP 4: Save optimized model
# ============================================
torch.save(quantized_model.state_dict(), 'model\\slide_encoder_lite.pth')

In [11]:
model = giga_slide_enc(path='model\\slide_encoder_lite.pth', global_pool=True)

F:\workspace\pathology\gigapath
dilated_ratio:  [1, 2, 4, 8, 16]
segment_length:  [np.int64(1024), np.int64(5792), np.int64(32768), np.int64(185363), np.int64(1048576)]
Number of trainable LongNet parameters:  85148160
Global Pooling: True


MemoryError: Unable to allocate 2.86 GiB for an array with shape (1000000, 384) and data type float64

In [1]:
import timm
import gliopath.models.longn
timm.list_models('*gigapath*')



In [2]:
timm.create_model('gigapath_slide_enc3l32d', pretrained=False, in_chans=1536, global_pool=True)

UnboundLocalError: cannot access local variable 'longnet_args' where it is not associated with a value