In [55]:
# Load packages and classes
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tiffslide
import seaborn as sns
import gget
import tifffile
import zarr

# MosaicDataset and BruceDataset classes allow loading and visualisation of the different data sources
from gbmhackathon import MosaicDataset, BruceDataset

from gbmhackathon.utils.visium_functions import (
    normalize_anndata_wrapper,
    convert_obsm_to_adata
)
from gbmhackathon.viz.visium_functions import (
    plot_spatial_expression,
    plot_obsm
)
from gbmhackathon.stats.visium_functions import (
    perform_multi_clustering,
    quantify_cell_population_activity
)

In [25]:
# Look at the available data sources
MosaicDataset.sources.keys()

dict_keys(['clinical', 'bulk_rna', 'spatial', 'sc_rna', 'wes', 'he'])

In [2]:
# The following cell will returns a dictionary with the data sources in the
# keys and the list of files (and path) used to return the data.
source_dict_mosaic = MosaicDataset.load_tabular()

In [43]:
source_dict_mosaic.keys()

dict_keys(['clinical', 'bulk_rna', 'wes', 'he'])

In [19]:
filename_sample_table = "/home/ec2-user/SageMaker/data/mosaic_dataset/Data availibility per modality per patient.csv"
sample_table = pd.read_csv(filename_sample_table, index_col=0)
sample_table.head(2)

Unnamed: 0_level_0,Visium_usable,WES_usable,scRNAseq_usable,bulkRNAseq_usable,HE_usable,Clinical_usable
Hackathon_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
HK_G_001a,Yes,Yes,Yes,Yes,Yes,Yes
HK_G_002a,Yes,Yes,Yes,Yes,Yes,Yes


In [17]:
mask_yes = sample_table["HE_usable"] == 'Yes'
mask_yes.sum()

114

In [20]:
sample_table.shape

(115, 6)

## Analysis of clinical data

In [24]:
clinical = source_dict_mosaic['clinical']

In [27]:
clinical.keys()

dict_keys(['data dictionary', 'original clinical', 'processed gbm clinical', 'treatments', 'key events clinical'])

In [36]:
data_clinical_dict = clinical['data dictionary']
processed_clinical = clinical["processed gbm clinical"]
treatments = clinical["treatments"]
key_events_clinical = clinical["key events clinical"]

In [41]:
key_events_clinical.head(10)

Unnamed: 0_level_0,event_id,event_uuid,progression_recurrence,time_interval_diagnosis_to_progression_recurrence_years
patient_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
HK_G_002,HK_G_002_event_1.0,882c0891-f733-5bd2-bc87-bfc8bf37cab0,Yes,1.702943
HK_G_002,HK_G_002_event_2.0,40329cc2-af36-51ce-883d-d4482582d576,Yes,2.08898
HK_G_003,HK_G_003_event_1.0,63344840-9cdf-528e-a00f-f2f0475ab8b1,Yes,0.936345
HK_G_003,HK_G_003_event_2.0,1b06f64f-0255-5e6e-a404-46ff524eb503,Yes,1.281314
HK_G_004,HK_G_004_event_1.0,0874f35a-a4e4-50f2-8139-b42996e55d07,Yes,0.260096
HK_G_004,HK_G_004_event_2.0,3482236b-37a6-5a2d-93a9-023100740e29,Yes,0.602327
HK_G_005,HK_G_005_event_1.0,edcfbf19-e5a1-5ada-81a2-6d41332ba3da,Yes,0.068446
HK_G_005,HK_G_005_event_2.0,de59043c-1bf8-5218-84a9-29bca310c6a9,Yes,0.424367
HK_G_005,HK_G_005_event_3.0,b6cd0c29-d1a4-5c2a-8a8c-79b79e2d158a,Yes,2.841889
HK_G_005,HK_G_005_event_4.0,fd359bd9-776e-52ea-8923-e347fdd16ba0,Yes,3.030801


In [35]:
treatments.head()

Unnamed: 0,patient_id,treatment_id,treatment_uuid,treatment_type,term_for_medical_coding,custom_treatment_label,surgery_specify,surgery_indication,surgery_on_primary_tumour,surgery_on_primary_tumour_type,...,describe_toxicity_ies,highest_toxicity_grade,reason_for_treatment_end,best_response_during_treatment,years_treatment_start_to_end,treatment_event_order_by_start_date,treatment_ongoing,treatment_type_specific,treatment_chronology,time_interval_between_diagnosis_treatment_start_years
0,HK_G_001,HK_G_001_treatment_1.0,34e25ce3-50b5-5359-88aa-a7723d3188df,Surgery,,Surgery:PrimaryTumour,Right temporal craniotomy for resection of a h...,Initial treatment,Yes,Resection,...,Left facio-brachio-crural hemisyndrome with pa...,,,,,1.0,,,,0.0
1,HK_G_001,HK_G_001_treatment_2.0,6f355b51-193a-52a9-94ef-be9b0685c563,Radiotherapy,,Post-operative:nan,,,,,...,,,Other,Stable disease,0.054757,2.0,No,Other,Post-operative,0.093087
2,HK_G_001,HK_G_001_treatment_2.0,6f355b51-193a-52a9-94ef-be9b0685c563,Chemotherapy,Temozolomide,Adjuvant:Temozolomide,,,,,...,,,Scheduled treatment end,Stable disease,0.054757,2.0,No,,Adjuvant,0.093087
3,HK_G_001,HK_G_001_treatment_4.0,85416b21-bfd3-5a2f-9318-5dbfe2aed605,Chemotherapy,Temozolomide,Maintenance:Temozolomide,,,,,...,,,Scheduled treatment end,Stable disease,0.982888,4.0,No,,Maintenance,0.301164
4,HK_G_002,HK_G_002_treatment_1.0,c329e7df-9dfd-524f-af99-fc4e297dd524,Surgery,,Surgery:PrimaryTumour,Awake craniotomy for resection of a left tempo...,Initial treatment,Yes,Resection,...,,,,,,1.0,,,,0.0


### Spatial data

In [44]:
visium_dict = MosaicDataset.load_visium(
    sample_list=["HK_G_022a_vis", "HK_G_024a_vis", "HK_G_030a_vis"], # remove this argument to load all available samples)
    resolution="hires"
)

Resolution of the spatial image to load:  hires
You can change the resolution by setting the resolution parameter using the resolution argument.
Loading Visium data, this can take few minutes...


In [56]:
visium_obj = normalize_anndata_wrapper(visium_dict, target_sum=1e6)

In [57]:
one_visium_file = visium_obj["HK_G_030a_vis"]

In [58]:
one_visium_file

AnnData object with n_obs × n_vars = 4522 × 16927
    obs: 'in_tissue', 'array_row', 'array_col'
    var: 'gene_ids', 'feature_types', 'genome'
    uns: 'spatial', 'log1p'
    obsm: 'spatial'
    layers: 'raw', 'CPM', 'log_CPM'

In [59]:
one_visium_file.obs # Sequence du gène

Unnamed: 0,in_tissue,array_row,array_col
AACACGTGCATCGCAC-1,1,76,22
AACACTTGGCAAGGAA-1,1,47,71
AACAGGAAGAGCATAG-1,1,69,7
AACAGGATTCATAGTT-1,1,49,43
AACAGGCCAACGATTA-1,1,71,127
...,...,...,...
TGTTGGAACGAGGTCA-1,1,28,72
TGTTGGAAGCTCGGTA-1,1,1,95
TGTTGGATGGACTTCT-1,1,13,53
TGTTGGCCAGACCTAC-1,1,49,47


In [60]:
one_visium_file.var

Unnamed: 0,gene_ids,feature_types,genome
SAMD11,ENSG00000187634,Gene Expression,GRCh38
NOC2L,ENSG00000188976,Gene Expression,GRCh38
KLHL17,ENSG00000187961,Gene Expression,GRCh38
PLEKHN1,ENSG00000187583,Gene Expression,GRCh38
PERM1,ENSG00000187642,Gene Expression,GRCh38
...,...,...,...
MT-ND4L,ENSG00000212907,Gene Expression,GRCh38
MT-ND4,ENSG00000198886,Gene Expression,GRCh38
MT-ND5,ENSG00000198786,Gene Expression,GRCh38
MT-ND6,ENSG00000198695,Gene Expression,GRCh38
