In [24]:
import matplotlib.pyplot as plt
import skimage
import numpy as np
import pandas as pd
import glob

In this script we extract the maldi glycan signals from each tissue, and used for tissue level quantitative analysis.

Eg related to figure 6C.

In [29]:
evalu = pd.read_csv('../extracted/EvaluationV2.csv') # load glycan names etc..
#evalu = evalu.dropna()

# remove glycans that are not measured in all samples
notinlist = ['Gc_28+S', 'Gc_28+S+Na*2', 'Gc_31+Fuc+Na*2', 'Gc_37+Sx2+Na', 'Gc_48+Sx2+Na',
            'Gc_49+Fucx3', 'Gc_54+Fucx4+Na', '46', '38+Fucx2', 'Gc_8+Fuc']
evalu = evalu[~evalu['composition'].isin(notinlist)]
evalu.shape

(38, 9)

In [30]:
slides = ['dsst1c1-total_ion_count', 'dsst2c2-total_ion_count',
         'dsst3c3-total_ion_count', 'dsst4c4-total_ion_count']

#slides = ['dsst1c1-total_ion_count']

root_dir = '../extracted/pipeline_results/'

glycogens = evalu.composition.to_list()
glycogens_standardname = evalu.Standard_name.to_list()

## make standard name dict

ddg = {}
for i in range(len(glycogens)):
    ddg[glycogens[i]] = glycogens_standardname[i]

#### make dictionary for saving
dd = {'maldiID':[]}
for glyc in glycogens_standardname:
    dd[glyc] = []


####
idx = 0
for item in slides:
    folder_dir = root_dir + item + '/extracted/library_matched/float/'
    mask_dir = root_dir + item + '/Mask*.tif'
    masklist = glob.glob(mask_dir)
    idx = idx + 1 # slide number
    
    for glyc in glycogens:
        img_dir = folder_dir + glyc + '.tiff'
        img = skimage.io.imread(img_dir)
        
        for mask in masklist:
            maskid = 's'+str(idx)+'-'+mask.split('/')[-1][4:-4] # get maskID
            msk = skimage.io.imread(mask) # section mask
            msk[msk > 0] = 1
            img_tmp = img * msk
            
            tmp1 = img > skimage.filters.threshold_otsu(img_tmp) # otsu mask
            tmp2 = tmp1 * img_tmp # overlay
            
            size = np.sum(tmp1)
            signal = np.sum(tmp2)
            rel_sig = signal / size # normalize the per-tissue signal by tissue size
            
            # save the result:
            sname = ddg[glyc]
            dd[sname].append(rel_sig)
            
            # save maldiID:
            if glyc == '10': # only save once
                dd['maldiID'].append(maskid)
        
    

In [31]:
result = pd.DataFrame.from_dict(dd)
result.to_csv('../Extracted_sigNormsize_all.csv')

In [32]:
result

Unnamed: 0,maldiID,Hex3HexNAc2,Hex3dHex1HexNAc2,Hex4HexNAc2,Hex5HexNAc2,Hex3HexNAc3,Hex3dHex1HexNAc3,Hex4HexNAc3,Hex3dHex1HexNAc4,Hex5HexNAc3,...,Hex6dHex3HexNAc6,Hex5HexNAc4NeuAc1,Hex5dHex1HexNAc4NeuAc1,Hex5dHex1HexNAc6NeuAc1,Hex7HexNAc6,Hex5dHex2HexNAc4,Hex4dHex2HexNAc5,Hex6dHex2HexNAc5,Hex5HexNAc6NeuAc1,Hex5HexNAc5NeuAc1
0,s1-9,2.048792,2.412731,2.224348,15.264939,2.537573,4.311773,5.172860,5.599789,3.845676,...,1.579386,1.960646,4.157392,1.487257,1.487824,3.569991,3.345958,1.833043,1.447675,1.631977
1,s1-8,1.987102,2.101363,1.970345,11.084962,2.408372,3.800378,4.670835,5.053198,3.037160,...,1.659207,1.703378,2.987572,1.611934,1.614819,2.982436,3.445508,1.906187,1.604735,1.722724
2,s1-15,185.783632,6.554713,15.885266,92.107544,61.141369,3.521475,9.789409,4.348571,1.818061,...,0.481586,31.776128,9.363809,0.301745,0.989934,20.615109,0.456639,0.933749,0.352219,1.025954
3,s1-1,6.114084,8.527836,5.563692,12.813337,6.778779,13.935340,5.365334,15.087869,4.271006,...,4.877536,3.277753,3.428887,5.186115,6.011704,3.707920,8.249293,6.377245,6.651885,7.373897
4,s1-14,10.174304,4.517122,2.943171,15.072366,4.981597,5.360948,21.487141,22.688778,2.221267,...,1.097834,73.196867,87.711673,0.607378,2.840094,55.295204,1.041175,1.930235,0.619774,2.851618
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
62,s4-12,2.546766,2.203038,2.653971,12.104411,3.531513,4.851373,4.434466,8.770939,3.404922,...,3.139097,2.302099,2.021243,4.821088,2.880612,2.447947,3.329654,4.121710,3.314911,3.287683
63,s4-10,3.001802,2.596170,3.354872,15.342431,3.999906,5.556088,4.997346,10.076631,3.961871,...,2.911016,2.658063,2.345941,5.281806,3.465015,2.644214,3.292691,4.891464,3.684450,3.596194
64,s4-5,3.825571,3.037624,4.480463,13.736424,4.275897,5.731534,5.457959,8.786048,4.574927,...,4.070428,3.149398,2.821768,5.626699,5.761918,2.909060,3.263876,5.502137,5.145301,5.589420
65,s4-4,3.788381,2.810922,4.153805,14.063687,4.507343,5.917111,5.294141,10.843714,4.349698,...,3.595717,3.006430,2.608277,5.573833,5.423795,2.725018,3.671851,5.259037,5.215113,5.068764


## make a related meta data file that also contains MALDI related information

In [7]:
meta = pd.read_csv('../../../meta_link/meta_mibi_dsp.csv')

Unnamed: 0.1,Unnamed: 0,mibi_run,fov_in_run,location,DSP_Ecad_name,DSP_CD45_name,DSP_Bac_name,tissueID,MALDI-ID
0,1,2023-03-04T16-22-15-DSSC1-Run1,fov-1-scan-1,Small,,,,C1D1,S1-6
1,2,2023-03-04T16-22-15-DSSC1-Run1,fov-2-scan-1,Small,DSP-1001660012833-A-D04,DSP-1001660012833-A-D03,DSP-1001660012833-A-D02,C1D1,S1-6
2,3,2023-03-04T16-22-15-DSSC1-Run1,fov-3-scan-1,Small,DSP-1001660012833-A-D10,DSP-1001660012833-A-D09,DSP-1001660012833-A-D08,C1D2,S1-5
3,4,2023-03-04T16-22-15-DSSC1-Run1,fov-4-scan-1,Small,DSP-1001660012833-A-D07,DSP-1001660012833-A-D06,DSP-1001660012833-A-D05,C1D2,S1-5
4,5,2023-03-04T16-22-15-DSSC1-Run1,fov-5-scan-1,Small,,,,C1D2,S1-5
...,...,...,...,...,...,...,...,...,...
197,198,2023-03-28T11-45-15-DSSC1-Run4,fov-2-scan-1,Large,,,,C1D6,S1-1
198,199,2023-03-28T11-45-15-DSSC1-Run4,fov-3-scan-1,Large,,,,C1D6,S1-1
199,200,2023-03-28T11-45-15-DSSC1-Run4,fov-4-scan-1,Large,,,,C1D6,S1-1
200,201,2023-03-28T11-45-15-DSSC1-Run4,fov-5-scan-1,Large,,,,C1D6,S1-1


In [16]:
dd = {'treat' : meta.mibi_run.str[-7:-6].to_list(),
      'MaldiID' : meta['MALDI-ID'].to_list(), 'location' : meta['location'].to_list()}

df = pd.DataFrame.from_dict(dd)
df['MaldiID'] = df['MaldiID'].str.replace('S','s')
df.to_csv('../maldi_meta.csv', index = False)
df

Unnamed: 0,treat,MaldiID,location
0,C,s1-6,Small
1,C,s1-6,Small
2,C,s1-5,Small
3,C,s1-5,Small
4,C,s1-5,Small
...,...,...,...
197,C,s1-1,Large
198,C,s1-1,Large
199,C,s1-1,Large
200,C,s1-1,Large


Then manually added the missing control TMA 1-4 and some missing cores. Saved as V2.csv