Python module to merge anndata object with image segmentation data and transcription data

# Import

In [1]:
import anndata as ad
import pandas as pd

# Input

Transcription anndata object and image segmentation object

In [2]:
adataI= ad.read_h5ad("/disk2/user/cormey/outputs/Segmentation_objects/034_RJ.C1.h5ad")
adataT= ad.read_h5ad("/disk2/user/cormey/outputs/Transcription_objects/034_RJ.C1")

# Output

Anndata object with both image and transcription data

# Add a barcode column in obs

In [3]:
#for adataT
liste_BC=[]
for ligne in adataT.obs.index:
    BC=ligne.split("#")[0]
    liste_BC.append(BC)
adataT.obs['barcode'] = liste_BC

In [4]:
adataT.obs.head()

Unnamed: 0,batch,annotation,barcode
AAACACCAATAACTGC-1#V10F03-034_C,V10F03-034_C,Tumor,AAACACCAATAACTGC-1
AAACAGCTTTCAGAAG-1#V10F03-034_C,V10F03-034_C,Tumor,AAACAGCTTTCAGAAG-1
AAACAGGGTCTATATT-1#V10F03-034_C,V10F03-034_C,Mixed,AAACAGGGTCTATATT-1
AAACAGTGTTCCTGGG-1#V10F03-034_C,V10F03-034_C,Tumor,AAACAGTGTTCCTGGG-1
AAACATGGTGAGAGGA-1#V10F03-034_C,V10F03-034_C,Tumor,AAACATGGTGAGAGGA-1


In [5]:
#Remove index from adataT
df_obsT = pd.DataFrame(adataT.obs, index=adataT.obs.index)
df_obsT.reset_index(drop=True, inplace=True)
adataT.obs = df_obsT

In [6]:
print(adataT.X[:10,:10])

  (0, 6)	0.9533190626749124
  (4, 3)	0.4095996751707791
  (6, 3)	1.3419501350667695
  (7, 9)	0.5834546703358795
  (8, 3)	1.1453497665197996


In [7]:
#for adataI
liste_BC=[]
for ligne in adataI.obs["path"]:
    BC=ligne.split("_")[4]
    liste_BC.append(BC)
adataI.obs['barcode'] = liste_BC

In [8]:
adataI.obs.head()

Unnamed: 0,path,annotation,barcode
0,/disk2/user/cormey/outputs/Tilted_images/034_R...,Mixed,CTGGTCATTCCAATCC-1
1,/disk2/user/cormey/outputs/Tilted_images/034_R...,Mixed,AATTCATAAGGGATCT-1
2,/disk2/user/cormey/outputs/Tilted_images/034_R...,Mixed,GGAACGGCCTGCAGCC-1
3,/disk2/user/cormey/outputs/Tilted_images/034_R...,Mixed,GTAGAAACGGGTGGAG-1
4,/disk2/user/cormey/outputs/Tilted_images/034_R...,Mixed,TAATGAAAGACCCTTG-1


In [9]:
adataI.X[:1,:27]

array([[ 8.00000000e+00,  1.51412500e+03,  1.23305834e+03,
         1.97425000e+03,  1.61632057e+03,  1.54850000e+03,
         1.24649258e+03,  6.61739614e-01,  1.28501719e-01,
         4.01106723e+01,  1.78599494e+01,  1.00000000e+00,
         0.00000000e+00,  7.58142058e-01,  5.66055963e-02,
         4.90330428e+01,  2.10596941e+01,  4.74397455e+01,
         2.07645506e+01,  3.47138376e+01,  1.52996667e+01,
        -4.98534895e-01,  4.14310232e-01,  1.35720193e+02,
         5.91845924e+01,  9.64560118e-01,  2.88448173e-02]])

# Sort by barcode both anndata object

In [10]:
adataI = adataI[adataI.obs.sort_values('barcode').index]
adataT = adataT[adataT.obs.sort_values('barcode').index]

In [11]:
adataT.obs.head()

Unnamed: 0,batch,annotation,barcode
0,V10F03-034_C,Tumor,AAACACCAATAACTGC-1
1,V10F03-034_C,Tumor,AAACAGCTTTCAGAAG-1
2,V10F03-034_C,Mixed,AAACAGGGTCTATATT-1
3,V10F03-034_C,Tumor,AAACAGTGTTCCTGGG-1
4,V10F03-034_C,Tumor,AAACATGGTGAGAGGA-1


In [12]:
adataI.obs.head()

Unnamed: 0,path,annotation,barcode
1250,/disk2/user/cormey/outputs/Tilted_images/034_R...,Tumor,AAACACCAATAACTGC-1
442,/disk2/user/cormey/outputs/Tilted_images/034_R...,Tumor,AAACAGCTTTCAGAAG-1
638,/disk2/user/cormey/outputs/Tilted_images/034_R...,Mixed,AAACAGGGTCTATATT-1
1891,/disk2/user/cormey/outputs/Tilted_images/034_R...,Tumor,AAACAGTGTTCCTGGG-1
1430,/disk2/user/cormey/outputs/Tilted_images/034_R...,Tumor,AAACATGGTGAGAGGA-1


In [13]:
print(adataT.X[:1,20:])

  (0, 24)	1.432369437926531
  (0, 32)	0.9533190626749124
  (0, 148)	1.432369437926531
  (0, 183)	0.9533190626749124
  (0, 193)	1.432369437926531
  (0, 212)	0.9533190626749124
  (0, 221)	1.9983974431020337
  (0, 224)	1.754908425430092
  (0, 228)	0.9533190626749124
  (0, 242)	0.9533190626749124
  (0, 250)	0.9533190626749124
  (0, 289)	0.9533190626749124
  (0, 312)	0.9533190626749124


# Combine X matrix of both anndata objects

In [15]:
#Extraction of X matrix from both anndata objects
df_segmentation = pd.DataFrame(adataI.X, index=adataI.obs.index)
matrix_X = adataT.X
df_transcription = pd.DataFrame(matrix_X.toarray(), index=adataT.obs_names, columns=adataT.var_names) #toarray convert sparse matrix into dense matrix
# Remove index to combine dataframe
df_segmentation.reset_index(drop=True, inplace=True)
df_transcription.reset_index(drop=True, inplace=True)

In [16]:
df_segmentation.shape

(2061, 27)

In [17]:
df_transcription.shape

(2061, 350)

In [18]:
df_combi=pd.concat([df_segmentation,df_transcription], axis=1) #combine dataframes

In [19]:
df_combi.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,ENSG00000087842,ENSG00000102104,ENSG00000086758,ENSG00000196338,ENSG00000157502,ENSG00000080561,ENSG00000213468,ENSG00000102181,ENSG00000274791,ENSG00000273748
0,38.0,899.921053,580.909966,1206.973684,785.921433,928.184211,593.15383,0.726824,0.148092,32.446737,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,39.0,883.205128,403.093116,1244.230769,572.650091,917.871795,406.481789,0.72813,0.110887,32.624605,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,27.0,972.962963,429.086064,1337.37037,591.938507,1006.333333,434.472179,0.728673,0.159345,34.331603,...,0.669984,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,55.0,897.963636,414.469561,1191.272727,568.69482,927.290909,422.341597,0.696598,0.125523,32.869511,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,38.0,867.684211,399.065626,1179.131579,533.022621,897.210526,407.240273,0.711607,0.129742,32.456104,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [20]:
df_combi.shape

(2061, 377)

In [21]:
adataf=ad.AnnData(X=df_combi, obs=adataT.obs) #stock the result in a new dataframe



In [22]:
print(adataf.X[:1, :377])

[[ 3.80000000e+01  8.99921053e+02  5.80909966e+02  1.20697368e+03
   7.85921433e+02  9.28184211e+02  5.93153830e+02  7.26823544e-01
   1.48091820e-01  3.24467375e+01  9.64491055e+00  1.00000000e+00
   0.00000000e+00  7.48205358e-01  5.31760756e-02  4.16569086e+01
   1.13028579e+01  4.09879079e+01  1.14585693e+01  2.64445794e+01
   9.34251867e+00 -8.84549185e-02  8.52306097e-01  1.11239153e+02
   3.23691456e+01  9.66917374e-01  1.15026686e-02  0.00000000e+00
   0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
   0.00000000e+00  9.53319063e-01  0.00000000e+00  0.00000000e+00
   0.00000000e+00  0.00000000e+00  1.43236944e+00  0.00000000e+00
   0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
   0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
   0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
   0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
   0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
   0.00000

In [23]:
adataf.obs.head()

Unnamed: 0,batch,annotation,barcode
0,V10F03-034_C,Tumor,AAACACCAATAACTGC-1
1,V10F03-034_C,Tumor,AAACAGCTTTCAGAAG-1
2,V10F03-034_C,Mixed,AAACAGGGTCTATATT-1
3,V10F03-034_C,Tumor,AAACAGTGTTCCTGGG-1
4,V10F03-034_C,Tumor,AAACATGGTGAGAGGA-1


In [62]:
adataf.write_h5ad("/disk2/user/cormey/outputs/S_and_T_objects/34C")