# Testing cell_colocalisation_frame building
for _colocalisation._cell_coloc

In [194]:
import numpy as np
import pandas as pd

## Creating spots to test coloc

In [195]:
im_shape = (3,50,50)
dim = len(im_shape)
spot_number = 1000
SEED = 1
random_gen = np.random.default_rng(seed=SEED)
voxel_size = (3,1,1)
coloc_distance = 3

spots1 = random_gen.integers([0]*dim, im_shape, (spot_number, dim))
spots2 = random_gen.integers([0]*dim, im_shape, (spot_number, dim))

## Creating fake cell ids and cluster ids

In [196]:
CELL_NUMBER = 10
cell_ids1 = random_gen.integers(0, CELL_NUMBER, spot_number)
cell_ids2 = random_gen.integers(0, CELL_NUMBER, spot_number)

CLUSTER_PROPORTION = 0.10
cluster_id1 = random_gen.uniform(size=spot_number)
cluster_id2 = random_gen.uniform(size=spot_number)

cutoff = int(spot_number*CLUSTER_PROPORTION)
cluster_id1[:cutoff] = 1 # should not be necessary for test to differentiate clusters
cluster_id1[cutoff:] = -1 # free spots

cluster_id2[:cutoff] = 1 # should not be necessary for test to differentiate clusters
cluster_id2[cutoff:] = -1 # free spots


In [197]:
assert any(cluster_id1 == 1)
assert any(cluster_id2 == 1)

## Creating df

In [198]:
#Fake names
acquisition_id1 = 1
acquisition_id2 = 2
acquisition_name1 = "Cy3"
acquisition_name2 = "Cy5"

### Spots DF

In [199]:
coordinates = np.concatenate([spots1,spots2],axis=0)
z,y,x = list(zip(*coordinates))

spots_df = pd.DataFrame({
    'spots_id' : np.arange(2*spot_number),
    'name' : [acquisition_name1] * spot_number + [acquisition_name2] * spot_number,
    'acquisition_id' : [acquisition_id1] * spot_number + [acquisition_id2] * spot_number,
    'cell_id' : np.concatenate([cell_ids1,cell_ids2]),
    'cluster_id' : np.concatenate([cluster_id1, cluster_id2]),
    'z' : z,
    'y' : y,
    'x' : x,    
})
spots_df['coordinates'] = list(zip(spots_df['z'], spots_df['y'], spots_df['x']))
spots_df['is_clustered'] = spots_df['cluster_id'] != -1
spots_df = spots_df.drop(columns=['z','y','x'])
spots_df

Unnamed: 0,spots_id,name,acquisition_id,cell_id,cluster_id,coordinates,is_clustered
0,0,Cy3,1,6,1.0,"(1, 25, 37)",True
1,1,Cy3,1,1,1.0,"(2, 1, 7)",True
2,2,Cy3,1,7,1.0,"(2, 47, 12)",True
3,3,Cy3,1,8,1.0,"(0, 43, 21)",True
4,4,Cy3,1,3,1.0,"(0, 41, 12)",True
...,...,...,...,...,...,...,...
1995,1995,Cy5,2,4,-1.0,"(2, 8, 41)",False
1996,1996,Cy5,2,3,-1.0,"(2, 1, 25)",False
1997,1997,Cy5,2,4,-1.0,"(0, 45, 48)",False
1998,1998,Cy5,2,9,-1.0,"(0, 27, 19)",False


### Cell Df

In [200]:
Cell_df_all = spots_df.groupby(['name','acquisition_id','cell_id']).agg({
    'coordinates' : list,
}).reset_index(drop=False)

Cell_df_clustered = spots_df[spots_df['is_clustered']].groupby(['name','acquisition_id','cell_id']).agg({
    'coordinates' : list,
}).reset_index(drop=False)

Cell_df_free = spots_df[~spots_df['is_clustered']].groupby(['name','acquisition_id','cell_id']).agg({
    'coordinates' : list,
}).reset_index(drop=False)

Cell_df_all['total_rna_number'] = Cell_df_all['coordinates'].apply(len)
Cell_df_clustered['total_rna_number'] = Cell_df_clustered['coordinates'].apply(len)
Cell_df_free['total_rna_number'] = Cell_df_free['coordinates'].apply(len)

INDEX_KEYS = ['name','acquisition_id','cell_id']

Cell_df_all = Cell_df_all.rename(columns={'coordinates' : 'all_spots'}).set_index(INDEX_KEYS)
Cell_df_clustered = Cell_df_clustered.rename(columns={'coordinates' : 'clustered_spots', 'total_rna_number' : 'clustered_spot_number'}).set_index(INDEX_KEYS)
Cell_df_free = Cell_df_free.rename(columns={'coordinates' : 'free_spots', 'total_rna_number' : 'free_spot_number'}).set_index(INDEX_KEYS)

Cell_df = Cell_df_all.join([Cell_df_clustered, Cell_df_free]).reset_index(drop=False)
Cell_df

Unnamed: 0,name,acquisition_id,cell_id,all_spots,total_rna_number,clustered_spots,clustered_spot_number,free_spots,free_spot_number
0,Cy3,1,0,"[(0, 1, 43), (2, 48, 4), (1, 1, 35), (0, 29, 4...",101,"[(0, 1, 43), (2, 48, 4), (1, 1, 35), (0, 29, 4...",8,"[(2, 9, 46), (1, 15, 7), (0, 30, 11), (0, 14, ...",93
1,Cy3,1,1,"[(2, 1, 7), (2, 14, 27), (0, 16, 48), (0, 41, ...",110,"[(2, 1, 7), (2, 14, 27), (0, 16, 48), (0, 41, ...",11,"[(2, 10, 1), (1, 32, 19), (1, 1, 0), (0, 24, 2...",99
2,Cy3,1,2,"[(1, 32, 27), (1, 9, 23), (2, 15, 23), (2, 48,...",93,"[(1, 32, 27), (1, 9, 23), (2, 15, 23), (2, 48,...",9,"[(0, 48, 36), (1, 48, 48), (2, 33, 37), (1, 19...",84
3,Cy3,1,3,"[(0, 41, 12), (0, 19, 20), (1, 38, 18), (1, 5,...",106,"[(0, 41, 12), (0, 19, 20), (1, 38, 18), (1, 5,...",7,"[(1, 42, 40), (0, 39, 15), (0, 37, 9), (0, 47,...",99
4,Cy3,1,4,"[(1, 49, 37), (2, 39, 43), (0, 10, 42), (2, 43...",98,"[(1, 49, 37), (2, 39, 43), (0, 10, 42), (2, 43...",11,"[(1, 45, 19), (1, 1, 46), (0, 40, 37), (0, 10,...",87
5,Cy3,1,5,"[(0, 21, 23), (0, 15, 35), (2, 17, 43), (0, 11...",87,"[(0, 21, 23), (0, 15, 35)]",2,"[(2, 17, 43), (0, 11, 42), (1, 18, 47), (0, 7,...",85
6,Cy3,1,6,"[(1, 25, 37), (2, 16, 22), (0, 22, 48), (2, 10...",106,"[(1, 25, 37), (2, 16, 22), (0, 22, 48), (2, 10...",10,"[(1, 34, 47), (0, 22, 23), (2, 39, 12), (1, 42...",96
7,Cy3,1,7,"[(2, 47, 12), (2, 41, 26), (1, 3, 22), (0, 7, ...",91,"[(2, 47, 12), (2, 41, 26), (1, 3, 22), (0, 7, ...",12,"[(1, 36, 14), (1, 47, 11), (0, 31, 40), (1, 28...",79
8,Cy3,1,8,"[(0, 43, 21), (1, 5, 49), (2, 13, 36), (1, 38,...",108,"[(0, 43, 21), (1, 5, 49), (2, 13, 36), (1, 38,...",13,"[(0, 42, 29), (2, 30, 26), (1, 49, 19), (2, 19...",95
9,Cy3,1,9,"[(2, 6, 15), (0, 0, 37), (0, 14, 24), (0, 21, ...",100,"[(2, 6, 15), (0, 0, 37), (0, 14, 24), (0, 21, ...",17,"[(0, 48, 24), (1, 37, 44), (1, 35, 10), (1, 49...",83


### colocalisation_df

In [201]:
pivot_values_columns = ['all_spots', 'clustered_spots', 'free_spots', 'total_rna_number']
colocalisation_df = Cell_df.pivot(
        columns=['name', 'acquisition_id'],
        values= pivot_values_columns,
        index= 'cell_id'
    )
colocalisation_df

Unnamed: 0_level_0,all_spots,all_spots,clustered_spots,clustered_spots,free_spots,free_spots,total_rna_number,total_rna_number
name,Cy3,Cy5,Cy3,Cy5,Cy3,Cy5,Cy3,Cy5
acquisition_id,1,2,1,2,1,2,1,2
cell_id,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3
0,"[(0, 1, 43), (2, 48, 4), (1, 1, 35), (0, 29, 4...","[(2, 39, 9), (0, 40, 29), (1, 12, 33), (1, 31,...","[(0, 1, 43), (2, 48, 4), (1, 1, 35), (0, 29, 4...","[(2, 39, 9), (0, 40, 29), (1, 12, 33), (1, 31,...","[(2, 9, 46), (1, 15, 7), (0, 30, 11), (0, 14, ...","[(0, 38, 40), (0, 1, 31), (1, 44, 0), (1, 27, ...",101,99
1,"[(2, 1, 7), (2, 14, 27), (0, 16, 48), (0, 41, ...","[(1, 28, 15), (2, 14, 39), (1, 15, 45), (1, 44...","[(2, 1, 7), (2, 14, 27), (0, 16, 48), (0, 41, ...","[(1, 28, 15), (2, 14, 39), (1, 15, 45), (1, 44...","[(2, 10, 1), (1, 32, 19), (1, 1, 0), (0, 24, 2...","[(0, 20, 39), (0, 43, 45), (2, 17, 32), (2, 35...",110,89
2,"[(1, 32, 27), (1, 9, 23), (2, 15, 23), (2, 48,...","[(2, 25, 41), (0, 47, 29), (1, 15, 11), (0, 19...","[(1, 32, 27), (1, 9, 23), (2, 15, 23), (2, 48,...","[(2, 25, 41), (0, 47, 29), (1, 15, 11), (0, 19...","[(0, 48, 36), (1, 48, 48), (2, 33, 37), (1, 19...","[(2, 10, 7), (0, 27, 49), (0, 13, 11), (2, 1, ...",93,111
3,"[(0, 41, 12), (0, 19, 20), (1, 38, 18), (1, 5,...","[(2, 12, 49), (2, 11, 3), (1, 49, 34), (2, 18,...","[(0, 41, 12), (0, 19, 20), (1, 38, 18), (1, 5,...","[(2, 12, 49), (2, 11, 3), (1, 49, 34), (2, 18,...","[(1, 42, 40), (0, 39, 15), (0, 37, 9), (0, 47,...","[(2, 3, 20), (0, 0, 18), (0, 22, 16), (0, 23, ...",106,92
4,"[(1, 49, 37), (2, 39, 43), (0, 10, 42), (2, 43...","[(1, 31, 16), (0, 2, 46), (0, 10, 33), (2, 36,...","[(1, 49, 37), (2, 39, 43), (0, 10, 42), (2, 43...","[(1, 31, 16), (0, 2, 46), (0, 10, 33), (2, 36,...","[(1, 45, 19), (1, 1, 46), (0, 40, 37), (0, 10,...","[(1, 45, 8), (0, 16, 43), (0, 14, 17), (1, 39,...",98,97
5,"[(0, 21, 23), (0, 15, 35), (2, 17, 43), (0, 11...","[(2, 30, 35), (0, 24, 18), (1, 14, 15), (2, 40...","[(0, 21, 23), (0, 15, 35)]","[(2, 30, 35), (0, 24, 18), (1, 14, 15), (2, 40...","[(2, 17, 43), (0, 11, 42), (1, 18, 47), (0, 7,...","[(1, 40, 7), (0, 19, 47), (0, 21, 21), (1, 21,...",87,95
6,"[(1, 25, 37), (2, 16, 22), (0, 22, 48), (2, 10...","[(1, 30, 27), (0, 23, 5), (0, 14, 29), (2, 30,...","[(1, 25, 37), (2, 16, 22), (0, 22, 48), (2, 10...","[(1, 30, 27), (0, 23, 5), (0, 14, 29), (2, 30,...","[(1, 34, 47), (0, 22, 23), (2, 39, 12), (1, 42...","[(1, 14, 10), (0, 48, 23), (1, 20, 10), (2, 16...",106,115
7,"[(2, 47, 12), (2, 41, 26), (1, 3, 22), (0, 7, ...","[(2, 13, 25), (1, 29, 49), (2, 22, 44), (1, 39...","[(2, 47, 12), (2, 41, 26), (1, 3, 22), (0, 7, ...","[(2, 13, 25), (1, 29, 49), (2, 22, 44), (1, 39...","[(1, 36, 14), (1, 47, 11), (0, 31, 40), (1, 28...","[(2, 15, 14), (1, 34, 47), (2, 17, 9), (1, 3, ...",91,102
8,"[(0, 43, 21), (1, 5, 49), (2, 13, 36), (1, 38,...","[(2, 34, 13), (1, 44, 24), (0, 44, 49), (1, 19...","[(0, 43, 21), (1, 5, 49), (2, 13, 36), (1, 38,...","[(2, 34, 13), (1, 44, 24), (0, 44, 49)]","[(0, 42, 29), (2, 30, 26), (1, 49, 19), (2, 19...","[(1, 19, 32), (0, 46, 40), (1, 7, 17), (2, 11,...",108,91
9,"[(2, 6, 15), (0, 0, 37), (0, 14, 24), (0, 21, ...","[(2, 21, 4), (1, 2, 18), (1, 10, 0), (0, 34, 0...","[(2, 6, 15), (0, 0, 37), (0, 14, 24), (0, 21, ...","[(2, 21, 4), (1, 2, 18), (1, 10, 0), (0, 34, 0...","[(0, 48, 24), (1, 37, 44), (1, 35, 10), (1, 49...","[(1, 28, 35), (2, 27, 28), (1, 16, 32), (2, 22...",100,109


This DataFrame shape allows to get use the current implementation : 



In [None]:
if False : 
    colocalisation_df[("spots_to_spots_count",coloc_name,"forward")] = colocalisation_df['rna_coords'].apply(
            lambda x: spots_colocalisation(
                spot_list1= x[(acquisition_name_id1,acquisition_id1)],
                spot_list2= x[(acquisition_name_id2,acquisition_id2)],
                distance=colocalisation_distance,
                voxel_size=voxel_size
                ),axis=1
            )
    colocalisation_df[("spots_to_spots_fraction",coloc_name,"forward")] = colocalisation_df[("spots_to_spots_count",coloc_name,"forward")].astype(float) / colocalisation_df[('total_rna_number',acquisition_name_id1,acquisition_id1)].astype(float)

# Concatenate DataFrames with different columns on axis 0

In [202]:
import pandas as pd
import numpy as np

In [219]:
COLUMNS_1 = ['group_id', 'id', 'fruits','colors']
COLUMNS_2 = ['group_id', 'id', 'vegetables','colors']
COLUMNS_MAIN = ['summer','winter']

COLORS = ['green','red','yellow','orange','green']
group_id = [1,2,1,2,1]
fruits = ['apple', 'apple','lemon','kaki','pear']
vegetables = ['pepper', 'tomato','peper','pepper','courgette']
ids = np.arange(len(group_id))

In [225]:
DF1 = pd.DataFrame(columns=pd.MultiIndex.from_product([COLUMNS_MAIN,COLUMNS_1]), data= zip(group_id,ids,fruits,COLORS, group_id,ids,fruits,COLORS))
DF2 = pd.DataFrame(columns=pd.MultiIndex.from_product([COLUMNS_MAIN,COLUMNS_2]), data= zip(group_id,ids,vegetables,COLORS, group_id,ids,vegetables,COLORS))

In [226]:
DF1

Unnamed: 0_level_0,summer,summer,summer,summer,winter,winter,winter,winter
Unnamed: 0_level_1,group_id,id,fruits,colors,group_id,id,fruits,colors
0,1,0,apple,green,1,0,apple,green
1,2,1,apple,red,2,1,apple,red
2,1,2,lemon,yellow,1,2,lemon,yellow
3,2,3,kaki,orange,2,3,kaki,orange
4,1,4,pear,green,1,4,pear,green
5,1,5,apple,green,1,5,apple,green
6,2,6,apple,red,2,6,apple,red
7,1,7,lemon,yellow,1,7,lemon,yellow
8,2,8,kaki,orange,2,8,kaki,orange
9,1,9,pear,green,1,9,pear,green


In [227]:
DF2

Unnamed: 0_level_0,summer,summer,summer,summer,winter,winter,winter,winter
Unnamed: 0_level_1,group_id,id,vegetables,colors,group_id,id,vegetables,colors
0,1,0,pepper,green,1,0,pepper,green
1,2,1,tomato,red,2,1,tomato,red
2,1,2,peper,yellow,1,2,peper,yellow
3,2,3,pepper,orange,2,3,pepper,orange
4,1,4,courgette,green,1,4,courgette,green
5,1,5,pepper,green,1,5,pepper,green
6,2,6,tomato,red,2,6,tomato,red
7,1,7,peper,yellow,1,7,peper,yellow
8,2,8,pepper,orange,2,8,pepper,orange
9,1,9,courgette,green,1,9,courgette,green


In [228]:
pd.concat([DF1,DF2],axis=0, ignore_index=True)

Unnamed: 0_level_0,summer,summer,summer,summer,winter,winter,winter,winter,summer,winter
Unnamed: 0_level_1,group_id,id,fruits,colors,group_id,id,fruits,colors,vegetables,vegetables
0,1,0,apple,green,1,0,apple,green,,
1,2,1,apple,red,2,1,apple,red,,
2,1,2,lemon,yellow,1,2,lemon,yellow,,
3,2,3,kaki,orange,2,3,kaki,orange,,
4,1,4,pear,green,1,4,pear,green,,
5,1,5,apple,green,1,5,apple,green,,
6,2,6,apple,red,2,6,apple,red,,
7,1,7,lemon,yellow,1,7,lemon,yellow,,
8,2,8,kaki,orange,2,8,kaki,orange,,
9,1,9,pear,green,1,9,pear,green,,


### Setting tuple value in multi-index columns df

In [237]:
DF1['TVA'] = [(1,1)]*len(DF1)
DF1

Unnamed: 0_level_0,summer,summer,summer,summer,winter,winter,winter,winter,TVA
Unnamed: 0_level_1,group_id,id,fruits,colors,group_id,id,fruits,colors,Unnamed: 9_level_1
0,1,0,apple,green,1,0,apple,green,"(1, 1)"
1,2,1,apple,red,2,1,apple,red,"(1, 1)"
2,1,2,lemon,yellow,1,2,lemon,yellow,"(1, 1)"
3,2,3,kaki,orange,2,3,kaki,orange,"(1, 1)"
4,1,4,pear,green,1,4,pear,green,"(1, 1)"
5,1,5,apple,green,1,5,apple,green,"(1, 1)"
6,2,6,apple,red,2,6,apple,red,"(1, 1)"
7,1,7,lemon,yellow,1,7,lemon,yellow,"(1, 1)"
8,2,8,kaki,orange,2,8,kaki,orange,"(1, 1)"
9,1,9,pear,green,1,9,pear,green,"(1, 1)"


In [253]:
print(DF1['TVA'].to_numpy())

0 in DF1['TVA'].iat[0]

[(1, 1) (1, 1) (1, 1) (1, 1) (1, 1) (1, 1) (1, 1) (1, 1) (1, 1) (1, 1)]


False