# Processing Visiopharm output

# 1. Import packages and data

pip install opencv-python

In [1]:
import cv2
from matplotlib import image
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
import json

# for clustering
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans

# 2. Function definition

In [1]:
def merged_modularity(path_for_visium, path_tissue_positions, path_for_morphological_features, name):
    #load visium data         
    ST_visium = sc.read_visium(path_for_visium)
    ST_visium.obs = ST_visium.obs.reset_index()
    ST_visium.obs = ST_visium.obs.rename(columns={"index": "probes"})
    # load tissue positions df
    tissue_positions = pd.read_csv(path_tissue_positions, header = None)
    #preprocess tissue positions tissu df
    tissue_positions = tissue_positions.rename(columns={0: "probes"})
    #merge the two df
    coordinates_position_probes_visium = pd.merge(ST_visium.obs, tissue_positions, how='inner', on=['probes'])
    y_coordinates_position_probes_visium = list(coordinates_position_probes_visium[4])
    x_coordinates_position_probes_visium = list(coordinates_position_probes_visium[5])

      # create df form visium data
    ST_visium = sc.read_visium(path_for_visium)
    counts = pd.DataFrame(ST_visium.X.todense(), columns=ST_visium.var_names, index=ST_visium.obs_names) # create counts df

    PDAC_B2 = pd.read_csv(path_for_morphological_features)
    PDAC_B2_visio = PDAC_B2.drop(columns = ["Unnamed: 0","Name","Total Nuclei (#)"])    
    PDAC_B2_visio_x = list(PDAC_B2["new_x"])
    PDAC_B2_visio_y = list(PDAC_B2["new_y"])
    rad = 65
    idx_to_drop = []
    x_ST_list = []
    y_ST_list = []
    for x,y in zip(PDAC_B2_visio_x, PDAC_B2_visio_y):
        for circle_x, circle_y in zip(x_coordinates_position_probes_visium, y_coordinates_position_probes_visium):
            if(isInside(circle_x, circle_y, rad, x, y)):
                msk = PDAC_B2_visio['new_x'].eq(x) & (PDAC_B2_visio['new_y'].eq(y))
                idx_to_drop.append(PDAC_B2_visio.index[msk])
                x_ST_list.append(circle_x)
                y_ST_list.append(circle_y)

    new_idx = [x for idx in idx_to_drop for x in idx]
    df_collect_morpho = PDAC_B2_visio.drop(new_idx)
    df_1notin2 = PDAC_B2_visio[~PDAC_B2_visio.isin(df_collect_morpho).all(axis=1)]

    df_1notin2["x_coord"] = x_ST_list # add x and y spots coordinates in columns
    df_1notin2["y_coord"] = y_ST_list
    df_mean = df_1notin2.groupby(['x_coord','y_coord'])['Nuclei Area per object', 'Texture inertia', 'Max Intensity',
                                    'Mean Intensity', 'Min Intensity', 'Modus', 'Texture Anisotropy',
                                    'Texture Entropy', 'Texture Major Axis', 'Texture Major Direction',
                                    'Texture Minor Axis'].mean() # calculate mean of morphological features by spot
    df_mean = df_mean.reset_index() # switch index to column

  

    coord = pd.DataFrame(ST_visium.obsm['spatial'], columns=['x_coord', 'y_coord'], index=ST_visium.obs_names) # create spot coordinates df
    merged_coord = pd.merge(coord, df_mean, on= ["x_coord", "y_coord"], how="left") # merged the two dataframe into new one
    coordinates_position_probes_visium = coordinates_position_probes_visium.rename(columns={4: "y_coord", 5: "x_coord"}) # rename columns
    merged_probes = pd.merge(coordinates_position_probes_visium, merged_coord, on= ["x_coord", "y_coord"], how="left") # merge coordinates_position_probes_visium and merged_coord df
    merged_probes_clean = merged_probes.drop(['in_tissue', 'array_row', 'array_col', 1,2,3], axis=1) # drop useless columns
    merged_probes_clean = merged_probes_clean.set_index('probes') # change index name
    final_df = counts.join(merged_probes_clean) # merged merged_probes_clean and counts df
    #save results
    final_df.to_csv(f'dataframe/{name}_profils_merged_morpho_raw.csv')
    counts_with_probes = final_df.drop(['Nuclei Area per object', 'Texture inertia',
       'Max Intensity', 'Mean Intensity', 'Min Intensity', 'Modus',
       'Texture Anisotropy', 'Texture Entropy', 'Texture Major Axis',
       'Texture Major Direction', 'Texture Minor Axis'], axis = 1)
    counts_with_probes.to_csv(f'dataframe/{name}_counts_with_probes.csv')                    
    print("Final df and count df correctly save !")
    print("Number of NA:")
    print(final_df.isna().sum()) 

# 3. DPLFC