In [1]:
# this is a demo code for computing the average intensity of proteins of all images (save as .csv)
import numpy as np
import pandas as pd
import imageio
from IPython.display import display, Image
import time
from tqdm import tqdm
import os

# ---------- specify these args ----------
img_dir = 'images' # dir that saves your images
n_train = 225 # number of images
n_protein = 52 # number of proteins 
# ----------------------------------------

avg_list = [] 
for i in tqdm(np.arange(1, n_train+1), total=n_train, desc="Processing"):
    img_file_name = f'{i}.tiff'
    path = os.path.join(img_dir, img_file_name)
    img = imageio.v2.imread(path)
    avg = np.mean(img, axis=(1,2))
    avg_list.append([i] + avg.tolist())
avg_df = pd.DataFrame(avg_list, columns = ['id'] + ['protein' + str(i) for i in range(1, 52+1)])    
avg_df.to_csv('avg_intensity.csv', index=False)

avg_df.head()

Processing: 100%|█████████████████████████████████████████████████████████████████████████████████████| 225/225 [00:05<00:00, 40.58it/s]


Unnamed: 0,id,protein1,protein2,protein3,protein4,protein5,protein6,protein7,protein8,protein9,...,protein43,protein44,protein45,protein46,protein47,protein48,protein49,protein50,protein51,protein52
0,1,690.930664,1.101514,0.338033,2.095259,1.986703,2.655478,5.19267,3.728465,7.974388,...,0.118369,0.927603,0.070413,11.80284,0.799855,2.396329,4.577846,6.061446,1.406956,0.073323
1,2,674.043884,1.716993,0.512083,3.254199,3.060911,4.100561,8.003437,5.790457,2.941196,...,0.120988,0.773724,0.348591,17.131119,0.715897,2.53706,4.851519,5.077485,1.175855,0.078326
2,3,661.055176,1.76608,0.524979,3.265607,3.136247,4.214772,8.161823,5.971869,3.592763,...,0.239581,0.786141,0.232712,14.377203,0.634852,4.011008,7.628147,5.026156,1.138687,0.061795
3,4,708.462463,1.057975,0.3135,1.972012,1.876603,2.525851,4.90344,3.542351,2.584691,...,0.062979,0.270438,0.06621,0.798588,0.574971,2.613918,4.953641,5.981791,1.374423,0.078093
4,5,515.079346,1.493825,0.427023,2.671709,2.539767,3.423347,6.651449,4.852226,4.896438,...,0.121323,0.750853,0.066865,10.534543,0.72183,1.484233,2.815199,3.897813,0.872405,0.068889


In [5]:
# calculate co-locolization of p53 with the followings: Histone H3, Ki-67, EGFR, CD44, Vimentin, EpCAM
'''
index	Metal Tag	Target
protein10	In113	Histone H3
protein11	La139	Histone H3
protein20	Sm149	Vimentin
protein24	Eu153	Histone H3
protein30	Tb159	p53
protein31	Gd160	CD44
protein32	Dy161	EpCAM
protein39	Er168	Ki-67
protein40	Tm169	EGFR

'''

import numpy as np
import pandas as pd
import imageio
from tqdm import tqdm
import os

# ---------- specify these args ----------
img_dir = 'images' # directory containing all images
n_images = 225 # number of images
proteins_of_interest = [9, 10, 19, 23, 29, 30, 31, 38, 39]  # Indices of proteins for which co-localization will be calculated
# ----------------------------------------

co_localization_data = []

# Iterate through each image
for i in tqdm(range(1, n_images + 1), desc="Processing"):
    # Load the image
    img_file_name = f'{i}.tiff'
    img_path = os.path.join(img_dir, img_file_name)
    img = imageio.v2.imread(img_path)

    # Initialize an array to store co-localization coefficients for each protein pair
    co_localization_coefficients = []

    # Iterate through pairs of proteins
    for protein_index_1 in proteins_of_interest:
        protein_channel_1 = img[..., protein_index_1]

        for protein_index_2 in proteins_of_interest:
            if protein_index_1 == protein_index_2:
                continue  # Skip the same protein

            protein_channel_2 = img[..., protein_index_2]

            # Calculate co-localization coefficient (e.g., Pearson correlation)
            correlation = np.corrcoef(protein_channel_1.flatten(), protein_channel_2.flatten())[0, 1]

            co_localization_coefficients.append(correlation)

    # Append co-localization data for the current image
    co_localization_data.append([i] + co_localization_coefficients)

# Convert co-localization data to DataFrame
column_names = ['id'] + [f'co_localization_{i+1}_{j+1}' for i in proteins_of_interest for j in proteins_of_interest if i != j]
co_localization_df = pd.DataFrame(co_localization_data, columns=column_names)

# Save co-localization data as CSV
co_localization_df.to_csv('co_localization_results.csv', index=False)

# Display the first few rows of the DataFrame
co_localization_df.head()


Processing: 100%|█████████████████████████████████████████████████████████████████████████████████████| 225/225 [00:10<00:00, 21.52it/s]


Unnamed: 0,id,co_localization_10_11,co_localization_10_20,co_localization_10_24,co_localization_10_30,co_localization_10_31,co_localization_10_32,co_localization_10_39,co_localization_10_40,co_localization_11_10,...,co_localization_39_32,co_localization_39_40,co_localization_40_10,co_localization_40_11,co_localization_40_20,co_localization_40_24,co_localization_40_30,co_localization_40_31,co_localization_40_32,co_localization_40_39
0,1,0.994129,0.993302,0.993909,0.994704,0.992783,0.994806,0.992848,0.994251,0.994129,...,0.993937,0.994844,0.994251,0.993808,0.994025,0.994429,0.994537,0.993583,0.994813,0.994844
1,2,0.989568,0.991125,0.989136,0.991143,0.989121,0.990702,0.990386,0.991315,0.989568,...,0.992693,0.994526,0.991315,0.988178,0.992918,0.992142,0.992979,0.993135,0.993897,0.994526
2,3,0.994546,0.993543,0.99209,0.99209,0.992287,0.992959,0.990164,0.989783,0.994546,...,0.991581,0.99473,0.989783,0.989665,0.9913,0.990732,0.991188,0.991319,0.990842,0.99473
3,4,0.995324,0.996663,0.996378,0.99653,0.995251,0.994936,0.995172,0.996388,0.995324,...,0.994008,0.995467,0.996388,0.994936,0.996634,0.99593,0.99621,0.994501,0.99462,0.995467
4,5,0.990384,0.984511,0.984182,0.984303,0.98351,0.98412,0.983095,0.983563,0.990384,...,0.994104,0.994805,0.983563,0.987919,0.990088,0.994558,0.994325,0.993625,0.994005,0.994805
