In [None]:
import sys
import os

# Add the folder paths to sys.path
sys.path.append(os.path.abspath('/scratch/user/s4647285/DeepHis2Exp/Models/BLEEP'))
sys.path.append(os.path.abspath('/scratch/user/s4647285/DeepHis2Exp/Wilson/BLEEP_Experiment'))

import bleep as bl
import BLEEP_pfizer_dataloader as bpd

In [None]:
CKPT_PATH = '/scratch/user/s4647285/DeepHis2Exp/Wilson/BLEEP_Experiment/BLEEP_Model_1.ckpt/lightning_logs/version_9995117/checkpoints/epoch=149-step=10500.ckpt'

In [None]:
import pandas as pd
vis_sample_list = ["VLP78_A",  "VLP78_D",  "VLP79_A",  "VLP79_D",  "VLP80_A", "VLP80_D",  "VLP81_A",  "VLP82_A",  "VLP82_D",  "VLP83_A",  "VLP83_D"]
# Load the gene list
file_path = "/QRISdata/Q1851/Wilson/Pfizer/PROCESSED_DATA/samples_markers_intersection.csv"
gene_list_df = pd.read_csv(file_path, header = None)
gene_list = gene_list_df.to_numpy().flatten()

In [None]:
dataloaders = bpd.BLEEP_Pfizer_data_loader(sample_list = vis_sample_list, batch_size = 1, shuffle = True, desired_genes = gene_list, subset_size = 10)

In [None]:
train_dataloader = dataloaders[0]
test_dataloader = dataloaders[1]
val_dataloader = dataloaders[2]

In [None]:
import pytorch_lightning as pl
model = bl.BLEEP.load_from_checkpoint(CKPT_PATH)

In [None]:
model.eval()

In [None]:
trainer = pl.Trainer(inference_mode=True)

In [None]:
prediction = bl.bleep_inference(model = model, trainer = trainer, tr_loader = train_dataloader, te_loader = test_dataloader)

In [None]:
te_exp = prediction[0] #te_exp
matched_spot_expression_pred = prediction[1] #matched_spot_expression_pred

In [None]:
len(prediction[0]), len(prediction[1]), len(prediction[2]), len(prediction[3])

In [None]:
sample_names = []
for name in prediction[3]: #name attached to each spot
    sample_names.append(name[0].astype(str))

In [None]:
x = []
y = []
for coords in prediction[2]: #splite the coordinates in each spot
    x.append(coords[0])
    y.append(coords[1])

In [None]:
import pandas as pd
# Convert numpy array to dataframe
df_ground_truth = pd.DataFrame(te_exp, columns=gene_list, index=sample_names)
df_ground_truth['x'] = x
df_ground_truth['y'] = y
# Display the dataframe
print(df_ground_truth.index.value_counts())
print("Check the length with the number of spot to be sure we have the right sample")

In [None]:
print(df_ground_truth.head())

In [None]:
import pandas as pd
# Convert numpy array to dataframe
df_pred = pd.DataFrame(matched_spot_expression_pred, columns=gene_list, index = sample_names)
df_pred['x'] = x
df_pred['y'] = y
# Display the dataframe
print(df_ground_truth.index.value_counts())
print(df_pred.head())

In [None]:
import numpy as np
test_sample_dict = {}
test_sample_names = np.unique(sample_names)
for name in test_sample_names:
    test_sample_dict[name] = df_pred.loc[name]

In [None]:
ground_truth_test_sample_dict = {}
test_sample_names = np.unique(sample_names)
for name in test_sample_names:
    ground_truth_test_sample_dict[name] = df_ground_truth.loc[name]

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
# Calculate Pearson correlation coefficients
correlation_matrix = df_ground_truth.corrwith(df_pred, method='pearson')

# Plotting the correlation coefficients
plt.figure(figsize=(10, 6))
sns.barplot(x=correlation_matrix.index, y=correlation_matrix.values, palette='viridis')
plt.title('Pearson Correlation between Ground Truth and Prediction')
plt.xlabel('Genes')
plt.ylabel('Correlation Coefficient')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

In [None]:
correlation_matrix.nlargest(52)

In [None]:
import PIL.Image as Image

def plot_exp_with_image(df = test_sample_dict,  sample_name = 'VLP79_D', gene_name = 'ETS1', prediction = True):
    df_subset = df[sample_name]
    df_plot = df_subset.loc[:, [gene_name, 'x', 'y']]
    df_plot
    image_path = f"/QRISdata/Q1851/Wilson/Pfizer/PROCESSED_DATA/{sample_name}/{sample_name}.tif"

    # Load the image
    im = Image.open(image_path).convert('RGB')

    # Plot scatter plot with gene_name as color
    plt.figure(figsize=(8, 6))  # Adjust the figure size as needed
    plt.imshow(im)
    plt.axis('off')  # Turn off axis labels

    # Plot scatter plot on top of the image
    plt.scatter(df_subset['x'], df_subset['y'], c=df_subset[gene_name], cmap='viridis',vmin = 0, vmax = 1, s=2, alpha=0.7)
    plt.colorbar(label='gene_name')
    plt.xlabel('X Coordinate')
    plt.ylabel('Y Coordinate')
    if prediction:
        plt.title(f"{sample_name} with predicted {gene_name} expression ")
    else:
        plt.title(f"{sample_name} with ground truth {gene_name} expression ")
    plt.show()


In [None]:
plot_exp_with_image()
plot_exp_with_image(df = ground_truth_test_sample_dict,sample_name = 'VLP79_D', gene_name = 'ETS1', prediction=False )

In [None]:
plot_exp_with_image(df = test_sample_dict,sample_name = 'VLP80_D', gene_name = 'ETS1', prediction=True )
plot_exp_with_image(df = ground_truth_test_sample_dict,sample_name = 'VLP80_D', gene_name = 'ETS1', prediction=False )

In [None]:
plot_exp_with_image(df = test_sample_dict,sample_name = 'VLP80_A', gene_name = 'ETS1', prediction=True )
plot_exp_with_image(df = ground_truth_test_sample_dict,sample_name = 'VLP80_A', gene_name = 'ETS1', prediction=False )

In [None]:
plot_exp_with_image(df = test_sample_dict,sample_name = 'VLP80_A', gene_name = 'C1S', prediction=True )
plot_exp_with_image(df = ground_truth_test_sample_dict,sample_name = 'VLP80_A', gene_name = 'C1S', prediction=False )

In [None]:
plot_exp_with_image(df = test_sample_dict,sample_name = 'VLP80_D', gene_name = 'C1S', prediction=True )
plot_exp_with_image(df = ground_truth_test_sample_dict,sample_name = 'VLP80_D', gene_name = 'C1S', prediction=False )

In [None]:
plot_exp_with_image(df = test_sample_dict,sample_name = 'VLP79_D', gene_name = 'C1S', prediction=True )
plot_exp_with_image(df = ground_truth_test_sample_dict,sample_name = 'VLP79_D', gene_name = 'C1S', prediction=False )

In [None]:
plot_exp_with_image(df = test_sample_dict,sample_name = 'VLP79_D', gene_name = 'IKZF1', prediction=True )
plot_exp_with_image(df = ground_truth_test_sample_dict,sample_name = 'VLP79_D', gene_name = 'IKZF1', prediction=False )

In [None]:
plot_exp_with_image(df = test_sample_dict,sample_name = 'VLP80_D', gene_name = 'IKZF1', prediction=True )
plot_exp_with_image(df = ground_truth_test_sample_dict,sample_name = 'VLP80_D', gene_name = 'IKZF1', prediction=False )

In [None]:
plot_exp_with_image(df = test_sample_dict,sample_name = 'VLP80_A', gene_name = 'IKZF1', prediction=True )
plot_exp_with_image(df = ground_truth_test_sample_dict,sample_name = 'VLP80_A', gene_name = 'IKZF1', prediction=False )

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Get the top 5 genes
top5_genes = correlation_matrix.nlargest(5).index

# Create a plot for each of the top 5 genes
for gene in top5_genes:
    plt.figure(figsize=(10, 6))
    plt.title(f'Dot Plot for Gene: {gene}')
    plt.xlabel('Spots')
    plt.ylabel('Expression Values')
    
    # Assuming your DataFrames have the same number of rows as te_exp
    num_spots = len(df_ground_truth)
    
    # Set x-ticks
    plt.xticks(ticks=np.arange(0, num_spots, step=num_spots//10), 
               labels=np.arange(1, num_spots+1, step=num_spots//10))

    # Dot plot for ground truth
    plt.scatter(np.arange(num_spots), df_ground_truth[gene], 
                c='blue', alpha=0.6, edgecolors='w', linewidth=0.5, 
                label='Ground Truth')

    # Dot plot for prediction
    plt.scatter(np.arange(num_spots), df_pred[gene], 
                c='red', alpha=0.6, edgecolors='w', linewidth=0.5, 
                label='Prediction')

    plt.legend()
    plt.tight_layout()
    plt.show()