In [None]:
#general packages
import pandas as pd
import numpy as np
#plotting packages
import seaborn as sns
import matplotlib.pyplot as plt
#pearsonr package
from scipy.stats import pearsonr
#import custom package
from helpers.stain_intron_quant_main import gen_figure

## Make figures

In [None]:
#read in results
info = pd.read_csv("./results/Hes1_vs_EU.csv", index_col=0)
introns=pd.read_csv("./results/intron_spot_info.csv", index_col=0)

In [None]:
#show different times
info["Time"].unique()

In [None]:
#get mean stain intensity per time
mean_values = info.groupby("Time").mean()
mean_values.index = mean_values.index.str.strip()
mean_values_sorted = mean_values.reindex(["0h", "1h", "1.5h", "2h", "2.5h", "3h","4h","5h","6h"])

In [None]:
#get intron counts per cell by time
intron_counts_per_cell = []
for time in introns["Time"].unique():
    counts_per_cell = pd.DataFrame(introns[introns["Time"] == time].groupby("cell id").size())
    counts_per_cell.columns = ["Counts"]
    counts_per_cell["Time"] = time
    intron_counts_per_cell.append(counts_per_cell)
final_introns = pd.concat(intron_counts_per_cell)

In [None]:
#get mean intron counts per cell
final_intron_mean = final_introns.groupby("Time").mean()
final_intron_mean.index = final_intron_mean.index.str.strip()
final_intron_mean_sorted = final_intron_mean.reindex(["0h", "1h", "1.5h", "2h", "2.5h", "3h","4h","5h","6h"])

In [None]:
#cells per time point
final_introns.groupby("Time").size()

In [None]:
#make figure nice
plt.figure(dpi=150)
#mean line plots per stain
plt.plot(mean_values_sorted.index, mean_values_sorted["Hes1"], color = "gray", linewidth = 1, label = "Hes1")
plt.plot(mean_values_sorted.index, mean_values_sorted["EU"], color = "red", linewidth = 1, label = "EU")
#plot legend outside image
plt.legend(bbox_to_anchor=(1.0, 1.05))

#make strip plot of cells
sns.stripplot(x = info["Time"], y = info["Hes1"], alpha = 0.2, 
              color = "gray",  order = ["0h", "1h", "1.5h", "2h", "2.5h", "3h","4h","5h","6h"], label = "Hes1")
sns.stripplot(x = info["Time"], y = info["EU"], alpha = 0.2, 
              color = "red",  order = ["0h", "1h", "1.5h", "2h", "2.5h", "3h","4h","5h","6h"], label = "EU")


plt.ylabel("Mean Intensity (a.u.)", fontsize=12)
plt.xlabel("Time", fontsize=12)
sns.despine()
plt.ylim(0,20000)
plt.show()

plt.figure(dpi=150)
plt.plot(final_intron_mean_sorted.index, final_intron_mean_sorted["Counts"], color = "blue", linewidth = 1, label = "Introns")
sns.despine()
plt.xlabel("Time")
plt.ylabel("Mean Intron Counts/Cell")
plt.show()

## Get R values

In [None]:
def calc_pearsonr(df):
    """
    Function to calculate correlation between two stains
    """
    column_names = df.columns
    time_r = []
    for time in df["Time"].unique():
        df_time = df[df["Time"]==time]
        stain1 = df_time[f"{column_names[1]}"].values
        stain2 = df_time[f"{column_names[2]}"].values
        r = pearsonr(stain1,stain2)[0]
        time_r.append([time,r])
        
    time_r = pd.DataFrame(time_r)
    time_r.columns = ["Time", "Pearson's R"]
        
    return time_r

In [None]:
#hes vs eu
hes_v_eu = calc_pearsonr(info)

In [None]:
for time in hes_v_eu["Time"].unique():
    info_slice = info[info["Time"]==time]
    r_info = hes_v_eu[hes_v_eu["Time"] == time].iloc[0][1]
    plt = gen_figure(info_slice, time_course=False, no_intercept=True)
    max_y = plt.yticks()[0][len(plt.yticks()[0])-2]
    plt.annotate(f"Pearson's R = {round(r_info,2)}", (0,max_y))
    plt.show()

In [None]:
#reorganize intron counts df
final_introns = final_introns.reset_index()
final_introns.columns = ["Cell id", "Counts", "Time"]

In [None]:
#merge dfs
df_merged = pd.merge(info, final_introns, on=["Cell id", "Time"], how='inner')

In [None]:
#pearson r for Hes1 vs intron counts
df_merged_iso = df_merged[["Cell id","Hes1", "Counts", "Time"]]
hes_v_introns = calc_pearsonr(df_merged_iso)

In [None]:
for time in hes_v_introns["Time"].unique():
    info_slice = df_merged_iso[df_merged_iso["Time"]==time]
    r_info = hes_v_introns[hes_v_introns["Time"] == time].iloc[0][1]
    plt = gen_figure(info_slice, time_course=False, no_intercept=True)
    max_y = plt.yticks()[0][len(plt.yticks()[0])-2]
    plt.annotate(f"Pearson's R = {round(r_info,2)}", (0,max_y))
    plt.show()

In [None]:
#pearson r for Eu vs intron counts
df_merged_iso = df_merged[["Cell id","EU", "Counts", "Time"]]
eu_v_introns = calc_pearsonr(df_merged_iso)

In [None]:
for time in hes_v_eu["Time"].unique():
    info_slice = df_merged_iso[df_merged_iso["Time"]==time]
    r_info = eu_v_introns[eu_v_introns["Time"] == time].iloc[0][1]
    plt = gen_figure(info_slice, time_course=False, no_intercept=True)
    max_y = plt.yticks()[0][len(plt.yticks()[0])-2]
    plt.annotate(f"Pearson's R = {round(r_info,2)}", (0,max_y))
    plt.show()