In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import linregress
from scipy.stats import spearmanr
from false_positive_analysis import percent_false_positive
%config InlineBackend.figure_format='retina'

In [None]:
#read in final mtx
mtx = pd.read_csv(f"/groups/CaiLab/personal/Lex/raw/112221_20kdash_3t3/notebook_pyfiles/genebycell/final/genebycell.csv", index_col=0)

In [None]:
#read in rnaseq data
rnaseq = pd.read_csv("kallisto_NIH3T3.csv")
rnaseq.columns = ["Genes","FPKM"]

In [None]:
#codebook
codebook = pd.read_csv("/groups/CaiLab/personal/Lex/raw/112221_20kdash_3t3/barcode_key/corrected_codebook.csv")
codebook.columns = ["Gene", "Brcd1", "Brcd2"]

In [None]:
fakebook = codebook[codebook["Gene"].str.startswith("fake")]
codebook = codebook.drop(fakebook.index)

In [None]:
fp, fake, norm_fpr= percent_false_positive(mtx, codebook, fakebook)
percent_fp = fp["FP raw"].mean()
mean_counts = fp["total_counts"].mean()
sum_counts = fp["total_counts"].sum()
fp_list = [percent_fp,norm_fpr,mean_counts,sum_counts]

In [None]:
df_stats = pd.DataFrame(fp_list).T
df_stats.columns = ["percent fp","false positive rate","mean counts", "total sum"]

In [None]:
df_stats

In [None]:
#convert data to pseudobulk rnaseq data
bulk = pd.DataFrame(mtx.mean(axis=1)).reset_index()
bulk.columns = ["Genes", "Counts"]

In [None]:
rnaseq.columns = ["Genes", "TPM"]

In [None]:
rnaseq["Genes"]=rnaseq["Genes"].str.lower()

In [None]:
comb_1 = pd.merge(rnaseq,bulk)

In [None]:
sort_comb_1 = comb_1.sort_values(["TPM"])

In [None]:
#pearson calc old rnaseq
linreg = linregress(x = sort_comb_1["TPM"], y = sort_comb_1["Counts"])
pearsonr = linreg[2]
pearsonr = round(pearsonr,2)
pearsonr

In [None]:
plt.scatter(np.log10(sort_comb_1["Counts"]),np.log10(sort_comb_1["TPM"]), s = 5, alpha=0.5)
plt.ylabel("Bulk RNAseq Log10(FPKM)", fontsize=12)
plt.xlabel("Pseudobulk Log10(Counts)", fontsize=12)
plt.xticks(fontsize=12, rotation=0)
plt.yticks(fontsize=12, rotation=0)
plt.annotate(f"Pearson's r= {pearsonr}", (-1.5,1.5), fontsize=12)
sns.despine()