In [1]:
import numpy as np
import pandas as pd

# Estimation of half-life

In [4]:
dat = pd.read_csv("reports/NASCseq_Summary.tsv", sep="\t")
dat.index = dat["Cell"]
data = dict()
data["NASCseq"] = dat[["2022" in run for run in dat["Run"]]]
data["GSE128273"] = dat[dat["Run"] == "GSE128273_NASCseq_K562_SE"]

In [5]:
def get_halflife(total, nascent):
    if total == 0:
        T = np.nan
    elif nascent == 0:
        T = np.inf
    elif total == nascent:
        T = 0
    else:
        ntr = nascent / total
        T = -3 / np.log2(1 - ntr)
    return T

for name, d in data.items():
    d = d[(d["s4U"] == 50) & (d["Time"] == 3)]
    print("Name: %s, Cells: %d" % (name, len(d)))
    d = d[(d["Stranded.Reads"] >= 500000) & (d["TC.Ratio"] > 0.008)]
    print("Filtered cells: %d" % len(d))

    ref = None
    for run, cell in d[["Run", "Cell"]].values:
        path = "results/expression/fpkm/%s/%s.tsv" % (run, cell)
        # print(path)
        m = pd.read_csv(path, sep="\t", index_col=0)
        if ref is None:
            ref = m.copy()
        else:
            for c in ["Count", "Count.Exists", "Count.Nascent", "LibSize"]:
                ref[c] = ref[c] + m[c]
    m = ref
    m["FPKM"] = m["Count"] * 1e9 / m["Length"] / m["LibSize"]
    m["FPKM.Exists"] = m["Count.Exists"] * 1e9 / m["Length"] / m["LibSize"]
    m["FPKM.Nascent"] = m["Count.Nascent"] * 1e9 / m["Length"] / m["LibSize"]
    m["NTR"] = m["Count.Nascent"] / m["Count"]
    m["T"] = [get_halflife(total, nascent) for total, nascent in ref[["Count", "Count.Nascent"]].values]
    m.to_csv("reports/halflife_%s_K562_50uM_3h.tsv" % name, sep="\t")

Name: NASCseq, Cells: 38
Filtered cells: 29
Name: GSE128273, Cells: 80
Filtered cells: 61


In [6]:
tids = ["ENST00000652288.1", "ENST00000317968.9", "ENST00000229239.10"]
gnames = ["MYC", "PDLIM5", "GAPDH"]

for name in data:
    print("Name:", name)
    path = "reports/halflife_%s_K562_50uM_3h.tsv" % name
    d = pd.read_csv(path, sep="\t")
    print("Gene\tTotal\tNascent\tNTR")
    print("-" * 80)
    for tid, gname in zip(tids, gnames):
        d1 = d[d["TranscriptID"] == tid]
        v1 = d1["Count"].values[0]
        v2 = d1["Count.Nascent"].values[0]
        print(gname, v1, v2, v2 / v1, sep="\t")
    print("-" * 80)

Name: NASCseq
Gene	Total	Nascent	NTR
--------------------------------------------------------------------------------
MYC	1366	1178	0.862371888726208
PDLIM5	2528	642	0.25395569620253167
GAPDH	120361	11573	0.09615240817208232
--------------------------------------------------------------------------------
Name: GSE128273
Gene	Total	Nascent	NTR
--------------------------------------------------------------------------------
MYC	3926	2765	0.7042791645440653
PDLIM5	3600	815	0.2263888888888889
GAPDH	67616	4036	0.059690014197823
--------------------------------------------------------------------------------
