In [11]:
import numpy as np
import pandas as pd
from scipy.stats import linregress

# Load the data file
file_path = r"C:\Users\kdivy\OneDrive\Desktop\COMP ANA HT DATA\ASSIGN-2\DecayTimecourse (1).txt"
df = pd.read_csv(file_path, sep="\t")
df.iloc[:, 1:] = df.iloc[:, 1:].apply(pd.to_numeric, errors='coerce')

# Define time points (minutes)
time_points = np.array([0, 5, 10, 15, 20, 30, 40, 50, 60])

# Calculate half-life for each transcript
half_lives = {}
for _, row in df.iterrows():
    gene = row.iloc[0]
    # Reshape expression data into three replicates (3 x 9)
    replicates = row.iloc[1:].values.reshape(3, -1).astype(float)
    gene_hls = []
    for rep in replicates:
        # Filter out invalid values (NaN or non-positive)
        valid = (rep > 0) & ~np.isnan(rep)
        if valid.sum() < 3:
            continue
        # Perform linear regression on log(expression) vs. time
        result = linregress(time_points[valid], np.log(rep[valid]))
        if -result.slope > 0:
            gene_hls.append(np.log(2) / (-result.slope))
    if gene_hls:
        half_lives[gene] = np.mean(gene_hls)

# Save half-life results
hl_df = pd.DataFrame(list(half_lives.items()), columns=["Gene", "Half-life"]).dropna()
hl_df.to_csv("calculated_half_lives.csv", index=False)

# Load half-life data and determine top/bottom 10% by quantile
hl_df = pd.read_csv("calculated_half_lives.csv")
top_threshold = hl_df["Half-life"].quantile(0.9)
bottom_threshold = hl_df["Half-life"].quantile(0.1)

top_genes = hl_df[hl_df["Half-life"] >= top_threshold]
bottom_genes = hl_df[hl_df["Half-life"] <= bottom_threshold]

top_genes.to_csv("top_10_percent_half_lives.csv", index=False)
bottom_genes.to_csv("bottom_10_percent_half_lives.csv", index=False)

print("Top 10% genes:", top_genes["Gene"].tolist())
print("Bottom 10% genes:", bottom_genes["Gene"].tolist())


Top 10% genes: ['YMR261C', 'YLR102C', 'YDR118W', 'YDL008W', 'YOR303W', 'YPR055W', 'YJL031C', 'YDR019C', 'YDR407C', 'YGR238C', 'YPR047W', 'YAL038W', 'YDR052C', 'YGR183C', 'Q0105', 'Q0275', 'Q0130', 'Q0080', 'YOR334W', 'YGL093W', 'Q0140', 'YPR186C', 'YKL058W', 'YLL050C', 'YMR092C', 'YBL078C', 'YHR156C', 'YDR424C', 'YPL174C', 'YMR294W', 'YBL037W', 'YHL019C', 'YOR075W', 'YOR036W', 'YJL041W', 'YER094C', 'YIL075C', 'YPL167C', 'YIL139C', 'YMR061W', 'YBR167C', 'YDR364C', 'YMR268C', 'YBR152W', 'YKL074C', 'YCR018C', 'YDL132W', 'YKL142W', 'YPR166C', 'YFR049W', 'YIL143C', 'YJR052W', 'YNL250W', 'YOL090W', 'YIR017C', 'YPL248C', 'YML039W', 'YMR050C', 'YBL005W-A', 'YKL152C', 'YLR134W', 'YLL024C', 'YMR186W', 'YLR109W', 'YLL039C', 'YPL240C', 'YBR011C', 'YDR077W', 'YPR149W', 'YGR087C', 'YLR259C', 'YMR251W', 'YLR304C', 'YLR087C', 'YHR049W', 'YDR366C', 'YLR216C', 'YBR054W', 'YMR031W-A', 'YKL086W', 'YER057C', 'YMR316C-A', 'YAL066W', 'YDR032C', 'YHR095W', 'YKL035W', 'YFL013W-A', 'YHR008C', 'YBL026W', 'YDR063