In [1]:
from google.colab import drive
drive.mount("/content/gdrive")

import os
os.chdir('/content/gdrive/MyDrive/ViralMut/')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [2]:
import pandas as pd
from scipy.stats import mannwhitneyu

df = pd.read_csv('./upload/species_stability_with_threshold.csv',
                 low_memory=False,
                 dtype={'study_id': str, 'replicate_id': str, 'time_points': str})

df['half_life'] = pd.to_numeric(df['half_life'], errors='coerce')
df = df.dropna(subset=['half_life'])
df_human = df[df['species_name'] == 'Human'].copy()


calm_list = ['ABCD1', 'AFG3L2', 'AR', 'ARX', 'ATP1A1',
             'CACNA1A', 'CACNA1C', 'DHX37', 'DNM1', 'DNMT1',
             'EYA1', 'EZH2', 'FOXG1', 'FOXP3', 'GARS1',
             'GATA1', 'GATA2', 'GJA3', 'GNB1', 'HGD',
             'KCNH1', 'MEF2C', 'PCYT1A', 'PDE4D', 'PPP2R1A',
             'SCN3A', 'SNRNP200', 'SOX9', 'TGFBR1']

esm_list = ['ACAD9', 'CACNA1D', 'CAMTA1', 'CC2D2A', 'CD40LG',
            'CFH', 'CPLANE1', 'CYBB', 'DEAF1', 'DICER1',
            'DOK7', 'DYNC1H1', 'DYNC2H1', 'EXT1', 'EYS',
            'GNPTAB', 'GRIN2A', 'GRIN2B', 'HBA1', 'HEXB',
            'HLCS', 'IL12RB1', 'IL2RG', 'INF2', 'ITGB4',
            'KCNJ11', 'KCNT1', 'KDM3B', 'KDM6A', 'MAN2B1',
            'MECP2', 'MORC2', 'MSH6', 'MYO15A', 'MYOC',
            'MYRF', 'NBEAL2', 'NEU1', 'NR0B1', 'NSD2',
            'NTRK1', 'OCA2', 'OFD1', 'OTOF', 'PC',
            'PIK3R2', 'PKD1', 'POGZ', 'PURA', 'RAG1',
            'RB1', 'SERPING1', 'SLC16A2', 'SLC4A11', 'SMC1A',
            'SUMF1', 'TAF1', 'TECTA', 'TGM1', 'TNFRSF1A',
            'TP53', 'TYMP', 'USH2A', 'WAS', 'WDR62', 'WFS1']

unique_cells = df_human['cell_type'].unique()

print(f"{'Cell Type':<15} | {'CLM (N)':<10} | {'PLM (N)':<10} | {'P-value':<10} | {'Median CLM':<10} | {'Median PLM':<10}")
print("-" * 80)

for cell in unique_cells:

    df_temp = df_human[df_human['cell_type'] == cell].copy()
    df_agg = df_temp.groupby('gene_name_y')['half_life'].median().reset_index()

    s_clm = df_agg[df_agg['gene_name_y'].isin(calm_list)]['half_life']
    s_esm = df_agg[df_agg['gene_name_y'].isin(esm_list)]['half_life']

    if len(s_clm) > 5 and len(s_esm) > 5:

        stat, p_val = mannwhitneyu(s_clm, s_esm, alternative='less')

        med_clm = s_clm.median()
        med_plm = s_esm.median()

        sig_mark = " ***" if p_val < 0.05 else ""
        print(f"{cell:<15} | {len(s_clm):<10} | {len(s_esm):<10} | {p_val:.4f}{sig_mark:<4} | {med_clm:.2f}{'':<6} | {med_plm:.2f}")

Cell Type       | CLM (N)    | PLM (N)    | P-value    | Median CLM | Median PLM
--------------------------------------------------------------------------------
Hela            | 24         | 53         | 0.1833     | 68.05       | 104.10
HEK293          | 29         | 61         | 0.2012     | 1.44       | 1.46
MCF-7           | 12         | 31         | 0.6119     | 10.08       | 10.32
K562            | 21         | 48         | 0.6104     | 2.24       | 2.23
H1ESC           | 10         | 17         | 0.5500     | 0.67       | 0.51
HepG2           | 29         | 65         | 0.3417     | 13.23       | 23.89
RPE             | 8          | 18         | 0.9849     | 0.14       | 0.03
HEK293T         | 29         | 62         | 0.7462     | 27.66       | 23.49
A673            | 18         | 28         | 0.3636     | 4.40       | 4.97
iPSN            | 29         | 63         | 0.3084     | 24.40       | 42.66
HCT116          | 27         | 63         | 0.1968     | 92.79       | 149.11