In [None]:
import os
job_cancel_str="scancel " + os.environ['SLURM_JOBID']
os.system(job_cancel_str)

# Readining 1 Channel

In [1]:
import sys
import os
import numpy as np
import pandas as pd
from glob import glob
from tqdm.notebook import tqdm

In [2]:
sys.path.append('/storage/users/g-and-n/naorko_code/code/')

In [3]:
from learning_tabular.preprocessing import load_plate_csv, list_columns
from hit_finding.metrics import *

In [17]:
flds = glob('/storage/users/g-and-n/tabular_models_results/400*/*/')
flds

['/storage/users/g-and-n/tabular_models_results/40033/channel ER/',
 '/storage/users/g-and-n/tabular_models_results/40012/channel DNA/',
 '/storage/users/g-and-n/tabular_models_results/40034/channel Mito/',
 '/storage/users/g-and-n/tabular_models_results/40015/channel RNA/',
 '/storage/users/g-and-n/tabular_models_results/40014/channel Mito/',
 '/storage/users/g-and-n/tabular_models_results/40035/channel RNA/',
 '/storage/users/g-and-n/tabular_models_results/40032/channel DNA/',
 '/storage/users/g-and-n/tabular_models_results/40022/channel DNA/',
 '/storage/users/g-and-n/tabular_models_results/40024/channel Mito/',
 '/storage/users/g-and-n/tabular_models_results/40031/channel AGP/',
 '/storage/users/g-and-n/tabular_models_results/40023/channel ER/',
 '/storage/users/g-and-n/tabular_models_results/40025/channel RNA/',
 '/storage/users/g-and-n/tabular_models_results/40021/channel AGP/',
 '/storage/users/g-and-n/tabular_models_results/40011/channel AGP/',
 '/storage/users/g-and-n/tabular_

In [4]:
flds = [
    '/storage/users/g-and-n/tabular_models_results/40012/channel DNA/',
    '/storage/users/g-and-n/tabular_models_results/40033/channel ER/',
    '/storage/users/g-and-n/tabular_models_results/40034/channel Mito/',
]

## Calculate Pure Z-Scores

In [None]:
for cur_fld in flds:
    os.chdir(cur_fld)

    os.makedirs('zscores', exist_ok=True)

    err_fld = './results/'
    pure_fld = './zscores/'

    index_fields = None
    well_index=None
    by_well=True
    for plate_csv in tqdm(glob(f'{err_fld}/*')):
        dest = f'{pure_fld}/{os.path.basename(plate_csv)}'

        if os.path.exists(dest):
            continue

        df = load_plate_csv(plate_csv, index_fields=index_fields)

        if by_well:
            if well_index is None:
                well_index = ['Plate', LABEL_FIELD, 'Metadata_broad_sample', 'Image_Metadata_Well']

            df = df.groupby(by=well_index).apply(lambda g: g.mean())

        df_mock = df[df.index.isin(['mock'], 1)]
        if not df_mock.shape[0]:
            print(f'no mock wells in {os.path.basename(plate_csv)}')
            continue

        scaler = StandardScaler()
        scaler.fit(df_mock)
        del df_mock

        df_zscores = pd.DataFrame(scaler.transform(df), index=df.index, columns=df.columns)
        del df

        df_zscores.to_csv(dest)

        del df_zscores

  0%|          | 0/100 [00:00<?, ?it/s]

## Calculate SS

In [None]:
def generate_scores(cpdf, abs_zscore=True, th_range=range(2, 21)):
    res = {}
    rep_cnt, fet_cnt = cpdf.shape

    corr = cpdf.astype('float64').T.corr(method='pearson').values
    if len(corr) == 1:
        med_corr = 1
    else:
        med_corr = np.median(list(corr[np.triu_indices(len(corr), k = 1)]))
        
    res['Med_Corr'] = med_corr
    
    cpdf_norm = cpdf * np.sqrt(rep_cnt)
    
    if abs_zscore:
        cpdf_norm = abs(cpdf_norm.T)

    for t in th_range:
        gtr_t_cnt = (cpdf_norm >= t).sum().sum()
        ss_norm = gtr_t_cnt / rep_cnt
        mas = np.sqrt((max(med_corr, 0) * ss_norm) / fet_cnt)
        res[f'SS_{t}'] = ss_norm
        res[f'MAS_{t}'] = mas
    
    return pd.Series(res)

In [None]:
for cur_fld in tqdm(flds):
    cur_df = pd.concat([pd.read_csv(pth, index_col=[0,1,2,3]) for pth in glob(cur_fld+'zscores/*')])
    cur_df = cur_df.query('Metadata_ASSAY_WELL_ROLE == "treated"').droplevel(1)
    cur_res = cur_df.groupby('Metadata_broad_sample').apply(generate_scores, abs_zscore=False, th_range=[2, 6, 10, 14])
    del cur_df
    cur_res.to_csv(cur_fld+'scores.csv')
    del cur_res

In [16]:
import shutil

total, used, free = shutil.disk_usage("/storage/users/g-and-n/tabular_models_results/40034/channel Mito/")

print("Total: %d GiB" % (total // (2**30)))
print("Used: %d GiB" % (used // (2**30)))
print("Free: %d GiB" % (free // (2**30)))

Total: 25600 GiB
Used: 21309 GiB
Free: 4290 GiB


In [None]:
import shutil

total, used, free = shutil.disk_usage("/storage/users/g-and-n/tabular_models_results/40034/channel Mito/")

print("Total: %d GiB" % (total // (2**30)))
print("Used: %d GiB" % (used // (2**30)))
print("Free: %d GiB" % (free // (2**30)))