In [1]:
from pathlib import Path
import pandas as pd
import numpy as np

VW_PREFIX = "VW_"

In [2]:
pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)
pd.set_option("display.max_colwidth", None)

In [None]:
z_path = Path("../../../PV1 Data/SHERC_combined_wSB_4-24-24_Z-HDI95_avg.tsv").resolve()
z_df = pd.read_csv(z_path, sep="\t")
display(z_df.head())

In [None]:
def apply_threshold(df, zthresh1=10.0, cthresh1=100, zthresh2=5.0, cthresh2=60):
    # assuming df passed in full
    subject_cols = [col for col in df.columns if col.startswith(VW_PREFIX)]

    subjects = df.loc[:, subject_cols].apply(pd.to_numeric, errors="coerce")
    arr = subjects.to_numpy()
    
    def_counts = (arr >= zthresh1).sum(axis=1)
    maybe_counts = (arr >= zthresh2).sum(axis=1)

    def_mask = def_counts >= cthresh1
    maybe_mask = (~def_mask) & (maybe_counts >= cthresh2)
    not_mask = ~(def_mask | maybe_mask)

    return df.assign(**{"Def epitope": def_mask.astype("int8"), 
                        "Maybe epitope": maybe_mask.astype("int8"), 
                        "Not epitope": not_mask.astype("int8")})

In [None]:
z_classified = apply_threshold(z_df.copy())

In [None]:
assert ((z_classified["Def epitope"] + z_classified["Maybe epitope"] + z_classified["Not epitope"]) == 1).all()
print("Counts:",
      "Def", z_classified["Def epitope"].sum(),
      "Maybe", z_classified["Maybe epitope"].sum(),
      "Not", z_classified["Not epitope"].sum())

In [None]:
cols = ["Def epitope", "Maybe epitope", "Not epitope"]
any_hits_mask = z_classified[cols].eq(1).any(axis=1)
any_hits = z_classified.loc[any_hits_mask, cols]
any_hits.head(10)

In [None]:
def_hits_mask = z_classified["Def epitope"].eq(1).any(axis=1)
def_hits = z_classified.loc[def_hits_mask, "Def epitope"]
def_hits.head(10)