In [37]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.stats.multitest as smm
from matplotlib.backends.backend_pdf import PdfPages


In [38]:
def run_bh(pValDf):
    pValDf = np.abs(pValDf)
    adjustedPVals = pd.DataFrame(columns=pValDf.columns, index=pValDf.index)
    actRej = pd.DataFrame(columns=pValDf.columns, index=pValDf.index)

    for (tf, pVals) in pValDf.items():
        pVals = pVals.dropna()
        pV_df = pd.DataFrame(index=pVals.index, columns=['pVal'], data=pVals.values)
        pV_df.sort_values(by='pVal', inplace=True)

        reject, adj_p_val = smm.fdrcorrection(pV_df['pVal'].values, method='i', is_sorted=True, alpha=0.1)
        pV_df[tf] = adj_p_val

        adjustedPVals.loc[pV_df.index.values, tf] = pV_df[tf].values
        actRej.loc[pV_df.index.values, tf] = reject

    return adjustedPVals, actRej

In [39]:
p_val_file = 'data/p-values_1000000_v2_shuffle.tsv'

pval_df = pd.read_csv(p_val_file, sep='\t', index_col=0)
adjP, acptRej = run_bh(pval_df)

acptRej.replace(True, 1, inplace=True)
acptRej.replace(False, 0, inplace=True)

In [84]:
p_value_file = 'data/p-values_10000.tsv'
umap_file = 'data/_umap.tsv'
dest_file = 'data/plots10k.pdf'

p_value_df = pd.read_csv(p_value_file, sep='\t', index_col=0)

adjP, acptRej = run_bh(p_value_df)
umap_df = pd.read_csv(umap_file, sep='\t', index_col=0)

figures = []

# Sort columns of acptRej by number of True values
acptRej = acptRej[acptRej.sum().sort_values(ascending=False).index]

acptRej.replace(True, -1, inplace=True)
acptRej.replace(False, 1, inplace=True)
acptRej.replace(np.nan, 0, inplace=True)

count = 0

# Run loop through columns and index of acptRej
for (tft, cells) in acptRej.items():
    plt.figure(figsize=(10, 10))
    # plt.style.use('seaborn-whitegrid')

    # Count number of 1s and -1s in cells
    total_cell = cells.count() - cells[cells == 0].count()

    plt.title(
        tft + ', Total cells: ' + str(total_cell) + ', Activated cells: ' + str(cells[cells == 1].count()))
    plt.xlabel('UMAP1')
    plt.ylabel('UMAP2')

    cell_coord = umap_df.merge(cells, left_index=True, right_index=True)
    cell_coord['p-value'] = p_value_df[tft]
    cell_coord['logp-value'] = cell_coord['p-value'].apply(lambda x: np.sign(x) * -np.log(np.abs(x)))
    # cell_coord['logp-value'].replace(np.nan, 0, inplace=True)

    plt.scatter(x=cell_coord['UMAP1'], y=cell_coord['UMAP2'], c=cell_coord['logp-value'], cmap='coolwarm', s=1)
    plt.colorbar()

    figures.append(plt.gcf())

    count += 1
    if count == 50:
        break

with PdfPages(dest_file) as pdf:
    for fig in figures:
        pdf.savefig(fig)

plt.close('all')

  plt.figure(figsize=(10, 10))


In [78]:
a2m = p_value_df['A2M']
a2m

midpoint_AAACCTGCACCCAGTG-1       NaN
midpoint_AAACCTGCAGCTTAAC-1    0.6616
midpoint_AAACCTGGTGTGCGTC-1    0.8900
midpoint_AAACCTGGTTGAACTC-1    0.9664
midpoint_AAACGGGAGGATGGTC-1    0.4132
                                ...  
tumor_TTTCCTCGTATGCTTG-1       0.0316
tumor_TTTGCGCCACGGTTTA-1       0.0288
tumor_TTTGGTTCAGCCTGTG-1       0.0540
tumor_TTTGGTTCAGTGAGTG-1       0.0288
tumor_TTTGGTTGTGCAACTT-1       0.0344
Name: A2M, Length: 4797, dtype: float64

In [79]:
a2m = a2m.dropna()
a2m

midpoint_AAACCTGCAGCTTAAC-1    0.6616
midpoint_AAACCTGGTGTGCGTC-1    0.8900
midpoint_AAACCTGGTTGAACTC-1    0.9664
midpoint_AAACGGGAGGATGGTC-1    0.4132
midpoint_AAACGGGAGGGCTCTC-1    0.4737
                                ...  
tumor_TTTCCTCGTATGCTTG-1       0.0316
tumor_TTTGCGCCACGGTTTA-1       0.0288
tumor_TTTGGTTCAGCCTGTG-1       0.0540
tumor_TTTGGTTCAGTGAGTG-1       0.0288
tumor_TTTGGTTGTGCAACTT-1       0.0344
Name: A2M, Length: 3073, dtype: float64

In [16]:
n = 1_000
ranks = [(i + 0.5) / n for i in range(0, n)]
ranks


[0.0005,
 0.0015,
 0.0025,
 0.0035,
 0.0045,
 0.0055,
 0.0065,
 0.0075,
 0.0085,
 0.0095,
 0.0105,
 0.0115,
 0.0125,
 0.0135,
 0.0145,
 0.0155,
 0.0165,
 0.0175,
 0.0185,
 0.0195,
 0.0205,
 0.0215,
 0.0225,
 0.0235,
 0.0245,
 0.0255,
 0.0265,
 0.0275,
 0.0285,
 0.0295,
 0.0305,
 0.0315,
 0.0325,
 0.0335,
 0.0345,
 0.0355,
 0.0365,
 0.0375,
 0.0385,
 0.0395,
 0.0405,
 0.0415,
 0.0425,
 0.0435,
 0.0445,
 0.0455,
 0.0465,
 0.0475,
 0.0485,
 0.0495,
 0.0505,
 0.0515,
 0.0525,
 0.0535,
 0.0545,
 0.0555,
 0.0565,
 0.0575,
 0.0585,
 0.0595,
 0.0605,
 0.0615,
 0.0625,
 0.0635,
 0.0645,
 0.0655,
 0.0665,
 0.0675,
 0.0685,
 0.0695,
 0.0705,
 0.0715,
 0.0725,
 0.0735,
 0.0745,
 0.0755,
 0.0765,
 0.0775,
 0.0785,
 0.0795,
 0.0805,
 0.0815,
 0.0825,
 0.0835,
 0.0845,
 0.0855,
 0.0865,
 0.0875,
 0.0885,
 0.0895,
 0.0905,
 0.0915,
 0.0925,
 0.0935,
 0.0945,
 0.0955,
 0.0965,
 0.0975,
 0.0985,
 0.0995,
 0.1005,
 0.1015,
 0.1025,
 0.1035,
 0.1045,
 0.1055,
 0.1065,
 0.1075,
 0.1085,
 0.1095,
 0.1105,
 

In [32]:
ls = [0, 1, 2, 3, 4, 5, 5, 5, 5, 5, 6, 7, 8, 9]
print(np.searchsorted(ls, 5.5))
print(np.searchsorted(ls, 5))

10
5
