In [None]:
import sys
import numpy as np
import pandas as pd
import os
import scipy
from tqdm import tqdm
from sklearn.metrics import accuracy_score

import matplotlib.pyplot as plt
from plotnine import *
%matplotlib inline

## Paths

In [None]:
eqtl_file = 'susie_backmerged_dedup.csv'

## Load data

In [None]:
eqtl_df = pd.read_csv(eqtl_file)

## Subset to high-confidence sc-eQTLs

In [None]:
pip_cut=0.9
effect_cut = 0.01

In [None]:
eqtl_df = eqtl_df[eqtl_df.pip > pip_cut].copy()
eqtl_df_pred = eqtl_df[eqtl_df.abs_matched_score > effect_cut].copy()

len(eqtl_df), len(eqtl_df_pred)

# Predicting the beta for positive variants

In [None]:
print("Decima")
print(scipy.stats.pearsonr(eqtl_df['beta'], eqtl_df['matched_score']))
print(scipy.stats.pearsonr(eqtl_df_pred['beta'], eqtl_df_pred['matched_score']))

print("Borzoi whole blood")
print(scipy.stats.pearsonr(eqtl_df['beta'], eqtl_df['borzoi_wholeblood_score']))
print(scipy.stats.pearsonr(eqtl_df_pred['beta'], eqtl_df_pred['borzoi_wholeblood_score']))

print("Borzoi matched")
print(scipy.stats.pearsonr(eqtl_df['beta'], eqtl_df['borzoi_matched_score']))
print(scipy.stats.pearsonr(eqtl_df_pred['beta'], eqtl_df_pred['borzoi_matched_score']))

In [None]:
print(accuracy_score(eqtl_df['beta'] > 0, eqtl_df['matched_score'] > 0))
print(accuracy_score(eqtl_df_pred['beta'] > 0, eqtl_df_pred['matched_score'] > 0))

## Visualize

In [None]:
(
    ggplot(eqtl_df, aes(x='beta', y = 'matched_score')) +\
    geom_pointdensity(size=.3) + theme_classic() + theme(figure_size=(2.9, 2.5)) 
    + xlab("Beta") + ylab("   Predicted logFC\n(cell-type matched)")
    + geom_vline(xintercept=0, color='grey', linetype='dashed') 
    + geom_hline(yintercept=0, color='grey', linetype='dashed') 
)