In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tempfile
import os

In [None]:
########## Tev's Code ############

# you need to download and extract the SHARPR package for this to work
# https://ernstlab.biolchem.ucla.edu/SHARPR/SHARPR.zip
# read the manual if something doesn't work

def compute_sharpr_from_tiles(tile_fn, varprior_1=1, varprior_2=None):
    """ Computes SHARPR scores from tile matrix.

        Format of tile_fn is the same as SHARPR tile matrix format, tab-separated,
        rows are regions and columns are tiles. 
    """
    tilelength = 145
    stepsize = 5
    numtilepos = 31

    if varprior_2 is None:
        varprior_2 = varprior_1
    
    with tempfile.TemporaryDirectory() as tmpdir:
        inputtablefile = tile_fn
        inferenceoutputfile_1 = f'{tmpdir}/inference_out.varprior_{varprior_1}.tsv'
        
        cmd_ = f"java -jar /u/home/m/mudiyang/Scripts/SHARPR/SHARPR.jar Infer {inputtablefile} {inferenceoutputfile_1} {varprior_1} {tilelength} {stepsize} {numtilepos}"
        os.system(cmd_)
        print(cmd_)
        
        inferenceoutputfile_2 = f'{tmpdir}/inference_out.varprior_{varprior_2}.tsv'
        
        cmd_ = f"java -jar /u/home/m/mudiyang/Scripts/SHARPR/SHARPR.jar Infer {inputtablefile} {inferenceoutputfile_2} {varprior_2} {tilelength} {stepsize} {numtilepos}"
        os.system(cmd_)
        print(cmd_)
        
        fileset1 = f"{inferenceoutputfile_1}"
        fileset2 = f"{inferenceoutputfile_2}"
        
        combineoutputfile = f'{tmpdir}/combineoutput.tsv'
        
        cmd_ = f"java -jar /u/home/m/mudiyang/Scripts/SHARPR/SHARPR.jar Combine -c {fileset2} {fileset1} {combineoutputfile}"
        os.system(cmd_)
        print(cmd_)
        
        interpolateinputfile = combineoutputfile
        interpolateoutputfile = f'{tmpdir}/interpolate_out.tsv'
        
        cmd_ = f"java -jar /u/home/m/mudiyang/Scripts/SHARPR/SHARPR.jar Interpolate {interpolateinputfile} {interpolateoutputfile} {stepsize}"
        os.system(cmd_)
        print(cmd_)
        
        df = pd.read_csv(interpolateoutputfile, sep='\t', header=None, index_col=0)
        df.columns = df.columns.map(int)
        df.index = df.index.rename('region_id')
        
        
        return df


In [None]:
for i in ['linear','linear_inference','RF','RF_inference']:
    path  = '/u/home/m/mudiyang/scratch/Tile_Prediction_Regressors/Combined/data/predictions/%s.tsv'%i
    
    varprior_vals = [0.1,1,10,50]
    for val in varprior_vals:
        SHARPR_Scores = compute_sharpr_from_tiles(path,val)
        print(SHARPR_Scores.loc['H1hesc_10_0_chr9_108475935'])
        save_path = '/u/home/m/mudiyang/scratch/Tile_Prediction_Regressors/Combined/data/SHARPR_Scores/%s_%s.pkl'%(i, val)
        SHARPR_Scores.to_pickle(save_path)