This code duplicates every row of a GRange by the score it has.  
This is necessary as groHMM 'detectTranscripts' function does not take the score as a parameters in its function but requires duplicate lines proportional to the readCounts.

In [2]:
import pandas as pd

In [3]:
def duplicate_df(df):
    '''
    Input:
    df: a dataframe corresponding to a GRange 
    df[chromosome,start,enf,name,score,strand]
    Output:
    The same dataframe where every row is duplicated a number of times 
    corresponding to its score 
    '''
    temp_df = []
    for row in df.itertuples(index=False):
        new_row = (row.chromosome,row.start,row.end,row.name,1,row.strand)
        temp_df.extend([list(new_row)]*row.score)
    return pd.DataFrame(temp_df, columns=df.columns)

def duplicate_bed(fileName):
    '''
    Outputs a BED file where every row is duplicated
    according to its GRange score
    Input:
    fileName: The name of the GRange BED file without the .bed suffix
    This BED file must have the field chrom,chromStart,chromEnd,name,score,strand
    Output:
    Writes to memory the duplicated BED file
    '''
    df = pd.read_csv(f"{fileName}.bed",delim_whitespace=True)
    duplicated_df = duplicate_df(df)
    duplicated_df.to_csv(f"{fileName}_duplicated.bed", header=None, index=None, sep="\t")

In [38]:
duplicate_bed("MCF-7")

In [5]:
duplicate_bed("drosophila")

In [7]:
duplicate_bed("arabidopsis")