In [1]:
import pandas as pd
import scipy.stats
import matplotlib.pyplot as plt
import numpy as np

def pearson_spearman(x, y):
    pearson_corr, pearson_p_value = scipy.stats.pearsonr(x, y)
    print(f"Pearson correlation coefficient: {pearson_corr:.4f}, p-value: {pearson_p_value:.4g}")

    spearman_corr, spearman_p_value = scipy.stats.spearmanr(x, y)
    print(f"Spearman correlation coefficient: {spearman_corr:.4f}, p-value: {spearman_p_value:.4g}")

In [31]:
# EXAMPLE MAKE ONE_TO_ONE_PEAKS THEN SAVE IT
import pandas as pd
import scipy.stats
import matplotlib.pyplot as plt
import numpy as np

species = 'macaque'

both_df = pd.read_csv(f"/home/azstephe/liverRegression/regression_liver/data/splits/{species}Mouse/{species}ToMouse_liver_{species}Enhancer_mouseEnhancer_test_wawb.narrowPeak", sep="\t", header=None)
mac_qn = pd.read_csv(f"/home/azstephe/liverRegression/regression_liver/data/log/{species}_liver_pos_ALL.bed", sep="\t", header=None)
mouse_qn = pd.read_csv("/home/azstephe/liverRegression/regression_liver/data/log/mouse_liver_pos_ALL.bed", sep="\t", header=None)

unique1_2 = both_df[~both_df.duplicated(subset=[1, 2], keep=False)] # entries with unique mac mapped to mouse start and end
all_unique = unique1_2[~unique1_2.duplicated(subset=[11,12], keep=False)]

#### FUNCTIONS

def pearson_spearman(x, y):
    pearson_corr, pearson_p_value = scipy.stats.pearsonr(x, y)
    print(f"Pearson correlation coefficient: {pearson_corr:.4f}, p-value: {pearson_p_value:.4g}")

    spearman_corr, spearman_p_value = scipy.stats.spearmanr(x, y)
    print(f"Spearman correlation coefficient: {spearman_corr:.4f}, p-value: {spearman_p_value:.4g}")
    
def collapse_if_identical(lst):
    return lst[0] if all(x == lst[0] for x in lst) else lst
    

# takes in list and returns peak with largest signal
def keep_largest_signal(lst, qn_df):
    if isinstance(lst, list):
        maxPeakName = ''
        maxPeakStrength = 0
        for peak in lst:
            matching_rows = qn_df[qn_df[3] == peak]
            if not matching_rows.empty:
                signal = matching_rows[4].iloc[0]
                if signal > maxPeakStrength:
                    maxPeakStrength = signal
                    maxPeakName = peak
        return maxPeakName
    else: 
        return lst

# def overlap(start1, end1, start2, end2):
#     return max(0, min(end1, end2)-max(start1, start2))

def get_biggest_overlap(lst, col, df):
    maxOverlap = 0
    maxPeakOverlap = ""
    for peak in lst:
        row = df[df[col] == peak].iloc[0] #iloc bc possible to return multiple rows (never will based on th
        overlap = max(0, min(row[2], row[12])-max(row[1], row[11]))
        if overlap > maxOverlap:
            maxOverlap = overlap
            maxPeakOverlap = peak
    return peak

#### COL1,2 DUPLICATES

duplicates3 = both_df[both_df.duplicated(subset=[1, 2], keep=False)] # entries with duplicated mac mapped to mouse start and end 
grouped_dups3 = duplicates3.groupby([1, 2])[3].apply(list).reset_index(name='col3')

# grouped dups: start | end | [peaks with these endpoints]
grouped_dups3['col3'] = grouped_dups3['col3'].apply(collapse_if_identical) 

grouped_dups3['col3'] = grouped_dups3['col3'].apply(lambda x: keep_largest_signal(x, mac_qn)) # redundant endpoints now map to 1 peak in col3
grouped_dups3 = grouped_dups3[grouped_dups3['col3'] != ''] # remove macaque peaks that aren't large enough

# df of duplicated col1,2 with strongest peak
keep_strongestcol3 = duplicates3.merge(grouped_dups3[['col3']], left_on=3, right_on='col3', how='inner').drop('col3', axis=1) # keeps the strongest signal in col3 for redundant endpoints

#### COL11,12 DUPLICATES

grouped_dups13 = keep_strongestcol3.groupby([11, 12])[13].apply(list).reset_index(name='col13')
grouped_dups13['col13'] = grouped_dups13['col13'].apply(collapse_if_identical)

grouped_dups13['col13'] = grouped_dups13['col13'].apply(lambda x: keep_largest_signal(x, mouse_qn))
grouped_dups13 = grouped_dups13[grouped_dups13['col13'] != '']

keep_strongestcol13 = keep_strongestcol3.merge(grouped_dups13[['col13']], left_on=13, right_on='col13', how='inner').drop('col13', axis=1)

unique_endpoints = keep_strongestcol13 #rows with unique endpoints from the duplicated endpoints set

####

# col3 peakname duplicates with different endpoints
still_dups_col3 = unique_endpoints[unique_endpoints.duplicated(subset=[3], keep=False)]

# col3peaks | [col13 peaks intersecting col3 peak]
grouped_dcol3 = still_dups_col3.groupby(3)[13].apply(list).reset_index(name='col13')

# get the col13 peak with most overlap of col3
grouped_dcol3['col13'] = grouped_dcol3['col13'].apply(lambda x: get_biggest_overlap(x, 13, still_dups_col3))

merged3 = still_dups_col3.merge(grouped_dcol3[[3, 'col13']], left_on=[3, 13], right_on=[3, 'col13'], how='left', indicator=True)

remove3 = merged3[merged3['_merge'] == 'left_only'].drop(columns=['_merge']) # col13 is what we want to remove

unique_endpoints_subset = unique_endpoints.iloc[:,:20]
remove3_subset = remove3.iloc[:,:20]

# all col3 entries unique
unique3 = unique_endpoints[~unique_endpoints_subset.apply(tuple, axis=1).isin(remove3_subset.apply(tuple, axis=1))] 

####

# col13 peakname duplicates with different endpoints
still_dups_col13 = unique3[unique3.duplicated(subset=[13], keep=False)]

# col13peaks | [col3 peaks intersecting col13 peak]
grouped_dcol13 = still_dups_col13.groupby(13)[3].apply(list).reset_index(name='col3')

# get the col3 peak with most overlap of col13
grouped_dcol13['col3'] = grouped_dcol13['col3'].apply(lambda x: get_biggest_overlap(x, 3, still_dups_col13))

merged13 = still_dups_col13.merge(grouped_dcol13[[13, 'col3']], left_on=[3, 13], right_on=['col3', 13], how='left', indicator=True)

remove13 = merged13[merged13['_merge'] == 'left_only'].drop(columns=['_merge'])

unique3_subset = unique3.iloc[:, :20]
remove13_subset = remove13.iloc[:, :20]

# Identify rows in u3 that are NOT in remove3
filtered_peaks_unique = unique3[~unique3_subset.apply(tuple, axis=1).isin(remove13_subset.apply(tuple, axis=1))]

full_unique = pd.concat([all_unique, filtered_peaks_unique])

# Sort by column '1'
one_to_one_peaks = full_unique.sort_values(by=1).reset_index(drop=True)
one_to_one_peaks.to_csv(f'/home/azstephe/liverRegression/regression_liver/data/test_splits/oneToOnePeaks/{species}_mouse.bed', header=None, sep='\t')
one_to_one_peaks
##############################



Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,chr2,3228507,3228816,peak55529,-1,.,-1,-1,-1,119,chr2,3228532,3229401,peak11427,1000,.,16.72388,152.95140,149.78862,727
1,chr2,3799568,3800261,peak62146,-1,.,-1,-1,-1,632,chr2,3799767,3800338,peak11438,817,.,4.91323,20.94289,18.55824,208
2,chr1,4571620,4572258,peak33144,-1,.,-1,-1,-1,154,chr1,4571316,4572253,peak23,1000,.,11.87366,175.62753,172.38829,420
3,chr2,5249965,5250352,peak29753,-1,.,-1,-1,-1,173,chr2,5250005,5250415,peak11452,746,.,6.23602,32.05004,29.53300,120
4,chr2,5749258,5749747,peak63648,-1,.,-1,-1,-1,364,chr2,5749265,5749600,peak11459,1000,.,7.74779,46.30035,43.65820,169
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
394,chr1,194995146,194995539,peak36835,-1,.,-1,-1,-1,203,chr1,194995180,194995620,peak1965,1000,.,8.09555,68.96048,66.16955,196
395,chr1,195016706,195017386,peak33495,-1,.,-1,-1,-1,203,chr1,195017180,195017466,peak1968,1000,.,5.09901,40.06577,37.47466,169
396,chr1,195018117,195018471,peak12739,-1,.,-1,-1,-1,185,chr1,195017987,195018471,peak1969,1000,.,11.09704,148.40295,145.25659,336
397,chr1,195034034,195034269,peak77700,-1,.,-1,-1,-1,151,chr1,195033987,195034343,peak1970,1000,.,12.14839,108.73130,105.74149,201


In [4]:
species = 'macaque'
model = 'bdbi7l3n'

mac_qn = pd.read_csv(f'/home/azstephe/liverRegression/regression_liver/data/log/{species}_liver_pos_ALL.bed', sep="\t", header=None)
mouse_qn = pd.read_csv(f'/home/azstephe/liverRegression/regression_liver/data/log/mouse_liver_pos_ALL.bed', sep="\t", header=None)

one_to_one_peaks = pd.read_csv(f'/home/azstephe/liverRegression/regression_liver/data/test_splits/oneToOnePeaks/{species}_mouse.bed', header=None, sep='\t')
pred_df = pd.read_csv(f'/home/azstephe/liverRegression/regression_liver/data/model_outputs/{model}/activations_{species}_TEST.csv', header=None)
val1_df = pd.read_csv(f'/home/azstephe/liverRegression/regression_liver/data/test_splits/amy_test1/{species}_liver_TEST_500bp.bed', header=None, delim_whitespace=True).iloc[:,4]
# val2_df = pd.read_csv(f'/home/azstephe/liverRegression/regression_liver/data/splits/val2/{species}_liver_VAL.narrowPeak', header=None, delim_whitespace=True).iloc[:,4]
val3_df = pd.read_csv(f'/home/azstephe/liverRegression/regression_liver/data/test_splits/log_test3/{species}_liver_TEST_500bp.bed', header=None, delim_whitespace=True).iloc[:,4]

val2_df = pd.read_csv(f'/home/azstephe/liverRegression/regression_liver/data/test_splits/log_test2/{species}_liver_TEST_500bp.bed', header=None, sep="\t").rename(columns={0: 'mac_chr'})

val1_len = 2*len(val1_df)
val2_len = 2*len(val2_df)

####################### make the ____ doubled predicted and true df
peaks = one_to_one_peaks[[4, 14]]

merged_df = peaks.merge(mac_qn, how='left', left_on=4, right_on=3, suffixes=('', '_mac'))

# Merge the result with df3 on col13 and df3['col3']
merged_df = merged_df.merge(mouse_qn, how='left', left_on=14, right_on=3, suffixes=('_MAC', '_mouse'))

# Subtract col4 values from df2 and df3
# merged_df['result'] = merged_df['4_mac'] - merged_df['4_mouse']
# res = merged_df[['4', 14, '4_mac', '4_mouse', 'result']]
# nan = res[res.isna().any(axis=1)]

# nan

####################### 

doubled_val2_df = pd.concat([val2_df, val2_df]).sort_index(kind='mergesort').reset_index(drop=True)

pred_val2_df = pred_df.iloc[val1_len:val1_len + val2_len].reset_index(drop=True)

doubled_combined = pd.concat([doubled_val2_df, pred_val2_df], axis=1)

mac_val2_real_pred = doubled_combined.rename(columns={4: 'mac_true', 0:'mac_pred'})

merged_mac_real_pred = peaks.merge(mac_val2_real_pred, how='left', left_on=4, right_on=3).drop(14, axis=1)
# m_mac_pred = merged_mac_real_pred[~merged_mac_real_pred[3].isin(nan['3_mac'])].reset_index(drop=True)
m_mac_pred = merged_mac_real_pred.rename(columns={3:'mac_peak'}).drop(4, axis=1)

####################### make the mouse doubled predicted and true df

mouse_pred = pd.read_csv(f'/home/azstephe/liverRegression/regression_liver/data/model_outputs/{model}/activations_mouse_TEST.csv', sep="\t", header=None)
mouse_real_neg=pd.read_csv('/home/azstephe/liverRegression/regression_liver/data/test_splits/neg/mouse_liver_TEST_500bp.bed', sep="\t", header=None)
mouse_real_pos=pd.read_csv('/home/azstephe/liverRegression/regression_liver/data/test_splits/log_pos/mouse_liver_TEST_500bp.bed', sep="\t", header=None)

real_neg_len = 2*len(mouse_real_neg)
real_pos_len = 2*len(mouse_real_pos)

mouse_pred_pos = mouse_pred.head(real_pos_len).reset_index(drop=True)

doubled_mouse_real_pos = pd.concat([mouse_real_pos, mouse_real_pos]).sort_index(kind='mergesort').reset_index(drop=True)


doubled_mouse_real_pos = doubled_mouse_real_pos.rename(columns={0:'mouse_chr'})
mouse_pos_real_pred = pd.concat([doubled_mouse_real_pos.drop(columns=5), mouse_pred_pos], axis=1)
mouse_pos_real_pred = mouse_pos_real_pred.rename(columns={4:'mouse_true', 0:'mouse_pred'}).reset_index(drop=True)


merged_mouse_pred = peaks.merge(mouse_pos_real_pred, how='left', left_on=14, right_on=3)

m_mouse_pred = merged_mouse_pred.rename(columns={3:'mouse_peak'}).drop([14], axis=1)

####################### make bigboss mouse + query predicted and real info

bigboss = pd.concat([m_mouse_pred, m_mac_pred], axis=1)

bigboss['true'] = bigboss['mouse_true']-bigboss['mac_true']
bigboss['pred'] = bigboss['mouse_pred']-bigboss['mac_pred']

mouse_av = (bigboss.loc[::2, 'mouse_pred'].values + bigboss.loc[1::2, 'mouse_pred'].values) / 2
mac_av = (bigboss.loc[::2, 'mac_pred'].values + bigboss.loc[1::2, 'mac_pred'].values) / 2

# Add the averages back to the DataFrame as a new column
bigboss.loc[::2, 'mouse_pred_avg'] = mouse_av  # Assign averages to even indices only
bigboss.loc[::2, 'mac_pred_avg'] = mac_av 
bigboss['avg_pred'] = bigboss['mouse_pred_avg']-bigboss['mac_pred_avg']


same_sign = np.sign(bigboss['true']) == np.sign(bigboss['pred'])
print(f'# of same sign for true difference and predicted difference: {same_sign.sum()} / {len(bigboss)} = {same_sign.sum() / len(bigboss)} %')

same_sign = np.sign(bigboss.loc[::2]['true']) == np.sign(bigboss.loc[::2]['avg_pred'])
print(f'# of same sign for true difference and averaged predicted difference: {same_sign.sum()} / {len(bigboss) / 2} = {same_sign.sum() / (len(bigboss) / 2)}%')

x = bigboss['true']
y = bigboss['pred']

print(pearson_spearman(x, y))

# of same sign for true difference and predicted difference: 154 / 798 = 0.19298245614035087 %
# of same sign for true difference and averaged predicted difference: 59 / 399.0 = 0.14786967418546365%


ValueError: array must not contain infs or NaNs

In [28]:
both_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,chr1,172410858,172411100,peak35917,-1,.,-1,-1,-1,147,chr1,172410904,172411124,peak1638,1000,.,7.46433,43.51744,40.89712,106
1,chr1,170516072,170516711,peak49234,-1,.,-1,-1,-1,411,chr1,170516091,170516710,peak1628,1000,.,8.50367,53.94794,51.25016,170
2,chr1,170516072,170516711,peak48014,-1,.,-1,-1,-1,195,chr1,170516091,170516710,peak1628,1000,.,8.50367,53.94794,51.25016,170
3,chr1,170496291,170496665,peak57069,-1,.,-1,-1,-1,238,chr1,170495849,170496626,peak1624,1000,.,4.21304,18.17925,15.83690,124
4,chr1,170496291,170496665,peak57069,-1,.,-1,-1,-1,238,chr1,170495849,170496626,peak1625,1000,.,13.59614,130.75430,127.67420,530
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
716,chr2,20990121,20990727,peak52293,-1,.,-1,-1,-1,138,chr2,20990090,20990342,peak11650,968,.,5.38566,24.74441,22.30925,119
717,chr2,20990121,20990727,peak79505,-1,.,-1,-1,-1,464,chr2,20990090,20990342,peak11650,968,.,5.38566,24.74441,22.30925,119
718,chr2,21280878,21281191,peak21070,-1,.,-1,-1,-1,131,chr2,21280669,21281287,peak11662,1000,.,3.64482,13.91373,11.65236,97
719,chr2,21280878,21281191,peak21070,-1,.,-1,-1,-1,131,chr2,21280669,21281287,peak11661,1000,.,17.12707,187.82730,184.54929,325


In [20]:
mac_val2_real_pred

Unnamed: 0,mac_chr,1,2,3,mac_true,mac_pred
0,chr1,134736163,134736663,peak35917,2.214026,1.248863
1,chr1,134736163,134736663,peak35917,2.214026,1.212983
2,chr1,136933860,136934360,peak49234,2.122200,1.016201
3,chr1,136933860,136934360,peak49234,2.122200,0.903532
4,chr1,136934092,136934592,peak48014,2.131472,1.155565
...,...,...,...,...,...,...
949,chr9,86944159,86944659,peak14033,2.823560,1.572033
950,chr9,98539330,98539830,peak98237,1.102986,0.810652
951,chr9,98539330,98539830,peak98237,1.102986,0.823465
952,chr9,98539699,98540199,peak19638,2.405192,1.385175


In [45]:
# EXAMPLE MAKE ONE_TO_ONE_PEAKS THEN SAVE IT
import pandas as pd
import scipy.stats
import matplotlib.pyplot as plt
import numpy as np

species = 'macaque'
#both_df = pd.read_csv(f"/home/azstephe/liverRegression/regression_liver/data/test_splits/mouse_mapped/macaqueToMouse_liver_mouseEnhancer_macaqueEnhancer_wawb.narrowPeak", sep="\t", header=None)

both_df = pd.read_csv(f"/home/azstephe/liverRegression/regression_liver/data/test_splits/mouse_mapped/mouseToMacaque_liver_macaqueEnhancer_mouseEnhancer_wawb.narrowPeak", sep="\t", header=None)
mac_qn = pd.read_csv(f"/home/azstephe/liverRegression/regression_liver/data/log/{species}_liver_pos_ALL.bed", sep="\t", header=None)
mouse_qn = pd.read_csv("/home/azstephe/liverRegression/regression_liver/data/log/mouse_liver_pos_ALL.bed", sep="\t", header=None)

unique1_2 = both_df[~both_df.duplicated(subset=[1, 2], keep=False)] # entries with unique mac mapped to mouse start and end
all_unique = unique1_2[~unique1_2.duplicated(subset=[11,12], keep=False)]

#### COL1,2 DUPLICATES

duplicates3 = both_df[both_df.duplicated(subset=[1, 2], keep=False)] # entries with duplicated mac mapped to mouse start and end 
grouped_dups3 = duplicates3.groupby([1, 2])[3].apply(list).reset_index(name='col3')

# grouped dups: start | end | [peaks with these endpoints]
grouped_dups3['col3'] = grouped_dups3['col3'].apply(collapse_if_identical) 

grouped_dups3['col3'] = grouped_dups3['col3'].apply(lambda x: keep_largest_signal(x, mac_qn)) # redundant endpoints now map to 1 peak in col3
grouped_dups3 = grouped_dups3[grouped_dups3['col3'] != ''] # remove macaque peaks that aren't large enough

# df of duplicated col1,2 with strongest peak
keep_strongestcol3 = duplicates3.merge(grouped_dups3[['col3']], left_on=3, right_on='col3', how='inner').drop('col3', axis=1) # keeps the strongest signal in col3 for redundant endpoints

#### COL11,12 DUPLICATES

grouped_dups13 = keep_strongestcol3.groupby([11, 12])[13].apply(list).reset_index(name='col13')
grouped_dups13['col13'] = grouped_dups13['col13'].apply(collapse_if_identical)

grouped_dups13['col13'] = grouped_dups13['col13'].apply(lambda x: keep_largest_signal(x, mouse_qn))
grouped_dups13 = grouped_dups13[grouped_dups13['col13'] != '']

keep_strongestcol13 = keep_strongestcol3.merge(grouped_dups13[['col13']], left_on=13, right_on='col13', how='inner').drop('col13', axis=1)

unique_endpoints = keep_strongestcol13 #rows with unique endpoints from the duplicated endpoints set

####

# col3 peakname duplicates with different endpoints
still_dups_col3 = unique_endpoints[unique_endpoints.duplicated(subset=[3], keep=False)]

# col3peaks | [col13 peaks intersecting col3 peak]
grouped_dcol3 = still_dups_col3.groupby(3)[13].apply(list).reset_index(name='col13')

# get the col13 peak with most overlap of col3
grouped_dcol3['col13'] = grouped_dcol3['col13'].apply(lambda x: get_biggest_overlap(x, 13, still_dups_col3))

merged3 = still_dups_col3.merge(grouped_dcol3[[3, 'col13']], left_on=[3, 13], right_on=[3, 'col13'], how='left', indicator=True)

remove3 = merged3[merged3['_merge'] == 'left_only'].drop(columns=['_merge']) # col13 is what we want to remove

unique_endpoints_subset = unique_endpoints.iloc[:,:20]
remove3_subset = remove3.iloc[:,:20]

# all col3 entries unique
unique3 = unique_endpoints[~unique_endpoints_subset.apply(tuple, axis=1).isin(remove3_subset.apply(tuple, axis=1))] 

####

# # col13 peakname duplicates with different endpoints
# still_dups_col13 = unique3[unique3.duplicated(subset=[13], keep=False)]

# # col13peaks | [col3 peaks intersecting col13 peak]
# grouped_dcol13 = still_dups_col13.groupby(13)[3].apply(list).reset_index(name='col3')

# # get the col3 peak with most overlap of col13
# grouped_dcol13['col3'] = grouped_dcol13['col3'].apply(lambda x: get_biggest_overlap(x, 3, still_dups_col13))

# merged13 = still_dups_col13.merge(grouped_dcol13[[13, 'col3']], left_on=[3, 13], right_on=['col3', 13], how='left', indicator=True)

# remove13 = merged13[merged13['_merge'] == 'left_only'].drop(columns=['_merge'])

# unique3_subset = unique3.iloc[:, :20]
# remove13_subset = remove13.iloc[:, :20]

# # Identify rows in u3 that are NOT in remove3
# filtered_peaks_unique = unique3[~unique3_subset.apply(tuple, axis=1).isin(remove13_subset.apply(tuple, axis=1))]

# full_unique = pd.concat([all_unique, filtered_peaks_unique])

# # Sort by column '1'
# one_to_one_peaks = full_unique.sort_values(by=1).reset_index(drop=True)
# one_to_one_peaks.to_csv(f'/home/azstephe/liverRegression/regression_liver/data/test_splits/oneToOnePeaks/{species}_mouse_newtest2.bed', header=None, sep='\t')
# one_to_one_peaks
##############################



In [46]:
df = both_df
# df = df[df.columns[10:20].tolist() + df.columns[0:10].tolist() + df.columns[20:].tolist()]
df # macaque mapped to mouse

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,chr1,134736277,134736545,peak35917,1000,.,8.15249,78.33947,75.70458,136,chr1,134736238,134736502,peak1925,-1,.,-1,-1,-1,170
1,chr1,136933877,136934529,peak49234,988,.,7.34949,50.37695,47.89619,233,chr1,136933878,136934506,peak1915,-1,.,-1,-1,-1,470
2,chr1,136933877,136934529,peak48014,988,.,7.42726,52.27309,49.78019,465,chr1,136933878,136934506,peak1915,-1,.,-1,-1,-1,470
3,chr1,136957486,136957905,peak57069,1000,.,6.24535,39.86447,37.45783,178,chr1,136957525,136958335,peak1911,-1,.,-1,-1,-1,686
4,chr1,136957486,136957905,peak57069,1000,.,6.24535,39.86447,37.45783,178,chr1,136957525,136958335,peak1912,-1,.,-1,-1,-1,294
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
638,chr9,83428417,83429117,peak39667,1000,.,7.22237,68.63238,66.04608,468,chr9,83428486,83428673,peak13189,-1,.,-1,-1,-1,92
639,chr9,84353599,84354300,peak22510,1000,.,11.85278,134.85542,131.99551,427,chr9,84353887,84354139,peak13190,-1,.,-1,-1,-1,125
640,chr9,86944200,86944586,peak14033,1000,.,15.83668,211.19728,208.10689,209,chr9,86944202,86944914,peak13201,-1,.,-1,-1,-1,170
641,chr9,98539499,98540293,peak98237,1000,.,2.01315,5.61643,3.79291,81,chr9,98539266,98540104,peak13217,-1,.,-1,-1,-1,207


In [32]:
both_df # macaque mapped to mouse

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,chr1,172410858,172411100,peak35917,-1,.,-1,-1,-1,147,chr1,172410904,172411124,peak1638,1000,.,7.46433,43.51744,40.89712,106
1,chr1,170516072,170516711,peak49234,-1,.,-1,-1,-1,411,chr1,170516091,170516710,peak1628,1000,.,8.50367,53.94794,51.25016,170
2,chr1,170516072,170516711,peak48014,-1,.,-1,-1,-1,195,chr1,170516091,170516710,peak1628,1000,.,8.50367,53.94794,51.25016,170
3,chr1,170496291,170496665,peak57069,-1,.,-1,-1,-1,238,chr1,170495849,170496626,peak1624,1000,.,4.21304,18.17925,15.83690,124
4,chr1,170496291,170496665,peak57069,-1,.,-1,-1,-1,238,chr1,170495849,170496626,peak1625,1000,.,13.59614,130.75430,127.67420,530
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
716,chr2,20990121,20990727,peak52293,-1,.,-1,-1,-1,138,chr2,20990090,20990342,peak11650,968,.,5.38566,24.74441,22.30925,119
717,chr2,20990121,20990727,peak79505,-1,.,-1,-1,-1,464,chr2,20990090,20990342,peak11650,968,.,5.38566,24.74441,22.30925,119
718,chr2,21280878,21281191,peak21070,-1,.,-1,-1,-1,131,chr2,21280669,21281287,peak11662,1000,.,3.64482,13.91373,11.65236,97
719,chr2,21280878,21281191,peak21070,-1,.,-1,-1,-1,131,chr2,21280669,21281287,peak11661,1000,.,17.12707,187.82730,184.54929,325


In [44]:
df = both_df
# df = df[df.columns[10:20].tolist() + df.columns[0:10].tolist() + df.columns[20:].tolist()]
df # macaque mapped to mouse

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,chr1,134736277,134736545,peak35917,1000,.,8.15249,78.33947,75.70458,136,chr1,134736238,134736502,peak1925,-1,.,-1,-1,-1,170
1,chr1,136933877,136934529,peak49234,988,.,7.34949,50.37695,47.89619,233,chr1,136933878,136934506,peak1915,-1,.,-1,-1,-1,470
2,chr1,136933877,136934529,peak48014,988,.,7.42726,52.27309,49.78019,465,chr1,136933878,136934506,peak1915,-1,.,-1,-1,-1,470
3,chr1,136957486,136957905,peak57069,1000,.,6.24535,39.86447,37.45783,178,chr1,136957525,136958335,peak1911,-1,.,-1,-1,-1,686
4,chr1,136957486,136957905,peak57069,1000,.,6.24535,39.86447,37.45783,178,chr1,136957525,136958335,peak1912,-1,.,-1,-1,-1,294
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
638,chr9,83428417,83429117,peak39667,1000,.,7.22237,68.63238,66.04608,468,chr9,83428486,83428673,peak13189,-1,.,-1,-1,-1,92
639,chr9,84353599,84354300,peak22510,1000,.,11.85278,134.85542,131.99551,427,chr9,84353887,84354139,peak13190,-1,.,-1,-1,-1,125
640,chr9,86944200,86944586,peak14033,1000,.,15.83668,211.19728,208.10689,209,chr9,86944202,86944914,peak13201,-1,.,-1,-1,-1,170
641,chr9,98539499,98540293,peak98237,1000,.,2.01315,5.61643,3.79291,81,chr9,98539266,98540104,peak13217,-1,.,-1,-1,-1,207


In [12]:
mouse_qn

Unnamed: 0,0,1,2,3,4
0,chr1,3368827,3369327,peak1,2.251052
1,chr1,3369075,3369575,peak2,1.339439
2,chr1,3416045,3416545,peak3,3.034834
3,chr1,3416436,3416936,peak4,1.349763
4,chr1,3423147,3423647,peak5,2.917047
...,...,...,...,...,...
22037,chrX,169903661,169904161,peak22037,2.146267
22038,chrX,169904084,169904584,peak22038,1.171128
22039,chrX,169906007,169906507,peak22040,1.404790
22040,chrX,169906208,169906708,peak22041,1.965940
