In [71]:
import pandas as pd
import numpy as np
from tqdm import tqdm_notebook
tqdm_notebook().pandas()

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

In [72]:
from bs4 import BeautifulSoup
import requests
import time

In [73]:
panther_names = {}

In [74]:
def get_panther_name(pantherid):
    if pantherid in panther_names.keys():
        return panther_names[pantherid];
    else: 
        r = requests.get("http://pantherdb.org/panther/family.do?clsAccession={}".format(pantherid))
        soup = BeautifulSoup(r.text, 'html.parser')
        family_name = soup.find(id="mainBody").table.tr.find("td",class_="mainText").text.strip()
        panther_names[pantherid] = family_name
        time.sleep(1)
        return family_name

In [75]:
panther_names

{}

In [76]:
datasets = [
    ("Gaublomme_GSE75109_TPM_clean", "clean_panther4march","geomean"),
    ("Gaublomme_GSE75110_TPM_clean","clean_panther4march","geomean"),
    ("Gaublomme_GSE75111_TPM_clean","clean_panther4march","geomean"),
#     "somatosensory_rpkm_suppl",
    ("kakadarov_tpm","clean_panther4march","geomean"),
    ("somatosensory_converted_into_tpm","clean_panther4march","geomean"),
    ("Dopaminergic_TPM_clean","clean_panther4march","geomean"),
    ("Rbp4_positive_cells","clean_panther4march","geomean"),
    ("Cheng_ES_TPM","clean_panther4march","geomean"),
    ("Alveolar_cells_Type_II_Merged_Batches","clean_panther4march","3max"),
    ("Alveolar_cells_Type_I_Merged_Batches","clean_panther4march","3max"),
#     ("Alveolar_cells_both_types","clean_panther4march","3max"),
    ("klein","clean_panther4march","3max"),
    ("hepat_TPM_yang_clean","clean_panther4march","geomean"),
    ("Yu_First_wave_endocrine_cells","clean_panther4march","geomean"),
#     ("lcl_european","HGNC_families"),
#     ("lcl_african","HGNC_families"),
#     ("cd4_zheng","HGNC_families"),
]



family_to_gene = pd.concat([
    pd.read_csv("clean_panther4march.csv", index_col="family_id"),
    pd.read_csv("HGNC_families.csv", index_col="family_id")
], sort=False)




In [77]:
# family_to_gene.loc[1492.0]

In [78]:
# pd.options.display.max_colwidth = 1000
pd.options.display.max_seq_items = 25

# Family-wise

* T-cell receptor beta-chain is present in all three Gaublomme sets (exclusive)
* Multiple histone families are conserved in all three Gaublomme sets (co-occurrent)
* Pcdh-clustered family is not shown as conserved (it is not exclusive in somatosensory and dopaminergic)
* A lot of Zinc finger proteins are conserved between all datasets (co-occurrent)

In [79]:
family_dfs = {x: pd.read_csv("{}/{}/results/family_IC.csv".format(x,y), index_col="family_id").dropna() for x, y, _ in datasets}

In [80]:
def get_lower(df):
    return df[df.ic <= df.ic.quantile(0.025)].index.values

def get_upper(df):
    return df[df.ic >= df.ic.quantile(0.975)].index.values

def get_intersection(l1, l2):
    return set(l1).intersection(l2)

In [81]:
dataset_names = [x[0] for x in datasets]

In [82]:
# Overlap display formaters
def format_family_names(x):
    if x is None or len(x) == 0:
        return ""
    _x = list(x.copy())
    s = "\n".join(_x[:10])
    if(len(_x) > 10):
        s += "\n+{} families".format(len(_x)-10)
    return s

def format_gene_names(x):
    if x is None or len(x) == 0:
        return ""
    
    s = ""
    for genes in x[:5]:
        genes_s = ",".join(genes[:5])
        if (len(genes) > 5):
            genes_s += " +{} genes".format(len(genes)-5)
        s += genes_s + "\n"
    if(len(x) > 5):
        s += "+{} families".format(len(x)-5)
    return s.strip()
    

In [83]:
family_overlap_lower = pd.DataFrame([[None if x == y else get_intersection(get_lower(family_dfs[x]),get_lower(family_dfs[y])) for y, _, _ in datasets] for x, _, _ in datasets], index=dataset_names, columns=dataset_names)

In [84]:
pd.options.display.max_colwidth = 1000
pd.options.display.max_seq_items = 5
writer = pd.ExcelWriter("overlap_family_lower.xlsx")
# display(family_overlap_lower.applymap(lambda x: "" if x is None else ",".join(list(x))))
family_overlap_lower.applymap(lambda x: "" if x is None else ",".join(list(x))).to_csv("overlap_family_lower_raw.csv")
display(family_overlap_lower.applymap(format_family_names))
display(family_overlap_lower.progress_applymap(lambda x: [get_panther_name(_x) for _x in x] if x is not None else None).applymap(format_family_names))
display(family_overlap_lower.progress_applymap(lambda x: [family_to_gene.loc[np.int(family) if isinstance(family, np.float64) else family].gene_symbol for family in x] if x is not None else []).applymap(format_gene_names))
display(family_overlap_lower.progress_applymap(lambda x: len(x) if x is not None else np.nan))

family_overlap_lower.applymap(format_family_names).to_excel(writer,"by IDs")
family_overlap_lower.applymap(lambda x: [get_panther_name(_x) for _x in x] if x is not None else None).applymap(format_family_names).to_excel(writer,"by family names")
family_overlap_lower.applymap(lambda x: [family_to_gene.loc[np.int(family) if isinstance(family, np.float64) else family].gene_symbol for family in x] if x is not None else []).applymap(format_gene_names).to_excel(writer,"by genes")
family_overlap_lower.applymap(lambda x: len(x) if x is not None else np.nan).to_excel(writer,"count by families")
writer.save()

Unnamed: 0,Gaublomme_GSE75109_TPM_clean,Gaublomme_GSE75110_TPM_clean,Gaublomme_GSE75111_TPM_clean,kakadarov_tpm,somatosensory_converted_into_tpm,Dopaminergic_TPM_clean,Rbp4_positive_cells,Cheng_ES_TPM,Alveolar_cells_Type_II_Merged_Batches,Alveolar_cells_Type_I_Merged_Batches,klein,hepat_TPM_yang_clean,Yu_First_wave_endocrine_cells
Gaublomme_GSE75109_TPM_clean,,PTHR23268\nPTHR11636,PTHR24089\nPTHR23268\nPTHR19375,PTHR12587\nPTHR11453,PTHR11453\nPTHR19375,,,,,,PTHR13771,,
Gaublomme_GSE75110_TPM_clean,PTHR23268\nPTHR11636,,PTHR12570\nPTHR23268,PTHR45615\nPTHR11969,PTHR12570\nPTHR42884,,PTHR11969,PTHR45615,,PTHR12268,,PTHR12570,PTHR10901
Gaublomme_GSE75111_TPM_clean,PTHR24089\nPTHR23268\nPTHR19375,PTHR12570\nPTHR23268,,,PTHR18952\nPTHR12570\nPTHR25465\nPTHR19375,,,PTHR18952,,PTHR13715,,PTHR18952\nPTHR12570\nPTHR23122,PTHR23122
kakadarov_tpm,PTHR12587\nPTHR11453,PTHR45615\nPTHR11969,,,PTHR11453,,PTHR11969,PTHR45615,PTHR10361,,,PTHR10361,PTHR45627
somatosensory_converted_into_tpm,PTHR11453\nPTHR19375,PTHR12570\nPTHR42884,PTHR18952\nPTHR12570\nPTHR25465\nPTHR19375,PTHR11453,,,PTHR10075,PTHR18952,,,,PTHR18952\nPTHR12570,PTHR10269
Dopaminergic_TPM_clean,,,,,,,PTHR12844,PTHR14002,PTHR24543,PTHR24061,,,
Rbp4_positive_cells,,PTHR11969,,PTHR11969,PTHR10075,PTHR12844,,PTHR11890,PTHR13388,,,,PTHR19384
Cheng_ES_TPM,,PTHR45615,PTHR18952,PTHR45615,PTHR18952,PTHR14002,PTHR11890,,PTHR14132\nPTHR14949,,,PTHR18952\nPTHR14949\nPTHR16024,PTHR14132\nPTHR19443
Alveolar_cells_Type_II_Merged_Batches,,,,PTHR10361,,PTHR24543,PTHR13388,PTHR14132\nPTHR14949,,PTHR13869\nPTHR13817,,PTHR14949\nPTHR10361\nPTHR24241,PTHR11733\nPTHR13817\nPTHR14132
Alveolar_cells_Type_I_Merged_Batches,,PTHR12268,PTHR13715,,,PTHR24061,,,PTHR13869\nPTHR13817,,,,PTHR13817


HBox(children=(FloatProgress(value=0.0, max=169.0), HTML(value='')))

KeyboardInterrupt: 

In [None]:
family_overlap_upper = pd.DataFrame([[None if x == y else get_intersection(get_upper(family_dfs[x]),get_upper(family_dfs[y])) for y, _, _ in datasets] for x, _, _ in datasets], index=dataset_names, columns=dataset_names)

In [None]:
pd.options.display.max_colwidth = 1000
pd.options.display.max_seq_items = 5
writer = pd.ExcelWriter("overlap_family_upper.xlsx")
display(family_overlap_upper.applymap(format_family_names))
display(family_overlap_upper.progress_applymap(lambda x: [get_panther_name(_x) for _x in x] if x is not None else None).applymap(format_family_names))
display(family_overlap_upper.progress_applymap(lambda x: [family_to_gene.loc[np.int(family) if isinstance(family, np.float64) else family].gene_symbol for family in x] if x is not None else []).applymap(format_gene_names))
display(family_overlap_upper.progress_applymap(lambda x: len(x) if x is not None else np.nan))

family_overlap_upper.applymap(format_family_names).to_excel(writer,"by IDs")
family_overlap_upper.applymap(lambda x: [get_panther_name(_x) for _x in x] if x is not None else None).applymap(format_family_names).to_excel(writer,"by family names")
family_overlap_upper.applymap(lambda x: [family_to_gene.loc[np.int(family) if isinstance(family, np.float64) else family].gene_symbol for family in x] if x is not None else []).applymap(format_gene_names).to_excel(writer,"by genes")
family_overlap_upper.applymap(lambda x: len(x) if x is not None else np.nan).to_excel(writer,"count by families")
writer.save()

In [None]:
# print("Pcdh clustered, IC vs. 2.5 percentile")
# display("somatosensory_converted_into_tpm")
# display(family_dfs["somatosensory_converted_into_tpm"].loc["PTHR24028_clustered"].ic)
# display(family_dfs["somatosensory_converted_into_tpm"].ic.quantile(0.025))

# # display("somatosensory_rpkm_suppl")
# # display(family_dfs["somatosensory_rpkm_suppl"].loc["PTHR24028_clustered"].ic)
# # display(family_dfs["somatosensory_rpkm_suppl"].ic.quantile(0.025))

# display("dopaminergic")
# display(family_dfs["dopaminergic"].loc["PTHR24028_clustered"].ic)
# display(family_dfs["dopaminergic"].ic.quantile(0.025))

# display("Rbp4_positive_cells")
# display(family_dfs["Rbp4_positive_cells"].loc["PTHR24028_clustered"].ic)
# display(family_dfs["Rbp4_positive_cells"].ic.quantile(0.025))

# Chromosome-wise

* Some Pcdh stretches are overlapping between different sets of neurons (exclusive)
* The highest overlap of the co-occurrent in Gaublomme sets (majorly comprised of histone genes) 

In [85]:
from tqdm import tqdm_notebook 

In [86]:
chr_names = list(range(1,20)) + ["X", "Y"]
chrs = {x: pd.read_csv("somatosensory_converted_into_tpm/intermediate/chr{}_filtered.csv".format(x)) for x in chr_names}

In [87]:
def load_chromosome_wise(dataset, dich_type):
#     dich_type = "geomean"
    chr_names = list(range(1,20)) + ["X", "Y"]
    ic_dfs = [pd.read_csv("{}/results/{}/stage1_chr{}_IC.csv".format(dataset,dich_type,x)) for x in chr_names]
    chrs = {x: pd.read_csv("{}/intermediate/chr{}_filtered.csv".format(dataset, x)) for x in chr_names}
    filtered_genes = pd.read_csv("{}/intermediate/{}/filtered_dichotomised_genes.csv".format(dataset, dich_type))
    for i, name in zip(range(len(ic_dfs)), chr_names):
        ic_dfs[i].loc[:,"chromosome"] = name

    ic_df = pd.concat(ic_dfs, sort=False).reset_index(drop=True)
    return ic_df[ic_df.n_genes >= ic_df.stretch * 6 / 7]

In [88]:
def get_genes(start, end, chromosome):
    return chrs[chromosome].loc[start:end, "Name"].values

def get_all_genes(l):
    if l is None:
        return set()
    return sorted(set(np.concatenate([get_genes(*x) for x in l])))

In [89]:
chromosome_dfs = {x: load_chromosome_wise(x, dich_type) for x, _, dich_type in tqdm_notebook(datasets)}

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  """Entry point for launching an IPython kernel.


HBox(children=(FloatProgress(value=0.0, max=13.0), HTML(value='')))




In [90]:
chromosome_dfs["Gaublomme_GSE75109_TPM_clean"]

Unnamed: 0,start,end,stretch,n_genes,obs_var,pb_var,ic,mean_expression,chromosome
29,29.0,36.0,7.0,6.0,0.488687,0.420475,1.162226,1.402878,1
30,30.0,37.0,7.0,6.0,0.231988,0.239429,0.968925,0.640288,1
127,127.0,134.0,7.0,6.0,0.996664,0.875006,1.139036,2.057554,1
128,128.0,135.0,7.0,6.0,1.211135,0.980281,1.235499,2.187050,1
129,129.0,136.0,7.0,6.0,1.134397,0.903576,1.255452,1.625899,1
...,...,...,...,...,...,...,...,...,...
69616,701.0,715.0,14.0,12.0,1.050360,0.928627,1.131089,2.791367,X
69850,935.0,949.0,14.0,12.0,1.937963,1.499301,1.292577,2.597122,X
69851,936.0,950.0,14.0,13.0,2.191012,1.660163,1.319758,2.798561,X
69852,937.0,951.0,14.0,13.0,2.191012,1.660163,1.319758,2.798561,X


In [91]:
def get_lower(df):
#     print(df[df.ic <= df.ic.quantile(0.025)].shape[0])
    slices = []
    for stretch in [7, 14, 21]:
        _slice = df[df.stretch == stretch]
#         print(_slice[_slice.ic <= _slice.ic.quantile(0.025)].shape[0])
        slices += [_slice[_slice.ic <= _slice.ic.quantile(0.025)]]
    return pd.concat(slices).loc[:, ["start", "end", "chromosome"]].values

def get_upper(df):
    slices = []
    for stretch in [7, 14, 21]:
        _slice = df[df.stretch == stretch]
        slices += [_slice[_slice.ic >= _slice.ic.quantile(0.975)]]
    return pd.concat(slices).loc[:, ["start", "end", "chromosome"]].values

def get_intersection(l1, l2):
    intersection = []
    for x in l1:
        for y in l2:
            if np.all(x == y):
                intersection += [x]
    if len(intersection) == 0:
        return None
    return np.stack(intersection)


In [92]:
dataset_names = [x for x, _, _ in datasets]

In [93]:
chromosome_overlap_lower = pd.DataFrame([[None if x == y else get_intersection(get_lower(chromosome_dfs[x]),get_lower(chromosome_dfs[y])) for y in dataset_names] for x in dataset_names], index=dataset_names, columns=dataset_names)

In [94]:
def format_stretches(x):
    if x is None or len(x) == 0:
        return ""
    _x = list(x.copy())
    s = "\n".join([str(__x) for __x in _x[:5]])
    if(len(_x) > 5):
        s += "\n+{} stretches".format(len(_x)-5)
    return s

def format_stretch_genes(x):
    if x is None or len(x) == 0:
        return ""
    _x = list(x.copy())
    s = "\n".join([str(__x) for __x in _x[:25]])
    if(len(_x) > 25):
        s += "\n+{} genes".format(len(_x)-25)
    return s


In [95]:
import pickle
with open("overlap_chromosome_lower_raw.pickle", "wb") as f:
    pickle.dump(chromosome_overlap_lower, f)

writer = pd.ExcelWriter("overlap_chromosome_lower.xlsx")
chromosome_overlap_lower.applymap(get_all_genes).applymap(format_stretch_genes).to_excel(writer,"by genes")
chromosome_overlap_lower.applymap(lambda x: len(x) if x is not None else np.nan).to_excel(writer,"by count (stretches)")
chromosome_overlap_lower.applymap(get_all_genes).applymap(lambda x: len(x) if x is not None else np.nan).to_excel(writer,"by count (genes)")
chromosome_overlap_lower.applymap(format_stretches).to_excel(writer,"by locations")
writer.save()

In [96]:
chromosome_overlap_lower.applymap(format_stretches)

Unnamed: 0,Gaublomme_GSE75109_TPM_clean,Gaublomme_GSE75110_TPM_clean,Gaublomme_GSE75111_TPM_clean,kakadarov_tpm,somatosensory_converted_into_tpm,Dopaminergic_TPM_clean,Rbp4_positive_cells,Cheng_ES_TPM,Alveolar_cells_Type_II_Merged_Batches,Alveolar_cells_Type_I_Merged_Batches,klein,hepat_TPM_yang_clean,Yu_First_wave_endocrine_cells
Gaublomme_GSE75109_TPM_clean,,[814.0 821.0 3],,[68.0 75.0 10],,,,,,[68.0 75.0 10]\n[366.0 380.0 13],,,[53.0 60.0 'X']
Gaublomme_GSE75110_TPM_clean,[814.0 821.0 3],,[769.0 776.0 5]\n[97.0 104.0 8]\n[242.0 256.0 6]\n[244.0 258.0 6],[791.0 798.0 3],,[639.0 646.0 11],,,,,[96.0 103.0 19],,[465.0 472.0 13]
Gaublomme_GSE75111_TPM_clean,,[769.0 776.0 5]\n[97.0 104.0 8]\n[242.0 256.0 6]\n[244.0 258.0 6],,,,[655.0 662.0 'X'],,,,[1990.0 1997.0 2]\n[1186.0 1193.0 5]\n[382.0 389.0 13]\n[603.0 610.0 15]\n[946.0 953.0 17],,,
kakadarov_tpm,[68.0 75.0 10],[791.0 798.0 3],,,[752.0 759.0 2],[531.0 538.0 18]\n[461.0 475.0 17],[326.0 333.0 7],,[427.0 434.0 1]\n[154.0 161.0 2]\n[1872.0 1879.0 2]\n[290.0 297.0 5]\n[623.0 630.0 9]\n+7 stretches,[34.0 41.0 6]\n[343.0 350.0 6]\n[68.0 75.0 10]\n[591.0 598.0 14]\n[897.0 904.0 17]\n+2 stretches,[478.0 485.0 2]\n[372.0 379.0 4]\n[362.0 369.0 7]\n[363.0 370.0 7]\n[393.0 400.0 7]\n+7 stretches,,[1239.0 1246.0 9]\n[927.0 934.0 10]\n[627.0 634.0 11]
somatosensory_converted_into_tpm,,,,[752.0 759.0 2],,[1458.0 1465.0 2]\n[932.0 939.0 7]\n[202.0 209.0 18]\n[203.0 210.0 18]\n[341.0 348.0 'X']\n+1 stretches,[398.0 405.0 3]\n[203.0 210.0 18]\n[204.0 211.0 18]\n[205.0 212.0 18]\n[206.0 213.0 18]\n+13 stretches,[591.0 598.0 2],[632.0 639.0 14]\n[1124.0 1131.0 17]\n[622.0 636.0 4],[1393.0 1400.0 11],[202.0 209.0 18]\n[203.0 210.0 18]\n[204.0 211.0 18]\n[199.0 213.0 18]\n[200.0 214.0 18]\n+10 stretches,,[1456.0 1463.0 2]\n[932.0 939.0 7]
Dopaminergic_TPM_clean,,[639.0 646.0 11],[655.0 662.0 'X'],[531.0 538.0 18]\n[461.0 475.0 17],[1458.0 1465.0 2]\n[932.0 939.0 7]\n[202.0 209.0 18]\n[203.0 210.0 18]\n[341.0 348.0 'X']\n+1 stretches,,[53.0 60.0 7]\n[203.0 210.0 18]\n[223.0 230.0 18]\n[224.0 231.0 18]\n[200.0 214.0 18]\n+3 stretches,,[202.0 209.0 2]\n[1060.0 1067.0 6]\n[531.0 538.0 18]\n[395.0 409.0 1]\n[1555.0 1569.0 11]\n+7 stretches,[1840.0 1847.0 7]\n[763.0 770.0 9]\n[213.0 234.0 18]\n[214.0 235.0 18]\n[215.0 236.0 18]\n+1 stretches,[403.0 410.0 1]\n[406.0 413.0 1]\n[201.0 208.0 18]\n[202.0 209.0 18]\n[203.0 210.0 18]\n+1 stretches,[700.0 707.0 10]\n[1287.0 1308.0 1],[400.0 407.0 1]\n[1038.0 1045.0 2]\n[932.0 939.0 7]\n[94.0 101.0 11]\n[704.0 711.0 'X']\n+5 stretches
Rbp4_positive_cells,,,,[326.0 333.0 7],[398.0 405.0 3]\n[203.0 210.0 18]\n[204.0 211.0 18]\n[205.0 212.0 18]\n[206.0 213.0 18]\n+13 stretches,[53.0 60.0 7]\n[203.0 210.0 18]\n[223.0 230.0 18]\n[224.0 231.0 18]\n[200.0 214.0 18]\n+3 stretches,,[535.0 542.0 1],[218.0 232.0 18]\n[222.0 236.0 18]\n[223.0 237.0 18],[535.0 542.0 1]\n[1444.0 1451.0 11]\n[560.0 567.0 19],[203.0 210.0 18]\n[204.0 211.0 18]\n[199.0 213.0 18]\n[200.0 214.0 18]\n[201.0 215.0 18]\n+4 stretches,,[979.0 986.0 5]
Cheng_ES_TPM,,,,,[591.0 598.0 2],,[535.0 542.0 1],,,[535.0 542.0 1],,,
Alveolar_cells_Type_II_Merged_Batches,,,,[427.0 434.0 1]\n[154.0 161.0 2]\n[1872.0 1879.0 2]\n[290.0 297.0 5]\n[623.0 630.0 9]\n+7 stretches,[632.0 639.0 14]\n[1124.0 1131.0 17]\n[622.0 636.0 4],[202.0 209.0 2]\n[1060.0 1067.0 6]\n[531.0 538.0 18]\n[395.0 409.0 1]\n[1555.0 1569.0 11]\n+7 stretches,[218.0 232.0 18]\n[222.0 236.0 18]\n[223.0 237.0 18],,,[113.0 120.0 1]\n[114.0 121.0 1]\n[429.0 436.0 1]\n[1298.0 1305.0 1]\n[1323.0 1330.0 6]\n+87 stretches,,[619.0 626.0 2]\n[620.0 627.0 2],[489.0 496.0 1]\n[675.0 682.0 7]\n[404.0 418.0 15]\n[214.0 228.0 18]\n[215.0 229.0 18]\n+2 stretches
Alveolar_cells_Type_I_Merged_Batches,[68.0 75.0 10]\n[366.0 380.0 13],,[1990.0 1997.0 2]\n[1186.0 1193.0 5]\n[382.0 389.0 13]\n[603.0 610.0 15]\n[946.0 953.0 17],[34.0 41.0 6]\n[343.0 350.0 6]\n[68.0 75.0 10]\n[591.0 598.0 14]\n[897.0 904.0 17]\n+2 stretches,[1393.0 1400.0 11],[1840.0 1847.0 7]\n[763.0 770.0 9]\n[213.0 234.0 18]\n[214.0 235.0 18]\n[215.0 236.0 18]\n+1 stretches,[535.0 542.0 1]\n[1444.0 1451.0 11]\n[560.0 567.0 19],[535.0 542.0 1],[113.0 120.0 1]\n[114.0 121.0 1]\n[429.0 436.0 1]\n[1298.0 1305.0 1]\n[1323.0 1330.0 6]\n+87 stretches,,,,[381.0 388.0 17]\n[452.0 466.0 16]\n[216.0 237.0 18]


In [97]:
for _x in range(chromosome_overlap_lower.shape[0]):
    for _y in range(chromosome_overlap_lower.shape[1]):
        x = (chromosome_overlap_lower.iloc[_x,_y])
        if x is None:
            continue
        x = pd.DataFrame(np.hstack([x, np.reshape(x[:,1] - x[:,0], (-1,1))]), columns=["a", "b", "chr", "size"])
        x = x.sort_values("size", ascending=False)
        for i in range(x.shape[0]):
            y = x.iloc[i]
            z = x.loc[(x.chr == y.chr) & (x.a <= y.a) & (x.b >= y.b) & (x.size < y.size)]
            if(z.shape[0] != 0):
                print(_x,_y,i)
                print(z)
                break

In [98]:

chromosome_overlap_lower.applymap(get_all_genes).applymap(format_stretch_genes)

Unnamed: 0,Gaublomme_GSE75109_TPM_clean,Gaublomme_GSE75110_TPM_clean,Gaublomme_GSE75111_TPM_clean,kakadarov_tpm,somatosensory_converted_into_tpm,Dopaminergic_TPM_clean,Rbp4_positive_cells,Cheng_ES_TPM,Alveolar_cells_Type_II_Merged_Batches,Alveolar_cells_Type_I_Merged_Batches,klein,hepat_TPM_yang_clean,Yu_First_wave_endocrine_cells
Gaublomme_GSE75109_TPM_clean,,Aknad1\nFam102b\nFndc7\nHenmt1\nPrpf38b\nSlc25a24\nSlc25a54\nStxbp3,,4930444F02Rik\nAbracl\nArfgef3\nCcdc28a\nEct2l\nHebp2\nNhsl1\nReps1,,,,,,1700029N11Rik\n4930444F02Rik\nA330076C08Rik\nAbracl\nArfgef3\nAtxn1\nCcdc28a\nCd83\nDtnbp1\nEct2l\nGfod1\nGmpr\nHebp2\nJarid2\nMcur1\nMylip\nNhsl1\nNol7\nRanbp9\nReps1\nRnf182\nSirt5\nTbc1d7,,,Eras\nGata1\nGlod5\nHdac6\nPcsk1n\nSuv39h1\nWas\nWdr13
Gaublomme_GSE75110_TPM_clean,Aknad1\nFam102b\nFndc7\nHenmt1\nPrpf38b\nSlc25a24\nSlc25a54\nStxbp3,,Acacb\nAlkbh2\nArhgef10\nCln8\nCoprs\nDlgap2\nErich1\nFbxo25\nFoxn4\nKbtbd11\nKctd10\nMyo1h\nTdrp\nTrbv12-1\nTrbv12-2\nTrbv13-1\nTrbv13-2\nTrbv13-3\nTrbv14\nTrbv15\nTrbv16\nTrbv17\nTrbv19\nTrbv20\nTrbv21\n+8 genes,Amigo1\nAmpd2\nAtxn7l2\nCyb561d1\nGnai3\nGnat2\nGpr61\nGstm4,,Atp1b2\nDnah2\nEfnb3\nKdm6b\nShbg\nTmem88\nTrp53\nWrap53,,,,,1700020D05Rik\nAp5b1\nCfl1\nEfemp2\nMus81\nOvol1\nRnaseh2c\nSnx32,,Eif4e1b\nFgfr4\nHk3\nSncb\nTspan17\nUimc1\nUnc5a\nZfp346
Gaublomme_GSE75111_TPM_clean,,Acacb\nAlkbh2\nArhgef10\nCln8\nCoprs\nDlgap2\nErich1\nFbxo25\nFoxn4\nKbtbd11\nKctd10\nMyo1h\nTdrp\nTrbv12-1\nTrbv12-2\nTrbv13-1\nTrbv13-2\nTrbv13-3\nTrbv14\nTrbv15\nTrbv16\nTrbv17\nTrbv19\nTrbv20\nTrbv21\n+8 genes,,,,2610002M06Rik\n2810403D21Rik\nBrwd3\nGpr174\nItm2a\nP2ry10b\nTbx22\nTent5d,,,,Amz1\nArfrp1\nBrat1\nCap2\nCard11\nCd70\nCrb3\nDennd1c\nDennd6b\nFam8a1\nGmeb2\nGna12\nGrifin\nHdac10\nHelz2\nIqce\nKdm1b\nKhsrp\nKif13a\nLfng\nLime1\nMapk11\nMapk12\nMir6958\nMir7118\n+15 genes,,,
kakadarov_tpm,4930444F02Rik\nAbracl\nArfgef3\nCcdc28a\nEct2l\nHebp2\nNhsl1\nReps1,Amigo1\nAmpd2\nAtxn7l2\nCyb561d1\nGnai3\nGnat2\nGpr61\nGstm4,,,4930443O20Rik\nAplnr\nLrrc55\nOlfr987\nOlfr988\nP2rx3\nSsrp1\nTnks1bp1,Atp5a1\nCyp4f13\nCyp4f14\nCyp4f15\nCyp4f16\nCyp4f17\nCyp4f37\nCyp4f39\nCyp4f40\nEpg5\nF830208F22Rik\nPglyrp2\nPstpip2\nSetbp1\nSiglec15\nSlc14a1\nSlc14a2\nZfp472\nZfp799\nZfp811\nZfp870\nZfp871\nZfp952,Bloc1s3\nExoc3l2\nGemin7\nMark4\nNkpd1\nPpp1r37\nTrappc6a\nZfp296,,1110038F14Rik\n1700025B11Rik\n1700072B07Rik\n1700109G14Rik\n1700109K24Rik\n4930426L09Rik\n4930447M23Rik\n4930487D11Rik\n4930513D17Rik\n4930515B02Rik\n4933415F23Rik\n6720483E21Rik\nAkap12\nAmer2\nApol10a\nApol10b\nApol11a\nApol11b\nApol6\nApol7a\nApol7b\nApol7c\nApol7e\nApol8\nApol9a\n+113 genes,1110038F14Rik\n1700007K13Rik\n1700025B11Rik\n1700109K24Rik\n4930444F02Rik\nA430035B10Rik\nAA545190\nAbracl\nAk8\nApol10a\nApol10b\nApol11a\nApol11b\nApol6\nApol7a\nApol7b\nApol7c\nApol7e\nApol8\nApol9a\nApol9b\nArfgef3\nBcl2l2\nCcdc28a\nCel\n+52 genes,1700028J19Rik\n2410002F23Rik\nKlk1\nKlk1b11\nKlk1b16\nKlk1b21\nKlk1b22\nKlk1b24\nKlk1b26\nKlk1b27\nKlk1b3\nKlk1b4\nKlk1b5\nKlk1b9\nMup17\nMup18\nMup19\nMup20\nMup21\nMup3\nMup5\nNlrp4e\nOlfr341\nOlfr342\nOlfr344\n+36 genes,,Alox12b\nAlox8\nAloxe3\nArhgap9\nCntrob\nEif1b\nEntpd3\nGli1\nGucy2e\nHes7\nInhbc\nInhbe\nKcnab3\nMobp\nMyrip\nNdufa4l2\nR3hdm2\nRpl14\nRpsa\nShmt2\nSlc25a38\nSnora62\nStac3\nTrappc1
somatosensory_converted_into_tpm,,,,4930443O20Rik\nAplnr\nLrrc55\nOlfr987\nOlfr988\nP2rx3\nSsrp1\nTnks1bp1,,4930550L24Rik\nAbcc6\nAbcc8\nAtp11c\nBmp2\nCcdc114\nCrls1\nEmp3\nF9\nFermt1\nFgf13\nGpr101\nHao1\nKcnj11\nLrrn4\nMcf2\nMcm8\nMir3090\nMir504\nNomo1\nPcdha1\nPcdha10\nPcdha11\nPcdha12\nPcdha2\n+14 genes,Bglap\nBglap2\nBglap3\nDnd1\nHars\nHars2\nPaqr6\nPcdha1\nPcdha10\nPcdha11\nPcdha12\nPcdha2\nPcdha3\nPcdha4\nPcdha5\nPcdha6\nPcdha7\nPcdha8\nPcdha9\nPcdhac1\nPcdhac2\nPcdhb1\nPcdhb2\nPcdhb3\nPcdhga1\n+26 genes,Dpp4\nFap\nFign\nGca\nGcg\nIfih1\nKcnh7\nSlc4a10,4930480K15Rik\n4933400B14Rik\nAdcy4\nCideb\nFoxd2\nFoxd2os\nFshr\nGtf2a1l\nLhcgr\nLtb4r1\nLtb4r2\nNfatc4\nNop9\nNrxn1\nNynrin\nPpp1r21\nRipk3\nSkint1\nSkint10\nSkint11\nSkint2\nSkint3\nSkint4\nSkint5\nSkint6\n+6 genes,4930417O22Rik\nArl4d\nCfap97d1\nDhx8\nDusp3\nEtv4\nMeox1\nSost,Diaph1\nHars2\nHdac3\nMir6979\nOlfr3\nOlfr338\nOlfr339\nOlfr340\nOlfr341\nOlfr342\nOlfr344\nOlfr345\nOlfr346\nOlfr347\nOlfr348\nOlfr350\nOlfr351\nOlfr352\nOlfr353\nOlfr354\nOlfr355\nOlfr356\nOlfr357\nOlfr358\nOlfr360\n+44 genes,,Abcc6\nAbcc8\nBmp2\nCcdc114\nChgb\nCrls1\nEmp3\nFermt1\nKcnj11\nLrrn4\nMcm8\nMir3090\nNomo1\nSyngr4\nTmem143\nTrmt6
Dopaminergic_TPM_clean,,Atp1b2\nDnah2\nEfnb3\nKdm6b\nShbg\nTmem88\nTrp53\nWrap53,2610002M06Rik\n2810403D21Rik\nBrwd3\nGpr174\nItm2a\nP2ry10b\nTbx22\nTent5d,Atp5a1\nCyp4f13\nCyp4f14\nCyp4f15\nCyp4f16\nCyp4f17\nCyp4f37\nCyp4f39\nCyp4f40\nEpg5\nF830208F22Rik\nPglyrp2\nPstpip2\nSetbp1\nSiglec15\nSlc14a1\nSlc14a2\nZfp472\nZfp799\nZfp811\nZfp870\nZfp871\nZfp952,4930550L24Rik\nAbcc6\nAbcc8\nAtp11c\nBmp2\nCcdc114\nCrls1\nEmp3\nF9\nFermt1\nFgf13\nGpr101\nHao1\nKcnj11\nLrrn4\nMcf2\nMcm8\nMir3090\nMir504\nNomo1\nPcdha1\nPcdha10\nPcdha11\nPcdha12\nPcdha2\n+14 genes,,Brsk1\nCox6b2\nFam71e2\nHspbp1\nKmt5c\nPcdha1\nPcdha10\nPcdha11\nPcdha12\nPcdha2\nPcdha3\nPcdha4\nPcdha5\nPcdha6\nPcdha7\nPcdha8\nPcdha9\nPcdhac1\nPcdhac2\nPcdhb1\nPcdhb10\nPcdhb11\nPcdhb12\nPcdhb13\nPcdhb14\n+16 genes,,1700063H04Rik\n1700092K14Rik\n4932435O22Rik\nAicda\nApobec1\nAtp5a1\nBcs1l\nBtbd17\nCd300a\nCd300c\nCd300c2\nCd300lb\nCd300ld\nCdk5r2\nCfap65\nCryba2\nCyp27a1\nDnaic2\nDppa3\nEntpd8\nEpg5\nF830208F22Rik\nFam166a\nFev\nGdf3\n+54 genes,Aldh1a2\nAqp9\nBcl7c\nCgnl1\nCtf1\nCtf2\nFbxl19\nHsd3b7\nLipc\nMir762\nMyzap\nOrai3\nPcdha12\nPcdhac1\nPcdhac2\nPcdhb1\nPcdhb10\nPcdhb11\nPcdhb12\nPcdhb13\nPcdhb14\nPcdhb15\nPcdhb16\nPcdhb17\nPcdhb18\n+16 genes,Cdk5r2\nCfap65\nCnppd1\nCryba2\nFev\nIhh\nMir375\nNhej1\nPcdha1\nPcdha10\nPcdha11\nPcdha12\nPcdha2\nPcdha3\nPcdha4\nPcdha5\nPcdha6\nPcdha7\nPcdha8\nPcdha9\nPcdhac1\nSlc23a3\nVaultrc5\nWnt10a\nWnt6\n+1 genes,1500026H17Rik\n1700034H15Rik\n1700065J18Rik\nA130010J15Rik\nA730013G03Rik\nAno4\nCamk1g\nG0s2\nGas2l3\nHhat\nHsd11b1\nIrf6\nKcnh1\nLamb3\nMir205\nMir3473c\nNek2\nNr1h4\nRcor3\nRd3\nScyl2\nSertad4\nSlc17a8\nSlc30a1\nSlc5a8\n+5 genes,1700042O10Rik\n4930512M02Rik\nA530088E08Rik\nAbcc6\nAbcc8\nAccs\nAccsl\nAlkbh3\nAlkbh3os1\nAlx4\nArl13a\nBcs1l\nC1qtnf6\nCard10\nCatip\nCcdc114\nCd82\nCdc42ep1\nCdk5r2\nCnot9\nCryba2\nCstf2\nCtdsp1\nCyp27a1\nCyp4f13\n+82 genes
Rbp4_positive_cells,,,,Bloc1s3\nExoc3l2\nGemin7\nMark4\nNkpd1\nPpp1r37\nTrappc6a\nZfp296,Bglap\nBglap2\nBglap3\nDnd1\nHars\nHars2\nPaqr6\nPcdha1\nPcdha10\nPcdha11\nPcdha12\nPcdha2\nPcdha3\nPcdha4\nPcdha5\nPcdha6\nPcdha7\nPcdha8\nPcdha9\nPcdhac1\nPcdhac2\nPcdhb1\nPcdhb2\nPcdhb3\nPcdhga1\n+26 genes,Brsk1\nCox6b2\nFam71e2\nHspbp1\nKmt5c\nPcdha1\nPcdha10\nPcdha11\nPcdha12\nPcdha2\nPcdha3\nPcdha4\nPcdha5\nPcdha6\nPcdha7\nPcdha8\nPcdha9\nPcdhac1\nPcdhac2\nPcdhb1\nPcdhb10\nPcdhb11\nPcdhb12\nPcdhb13\nPcdhb14\n+16 genes,,Arl4c\nGlrp1\nHjurp\nMroh2a\nPlatr5\nSpp2\nTrpm8\nUgt1a1,Pcdhb10\nPcdhb11\nPcdhb12\nPcdhb13\nPcdhb14\nPcdhb15\nPcdhb16\nPcdhb17\nPcdhb18\nPcdhb19\nPcdhb20\nPcdhb21\nPcdhb22\nPcdhb3\nPcdhb4\nPcdhb5\nPcdhb6\nPcdhb7\nPcdhb8\nPcdhb9,1700023F06Rik\nAcbd4\nAldh18a1\nArhgap27\nArl4c\nCcnj\nE030044B06Rik\nEntpd1\nFmnl1\nGlrp1\nHexim1\nHexim2\nHjurp\nMap3k14\nMir8092\nMroh2a\nPlatr5\nSorbs1\nSpata32\nSpp2\nTctn3\nTrpm8\nUgt1a1\nZfp518a,Hars2\nPcdha1\nPcdha10\nPcdha11\nPcdha12\nPcdha2\nPcdha3\nPcdha4\nPcdha5\nPcdha6\nPcdha7\nPcdha8\nPcdha9\nPcdhac1\nPcdhac2\nPcdhb1\nPcdhb2\nPcdhb3\nPcdhb4\nVaultrc5\nZmat2,,Fzd10\nFzd10os\nGlt1d1\nPiwil1\nRan\nRimbp2\nStx2\nTmem132d
Cheng_ES_TPM,,,,,Dpp4\nFap\nFign\nGca\nGcg\nIfih1\nKcnh7\nSlc4a10,,Arl4c\nGlrp1\nHjurp\nMroh2a\nPlatr5\nSpp2\nTrpm8\nUgt1a1,,,Arl4c\nGlrp1\nHjurp\nMroh2a\nPlatr5\nSpp2\nTrpm8\nUgt1a1,,,
Alveolar_cells_Type_II_Merged_Batches,,,,1110038F14Rik\n1700025B11Rik\n1700072B07Rik\n1700109G14Rik\n1700109K24Rik\n4930426L09Rik\n4930447M23Rik\n4930487D11Rik\n4930513D17Rik\n4930515B02Rik\n4933415F23Rik\n6720483E21Rik\nAkap12\nAmer2\nApol10a\nApol10b\nApol11a\nApol11b\nApol6\nApol7a\nApol7b\nApol7c\nApol7e\nApol8\nApol9a\n+113 genes,4930480K15Rik\n4933400B14Rik\nAdcy4\nCideb\nFoxd2\nFoxd2os\nFshr\nGtf2a1l\nLhcgr\nLtb4r1\nLtb4r2\nNfatc4\nNop9\nNrxn1\nNynrin\nPpp1r21\nRipk3\nSkint1\nSkint10\nSkint11\nSkint2\nSkint3\nSkint4\nSkint5\nSkint6\n+6 genes,1700063H04Rik\n1700092K14Rik\n4932435O22Rik\nAicda\nApobec1\nAtp5a1\nBcs1l\nBtbd17\nCd300a\nCd300c\nCd300c2\nCd300lb\nCd300ld\nCdk5r2\nCfap65\nCryba2\nCyp27a1\nDnaic2\nDppa3\nEntpd8\nEpg5\nF830208F22Rik\nFam166a\nFev\nGdf3\n+54 genes,Pcdhb10\nPcdhb11\nPcdhb12\nPcdhb13\nPcdhb14\nPcdhb15\nPcdhb16\nPcdhb17\nPcdhb18\nPcdhb19\nPcdhb20\nPcdhb21\nPcdhb22\nPcdhb3\nPcdhb4\nPcdhb5\nPcdhb6\nPcdhb7\nPcdhb8\nPcdhb9,,,1110038F14Rik\n1600012P17Rik\n1700001L19Rik\n1700003F12Rik\n1700007K13Rik\n1700008K24Rik\n1700020L24Rik\n1700025B11Rik\n1700025C18Rik\n1700028D13Rik\n1700034G24Rik\n1700063H04Rik\n1700067P10Rik\n1700095B10Rik\n1700101I11Rik\n1700109K24Rik\n1700122O11Rik\n2310001H17Rik\n2310002F09Rik\n2310065F04Rik\n2810025M15Rik\n2900026A02Rik\n4833428L15Rik\n4922502D21Rik\n4930426D05Rik\n+586 genes,,Abcb11\nBbs5\nDhrs9\nFastkd1\nG6pc2\nKlhl41\nLrp2\nNostrin\nSpc25,2810459M11Rik\nArmc9\nB3gnt7\nBcs1l\nC1qtnf6\nCard10\nCatip\nCdc42ep1\nCdk5r2\nCnot9\nCryba2\nCtdsp1\nCyp27a1\nCyth4\nElfn2\nFev\nFxyd1\nFxyd3\nFxyd7\nGga1\nGpr55\nGramd1a\nHpn\nHtr2b\nIl2rb\n+52 genes
Alveolar_cells_Type_I_Merged_Batches,1700029N11Rik\n4930444F02Rik\nA330076C08Rik\nAbracl\nArfgef3\nAtxn1\nCcdc28a\nCd83\nDtnbp1\nEct2l\nGfod1\nGmpr\nHebp2\nJarid2\nMcur1\nMylip\nNhsl1\nNol7\nRanbp9\nReps1\nRnf182\nSirt5\nTbc1d7,,Amz1\nArfrp1\nBrat1\nCap2\nCard11\nCd70\nCrb3\nDennd1c\nDennd6b\nFam8a1\nGmeb2\nGna12\nGrifin\nHdac10\nHelz2\nIqce\nKdm1b\nKhsrp\nKif13a\nLfng\nLime1\nMapk11\nMapk12\nMir6958\nMir7118\n+15 genes,1110038F14Rik\n1700007K13Rik\n1700025B11Rik\n1700109K24Rik\n4930444F02Rik\nA430035B10Rik\nAA545190\nAbracl\nAk8\nApol10a\nApol10b\nApol11a\nApol11b\nApol6\nApol7a\nApol7b\nApol7c\nApol7e\nApol8\nApol9a\nApol9b\nArfgef3\nBcl2l2\nCcdc28a\nCel\n+52 genes,4930417O22Rik\nArl4d\nCfap97d1\nDhx8\nDusp3\nEtv4\nMeox1\nSost,Aldh1a2\nAqp9\nBcl7c\nCgnl1\nCtf1\nCtf2\nFbxl19\nHsd3b7\nLipc\nMir762\nMyzap\nOrai3\nPcdha12\nPcdhac1\nPcdhac2\nPcdhb1\nPcdhb10\nPcdhb11\nPcdhb12\nPcdhb13\nPcdhb14\nPcdhb15\nPcdhb16\nPcdhb17\nPcdhb18\n+16 genes,1700023F06Rik\nAcbd4\nAldh18a1\nArhgap27\nArl4c\nCcnj\nE030044B06Rik\nEntpd1\nFmnl1\nGlrp1\nHexim1\nHexim2\nHjurp\nMap3k14\nMir8092\nMroh2a\nPlatr5\nSorbs1\nSpata32\nSpp2\nTctn3\nTrpm8\nUgt1a1\nZfp518a,Arl4c\nGlrp1\nHjurp\nMroh2a\nPlatr5\nSpp2\nTrpm8\nUgt1a1,1110038F14Rik\n1600012P17Rik\n1700001L19Rik\n1700003F12Rik\n1700007K13Rik\n1700008K24Rik\n1700020L24Rik\n1700025B11Rik\n1700025C18Rik\n1700028D13Rik\n1700034G24Rik\n1700063H04Rik\n1700067P10Rik\n1700095B10Rik\n1700101I11Rik\n1700109K24Rik\n1700122O11Rik\n2310001H17Rik\n2310002F09Rik\n2310065F04Rik\n2810025M15Rik\n2900026A02Rik\n4833428L15Rik\n4922502D21Rik\n4930426D05Rik\n+586 genes,,,,1810013A23Rik\nAbhd10\nAtg3\nBC016579\nBtla\nCcdc80\nCd200\nCd200r1\nCd200r2\nCd200r3\nCd200r4\nDef6\nFance\nGcsam\nPcdhb1\nPcdhb10\nPcdhb11\nPcdhb12\nPcdhb13\nPcdhb14\nPcdhb15\nPcdhb16\nPcdhb17\nPcdhb18\nPcdhb19\n+20 genes


In [99]:
chromosome_overlap_lower.applymap(lambda x: len(x) if x is not None else np.nan)

Unnamed: 0,Gaublomme_GSE75109_TPM_clean,Gaublomme_GSE75110_TPM_clean,Gaublomme_GSE75111_TPM_clean,kakadarov_tpm,somatosensory_converted_into_tpm,Dopaminergic_TPM_clean,Rbp4_positive_cells,Cheng_ES_TPM,Alveolar_cells_Type_II_Merged_Batches,Alveolar_cells_Type_I_Merged_Batches,klein,hepat_TPM_yang_clean,Yu_First_wave_endocrine_cells
Gaublomme_GSE75109_TPM_clean,,1.0,,1.0,,,,,,2.0,,,1.0
Gaublomme_GSE75110_TPM_clean,1.0,,4.0,1.0,,1.0,,,,,1.0,,1.0
Gaublomme_GSE75111_TPM_clean,,4.0,,,,1.0,,,,5.0,,,
kakadarov_tpm,1.0,1.0,,,1.0,2.0,1.0,,12.0,7.0,12.0,,3.0
somatosensory_converted_into_tpm,,,,1.0,,6.0,18.0,1.0,3.0,1.0,15.0,,2.0
Dopaminergic_TPM_clean,,1.0,1.0,2.0,6.0,,8.0,,12.0,6.0,6.0,2.0,10.0
Rbp4_positive_cells,,,,1.0,18.0,8.0,,1.0,3.0,3.0,9.0,,1.0
Cheng_ES_TPM,,,,,1.0,,1.0,,,1.0,,,
Alveolar_cells_Type_II_Merged_Batches,,,,12.0,3.0,12.0,3.0,,,92.0,,2.0,7.0
Alveolar_cells_Type_I_Merged_Batches,2.0,,5.0,7.0,1.0,6.0,3.0,1.0,92.0,,,,3.0


In [100]:
chromosome_overlap_upper = pd.DataFrame([[None if x == y else get_intersection(get_upper(chromosome_dfs[x]),get_upper(chromosome_dfs[y])) for y in dataset_names] for x in dataset_names], index=dataset_names, columns=dataset_names)

In [101]:
writer = pd.ExcelWriter("overlap_chromosome_upper.xlsx")
chromosome_overlap_upper.applymap(get_all_genes).applymap(format_stretch_genes).to_excel(writer,"by genes")
chromosome_overlap_upper.applymap(lambda x: len(x) if x is not None else np.nan).to_excel(writer,"by count (stretches)")
chromosome_overlap_upper.applymap(get_all_genes).applymap(lambda x: len(x) if x is not None else np.nan).to_excel(writer,"by count (genes)")
chromosome_overlap_upper.applymap(format_stretches).to_excel(writer,"by locations")
writer.save()

In [102]:
# pd.options.display.max_colwidth = 1000
pd.options.display.max_seq_items = 100
chromosome_overlap_upper.applymap(get_all_genes)

Unnamed: 0,Gaublomme_GSE75109_TPM_clean,Gaublomme_GSE75110_TPM_clean,Gaublomme_GSE75111_TPM_clean,kakadarov_tpm,somatosensory_converted_into_tpm,Dopaminergic_TPM_clean,Rbp4_positive_cells,Cheng_ES_TPM,Alveolar_cells_Type_II_Merged_Batches,Alveolar_cells_Type_I_Merged_Batches,klein,hepat_TPM_yang_clean,Yu_First_wave_endocrine_cells
Gaublomme_GSE75109_TPM_clean,{},"[9930111J21Rik1, 9930111J21Rik2, H1f1, H1f2, H1f3, H1f4, H1f6, H2ac11, H2ac12, H2ac18, H2ac19, H2ac20, H2ac4, H2ac6, H2ac7, H2ac8, H2bc11, H2bc12, H2bc18, H2bc21, H2bc3, H2bc4, H2bc6, H2bc7, H2bc8, H3c1, H3c13, H3c14, H3c15, H3c2, H3c3, H3c4, H3c6, H3c7, H4c1, H4c14, H4c2, H4c3, H4c4, H4c6, H4c9, Hfe, Ifi47, Irgm1, Olfr1395, Olfr1396, Olfr56, Pom121l2, Prss16, Psme2b, Slc17a2, Tgtp1, Tgtp2, Trim38, Trim41, Trim7, Vmn1r188, Vmn1r189, Zfp184]","[9930111J21Rik1, 9930111J21Rik2, A530032D15Rik, Ampd1, Arrdc5, Bcas2, C130026I21Rik, Cchcr1, Csde1, Dennd2c, Fbxo36, Fem1a, Gm7609, H1f1, H1f2, H1f3, H1f4, H1f6, H2-Q10, H2-Q2, H2-Q4, H2-Q6, H2-Q7, H2ac11, H2ac12, H2ac18, H2ac19, H2ac20, H2ac4, H2ac6, H2ac7, H2ac8, H2bc11, H2bc12, H2bc18, H2bc21, H2bc3, H2bc4, H2bc6, H2bc7, H2bc8, H3c1, H3c13, H3c14, H3c15, H3c2, H3c3, H3c4, H3c6, H3c7, H4c1, H4c14, H4c2, H4c3, H4c4, H4c6, H4c9, Hfe, Ifi47, Irgm1, Kdm4b, Mir6977, Nras, Olfr1395, Olfr1396, Olfr56, Plin3, Pom121l2, Pou5f1, Prss16, Psme2b, Psors1c2, Ptprs, Sike1, Slc16a14, Slc17a2, Sp100, Sp110, Sp140, Syt6, Tcf19, Tgtp1, Tgtp2, Ticam1, Trim33, Trim38, Trim41, Trim7, Uhrf1, Vmn1r188, Vmn1r189, Zfp184]","[9930111J21Rik1, 9930111J21Rik2, A530032D15Rik, C130026I21Rik, Fbxo36, Gm7609, Ifi47, Olfr1395, Olfr1396, Olfr56, Slc16a14, Sp100, Sp110, Sp140, Tgtp1, Tgtp2]","[H2-Q1, H2-Q10, H2-Q2, H2-Q4, H2-Q6, H2-Q7, Pou5f1, Tcf19]","[H1f1, H1f2, H1f3, H1f4, H1f6, H2ac4, H2ac6, H2ac7, H2ac8, H2bc3, H2bc4, H2bc6, H2bc7, H2bc8, H3c1, H3c2, H3c3, H3c4, H3c6, H4c1, H4c2, H4c3, H4c4, H4c6, Hfe]",{},"[3632454L22Rik, Armcx1, Armcx2, Armcx3, Armcx4, Armcx6, B230119M05Rik, Hnrnph2, Mir7093, Nxf2, Zmat1]","[A530032D15Rik, C130026I21Rik, Cchcr1, Ccl3, Ccl4, Ccl5, Ccl6, Ccl9, E230016K23Rik, Fbxo36, Gm7609, H2-Q1, H2-Q10, H2-Q2, H2-Q4, H2-Q6, H2-Q7, Pou5f1, Psors1c2, Slc16a14, Sp100, Sp110, Sp140, Tcf19, Wfdc17, Wfdc18]","[9930111J21Rik1, 9930111J21Rik2, A530032D15Rik, C130026I21Rik, Cchcr1, Fbxo36, Gm7609, H2-Q1, H2-Q10, H2-Q2, H2-Q4, H2-Q6, H2-Q7, Ifi47, Olfr1395, Olfr1396, Olfr56, Pou5f1, Psors1c2, Slc16a14, Sp100, Sp110, Sp140, Tcf19, Tgtp1, Tgtp2]","[9930111J21Rik1, 9930111J21Rik2, Ifi47, Irgm1, Olfr1395, Olfr1396, Olfr56, Psme2b, Tgtp1, Tgtp2, Trim7]","[H2ac7, H2ac8, H2bc6, H2bc7, H2bc8, H3c4, H3c6, H4c4]","[Ccl3, Ccl4, Ccl5, Ccl6, Ccl9, E230016K23Rik, H1f1, H1f2, H1f3, H1f4, H1f6, H2ac4, H2ac6, H2ac7, H2ac8, H2bc3, H2bc4, H2bc6, H2bc7, H2bc8, H3c1, H3c2, H3c3, H3c4, H3c6, H3c7, H4c1, H4c2, H4c3, H4c4, H4c6, Hfe, Ifi202b, Ifi203, Ifi204, Ifi207, Ifi208, Ifi209, Ifi211, Ifi213, Mndal, Wfdc17, Wfdc18]"
Gaublomme_GSE75110_TPM_clean,"[9930111J21Rik1, 9930111J21Rik2, H1f1, H1f2, H1f3, H1f4, H1f6, H2ac11, H2ac12, H2ac18, H2ac19, H2ac20, H2ac4, H2ac6, H2ac7, H2ac8, H2bc11, H2bc12, H2bc18, H2bc21, H2bc3, H2bc4, H2bc6, H2bc7, H2bc8, H3c1, H3c13, H3c14, H3c15, H3c2, H3c3, H3c4, H3c6, H3c7, H4c1, H4c14, H4c2, H4c3, H4c4, H4c6, H4c9, Hfe, Ifi47, Irgm1, Olfr1395, Olfr1396, Olfr56, Pom121l2, Prss16, Psme2b, Slc17a2, Tgtp1, Tgtp2, Trim38, Trim41, Trim7, Vmn1r188, Vmn1r189, Zfp184]",{},"[9930111J21Rik1, 9930111J21Rik2, Aoc1, Doxl1, Doxl2, Gimap1, Gimap3, Gimap5, Gimap6, Gimap7, H1f1, H1f2, H1f3, H1f4, H1f6, H2ac11, H2ac12, H2ac18, H2ac19, H2ac20, H2ac4, H2ac6, H2ac7, H2ac8, H2bc11, H2bc12, H2bc18, H2bc21, H2bc3, H2bc4, H2bc6, H2bc7, H2bc8, H3c1, H3c13, H3c14, H3c15, H3c2, H3c3, H3c4, H3c6, H3c7, H4c1, H4c14, H4c2, H4c3, H4c4, H4c6, H4c9, Hfe, Ifi47, Irgm1, Olfr1395, Olfr1396, Olfr56, Pom121l2, Prss16, Psme2b, Slc17a2, Tgtp1, Tgtp2, Tmem176a, Tmem176b, Trim38, Trim41, Trim7, Vmn1r188, Vmn1r189, Zfp184]","[9930111J21Rik1, 9930111J21Rik2, Ifi47, Olfr1395, Olfr1396, Olfr56, Tgtp1, Tgtp2]","[Aoc1, Gimap1, Gimap3, Gimap5, Gimap6, Gimap7, Tmem176a, Tmem176b]","[0610009E02Rik, Entr1, H1f1, H1f2, H1f3, H1f4, H1f6, H2ac4, H2ac6, H2ac7, H2ac8, H2bc3, H2bc4, H2bc6, H2bc7, H2bc8, H3c1, H3c2, H3c3, H3c4, H3c6, H4c1, H4c2, H4c3, H4c4, H4c6, Hfe, Inpp5e, Mir6996, Notch1, Pmpca, Sec16a, Snapc4]",{},{},{},"[9930111J21Rik1, 9930111J21Rik2, Ifi47, Olfr1395, Olfr1396, Olfr56, Tgtp1, Tgtp2]","[9930111J21Rik1, 9930111J21Rik2, Aacs, Bri3bp, Dhx37, Ifi47, Il18r1, Il18rap, Il1r1, Il1rl1, Il1rl2, Irgm1, Mfsd9, Mir12200, Ncor2, Olfr1395, Olfr1396, Olfr56, Psme2b, Scarb1, Slc9a2, Slc9a4, Tgtp1, Tgtp2, Tmem132b, Trim7, Ubc]","[H2ac7, H2ac8, H2bc6, H2bc7, H2bc8, H3c4, H3c6, H4c4]","[H1f2, H1f3, H1f4, H1f6, H2ac4, H2ac6, H2ac7, H2ac8, H2bc3, H2bc4, H2bc6, H2bc7, H2bc8, H3c1, H3c2, H3c3, H3c4, H3c6, H3c7, H4c1, H4c2, H4c3, H4c4, H4c6, Hfe]"
Gaublomme_GSE75111_TPM_clean,"[9930111J21Rik1, 9930111J21Rik2, A530032D15Rik, Ampd1, Arrdc5, Bcas2, C130026I21Rik, Cchcr1, Csde1, Dennd2c, Fbxo36, Fem1a, Gm7609, H1f1, H1f2, H1f3, H1f4, H1f6, H2-Q10, H2-Q2, H2-Q4, H2-Q6, H2-Q7, H2ac11, H2ac12, H2ac18, H2ac19, H2ac20, H2ac4, H2ac6, H2ac7, H2ac8, H2bc11, H2bc12, H2bc18, H2bc21, H2bc3, H2bc4, H2bc6, H2bc7, H2bc8, H3c1, H3c13, H3c14, H3c15, H3c2, H3c3, H3c4, H3c6, H3c7, H4c1, H4c14, H4c2, H4c3, H4c4, H4c6, H4c9, Hfe, Ifi47, Irgm1, Kdm4b, Mir6977, Nras, Olfr1395, Olfr1396, Olfr56, Plin3, Pom121l2, Pou5f1, Prss16, Psme2b, Psors1c2, Ptprs, Sike1, Slc16a14, Slc17a2, Sp100, Sp110, Sp140, Syt6, Tcf19, Tgtp1, Tgtp2, Ticam1, Trim33, Trim38, Trim41, Trim7, Uhrf1, Vmn1r188, Vmn1r189, Zfp184]","[9930111J21Rik1, 9930111J21Rik2, Aoc1, Doxl1, Doxl2, Gimap1, Gimap3, Gimap5, Gimap6, Gimap7, H1f1, H1f2, H1f3, H1f4, H1f6, H2ac11, H2ac12, H2ac18, H2ac19, H2ac20, H2ac4, H2ac6, H2ac7, H2ac8, H2bc11, H2bc12, H2bc18, H2bc21, H2bc3, H2bc4, H2bc6, H2bc7, H2bc8, H3c1, H3c13, H3c14, H3c15, H3c2, H3c3, H3c4, H3c6, H3c7, H4c1, H4c14, H4c2, H4c3, H4c4, H4c6, H4c9, Hfe, Ifi47, Irgm1, Olfr1395, Olfr1396, Olfr56, Pom121l2, Prss16, Psme2b, Slc17a2, Tgtp1, Tgtp2, Tmem176a, Tmem176b, Trim38, Trim41, Trim7, Vmn1r188, Vmn1r189, Zfp184]",{},"[9930111J21Rik1, 9930111J21Rik2, A530032D15Rik, C130026I21Rik, Cab39l, Cdadc1, Fbxo36, Gabarapl1, Gm7609, Ifi47, Klrc1, Klrc2, Klrc3, Klrd1, Klre1, Klri1, Klrk1, Mir680-1, Olfr1395, Olfr1396, Olfr56, Phf11a, Phf11b, Phf11c, Phf11d, Setdb2, Shisa2, Slc16a14, Sp100, Sp110, Sp140, Tgtp1, Tgtp2]","[Aoc1, Gimap1, Gimap3, Gimap5, Gimap6, Gimap7, Tmem176a, Tmem176b]","[H1f1, H1f2, H1f3, H1f4, H1f6, H2ac4, H2ac6, H2ac7, H2ac8, H2bc3, H2bc4, H2bc6, H2bc7, H2bc8, H3c1, H3c2, H3c3, H3c4, H3c6, H4c1, H4c2, H4c3, H4c4, H4c6, Hfe]",{},{},"[A530032D15Rik, A630001G21Rik, C130026I21Rik, Cab39l, Cchcr1, Cdadc1, Fbxo36, Gm7609, H2-Q10, H2-Q2, H2-Q4, H2-Q6, H2-Q7, Mir8096, Phf11a, Phf11b, Phf11c, Phf11d, Pou5f1, Psors1c2, Setdb2, Shisa2, Slc16a14, Sp100, Sp110, Sp140, Tcf19]","[9930111J21Rik1, 9930111J21Rik2, A530032D15Rik, A630001G21Rik, C130026I21Rik, Cchcr1, Fbxo36, Gm7609, H2-Q10, H2-Q2, H2-Q4, H2-Q6, H2-Q7, Ifi47, Mir8096, Olfr1395, Olfr1396, Olfr56, Pou5f1, Psors1c2, Slc16a14, Sp100, Sp110, Sp140, Tcf19, Tgtp1, Tgtp2]","[9930111J21Rik1, 9930111J21Rik2, Ifi47, Irgm1, Olfr1395, Olfr1396, Olfr56, Psme2b, Tgtp1, Tgtp2, Trim7]","[H2ac7, H2ac8, H2bc6, H2bc7, H2bc8, H3c4, H3c6, H4c4]","[A530032D15Rik, A630001G21Rik, C130026I21Rik, Fam114a1, Gm7609, H1f1, H1f2, H1f3, H1f4, H1f6, H2ac4, H2ac6, H2ac7, H2ac8, H2bc3, H2bc4, H2bc6, H2bc7, H2bc8, H3c1, H3c2, H3c3, H3c4, H3c6, H3c7, H4c1, H4c2, H4c3, H4c4, H4c6, Hfe, Ifi202b, Ifi203, Ifi204, Ifi205, Ifi207, Ifi211, Klf3, Klhl5, Mir574, Mir8096, Mndal, Olfr433, Rfc1, Slc16a14, Sp100, Sp110, Sp140, Tlr1, Tlr6, Tmem156, Wdr19]"
kakadarov_tpm,"[9930111J21Rik1, 9930111J21Rik2, A530032D15Rik, C130026I21Rik, Fbxo36, Gm7609, Ifi47, Olfr1395, Olfr1396, Olfr56, Slc16a14, Sp100, Sp110, Sp140, Tgtp1, Tgtp2]","[9930111J21Rik1, 9930111J21Rik2, Ifi47, Olfr1395, Olfr1396, Olfr56, Tgtp1, Tgtp2]","[9930111J21Rik1, 9930111J21Rik2, A530032D15Rik, C130026I21Rik, Cab39l, Cdadc1, Fbxo36, Gabarapl1, Gm7609, Ifi47, Klrc1, Klrc2, Klrc3, Klrd1, Klre1, Klri1, Klrk1, Mir680-1, Olfr1395, Olfr1396, Olfr56, Phf11a, Phf11b, Phf11c, Phf11d, Setdb2, Shisa2, Slc16a14, Sp100, Sp110, Sp140, Tgtp1, Tgtp2]",{},"[Abca5, Abca6, Abca8a, Abca8b, Abca9, Kcnj16, Kcnj2, Map2k6, Vmn2r31, Vmn2r32, Vmn2r33, Vmn2r34, Vmn2r35, Vmn2r36, Vmn2r37, Vmn2r38, Vmn2r39, Vmn2r40, Vmn2r41, Vmn2r42, Vmn2r43, Vmn2r44, Vmn2r45]","[0610010F05Rik, 1700016G22Rik, 1700061J23Rik, 1700093K21Rik, 4933427E13Rik, 4933430M04Rik, 5730522E02Rik, AW209491, Adamts13, Adamts4, Adarb2, Ahsa2, Akr1c21, Akr1e1, Apoa2, Arid4b, Asphd1, B3galnt2, B4galt3, Bcl11a, Cct4, Cdipt, Cdiptos, Chrm3, Dedd, Dip2c, Doc2a, Fam161a, Fcer1g, Ggps1, Gtpbp4, Hecw1, Hirip3, Idi1, Idi2, Ino80e, Kctd13, Kif22, Klf6, Klhdc9, Larp4b, Maz, Med22, Mrpl32, Mvp, Ndufs2, Nectin4, Nit1, Nr1i3, Pagr1a, Papolg, Pex13, Pfdn2, Pfkp, Pitrm1, Ppox, Prrt2, Psma2, Pus10, Rel, Rexo4, Rpl7a, Ryr2, Sez6l2, Stkld1, Surf1, Surf2, Surf4, Surf6, Taok2, Tbce, Tmem219, Tomm40l, Ufc1, Usp21, Usp34, Wdr37, Xpo1, Zmynd11]","[1700012H19Rik, 1700023C21Rik, 2610035D17Rik, 2810047C21Rik1, 4732490B19Rik, Abca5, Abca6, Abca8a, Abca8b, Abca9, BC006965, Cog1, D11Wsu47e, Fam104a, Kcnj16, Kcnj2, Map2k6, Slc39a11, Sox9, Sstr2, Vmn2r30, Vmn2r31, Vmn2r32, Vmn2r33, Vmn2r34, Vmn2r35, Vmn2r36, Vmn2r37, Vmn2r38, Vmn2r39, Vmn2r40, Vmn2r41, Vmn2r42, Vmn2r43, Vmn2r44, Vmn2r45, Vmn2r46, Vmn2r47]","[1700020D14Rik, Ddx3y, Eif2s3y, H2al2c, Kdm5d, Uba1y, Usp9y, Uty, Zfy1, Zfy2]","[1700016G22Rik, 1700029P11Rik, 1700047I17Rik2, 1700129C05Rik, 2700097O09Rik, 4930506C21Rik, A530032D15Rik, A930024E05Rik, AW209491, Aco2, Actn2, Adarb2, Akr1c21, Akr1e1, Anapc5, Anapc7, Apba1, Arfip2, Arid4b, Arpc1a, Arpc1b, Arpc3, Atp2a2, Atp5j2, B3galnt2, Baz1a, Bud31, C130026I21Rik, Cab39l, Camkk2, Ccdc47, Ccdc63, Ccr6, Cdadc1, Cfl2, Chadl, Chrm3, Cpsf4, Csdc2, Cux2, Dchs1, Ddx42, Dip2c, Dnajb7, Dnhd1, Ep300, Fam122a, Fam177a, Fam189a2, Fam216a, Fbxo36, Fgf9, Fgfr1op, Ftsj3, Fxn, Ggps1, Gm7609, Gpn3, Gtpbp4, Hecw1, Hvcn1, Idi1, Idi2, Ift81, Ilk, Kdm2b, Klf6, L3mbtl2, Larp4b, Limd2, Map3k3, Mir8115, Morn3, Mpc1, Mrpl32, Mtr, Myl2, Orai1, P2rx4, P2rx7, Pdap1, Pfkp, Pgm5, Phf11a, Phf11b, Phf11c, Phf11d, Phf5a, Pip5k1b, Pitrm1, Pmm1, Polr3h, Ppp1cc, Pptc7, Prr18, Psma2, Psmc5, Ptcd1, Rad9b, Rangap1, ...]","[1700018F24Rik, 1700047I17Rik2, 1700129C05Rik, 2700097O09Rik, 9930111J21Rik1, 9930111J21Rik2, A530032D15Rik, Aco2, Actn2, Adarb2, Arpc1a, Arpc1b, Atp5j2, Baz1a, Bud31, C130026I21Rik, Cab39l, Cdadc1, Cfl2, Chadl, Chrm3, Cpsf4, Dip2c, Fam177a, Fbxo36, Fgf9, Gm7609, Gtpbp4, Idi1, Idi2, Ifi47, Klf6, Kpna7, L3mbtl2, Larp4b, Mtr, Olfr1395, Olfr1396, Olfr56, Pdap1, Pfkp, Phf11a, Phf11b, Phf11c, Phf11d, Phf5a, Pitrm1, Ptcd1, Rangap1, Rcbtb1, Ryr2, Setdb2, Slc16a14, Snx6, Sp100, Sp110, Sp140, Srp54a, Srp54b, Tef, Tgtp1, Tgtp2, Tob2, Wdr37, Zc3h7b, Zkscan14, Zkscan5, Zmynd11]","[1700018F24Rik, 1700022I11Rik, 2310002L09Rik, 4930429F11Rik, 4933408B17Rik, 4933416M06Rik, 5330411J11Rik, 5730455P16Rik, 5930438M14Rik, 6030471H07Rik, 9130204L05Rik, 9930111J21Rik1, 9930111J21Rik2, A430085M09Rik, AV039307, Acvr1, Acvr1c, Adal, Adamtsl1, Adap2, Ak6, Alkbh5, Apc, Arpc1a, Arpc1b, Atad5, Atp5j2, Atp8b5, Atpaf2, Baz2b, Bdp1, Birc6, Bnc2, Brd8, Bud31, C030013C21Rik, Capn3, Catsper2, Ccdc125, Ccdc148, Ccdc171, Ccnb1, Ccndbp1, Cd302, Cdan1, Cdc23, Cdc25c, Cdc42ep3, Cdk5r1, Cdk7, Cebpz, Cebpzos, Cenph, Cer1, Ckmt1, Cntln, Cpsf4, Crim1, Crlf3, Cytip, Dapl1, Dhrs7b, Dmac1, Dnajb5, Dpy30, Drg2, Eif2ak2, Ell3, Epb42, Ermn, Fam13b, Fam214b, Fam53c, Fam98a, Fancg, Fbxw13, Fbxw14, Fbxw15, Fbxw16, Fbxw19, Fbxw20, Fbxw22, Fbxw28, Fez2, Flii, Frem1, Frmd3, Galnt5, Gfra3, Gid4, Gpatch11, Gtf2h2, Haus2, Heatr5b, Ifi47, Kdm4c, Kif20a, Lcmt2, Lelp1, Llgl1, ...]","[1700001J03Rik, Ces1a, Ces1b, Ces1c, Ces1d, Ces1e, Ces1f, Ces1g, Ces1h, Ces5a, Cyp3a11, Cyp3a16, Cyp3a25, Cyp3a41a, Cyp3a41b, Cyp3a44, Cyp3a57, Cyp3a59, Zscan25]","[1700019G24Rik, Asb4, Ddx3y, Dlx6, Dlx6os1, Dlx6os2, Dync1i1, Eif2s3y, H2al2c, Kdm5d, Pdk4, Peg10, Pon1, Pon2, Pon3, Ppp1r9a, Sem1, Sgce, Slc25a13, Uba1y, Usp9y, Uty, Zfy1, Zfy2]"
somatosensory_converted_into_tpm,"[H2-Q1, H2-Q10, H2-Q2, H2-Q4, H2-Q6, H2-Q7, Pou5f1, Tcf19]","[Aoc1, Gimap1, Gimap3, Gimap5, Gimap6, Gimap7, Tmem176a, Tmem176b]","[Aoc1, Gimap1, Gimap3, Gimap5, Gimap6, Gimap7, Tmem176a, Tmem176b]","[Abca5, Abca6, Abca8a, Abca8b, Abca9, Kcnj16, Kcnj2, Map2k6, Vmn2r31, Vmn2r32, Vmn2r33, Vmn2r34, Vmn2r35, Vmn2r36, Vmn2r37, Vmn2r38, Vmn2r39, Vmn2r40, Vmn2r41, Vmn2r42, Vmn2r43, Vmn2r44, Vmn2r45]",{},"[Cntn2, Golt1a, Lrrn2, Mdm4, Nfasc, Pik3c2b, Plekha6, Ppp1r15b, Serpina11, Serpina12, Serpina1a, Serpina1b, Serpina1c, Serpina1d, Serpina1e, Serpina1f, Serpina3a, Serpina3b, Serpina3c, Serpina3f, Serpina3g, Serpina3i, Serpina5, Serpina9, Tmem81]","[1700086L19Rik, 2210039B01Rik, 6720489N17Rik, Aopep, Dbpht2, Fbp1, Fbp2, Gphb5, Hif1a, Kcnh5, Mir713, Prkch, Rhoj, Snapc1, Syt16, Vmn2r31, Vmn2r32, Vmn2r33, Vmn2r34, Vmn2r35, Vmn2r36, Vmn2r37, Vmn2r38, Vmn2r39, Vmn2r40, Vmn2r41, Vmn2r42, Vmn2r43, Vmn2r44, Vmn2r45, Zfp808, Zfp934, Zfp935]",{},"[Efcab9, H2-D1, H2-Q1, H2-Q10, H2-Q2, H2-Q4, H2-Q6, H2-Q7, Hba-a1, Hba-a2, Hba-x, Hbq1a, Hbq1b, Pou5f1, Sh3pxd2b, Stk10, Tcf19, Ubtd2]","[3930402G23Rik, 4833411C07Rik, 9530052E02Rik, Col4a1, Col4a2, E230013L22Rik, Efcab9, H2-D1, H2-Q1, H2-Q10, H2-Q2, H2-Q4, H2-Q6, H2-Q7, Hba-a1, Hba-a2, Hba-x, Hbq1a, Hbq1b, Irs2, Naxd, Pou5f1, Rab20, Sh3pxd2b, Stk10, Tcf19, Ubtd2]","[1700012B07Rik, 1700012H19Rik, 1700023C21Rik, 1700096J18Rik, 9430091E24Rik, Abca5, Abca6, Abca8a, Abca8b, Abca9, Amz2, Arsg, B3galt1, BC006965, C1qtnf5, Cbl, Ccdc153, Fa2h, Fam20a, Galnt3, Glg1, Gna13, Kcnj16, Kcnj2, Ldhd, Map2k6, Mcam, Mfrp, Mir7224, Mlkl, Prkar1a, Rfwd3, Rgs9, Rnf26, Scn1a, Scn7a, Scn9a, Slc16a6, Sox9, Thy1, Ttc21b, Usp2, Wdr59, Wipi1, Xirp2, Znrf1]","[Serpina11, Serpina12, Serpina1a, Serpina1c, Serpina1d, Serpina1e, Serpina3a, Serpina5, Serpina9]","[1700092C10Rik, 4922502D21Rik, A230070E04Rik, Adam28, Adam7, Adamdec1, Asb4, Casd1, Clec12a, Clec12b, Clec1a, Clec1b, Clec7a, Clec9a, Ifi27l2b, Ms4a4a, Ms4a4b, Ms4a4c, Ms4a4d, Ms4a5, Ms4a6b, Ms4a6c, Ms4a6d, Ms4a7, Nefl, Nefm, Olr1, Peg10, Pon1, Pon2, Pon3, Ppp1r9a, Ppp4r4, Serpina10, Serpina11, Serpina12, Serpina16, Serpina1a, Serpina1b, Serpina1c, Serpina1d, Serpina1e, Serpina1f, Serpina3a, Serpina3b, Serpina3c, Serpina3f, Serpina3g, Serpina3i, Serpina5, Serpina6, Serpina9, Sgce, Stc1]"
Dopaminergic_TPM_clean,"[H1f1, H1f2, H1f3, H1f4, H1f6, H2ac4, H2ac6, H2ac7, H2ac8, H2bc3, H2bc4, H2bc6, H2bc7, H2bc8, H3c1, H3c2, H3c3, H3c4, H3c6, H4c1, H4c2, H4c3, H4c4, H4c6, Hfe]","[0610009E02Rik, Entr1, H1f1, H1f2, H1f3, H1f4, H1f6, H2ac4, H2ac6, H2ac7, H2ac8, H2bc3, H2bc4, H2bc6, H2bc7, H2bc8, H3c1, H3c2, H3c3, H3c4, H3c6, H4c1, H4c2, H4c3, H4c4, H4c6, Hfe, Inpp5e, Mir6996, Notch1, Pmpca, Sec16a, Snapc4]","[H1f1, H1f2, H1f3, H1f4, H1f6, H2ac4, H2ac6, H2ac7, H2ac8, H2bc3, H2bc4, H2bc6, H2bc7, H2bc8, H3c1, H3c2, H3c3, H3c4, H3c6, H4c1, H4c2, H4c3, H4c4, H4c6, Hfe]","[0610010F05Rik, 1700016G22Rik, 1700061J23Rik, 1700093K21Rik, 4933427E13Rik, 4933430M04Rik, 5730522E02Rik, AW209491, Adamts13, Adamts4, Adarb2, Ahsa2, Akr1c21, Akr1e1, Apoa2, Arid4b, Asphd1, B3galnt2, B4galt3, Bcl11a, Cct4, Cdipt, Cdiptos, Chrm3, Dedd, Dip2c, Doc2a, Fam161a, Fcer1g, Ggps1, Gtpbp4, Hecw1, Hirip3, Idi1, Idi2, Ino80e, Kctd13, Kif22, Klf6, Klhdc9, Larp4b, Maz, Med22, Mrpl32, Mvp, Ndufs2, Nectin4, Nit1, Nr1i3, Pagr1a, Papolg, Pex13, Pfdn2, Pfkp, Pitrm1, Ppox, Prrt2, Psma2, Pus10, Rel, Rexo4, Rpl7a, Ryr2, Sez6l2, Stkld1, Surf1, Surf2, Surf4, Surf6, Taok2, Tbce, Tmem219, Tomm40l, Ufc1, Usp21, Usp34, Wdr37, Xpo1, Zmynd11]","[Cntn2, Golt1a, Lrrn2, Mdm4, Nfasc, Pik3c2b, Plekha6, Ppp1r15b, Serpina11, Serpina12, Serpina1a, Serpina1b, Serpina1c, Serpina1d, Serpina1e, Serpina1f, Serpina3a, Serpina3b, Serpina3c, Serpina3f, Serpina3g, Serpina3i, Serpina5, Serpina9, Tmem81]",{},{},{},"[1700016G22Rik, AW209491, Adarb2, Aftph, Ahctf1, Akr1c21, Akr1e1, Arid4b, B3galnt2, Catsperd, Cdc42bpa, Chrm3, Cnst, Coq8a, Cox20, Desi2, Dip2c, Efcab2, Ggps1, Gtpbp4, Hecw1, Hnrnpu, Idi1, Idi2, Itpkb, Kif26b, Kif28, Klf6, Larp4b, Lgalsl, Lonp1, Micos13, Mir1933, Mrpl32, Mtr, Peli1, Pfkp, Pitrm1, Psen2, Psma2, Ranbp3, Rpl36, Ryr2, Safb, Safb2, Sccpdh, Sertad2, Slc1a4, Smyd3, Tbce, Tfb2m, Ugp2, Vmac, Vps54, Wdr37, Zmynd11]","[1700016G22Rik, Adarb2, Adck2, Aftph, Agk, Braf, Ccdc71, Chrm3, Dip2c, E330009J07Rik, Gtpbp4, Idi1, Idi2, Impdh2, Klf6, Lamb2, Larp4b, Lgalsl, Mir12206, Mir1933, Mrps33, Mtr, Ndufaf3, Ndufb2, Peli1, Pfkp, Pitrm1, Qars, Qrich1, Ryr2, Sertad2, Slc1a4, Tmem178b, Ugp2, Usp19, Vps54, Wdr37, Wee2, Zmynd11]","[0610012G03Rik, 4930429F11Rik, 4931419H13Rik, 9130221F21Rik, Aftph, Akap9, Alg5, Ankib1, Atg4b, Bex6, Bok, Brpf1, Ccdc169, Ccna1, Cdc42ep3, Cebpz, Cebpzos, Cep19, Cep68, Chmp3, Cpne9, Crim1, Cyp51, D2hgdh, Dclk1, Dtymk, Eif2ak2, Exosc8, Farp2, Fez2, Frem2, Fzd1, Gpatch11, Gt(ROSA)26Sor, Heatr5b, Immt, Ing5, Kdm3a, Krit1, Lgalsl, Lhfpl4, Lrrd1, Mab21l1, Mir12194, Mir1933, Mrpl35, Mterf1a, Mterf1b, Mtmr14, Nbea, Ncbp2, Ndufaf7, Nhlrc3, Nrros, Pak2, Peli1, Pigx, Polr1a, Postn, Prkd3, Proser1, Ptcd3, Qpct, Reep1, Rfxap, Rnf103, Senp5, Sept2, Sertad2, Sertm1, Setd5, Slc1a4, Smad9, Sohlh2, Spg20, Srgap3, Stk25, Stoml3, Strn, Sult6b1, Supt20, Thap4, Thumpd3, Trpc4, Ufm1, Ugp2, Vit, Vps54]","[Serpina11, Serpina12, Serpina1a, Serpina1c, Serpina1d, Serpina1e, Serpina5, Serpina9]","[A230072E10Rik, Fam120c, Fgd1, Gnl3l, H1f1, H1f2, H1f3, H1f4, H1f6, H2ac4, H2ac6, H2ac7, H2ac8, H2bc3, H2bc4, H2bc6, H2bc7, H2bc8, H3c1, H3c2, H3c3, H3c4, H3c6, H4c1, H4c2, H4c3, H4c4, H4c6, Hfe, Huwe1, Phf8, Serpina11, Serpina12, Serpina1a, Serpina1b, Serpina1c, Serpina1d, Serpina1e, Serpina1f, Serpina3a, Serpina3b, Serpina3c, Serpina3f, Serpina3g, Serpina3i, Serpina5, Serpina9, Trim38, Tsr2, Wnk3]"
Rbp4_positive_cells,{},{},{},"[1700012H19Rik, 1700023C21Rik, 2610035D17Rik, 2810047C21Rik1, 4732490B19Rik, Abca5, Abca6, Abca8a, Abca8b, Abca9, BC006965, Cog1, D11Wsu47e, Fam104a, Kcnj16, Kcnj2, Map2k6, Slc39a11, Sox9, Sstr2, Vmn2r30, Vmn2r31, Vmn2r32, Vmn2r33, Vmn2r34, Vmn2r35, Vmn2r36, Vmn2r37, Vmn2r38, Vmn2r39, Vmn2r40, Vmn2r41, Vmn2r42, Vmn2r43, Vmn2r44, Vmn2r45, Vmn2r46, Vmn2r47]","[1700086L19Rik, 2210039B01Rik, 6720489N17Rik, Aopep, Dbpht2, Fbp1, Fbp2, Gphb5, Hif1a, Kcnh5, Mir713, Prkch, Rhoj, Snapc1, Syt16, Vmn2r31, Vmn2r32, Vmn2r33, Vmn2r34, Vmn2r35, Vmn2r36, Vmn2r37, Vmn2r38, Vmn2r39, Vmn2r40, Vmn2r41, Vmn2r42, Vmn2r43, Vmn2r44, Vmn2r45, Zfp808, Zfp934, Zfp935]",{},{},{},"[1700122H20Rik, Aph1b, Aph1c, Car12, Chd1, Ciao2a, Csnk1g1, Dapk2, Dynlt1a, Dynlt1b, Dynlt1c, Dynlt1f, Ezr, Fbxl22, Herc1, LOC102640673, Lactb, Lix1, Lnpep, Mir190a, Mir692-1, Oaz2, Pclaf, Ppib, Rab8b, Rgmb, Riok2, Rps27l, Snx1, Snx22, Sytl3, Tln2, Tmem181a, Tpm1, Trip4, Usp3, Vmn2r90, Zfp609, Zfp960, Zfp97]","[1700122H20Rik, 1810041H14Rik, 5330411J11Rik, B930095G15Rik, Baz2b, Ccdc148, Cd302, Dapl1, Dock9, Dynlt1a, Dynlt1b, Dynlt1c, Dynlt1f, Ezr, Farp1, Ipo5, Itgb6, Ly75, March7, Mir692-1, Pkp4, Pla2r1, Psmd14, Rap2a, Rbms1, Slc15a1, Stk24, Sytl3, Tanc1, Tank, Tbr1, Tmem181a, Wdsub1]","[1500012K07Rik, 1700122H20Rik, 4931408D14Rik, 4933401D09Rik, 5330411J11Rik, A330032B11Rik, Acvr1, Acvr1c, Agbl1, Akap13, Aph1b, Aph1c, Baz2b, Btaf1, Car12, Ccdc148, Cd302, Chd1, Ciao2a, Cpeb3, Csnk1g1, Cytip, Dapk2, Dapl1, Dynlt1a, Dynlt1b, Dynlt1c, Dynlt1f, E430016F16Rik, Ermn, Ezr, Fbxl22, Fgfbp3, Galnt5, Gpd2, Herc1, Ide, Itgb6, Klhl25, LOC102640673, Lactb, Lix1, Lnpep, Ly75, March5, March7, Mir190a, Mir692-1, Mrpl46, Mrps11, Ntrk3, Oaz2, Pclaf, Pkp4, Pla2r1, Ppib, Psmd14, Rab8b, Rbms1, Rgmb, Riok2, Rps27l, Snx1, Snx22, Sv2b, Sytl3, Tanc1, Tank, Tbr1, Tln2, Tmem181a, Tnks2, Tpm1, Trip4, Upp2, Usp3, Wdsub1, Zfp609, Zfp960, Zfp97]",{},"[Afg3l2, Asic4, B430212C06Rik, Chmp1b, Chpf, Cidea, Epha4, Gnal, Impa2, Inha, Mc4r, Mir6343, Mir6352, Mppe1, Obsl1, Pmaip1, Slc4a3, Stk11ip, Tmem198, Tubb6]"
Cheng_ES_TPM,"[3632454L22Rik, Armcx1, Armcx2, Armcx3, Armcx4, Armcx6, B230119M05Rik, Hnrnph2, Mir7093, Nxf2, Zmat1]",{},{},"[1700020D14Rik, Ddx3y, Eif2s3y, H2al2c, Kdm5d, Uba1y, Usp9y, Uty, Zfy1, Zfy2]",{},{},{},{},"[DXBay18, Mir2137, Pnma3, Pnma5, Spin2d, Xlr3a, Xlr3b, Xlr4a, Xlr4b, Xlr5a, Xlr5b]","[B4galnt4, DXBay18, Ifitm1, Ifitm2, Ifitm3, Ifitm5, Ifitm6, Mir2137, Nlrp6, Pgghg, Pkp3, Pnma3, Pnma5, Spin2d, Xlr3a, Xlr3b, Xlr4a, Xlr4b, Xlr5a, Xlr5b, Zfp185]",{},{},"[DXBay18, Ddx3y, Eif2s3y, H2al2c, Kdm5d, Spin2d, Uba1y, Usp9y, Uty, Xlr3a, Xlr3b, Xlr4a, Xlr4b, Xlr5a, Xlr5b, Zfy1, Zfy2]"
Alveolar_cells_Type_II_Merged_Batches,"[A530032D15Rik, C130026I21Rik, Cchcr1, Ccl3, Ccl4, Ccl5, Ccl6, Ccl9, E230016K23Rik, Fbxo36, Gm7609, H2-Q1, H2-Q10, H2-Q2, H2-Q4, H2-Q6, H2-Q7, Pou5f1, Psors1c2, Slc16a14, Sp100, Sp110, Sp140, Tcf19, Wfdc17, Wfdc18]",{},"[A530032D15Rik, A630001G21Rik, C130026I21Rik, Cab39l, Cchcr1, Cdadc1, Fbxo36, Gm7609, H2-Q10, H2-Q2, H2-Q4, H2-Q6, H2-Q7, Mir8096, Phf11a, Phf11b, Phf11c, Phf11d, Pou5f1, Psors1c2, Setdb2, Shisa2, Slc16a14, Sp100, Sp110, Sp140, Tcf19]","[1700016G22Rik, 1700029P11Rik, 1700047I17Rik2, 1700129C05Rik, 2700097O09Rik, 4930506C21Rik, A530032D15Rik, A930024E05Rik, AW209491, Aco2, Actn2, Adarb2, Akr1c21, Akr1e1, Anapc5, Anapc7, Apba1, Arfip2, Arid4b, Arpc1a, Arpc1b, Arpc3, Atp2a2, Atp5j2, B3galnt2, Baz1a, Bud31, C130026I21Rik, Cab39l, Camkk2, Ccdc47, Ccdc63, Ccr6, Cdadc1, Cfl2, Chadl, Chrm3, Cpsf4, Csdc2, Cux2, Dchs1, Ddx42, Dip2c, Dnajb7, Dnhd1, Ep300, Fam122a, Fam177a, Fam189a2, Fam216a, Fbxo36, Fgf9, Fgfr1op, Ftsj3, Fxn, Ggps1, Gm7609, Gpn3, Gtpbp4, Hecw1, Hvcn1, Idi1, Idi2, Ift81, Ilk, Kdm2b, Klf6, L3mbtl2, Larp4b, Limd2, Map3k3, Mir8115, Morn3, Mpc1, Mrpl32, Mtr, Myl2, Orai1, P2rx4, P2rx7, Pdap1, Pfkp, Pgm5, Phf11a, Phf11b, Phf11c, Phf11d, Phf5a, Pip5k1b, Pitrm1, Pmm1, Polr3h, Ppp1cc, Pptc7, Prr18, Psma2, Psmc5, Ptcd1, Rad9b, Rangap1, ...]","[Efcab9, H2-D1, H2-Q1, H2-Q10, H2-Q2, H2-Q4, H2-Q6, H2-Q7, Hba-a1, Hba-a2, Hba-x, Hbq1a, Hbq1b, Pou5f1, Sh3pxd2b, Stk10, Tcf19, Ubtd2]","[1700016G22Rik, AW209491, Adarb2, Aftph, Ahctf1, Akr1c21, Akr1e1, Arid4b, B3galnt2, Catsperd, Cdc42bpa, Chrm3, Cnst, Coq8a, Cox20, Desi2, Dip2c, Efcab2, Ggps1, Gtpbp4, Hecw1, Hnrnpu, Idi1, Idi2, Itpkb, Kif26b, Kif28, Klf6, Larp4b, Lgalsl, Lonp1, Micos13, Mir1933, Mrpl32, Mtr, Peli1, Pfkp, Pitrm1, Psen2, Psma2, Ranbp3, Rpl36, Ryr2, Safb, Safb2, Sccpdh, Sertad2, Slc1a4, Smyd3, Tbce, Tfb2m, Ugp2, Vmac, Vps54, Wdr37, Zmynd11]","[1700122H20Rik, Aph1b, Aph1c, Car12, Chd1, Ciao2a, Csnk1g1, Dapk2, Dynlt1a, Dynlt1b, Dynlt1c, Dynlt1f, Ezr, Fbxl22, Herc1, LOC102640673, Lactb, Lix1, Lnpep, Mir190a, Mir692-1, Oaz2, Pclaf, Ppib, Rab8b, Rgmb, Riok2, Rps27l, Snx1, Snx22, Sytl3, Tln2, Tmem181a, Tpm1, Trip4, Usp3, Vmn2r90, Zfp609, Zfp960, Zfp97]","[DXBay18, Mir2137, Pnma3, Pnma5, Spin2d, Xlr3a, Xlr3b, Xlr4a, Xlr4b, Xlr5a, Xlr5b]",{},"[0610009B22Rik, 0610030E20Rik, 1110004F10Rik, 1110046J04Rik, 1600025M17Rik, 1700003G18Rik, 1700010I14Rik, 1700013H16Rik, 1700016G22Rik, 1700029M20Rik, 1700047I17Rik2, 1700047M11Rik, 1700061J23Rik, 1700065L07Rik, 1700093K21Rik, 1700094D03Rik, 1700104L18Rik, 1700120C14Rik, 1700122H20Rik, 1700129C05Rik, 2310005A03Rik, 2310043O21Rik, 2700097O09Rik, 3300005D01Rik, 3830403N18Rik, 4632404H12Rik, 4930432M17Rik, 4930447A16Rik, 4930452A19Rik, 4930471L23Rik, 4930502E18Rik, 4930506C21Rik, 4930517M08Rik, 4930568D16Rik, 4930578M01Rik, 4930583K01Rik, 4931406H21Rik, 4931417E11Rik, 4932414J04Rik, 4933406I18Rik, 4933411E08Rik, 4933413J09Rik, 4933431G14Rik, 4933434E20Rik, 6530411M01Rik, 8430426J06Rik, 9130019O22Rik, 9130221F21Rik, 9330159M07Rik, 9430037G07Rik, 9530003J23Rik, 9530068E07Rik, A530032D15Rik, A630001G21Rik, A730020M07Rik, A730082K24Rik, AI182371, AW822252, Abca4, Abcd3, Acaa1a, Acaa1b, Aco2, Acss2, Actl9, Actn2, Acvr2b, Adamts10, Adar, Adarb2, Adgre4, Aff4, Aftph, Ahcy, Ahsa2, Akap6, Aldo...","[1300017J02Rik, 1700047I17Rik2, 1700047M11Rik, 1700122H20Rik, 3300005D01Rik, 4732471J01Rik, 4930432M17Rik, 4930471L23Rik, 4930506C21Rik, 4930567K20Rik, 4930568D16Rik, 4932413F04Rik, 4933413J09Rik, 5830418P13Rik, A630066F11Rik, AI182371, Abca4, Abcd3, Acad11, Ackr4, Acpp, Acy3, Adgb, Aftph, Aip, Amelx, Amotl2, Anapc13, Anapc16, Anxa11, Aph1b, Aph1c, Arf4, Arhgap29, Arhgap6, Arpc1a, Arpc1b, Ascc1, Atp5j2, B230208H11Rik, BB019430, Bfsp2, Bud31, Cabp2, Capn2, Capn8, Car12, Casc4, Catsper2, Ccdc121, Ccdc138, Ccr6, Cdk2ap2, Cdv3, Ceacam1, Ceacam2, Cep63, Cgn, Chd1, Ciao2a, Ckmt1, Cnfn, Cnih3, Cnih4, Cntrl, Cphx1, Cphx2, Cphx3, Cpsf4, Csnk1g1, Ctdspl2, Cutal, Cxcl17, Dapk2, Ddit4, Degs1, Dennd6a, Dlgap1, Dnajb12, Dnajc13, Dnttip2, Doc2g, Duxbl1, Duxbl2, Duxbl3, Dynlt1a, Dynlt1b, Dynlt1c, Dynlt1f, Edar, Egfl6, Eif3j1, Ell3, Emilin2, Enah, Ephb1, Epm2a, Erich4, Ezr, F3, ...]",{},"[4930471L23Rik, A530032D15Rik, A630001G21Rik, Ahnak, Asrgl1, Best1, C130026I21Rik, Ccl3, Ccl4, Ccl5, Ccl6, Ccl9, DXBay18, Dlgap1, E230016K23Rik, Eef1g, Emilin2, F8a, Fads3, Fth1, Gm7609, Heatr6, Incenp, Lpin2, Mir1195, Mir2137, Mir8096, Myl12a, Myl12b, Myom1, Ndc80, Olfr651, Pnma3, Pnma5, Rab3il1, Scgb1a1, Slc16a14, Smchd1, Snord92, Sp100, Sp110, Sp140, Spdya, Spin2d, Tgif1, Trim12a, Trim12c, Trim30a, Trim30b, Trim30d, Trim34a, Trim34b, Trim5, Trmt61b, Wdr43, Wfdc17, Wfdc18, Wfdc21, Xlr3a, Xlr3b, Xlr3c, Xlr4a, Xlr4b, Xlr4c, Xlr5a, Xlr5b, Xlr5c, Zfp185, Zfp275, Zfp92]"
Alveolar_cells_Type_I_Merged_Batches,"[9930111J21Rik1, 9930111J21Rik2, A530032D15Rik, C130026I21Rik, Cchcr1, Fbxo36, Gm7609, H2-Q1, H2-Q10, H2-Q2, H2-Q4, H2-Q6, H2-Q7, Ifi47, Olfr1395, Olfr1396, Olfr56, Pou5f1, Psors1c2, Slc16a14, Sp100, Sp110, Sp140, Tcf19, Tgtp1, Tgtp2]","[9930111J21Rik1, 9930111J21Rik2, Ifi47, Olfr1395, Olfr1396, Olfr56, Tgtp1, Tgtp2]","[9930111J21Rik1, 9930111J21Rik2, A530032D15Rik, A630001G21Rik, C130026I21Rik, Cchcr1, Fbxo36, Gm7609, H2-Q10, H2-Q2, H2-Q4, H2-Q6, H2-Q7, Ifi47, Mir8096, Olfr1395, Olfr1396, Olfr56, Pou5f1, Psors1c2, Slc16a14, Sp100, Sp110, Sp140, Tcf19, Tgtp1, Tgtp2]","[1700018F24Rik, 1700047I17Rik2, 1700129C05Rik, 2700097O09Rik, 9930111J21Rik1, 9930111J21Rik2, A530032D15Rik, Aco2, Actn2, Adarb2, Arpc1a, Arpc1b, Atp5j2, Baz1a, Bud31, C130026I21Rik, Cab39l, Cdadc1, Cfl2, Chadl, Chrm3, Cpsf4, Dip2c, Fam177a, Fbxo36, Fgf9, Gm7609, Gtpbp4, Idi1, Idi2, Ifi47, Klf6, Kpna7, L3mbtl2, Larp4b, Mtr, Olfr1395, Olfr1396, Olfr56, Pdap1, Pfkp, Phf11a, Phf11b, Phf11c, Phf11d, Phf5a, Pitrm1, Ptcd1, Rangap1, Rcbtb1, Ryr2, Setdb2, Slc16a14, Snx6, Sp100, Sp110, Sp140, Srp54a, Srp54b, Tef, Tgtp1, Tgtp2, Tob2, Wdr37, Zc3h7b, Zkscan14, Zkscan5, Zmynd11]","[3930402G23Rik, 4833411C07Rik, 9530052E02Rik, Col4a1, Col4a2, E230013L22Rik, Efcab9, H2-D1, H2-Q1, H2-Q10, H2-Q2, H2-Q4, H2-Q6, H2-Q7, Hba-a1, Hba-a2, Hba-x, Hbq1a, Hbq1b, Irs2, Naxd, Pou5f1, Rab20, Sh3pxd2b, Stk10, Tcf19, Ubtd2]","[1700016G22Rik, Adarb2, Adck2, Aftph, Agk, Braf, Ccdc71, Chrm3, Dip2c, E330009J07Rik, Gtpbp4, Idi1, Idi2, Impdh2, Klf6, Lamb2, Larp4b, Lgalsl, Mir12206, Mir1933, Mrps33, Mtr, Ndufaf3, Ndufb2, Peli1, Pfkp, Pitrm1, Qars, Qrich1, Ryr2, Sertad2, Slc1a4, Tmem178b, Ugp2, Usp19, Vps54, Wdr37, Wee2, Zmynd11]","[1700122H20Rik, 1810041H14Rik, 5330411J11Rik, B930095G15Rik, Baz2b, Ccdc148, Cd302, Dapl1, Dock9, Dynlt1a, Dynlt1b, Dynlt1c, Dynlt1f, Ezr, Farp1, Ipo5, Itgb6, Ly75, March7, Mir692-1, Pkp4, Pla2r1, Psmd14, Rap2a, Rbms1, Slc15a1, Stk24, Sytl3, Tanc1, Tank, Tbr1, Tmem181a, Wdsub1]","[B4galnt4, DXBay18, Ifitm1, Ifitm2, Ifitm3, Ifitm5, Ifitm6, Mir2137, Nlrp6, Pgghg, Pkp3, Pnma3, Pnma5, Spin2d, Xlr3a, Xlr3b, Xlr4a, Xlr4b, Xlr5a, Xlr5b, Zfp185]","[0610009B22Rik, 0610030E20Rik, 1110004F10Rik, 1110046J04Rik, 1600025M17Rik, 1700003G18Rik, 1700010I14Rik, 1700013H16Rik, 1700016G22Rik, 1700029M20Rik, 1700047I17Rik2, 1700047M11Rik, 1700061J23Rik, 1700065L07Rik, 1700093K21Rik, 1700094D03Rik, 1700104L18Rik, 1700120C14Rik, 1700122H20Rik, 1700129C05Rik, 2310005A03Rik, 2310043O21Rik, 2700097O09Rik, 3300005D01Rik, 3830403N18Rik, 4632404H12Rik, 4930432M17Rik, 4930447A16Rik, 4930452A19Rik, 4930471L23Rik, 4930502E18Rik, 4930506C21Rik, 4930517M08Rik, 4930568D16Rik, 4930578M01Rik, 4930583K01Rik, 4931406H21Rik, 4931417E11Rik, 4932414J04Rik, 4933406I18Rik, 4933411E08Rik, 4933413J09Rik, 4933431G14Rik, 4933434E20Rik, 6530411M01Rik, 8430426J06Rik, 9130019O22Rik, 9130221F21Rik, 9330159M07Rik, 9430037G07Rik, 9530003J23Rik, 9530068E07Rik, A530032D15Rik, A630001G21Rik, A730020M07Rik, A730082K24Rik, AI182371, AW822252, Abca4, Abcd3, Acaa1a, Acaa1b, Aco2, Acss2, Actl9, Actn2, Acvr2b, Adamts10, Adar, Adarb2, Adgre4, Aff4, Aftph, Ahcy, Ahsa2, Akap6, Aldo...",{},"[1700018F24Rik, 1700047I17Rik2, 1700047M11Rik, 1700122H20Rik, 3300005D01Rik, 4930429F24Rik, 4930432M17Rik, 4930471L23Rik, 4930506C21Rik, 4930568D16Rik, 4933413J09Rik, 9930111J21Rik1, 9930111J21Rik2, A730020M07Rik, AI182371, Abca4, Abca7, Abcd3, Abhd3, Aftph, Alg14, Anxa11, Arf4, Arhgap29, Arhgap45, Arid3a, Armc4, Arpc1a, Arpc1b, Atp13a1, Atp5j2, Bcar3, Bckdhb, Bud31, Capn2, Capn8, Casc4, Cbarp, Ccdc121, Ccdc185, Ccny, Ccr6, Cd109, Cetn1, Cgn, Cnih3, Cnih4, Cnn2, Cnn3, Cntrl, Col12a1, Colec12, Cox7a2, Cphx1, Cphx2, Cphx3, Cpsf4, Ctbp1, Ctdspl2, Cutal, Cyp3a16, Cyp3a57, D430036J16Rik, Degs1, Dennd6a, Depdc5, Dlgap1, Dnttip2, Duxbl1, Duxbl2, Duxbl3, Dynlt1a, Dynlt1b, Dynlt1c, Dynlt1f, Eef1a1, Eif3j1, Elovl4, Emilin2, Enah, Esco1, Ezr, F3, F630042J09Rik, Fam177a, Fam53a, Fbxo28, Fbxw2, Fgfr1op, Fgfr3, Fgfr4, Filip1, Fnbp1l, Fndc1, Fosl2, Frmd5, Fzd8, Gapvd1, Gclm, Gjd4, ...]",{},"[4930471L23Rik, A530032D15Rik, A630001G21Rik, C130026I21Rik, DXBay18, Emilin2, F8a, Gm7609, Lpin2, Mir1195, Mir2137, Mir8096, Myl12a, Myl12b, Myom1, Olfr651, Pnma3, Pnma5, Slc16a14, Sp100, Sp110, Sp140, Spin2d, Tgif1, Trim12a, Trim12c, Trim30a, Trim30b, Trim30d, Trim34a, Trim34b, Trim5, Xlr3a, Xlr3b, Xlr3c, Xlr4a, Xlr4b, Xlr4c, Xlr5a, Xlr5b, Xlr5c, Zfp185, Zfp275, Zfp92]"


In [103]:
chromosome_overlap_upper.applymap(lambda x: len(x) if x is not None else np.nan)

Unnamed: 0,Gaublomme_GSE75109_TPM_clean,Gaublomme_GSE75110_TPM_clean,Gaublomme_GSE75111_TPM_clean,kakadarov_tpm,somatosensory_converted_into_tpm,Dopaminergic_TPM_clean,Rbp4_positive_cells,Cheng_ES_TPM,Alveolar_cells_Type_II_Merged_Batches,Alveolar_cells_Type_I_Merged_Batches,klein,hepat_TPM_yang_clean,Yu_First_wave_endocrine_cells
Gaublomme_GSE75109_TPM_clean,,34.0,52.0,2.0,1.0,16.0,,4.0,5.0,5.0,3.0,1.0,18.0
Gaublomme_GSE75110_TPM_clean,34.0,,37.0,1.0,1.0,10.0,,,,1.0,5.0,1.0,12.0
Gaublomme_GSE75111_TPM_clean,52.0,37.0,,5.0,1.0,16.0,,,6.0,6.0,3.0,1.0,20.0
kakadarov_tpm,2.0,1.0,5.0,,2.0,12.0,18.0,3.0,23.0,12.0,40.0,5.0,3.0
somatosensory_converted_into_tpm,1.0,1.0,1.0,2.0,,6.0,5.0,,4.0,6.0,6.0,2.0,17.0
Dopaminergic_TPM_clean,16.0,10.0,16.0,12.0,6.0,,,,8.0,4.0,11.0,1.0,11.0
Rbp4_positive_cells,,,,18.0,5.0,,,,5.0,6.0,10.0,,4.0
Cheng_ES_TPM,4.0,,,3.0,,,,,3.0,6.0,,,3.0
Alveolar_cells_Type_II_Merged_Batches,5.0,,6.0,23.0,4.0,8.0,5.0,3.0,,324.0,71.0,,20.0
Alveolar_cells_Type_I_Merged_Batches,5.0,1.0,6.0,12.0,6.0,4.0,6.0,6.0,324.0,,67.0,,14.0
