# Aim of the script

In this script, all the reads comming from the cutadapt steps are processed in order to generate tables linking the different cells to their enhancer. <br>
- First, it converts fastq reads to pandas dataframe.
- Second, it removes all rows with a different enhancer barcode than the true one. Plus it removes the rows with a different cell barcode to the ones found in the selected clustering version.
- Third, it computes for each cell barcode the different enhancer barcodes found and their respective frequency.
- Last, it elects the enhancer for each cells based on this frequency. <br>

Because it is quite heavy to run, a shortcut to reload the processed dataframes is present in the script after step 1

The input files are stored in the **cutadapt_output** folder. <br> 
As this script can generate tables for either full or reduced version of the clustering, and as you can work with either all three replicates merged or individually, the output are stored in different clusters.

## Loading packages

In [1]:
from collections import Counter
import collections
from Bio import SeqIO
from Bio import AlignIO
from Bio import Align
import pandas
import numpy
import matplotlib.pyplot as plt
import csv

## Loading cells and enhacers dictionaries

**dico_enhancers** is a table containing the 25 true enhancer barcodes. <br>
**dico_cells** are tables containing the cell barcodes present after the seurat analysis. Either with the full or the reduced clustering.

In [2]:
dico_enhancers=pandas.read_csv("dico_enhancers_python.tsv",sep="\t",header=None, names=["noms","seq"])
dico_cells_full=pandas.read_csv("../R_analyses/id_cells_6_int_full_clusters.csv")
dico_cells_reduced=pandas.read_csv("../R_analyses/id_cells_6_int_reduced_clusters.csv")

## Step1 - Converting fastq reads to pandas dataframe

In [3]:
id=[]
seq=[]
for record1 in SeqIO.parse("../data/cutadapt_output/trimming_libS6.1/E_BC.fastq", "fastq"):
    id.append(record1.id)
    seq.append(str(record1.seq))
df_id_enhancer=pandas.DataFrame({'id':id,'enhancer_BC':seq})
df_id_enhancer.to_csv('preprocessed_tables/DF_reads_ID_enhancer_6_1.tsv',sep='\t',index=False)

In [4]:
id=[]
seq=[]
for record1 in SeqIO.parse("../data/cutadapt_output/trimming_libS6.2/E_BC.fastq", "fastq"):
    id.append(record1.id)
    seq.append(str(record1.seq))
df_id_enhancer=pandas.DataFrame({'id':id,'enhancer_BC':seq})
df_id_enhancer.to_csv('preprocessed_tables/DF_reads_ID_enhancer_6_2.tsv',sep='\t',index=False)

In [8]:
id=[]
seq=[]
for record1 in SeqIO.parse("../data/cutadapt_output/trimming_libS6.3/E_BC.fastq", "fastq"):
    id.append(record1.id)
    seq.append(str(record1.seq))
df_id_enhancer=pandas.DataFrame({'id':id,'enhancer_BC':seq})
df_id_enhancer.to_csv('preprocessed_tables/DF_reads_ID_enhancer_6_3.tsv',sep='\t',index=False)

In [6]:
id=[]
cell=[]
UMI=[]
for record2 in SeqIO.parse("../data/cutadapt_output/trimming_libS6.1/C_BC.fastq", "fastq"):
    id.append(record2.id)
    seq=str(record2.seq)
    cell.append("rep1_"+seq[:16])
    UMI.append(seq[16:])
df_id_cell_umi=pandas.DataFrame({'id':id,'cell_BC':cell,'UMI':UMI})
df_id_cell_umi.to_csv('preprocessed_tables/DF_reads_ID_cell_UMI_6_1.tsv',sep="\t", index=False)

In [7]:
id=[]
cell=[]
UMI=[]
for record2 in SeqIO.parse("../data/cutadapt_output/trimming_libS6.2/C_BC.fastq", "fastq"):
    id.append(record2.id)
    seq=str(record2.seq)
    cell.append("rep2_"+seq[:16])
    UMI.append(seq[16:])
df_id_cell_umi=pandas.DataFrame({'id':id,'cell_BC':cell,'UMI':UMI})
df_id_cell_umi.to_csv('preprocessed_tables/DF_reads_ID_cell_UMI_6_2.tsv',sep="\t", index=False)

In [9]:
id=[]
cell=[]
UMI=[]
for record2 in SeqIO.parse("../data/cutadapt_output/trimming_libS6.3/C_BC.fastq", "fastq"):
    id.append(record2.id)
    seq=str(record2.seq)
    cell.append("rep3_"+seq[:16])
    UMI.append(seq[16:])
df_id_cell_umi=pandas.DataFrame({'id':id,'cell_BC':cell,'UMI':UMI})
df_id_cell_umi.to_csv('preprocessed_tables/DF_reads_ID_cell_UMI_6_3.tsv',sep="\t", index=False)

### Shortcut 1

In [10]:
df_id_enhancer_6_1=pandas.read_csv('preprocessed_tables/DF_reads_ID_enhancer_6_1.tsv',sep='\t')
df_id_cell_umi_6_1=pandas.read_csv('preprocessed_tables/DF_reads_ID_cell_UMI_6_1.tsv',sep='\t')
df_id_enhancer_6_2=pandas.read_csv('preprocessed_tables/DF_reads_ID_enhancer_6_2.tsv',sep='\t')
df_id_cell_umi_6_2=pandas.read_csv('preprocessed_tables/DF_reads_ID_cell_UMI_6_2.tsv',sep='\t')
df_id_enhancer_6_3=pandas.read_csv('preprocessed_tables/DF_reads_ID_enhancer_6_3.tsv',sep='\t')
df_id_cell_umi_6_3=pandas.read_csv('preprocessed_tables/DF_reads_ID_cell_UMI_6_3.tsv',sep='\t')

## Step 2 - removing rows based on known cells and enhancer barcodes

In [11]:
df_id_enhancer_cell_umi_6_1=pandas.merge(df_id_enhancer_6_1,df_id_cell_umi_6_1,on='id')
df_id_enhancer_cell_umi_6_2=pandas.merge(df_id_enhancer_6_2,df_id_cell_umi_6_2,on='id')
df_id_enhancer_cell_umi_6_3=pandas.merge(df_id_enhancer_6_3,df_id_cell_umi_6_3,on='id')

Removing the rows with a wrong enhancer barcode

In [None]:
df_id_enhancer_cell_umi_6_1=df_id_enhancer_cell_umi_6_1[df_id_enhancer_cell_umi_6_1['enhancer_BC'].isin(dico_enhancers['seq'])]
df_id_enhancer_cell_umi_6_2=df_id_enhancer_cell_umi_6_2[df_id_enhancer_cell_umi_6_2['enhancer_BC'].isin(dico_enhancers['seq'])]
df_id_enhancer_cell_umi_6_3=df_id_enhancer_cell_umi_6_3[df_id_enhancer_cell_umi_6_3['enhancer_BC'].isin(dico_enhancers['seq'])]

Removing the rows with a cell barcode not found in the seurat analysis. Here you can select if you are working on the full or reduced clustering

In [None]:
df_id_enhancer_cell_umi_sc_cells_6_1_full=df_id_enhancer_cell_umi_6_1[df_id_enhancer_cell_umi_6_1['cell_BC'].isin(dico_cells_full["x"].tolist())]
df_id_enhancer_cell_umi_sc_cells_6_2_full=df_id_enhancer_cell_umi_6_2[df_id_enhancer_cell_umi_6_2['cell_BC'].isin(dico_cells_full["x"].tolist())]
df_id_enhancer_cell_umi_sc_cells_6_3_full=df_id_enhancer_cell_umi_6_3[df_id_enhancer_cell_umi_6_3['cell_BC'].isin(dico_cells_full["x"].tolist())]

In [None]:
df_id_enhancer_cell_umi_sc_cells_6_1_reduced=df_id_enhancer_cell_umi_6_1[df_id_enhancer_cell_umi_6_1['cell_BC'].isin(dico_cells_reduced["x"].tolist())]
df_id_enhancer_cell_umi_sc_cells_6_2_reduced=df_id_enhancer_cell_umi_6_2[df_id_enhancer_cell_umi_6_2['cell_BC'].isin(dico_cells_reduced["x"].tolist())]
df_id_enhancer_cell_umi_sc_cells_6_3_reduced=df_id_enhancer_cell_umi_6_3[df_id_enhancer_cell_umi_6_3['cell_BC'].isin(dico_cells_reduced["x"].tolist())]

## Step 3 - Listing the enhancer barcodes found for each cells

For this function you have to select both the selected seurat clustering version (full/reduced) and how to process the samples (individual/merged). <br>
The function will then generates a table recapitalting enhancer information for each cell barcode.

In [None]:
def step3(seurat_clustering_version="full",library_processing="individual"):
    if library_processing=="individual" & seurat_clustering_version=="full":
        list_obj=[df_id_enhancer_cell_umi_sc_cells_6_1_full,df_id_enhancer_cell_umi_sc_cells_6_2_full,df_id_enhancer_cell_umi_sc_cells_6_3_full]
    elif library_processing=="merged" & seurat_clustering_version=="full":
        list_obj=[pandas.concat([df_id_enhancer_cell_umi_6_1_full,df_id_enhancer_cell_umi_6_2_full,df_id_enhancer_cell_umi_6_3_full])]
    elif library_processing=="individual" & seurat_clustering_version=="reduced":
        list_obj=[df_id_enhancer_cell_umi_sc_cells_6_1_reduced,df_id_enhancer_cell_umi_sc_cells_6_2_reduced,df_id_enhancer_cell_umi_sc_cells_6_3_reduced]
    elif library_processing=="merged" & seurat_clustering_version=="reduced":
        list_obj=[pandas.concat([df_id_enhancer_cell_umi_sc_cells_6_1_reduced,df_id_enhancer_cell_umi_sc_cells_6_2_reduced,df_id_enhancer_cell_umi_sc_cells_6_3_reduced])]
    j=1
    for object in list_obj:
        df_cell_enhancer_freq=pandas.DataFrame(columns=['cell_BC','list_enhancer_BC','nb_enhancer_BC','1st_enhancer_BC','freq_1st_enhancer_BC','2nd_enhancer_BC','freq_2nd_enhancer_BC'])
        i=0
        for cell_BC in object['cell_BC'].unique():
            i=i+1
            freq_seq_enhancers=Counter(object['enhancer_BC'][object['cell_BC']==cell_BC])
            nb=len(freq_seq_enhancers)
            sorted_freq_seq = sorted(freq_seq_enhancers.items(), key=lambda x:x[1])
            if nb==0:
                continue
            elif nb == 1 & sorted_freq_seq[0][1]>5:
                list_enhancer_bc=freq_seq_enhancers.keys()
                first_elem=sorted_freq_seq[0]
                df_cell_enhancer_freq.loc[len(df_cell_enhancer_freq)]=[cell_BC,list_enhancer_bc,nb,first_elem[0],first_elem[1],"null",0]
            elif nb>=2:
                list_enhancer_bc=freq_seq_enhancers.keys()
                first_elem=sorted_freq_seq[-1]
                second_elem=sorted_freq_seq[-2]
                if first_elem[1]>=second_elem[1]*10:
                    df_cell_enhancer_freq.loc[len(df_cell_enhancer_freq)]=[cell_BC,list_enhancer_bc,nb,first_elem[0],first_elem[1],second_elem[0],second_elem[1]]
        l1=[]
        l2=[]
        for i in range(len(df_cell_enhancer_freq)):
            l1.append(dico_enhancers['noms'][df_cell_enhancer_freq.loc[i,"1st_enhancer_BC"]==dico_enhancers['seq']].item())
            l2.append(dico_enhancers['noms'][df_cell_enhancer_freq.loc[i,"2nd_enhancer_BC"]==dico_enhancers['seq']].item())
        df_cell_enhancer_freq['nom_1st_enhancer']=l1
        df_cell_enhancer_freq['nom_2nd_enhancer']=l2
        df_cell_enhancer_freq.to_csv("output/wrapped_table_"+seurat_clustering_version+"_"+library_processing+"_"+str(j)+".tsv",sep='\t',index=False)
        j+=1

## Step 4 - output formating

This last step is necessary to generate the table used by Seurat and novoSpaRc

In [None]:
def step4(seurat_clustering_version="full",library_processing="individual"):
    if library_processing=="individual" & seurat_clustering_version=="full":
        list_obj=[df_id_enhancer_cell_umi_sc_cells_6_1_full,df_id_enhancer_cell_umi_sc_cells_6_2_full,df_id_enhancer_cell_umi_sc_cells_6_3_full]
    elif library_processing=="merged" & seurat_clustering_version=="full":
        list_obj=[pandas.concat([df_id_enhancer_cell_umi_6_1_full,df_id_enhancer_cell_umi_6_2_full,df_id_enhancer_cell_umi_6_3_full])]
    elif library_processing=="individual" & seurat_clustering_version=="reduced":
        list_obj=[df_id_enhancer_cell_umi_sc_cells_6_1_reduced,df_id_enhancer_cell_umi_sc_cells_6_2_reduced,df_id_enhancer_cell_umi_sc_cells_6_3_reduced]
    elif library_processing=="merged" & seurat_clustering_version=="reduced":
        list_obj=[pandas.concat([df_id_enhancer_cell_umi_sc_cells_6_1_reduced,df_id_enhancer_cell_umi_sc_cells_6_2_reduced,df_id_enhancer_cell_umi_sc_cells_6_3_reduced])]


### Shortcut 2

In [3]:
big_table_6_1=pandas.read_csv('individual_full/wrapped_table_full_version_rep1.tsv',sep='\t')
big_table_6_2=pandas.read_csv('individual_full/wrapped_table_full_version_rep2.tsv',sep='\t')
big_table_6_3=pandas.read_csv('individual_full/wrapped_table_full_version_rep3.tsv',sep='\t')

In [4]:
freq_enhancers_6_1=Counter(big_table_6_1['nom_1st_enhancer'])
freq_enhancers_6_2=Counter(big_table_6_2['nom_1st_enhancer'])
freq_enhancers_6_3=Counter(big_table_6_3['nom_1st_enhancer'])

In [5]:
i=0
list_big=[big_table_6_1,big_table_6_2,big_table_6_3]
for object in [freq_enhancers_6_1,freq_enhancers_6_2,freq_enhancers_6_3]: #freq_enhancers_6_2,
    df_list_cells_with_enhancer=pandas.DataFrame(columns=["enhancer","liste"])
    for cle in object.keys():
        list_cell = list_big[i]['cell_BC'][list_big[i]['nom_1st_enhancer']==cle].to_list()
        new_list=[]
        new_list=str(list_cell).replace("[","").replace("'","").replace(" ","").replace("]","").replace("\"","")
        df_list_cells_with_enhancer.loc[len(df_list_cells_with_enhancer)]=[cle,new_list]
    df_list_cells_with_enhancer.to_csv('individual_full/lists_cells_for_each_enhancer_full_version_6_'+str(i+1)+'.tsv',sep='\t',index=False)
    i+=1

## Now we do the same but for the reduced version

In [17]:
df_id_enhancer_cell_umi_sc_cells_6_1=df_id_enhancer_cell_umi_6_1[df_id_enhancer_cell_umi_6_1['cell_BC'].isin(dico_cells_reduced["x"].tolist())]
df_id_enhancer_cell_umi_sc_cells_6_2=df_id_enhancer_cell_umi_6_2[df_id_enhancer_cell_umi_6_2['cell_BC'].isin(dico_cells_reduced["x"].tolist())]
df_id_enhancer_cell_umi_sc_cells_6_3=df_id_enhancer_cell_umi_6_3[df_id_enhancer_cell_umi_6_3['cell_BC'].isin(dico_cells_reduced["x"].tolist())]

In [18]:
j=1
for object in [df_id_enhancer_cell_umi_sc_cells_6_1,df_id_enhancer_cell_umi_sc_cells_6_2,df_id_enhancer_cell_umi_sc_cells_6_3]: 
    df_cell_enhancer_freq=pandas.DataFrame(columns=['cell_BC','list_enhancer_BC','nb_enhancer_BC','1st_enhancer_BC','freq_1st_enhancer_BC','2nd_enhancer_BC','freq_2nd_enhancer_BC'])
    i=0
    for cell_BC in object['cell_BC'].unique():
        i=i+1
        freq_seq_enhancers=Counter(object['enhancer_BC'][object['cell_BC']==cell_BC])
        nb=len(freq_seq_enhancers)
        sorted_freq_seq = sorted(freq_seq_enhancers.items(), key=lambda x:x[1])
        if nb==0:
            continue
        elif nb == 1 & sorted_freq_seq[0][1]>5:
            list_enhancer_bc=freq_seq_enhancers.keys()
            first_elem=sorted_freq_seq[0]
            df_cell_enhancer_freq.loc[len(df_cell_enhancer_freq)]=[cell_BC,list_enhancer_bc,nb,first_elem[0],first_elem[1],"null",0]
        elif nb>=2:
            list_enhancer_bc=freq_seq_enhancers.keys()
            first_elem=sorted_freq_seq[-1]
            second_elem=sorted_freq_seq[-2]
            if first_elem[1]>=second_elem[1]*10:
                df_cell_enhancer_freq.loc[len(df_cell_enhancer_freq)]=[cell_BC,list_enhancer_bc,nb,first_elem[0],first_elem[1],second_elem[0],second_elem[1]]
    l1=[]
    l2=[]
    for i in range(len(df_cell_enhancer_freq)):
        l1.append(dico_enhancers['noms'][df_cell_enhancer_freq.loc[i,"1st_enhancer_BC"]==dico_enhancers['seq']].item())
        l2.append(dico_enhancers['noms'][df_cell_enhancer_freq.loc[i,"2nd_enhancer_BC"]==dico_enhancers['seq']].item())
    df_cell_enhancer_freq['nom_1st_enhancer']=l1
    df_cell_enhancer_freq['nom_2nd_enhancer']=l2
    df_cell_enhancer_freq.to_csv("individual_reduced/wrapped_table_reduced_version_rep"+str(j)+".tsv",sep='\t',index=False)
    j+=1

bypass 2 bis

In [6]:
big_table_6_1=pandas.read_csv('individual_reduced/wrapped_table_reduced_version_rep1.tsv',sep='\t')
big_table_6_2=pandas.read_csv('individual_reduced/wrapped_table_reduced_version_rep2.tsv',sep='\t')
big_table_6_3=pandas.read_csv('individual_reduced/wrapped_table_reduced_version_rep3.tsv',sep='\t')

In [7]:
freq_enhancers_6_1=Counter(big_table_6_1['nom_1st_enhancer'])
freq_enhancers_6_2=Counter(big_table_6_2['nom_1st_enhancer'])
freq_enhancers_6_3=Counter(big_table_6_3['nom_1st_enhancer'])

In [8]:
i=0
list_big=[big_table_6_1,big_table_6_2,big_table_6_3]
for object in [freq_enhancers_6_1,freq_enhancers_6_2,freq_enhancers_6_3]: #freq_enhancers_6_2,
    df_list_cells_with_enhancer=pandas.DataFrame(columns=["enhancer","liste"])
    for cle in object.keys():
        list_cell = list_big[i]['cell_BC'][list_big[i]['nom_1st_enhancer']==cle].to_list()
        new_list=[]
        new_list=str(list_cell).replace("[","").replace("'","").replace(" ","").replace("]","").replace("\"","")
        df_list_cells_with_enhancer.loc[len(df_list_cells_with_enhancer)]=[cle,new_list]
    df_list_cells_with_enhancer.to_csv('individual_reduced/lists_cells_for_each_enhancer_reduced_version_6_'+str(i+1)+'.tsv',sep='\t',index=False)
    i+=1

## Last we do the same after merging all three datasets together

In [14]:
df_id_enhancer_cell_umi_merged= pandas.concat([df_id_enhancer_cell_umi_6_1,df_id_enhancer_cell_umi_6_2,df_id_enhancer_cell_umi_6_3]) #

In [15]:
df_id_enhancer_cell_umi_sc_cells_merged=df_id_enhancer_cell_umi_merged[df_id_enhancer_cell_umi_merged['cell_BC'].isin(dico_cells_full["x"].tolist())]

In [16]:
df_cell_enhancer_freq=pandas.DataFrame(columns=['cell_BC','list_enhancer_BC','nb_enhancer_BC','1st_enhancer_BC','freq_1st_enhancer_BC','2nd_enhancer_BC','freq_2nd_enhancer_BC'])
for cell_BC in df_id_enhancer_cell_umi_sc_cells_merged['cell_BC'].unique():
    freq_seq_enhancers=Counter(df_id_enhancer_cell_umi_sc_cells_merged['enhancer_BC'][df_id_enhancer_cell_umi_sc_cells_merged['cell_BC']==cell_BC])
    nb=len(freq_seq_enhancers)
    sorted_freq_seq = sorted(freq_seq_enhancers.items(), key=lambda x:x[1])
    if nb==0:
        continue
    elif nb == 1 & sorted_freq_seq[0][1]>5:
        list_enhancer_bc=freq_seq_enhancers.keys()
        first_elem=sorted_freq_seq[0]
        df_cell_enhancer_freq.loc[len(df_cell_enhancer_freq)]=[cell_BC,list_enhancer_bc,nb,first_elem[0],first_elem[1],"null",0]
    elif nb>=2:
        list_enhancer_bc=freq_seq_enhancers.keys()
        first_elem=sorted_freq_seq[-1]
        second_elem=sorted_freq_seq[-2]
        if first_elem[1]>=second_elem[1]*10:
            df_cell_enhancer_freq.loc[len(df_cell_enhancer_freq)]=[cell_BC,list_enhancer_bc,nb,first_elem[0],first_elem[1],second_elem[0],second_elem[1]]
l1=[]
l2=[]
for i in range(len(df_cell_enhancer_freq)):
    l1.append(dico_enhancers['noms'][df_cell_enhancer_freq.loc[i,"1st_enhancer_BC"]==dico_enhancers['seq']].item())
    l2.append(dico_enhancers['noms'][df_cell_enhancer_freq.loc[i,"2nd_enhancer_BC"]==dico_enhancers['seq']].item())
df_cell_enhancer_freq['nom_1st_enhancer']=l1
df_cell_enhancer_freq['nom_2nd_enhancer']=l2
df_cell_enhancer_freq.to_csv('merged_full/wrapped_table_full_version_merged.tsv',sep='\t',index=False)

In [25]:
df_cell_enhancer_freq=pandas.read_csv('merged_full/wrapped_table_full_version_merged.tsv',sep='\t')

In [17]:
freq_enhancers_merged=Counter(df_cell_enhancer_freq['nom_1st_enhancer'])

In [18]:
df_list_cells_with_enhancer=pandas.DataFrame(columns=["enhancer","liste"])
for cle in freq_enhancers_merged.keys():
    list_cell = df_cell_enhancer_freq['cell_BC'][df_cell_enhancer_freq['nom_1st_enhancer']==cle].to_list()
    new_list=[]
    new_list=str(list_cell).replace("[","").replace("'","").replace(" ","").replace("]","").replace("\"","")
    df_list_cells_with_enhancer.loc[len(df_list_cells_with_enhancer)]=[cle,new_list]
df_list_cells_with_enhancer.to_csv('merged_full/lists_cells_for_each_enhancer_full_version_merged.tsv',sep='\t',index=False)

In [19]:
df_id_enhancer_cell_umi_sc_cells_merged=df_id_enhancer_cell_umi_merged[df_id_enhancer_cell_umi_merged['cell_BC'].isin(dico_cells_reduced["x"].tolist())]

In [20]:
df_cell_enhancer_freq=pandas.DataFrame(columns=['cell_BC','list_enhancer_BC','nb_enhancer_BC','1st_enhancer_BC','freq_1st_enhancer_BC','2nd_enhancer_BC','freq_2nd_enhancer_BC'])
for cell_BC in df_id_enhancer_cell_umi_sc_cells_merged['cell_BC'].unique():
    i=i+1
    freq_seq_enhancers=Counter(df_id_enhancer_cell_umi_sc_cells_merged['enhancer_BC'][df_id_enhancer_cell_umi_sc_cells_merged['cell_BC']==cell_BC])
    nb=len(freq_seq_enhancers)
    sorted_freq_seq = sorted(freq_seq_enhancers.items(), key=lambda x:x[1])
    if nb==0:
        continue
    elif nb == 1 & sorted_freq_seq[0][1]>5:
        list_enhancer_bc=freq_seq_enhancers.keys()
        first_elem=sorted_freq_seq[0]
        df_cell_enhancer_freq.loc[len(df_cell_enhancer_freq)]=[cell_BC,list_enhancer_bc,nb,first_elem[0],first_elem[1],"null",0]
    elif nb>=2:
        list_enhancer_bc=freq_seq_enhancers.keys()
        first_elem=sorted_freq_seq[-1]
        second_elem=sorted_freq_seq[-2]
        if first_elem[1]>=second_elem[1]*10:
            df_cell_enhancer_freq.loc[len(df_cell_enhancer_freq)]=[cell_BC,list_enhancer_bc,nb,first_elem[0],first_elem[1],second_elem[0],second_elem[1]]
l1=[]
l2=[]
for i in range(len(df_cell_enhancer_freq)):
    l1.append(dico_enhancers['noms'][df_cell_enhancer_freq.loc[i,"1st_enhancer_BC"]==dico_enhancers['seq']].item())
    l2.append(dico_enhancers['noms'][df_cell_enhancer_freq.loc[i,"2nd_enhancer_BC"]==dico_enhancers['seq']].item())
df_cell_enhancer_freq['nom_1st_enhancer']=l1
df_cell_enhancer_freq['nom_2nd_enhancer']=l2
df_cell_enhancer_freq.to_csv('merged_reduced/wrapped_table_reduced_version_merged.tsv',sep='\t',index=False)

In [30]:
df_cell_enhancer_freq=pandas.read_csv('merged_reduced/wrapped_table_reduced_version_merged.tsv',sep='\t')

In [21]:
freq_enhancers_merged=Counter(df_cell_enhancer_freq['nom_1st_enhancer'])

In [22]:
df_list_cells_with_enhancer=pandas.DataFrame(columns=["enhancer","liste"])
for cle in freq_enhancers_merged.keys():
    list_cell = df_cell_enhancer_freq['cell_BC'][df_cell_enhancer_freq['nom_1st_enhancer']==cle].to_list()
    new_list=[]
    new_list=str(list_cell).replace("[","").replace("'","").replace(" ","").replace("]","").replace("\"","")
    df_list_cells_with_enhancer.loc[len(df_list_cells_with_enhancer)]=[cle,new_list]
df_list_cells_with_enhancer.to_csv('merged_reduced/lists_cells_for_each_enhancer_reduced_version_merged.tsv',sep='\t',index=False)