In [None]:
import scanpy as sc
from scipy import io
import pandas as pd
import numpy as np
import os

## Give the locations of required files and assign your name of interest
- file_path: Location of the main annData which contains the raw counts
- gRNA_path: Location of the guide RNA AnnData 
- crispr_index_path: the location of a csv file which has information about gRNA sequences (the file that we used for cumulus is a good example)

In [2]:
file_path = "/gstore/scratch/u/ghaffars/Dataset/sublib4/raw_qc.h5ad"
Name = 'sublib4'
gRNA_path = "/gstore/scratch/u/ghaffars/Dataset/sublib4/gRNA_qc.h5ad"
crispr_index_path = "/gstore/home/ghaffars/Cumulus/crc_ngs5704/Cumulus/Split_1/cellranger/crispr_index.csv"

In [3]:
Dataset_HOME = os.path.join('/gstore/scratch/u/ghaffars/scMaGeck', Name)
folderName=os.path.join(Dataset_HOME, f'matrix_files_'+Name)
meta_path = os.path.join(Dataset_HOME, f'metadata_'+Name+'.csv')
barcode_name= os.path.join(Dataset_HOME, f'barcode_rec_'+Name+'.txt')
genes_list = os.path.join(Dataset_HOME, f'all_genes_'+Name+'.csv')

## Read the AnnData

In [4]:
bdata = sc.read(file_path)

In [5]:
bdata

AnnData object with n_obs × n_vars = 446413 × 36603
    obs: 'Sample', 'Barcode', 'DemuxType_crispr', 'DemuxAssignment_crispr', 'DemuxType_hashing', 'DemuxAssignment_hashing', 'cellline', 'timepoint', 'HTO', 'NGS_ID', 'Biological_replicate', '10Xrun', 'sublibrary', 'gRNA_library_MOI', 'gene_symbol', 'class', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'qc_pass', 'S_score', 'G2M_score', 'phase', 'SCN_class'
    var: 'Symbol'
    layers: 'counts'

In [6]:
bdata.obs['gene_symbol'].nunique()

5117

In [7]:
bdata.obs['gene_symbol'] = bdata.obs['gene_symbol'].apply(lambda x:x.split('_')[0])

In [8]:
bdata.obs['gene_symbol'].nunique()

5117

In [9]:
bdata.obs['gene_symbol'].value_counts().reset_index()[:10]

Unnamed: 0,index,gene_symbol
0,NTC,11595
1,TCF7L2,447
2,HBE1,173
3,ZNF385D,168
4,OVCH1,168
5,CLDN7,166
6,NAT1,159
7,TP53BP2,158
8,OR8D2,156
9,TVP23B,151


In [10]:
!mkdir -p "$folderName"

In [11]:
with open(folderName + '/barcodes.tsv', 'w') as f:
    for item in bdata.obs_names:
        f.write(item + '\n')
        
with open(folderName + '/features.tsv', 'w') as f:
    for item in ['\t'.join([x,x,'Gene Expression']) for x in bdata.var_names]:
        f.write(item + '\n')
        
io.mmwrite(folderName +'/matrix', bdata.X.T)

bdata.obs.to_csv(meta_path)

In [12]:
!ls -p "$folderName"

barcodes.tsv  features.tsv  matrix.mtx


In [13]:
!gzip "$folderName"/*

In [14]:
bdata

AnnData object with n_obs × n_vars = 446413 × 36603
    obs: 'Sample', 'Barcode', 'DemuxType_crispr', 'DemuxAssignment_crispr', 'DemuxType_hashing', 'DemuxAssignment_hashing', 'cellline', 'timepoint', 'HTO', 'NGS_ID', 'Biological_replicate', '10Xrun', 'sublibrary', 'gRNA_library_MOI', 'gene_symbol', 'class', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'qc_pass', 'S_score', 'G2M_score', 'phase', 'SCN_class'
    var: 'Symbol'
    layers: 'counts'

## Generate gRNA count matrix

In [15]:
gdata = sc.read(gRNA_path)

In [16]:
gdata

AnnData object with n_obs × n_vars = 446413 × 20872
    obs: 'Sample', 'Barcode', 'DemuxType_crispr', 'DemuxAssignment_crispr', 'DemuxType_hashing', 'DemuxAssignment_hashing', 'cellline', 'timepoint', 'HTO', 'NGS_ID', 'Biological_replicate', '10Xrun', 'sublibrary', 'gRNA_library_MOI', 'gene_symbol', 'class', 'qc_pass'
    layers: 'counts'

In [17]:
gdata.obs

Unnamed: 0,Sample,Barcode,DemuxType_crispr,DemuxAssignment_crispr,DemuxType_hashing,DemuxAssignment_hashing,cellline,timepoint,HTO,NGS_ID,Biological_replicate,10Xrun,sublibrary,gRNA_library_MOI,gene_symbol,class,qc_pass
SAM24443678_rep1-AAACCCAAGGAATCGC,SAM24443678,AAACCCAAGGAATCGC,singlet,ENSG00000135093_3,singlet,SAM24439045,DLD1,day5,HTO-7,NGS5570,REPLICATE_3,1,GMTY210:cropseq.crisprko.cas9.human.lib4.conce...,0.4,USP30,target,True
SAM24443678_rep1-AAACCCAAGTCTTCCC,SAM24443678,AAACCCAAGTCTTCCC,singlet,ENSG00000181915_1,singlet,SAM24439044,DLD1,day5,HTO-5,NGS5570,REPLICATE_2,1,GMTY210:cropseq.crisprko.cas9.human.lib4.conce...,0.4,ADO,target,True
SAM24443678_rep1-AAACCCACACAGCTGC,SAM24443678,AAACCCACACAGCTGC,singlet,ENSG00000288711_4,singlet,SAM24439044,DLD1,day5,HTO-4,NGS5570,REPLICATE_2,1,GMTY210:cropseq.crisprko.cas9.human.lib4.conce...,0.4,ENSG00000288711,target,True
SAM24443678_rep1-AAACCCACATGAATAG,SAM24443678,AAACCCACATGAATAG,singlet,ENSG00000103365_2,singlet,SAM24439045,DLD1,day5,HTO-18,NGS5570,REPLICATE_3,1,GMTY210:cropseq.crisprko.cas9.human.lib4.conce...,0.4,GGA2,target,True
SAM24443678_rep1-AAACCCATCGACCATA,SAM24443678,AAACCCATCGACCATA,singlet,ENSG00000139190_2,singlet,SAM24439045,DLD1,day5,HTO-7,NGS5570,REPLICATE_3,1,GMTY210:cropseq.crisprko.cas9.human.lib4.conce...,0.4,VAMP1,target,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
SAM24449114_rep12-TTTGTTGCACCTGCGA,SAM24449114,TTTGTTGCACCTGCGA,singlet,ENSG00000189068_2,singlet,SAM24439044,DLD1,day5,HTO-6,NGS5704,REPLICATE_2,12,GMTY210:cropseq.crisprko.cas9.human.lib4.conce...,0.4,VSTM1,target,True
SAM24449114_rep12-TTTGTTGCATTGCTTT,SAM24449114,TTTGTTGCATTGCTTT,singlet,ENSG00000278646_1,singlet,SAM24439043,DLD1,day5,HTO-12,NGS5704,REPLICATE_1,12,GMTY210:cropseq.crisprko.cas9.human.lib4.conce...,0.4,ENSG00000278646,target,True
SAM24449114_rep12-TTTGTTGGTATGGGAC,SAM24449114,TTTGTTGGTATGGGAC,singlet,ENSG00000110723_2,singlet,SAM24439043,DLD1,day5,HTO-11,NGS5704,REPLICATE_1,12,GMTY210:cropseq.crisprko.cas9.human.lib4.conce...,0.4,EXPH5,target,True
SAM24449114_rep12-TTTGTTGGTATTTCTC,SAM24449114,TTTGTTGGTATTTCTC,singlet,ENSG00000167840_3,singlet,SAM24439044,DLD1,day5,HTO-15,NGS5704,REPLICATE_2,12,GMTY210:cropseq.crisprko.cas9.human.lib4.conce...,0.4,ZNF232,target,True


In [18]:
COL = gdata.obs.columns.to_list()

In [19]:
tmp = gdata.obs.drop(columns=COL)

In [20]:
df = bdata.obs.join(tmp)

In [21]:
df

Unnamed: 0,Sample,Barcode,DemuxType_crispr,DemuxAssignment_crispr,DemuxType_hashing,DemuxAssignment_hashing,cellline,timepoint,HTO,NGS_ID,...,total_counts,total_counts_mt,pct_counts_mt,total_counts_ribo,pct_counts_ribo,qc_pass,S_score,G2M_score,phase,SCN_class
SAM24449114_rep1-AAACCCAAGACTGTTC,SAM24449114,AAACCCAAGACTGTTC,singlet,ENSG00000138767_2,singlet,SAM24439044,DLD1,day5,HTO-14,NGS5704,...,11768,193,1.640041,2172,18.456832,True,-1.879157,-1.713006,G1,Untreatedclus_DLD1
SAM24449114_rep1-AAACCCAAGAGAACCC,SAM24449114,AAACCCAAGAGAACCC,singlet,ENSG00000081386_1,singlet,SAM24439044,DLD1,day5,HTO-6,NGS5704,...,4553,50,1.098177,1212,26.619811,True,-0.465551,-0.736520,G1,Untreatedclus_DLD1
SAM24449114_rep1-AAACCCAAGATGCGAC,SAM24449114,AAACCCAAGATGCGAC,singlet,ENSG00000180228_3,singlet,SAM24439045,DLD1,day5,HTO-7,NGS5704,...,9155,314,3.429820,2143,23.407974,True,-0.290915,-0.803445,G1,Untreatedclus_DLD1
SAM24449114_rep1-AAACCCAAGCCTATCA,SAM24449114,AAACCCAAGCCTATCA,singlet,NTC_1819,singlet,SAM24439043,DLD1,day5,HTO-2,NGS5704,...,9456,324,3.426396,1990,21.044839,True,-0.524886,-0.383549,G1,Untreatedclus_DLD1
SAM24449114_rep1-AAACCCAAGCGCTTCG,SAM24449114,AAACCCAAGCGCTTCG,singlet,ENSG00000198933_4,singlet,SAM24439043,DLD1,day5,HTO-11,NGS5704,...,9437,474,5.022783,1714,18.162552,True,0.119865,-0.565891,S,Untreatedclus_DLD1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
SAM24443678_rep24-TTTGTTGGTATGAGGC,SAM24443678,TTTGTTGGTATGAGGC,singlet,ENSG00000185024_4,singlet,SAM24439045,DLD1,day5,HTO-8,NGS5570,...,5415,305,5.632502,993,18.337950,True,-0.594545,-0.008441,G1,Untreatedclus_DLD1
SAM24443678_rep24-TTTGTTGGTTTACGTG,SAM24443678,TTTGTTGGTTTACGTG,singlet,ENSG00000073282_4,singlet,SAM24439044,DLD1,day5,HTO-15,NGS5570,...,8771,126,1.436552,1507,17.181621,True,-0.381656,1.300689,G2M,Untreatedclus_DLD1
SAM24443678_rep24-TTTGTTGTCAGACCCG,SAM24443678,TTTGTTGTCAGACCCG,singlet,ENSG00000169946_1,singlet,SAM24439044,DLD1,day5,HTO-15,NGS5570,...,8692,333,3.831109,1544,17.763461,True,0.138666,-0.371232,S,Untreatedclus_DLD1
SAM24443678_rep24-TTTGTTGTCCTTATAC,SAM24443678,TTTGTTGTCCTTATAC,singlet,ENSG00000089775_3,singlet,SAM24439043,DLD1,day5,HTO-10,NGS5570,...,8220,468,5.693431,1311,15.948905,True,-0.163334,-0.185702,G1,Untreatedclus_DLD1


In [22]:
gdata.obs = df.copy()

In [23]:
gdata.obs['DemuxAssignment_crispr']=gdata.obs['DemuxAssignment_crispr'].apply(lambda x:x.split(',')[0])

In [24]:
gdata.obs[gdata.obs['DemuxAssignment_crispr'].str.contains(',')]

Unnamed: 0,Sample,Barcode,DemuxType_crispr,DemuxAssignment_crispr,DemuxType_hashing,DemuxAssignment_hashing,cellline,timepoint,HTO,NGS_ID,...,total_counts,total_counts_mt,pct_counts_mt,total_counts_ribo,pct_counts_ribo,qc_pass,S_score,G2M_score,phase,SCN_class


## Read sgRNA info from crispr_index

In [25]:
sgRNA = pd.read_csv(crispr_index_path,header=None) 

In [26]:
sgRNA.columns= ['sgseq','DemuxAssignment_crispr','crispr']

In [27]:
sgRNA = sgRNA[['sgseq','DemuxAssignment_crispr']]

In [28]:
sgRNA

Unnamed: 0,sgseq,DemuxAssignment_crispr
0,ATAGTAAGGTGCAGAGCGC,ENSG00000000457_1
1,ACTGAGCGAGTACAGCCCC,ENSG00000000457_2
2,ATCCCGCTGTACTGCAAGA,ENSG00000000457_3
3,CTTGTACTGTGGAAGCGGA,ENSG00000000457_4
4,TGGTGTGCTTTAGAAATCC,ENSG00000000460_1
...,...,...
20867,CGTATTATCGCCGGTGGTT,NTC_2013
20868,GTCGGTCGCTCGATCGTAA,NTC_2014
20869,CGCCGTACGTACGCCGGAT,NTC_2015
20870,GCGCGCCCGGATACTATAC,NTC_2016


In [29]:
DF = gdata.obs.reset_index().merge(sgRNA, how='left', on='DemuxAssignment_crispr').set_index('index')

In [30]:
DF

Unnamed: 0_level_0,Sample,Barcode,DemuxType_crispr,DemuxAssignment_crispr,DemuxType_hashing,DemuxAssignment_hashing,cellline,timepoint,HTO,NGS_ID,...,total_counts_mt,pct_counts_mt,total_counts_ribo,pct_counts_ribo,qc_pass,S_score,G2M_score,phase,SCN_class,sgseq
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
SAM24449114_rep1-AAACCCAAGACTGTTC,SAM24449114,AAACCCAAGACTGTTC,singlet,ENSG00000138767_2,singlet,SAM24439044,DLD1,day5,HTO-14,NGS5704,...,193,1.640041,2172,18.456832,True,-1.879157,-1.713006,G1,Untreatedclus_DLD1,GAGCCTAAGTACATCACTT
SAM24449114_rep1-AAACCCAAGAGAACCC,SAM24449114,AAACCCAAGAGAACCC,singlet,ENSG00000081386_1,singlet,SAM24439044,DLD1,day5,HTO-6,NGS5704,...,50,1.098177,1212,26.619811,True,-0.465551,-0.736520,G1,Untreatedclus_DLD1,AACTCTGTGAACCGACGTC
SAM24449114_rep1-AAACCCAAGATGCGAC,SAM24449114,AAACCCAAGATGCGAC,singlet,ENSG00000180228_3,singlet,SAM24439045,DLD1,day5,HTO-7,NGS5704,...,314,3.429820,2143,23.407974,True,-0.290915,-0.803445,G1,Untreatedclus_DLD1,CCTGGGAAAGGGTATATTC
SAM24449114_rep1-AAACCCAAGCCTATCA,SAM24449114,AAACCCAAGCCTATCA,singlet,NTC_1819,singlet,SAM24439043,DLD1,day5,HTO-2,NGS5704,...,324,3.426396,1990,21.044839,True,-0.524886,-0.383549,G1,Untreatedclus_DLD1,TCTCGCGCTTACGTTGCGA
SAM24449114_rep1-AAACCCAAGCGCTTCG,SAM24449114,AAACCCAAGCGCTTCG,singlet,ENSG00000198933_4,singlet,SAM24439043,DLD1,day5,HTO-11,NGS5704,...,474,5.022783,1714,18.162552,True,0.119865,-0.565891,S,Untreatedclus_DLD1,TCCCGGAGACCCCTCGCTT
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
SAM24443678_rep24-TTTGTTGGTATGAGGC,SAM24443678,TTTGTTGGTATGAGGC,singlet,ENSG00000185024_4,singlet,SAM24439045,DLD1,day5,HTO-8,NGS5570,...,305,5.632502,993,18.337950,True,-0.594545,-0.008441,G1,Untreatedclus_DLD1,AGCCAGAATGCATGACTTC
SAM24443678_rep24-TTTGTTGGTTTACGTG,SAM24443678,TTTGTTGGTTTACGTG,singlet,ENSG00000073282_4,singlet,SAM24439044,DLD1,day5,HTO-15,NGS5570,...,126,1.436552,1507,17.181621,True,-0.381656,1.300689,G2M,Untreatedclus_DLD1,CGAATCAAATGACTAGGAG
SAM24443678_rep24-TTTGTTGTCAGACCCG,SAM24443678,TTTGTTGTCAGACCCG,singlet,ENSG00000169946_1,singlet,SAM24439044,DLD1,day5,HTO-15,NGS5570,...,333,3.831109,1544,17.763461,True,0.138666,-0.371232,S,Untreatedclus_DLD1,AAGTTTGCCGCTAGGGACA
SAM24443678_rep24-TTTGTTGTCCTTATAC,SAM24443678,TTTGTTGTCCTTATAC,singlet,ENSG00000089775_3,singlet,SAM24439043,DLD1,day5,HTO-10,NGS5570,...,468,5.693431,1311,15.948905,True,-0.163334,-0.185702,G1,Untreatedclus_DLD1,ATAGCTGAATATGTCAGGT


In [31]:
with open(barcode_name, 'w') as f:
    print('cell\tbarcode\tsgrna\tgene\tread_count\tumi_count',file=f)
    for i in range(gdata.X.shape[0]):
        
        cell_bc = DF.index[i]
        sg = DF['DemuxAssignment_crispr'].iloc[i]
        sgseq = DF['sgseq'].iloc[i]
        geneid  = DF['gene_symbol'].iloc[i]
        j = gdata.var.index.get_loc(gdata.var.index[gdata.var.index==DF['DemuxAssignment_crispr'].iloc[i]][0])
        umi_count = gdata.X[i,j]
        
        
        print('\t'.join([cell_bc,sg,sgseq,geneid,str(rcount),str(umi_count)]),file=f)

In [32]:
BARCODE_REC= pd.read_csv(barcode_name, delimiter='\t')

In [33]:
BARCODE_REC['gene'].nunique()

5117

In [34]:
BARCODE_REC

Unnamed: 0,cell,barcode,sgrna,gene,read_count,umi_count
0,SAM24449114_rep1-AAACCCAAGACTGTTC,ENSG00000138767_2,GAGCCTAAGTACATCACTT,CNOT6L,0,0
1,SAM24449114_rep1-AAACCCAAGAGAACCC,ENSG00000081386_1,AACTCTGTGAACCGACGTC,ZNF510,0,0
2,SAM24449114_rep1-AAACCCAAGATGCGAC,ENSG00000180228_3,CCTGGGAAAGGGTATATTC,PRKRA,0,0
3,SAM24449114_rep1-AAACCCAAGCCTATCA,NTC_1819,TCTCGCGCTTACGTTGCGA,NTC,0,0
4,SAM24449114_rep1-AAACCCAAGCGCTTCG,ENSG00000198933_4,TCCCGGAGACCCCTCGCTT,TBKBP1,0,0
...,...,...,...,...,...,...
446408,SAM24443678_rep24-TTTGTTGGTATGAGGC,ENSG00000185024_4,AGCCAGAATGCATGACTTC,BRF1,0,0
446409,SAM24443678_rep24-TTTGTTGGTTTACGTG,ENSG00000073282_4,CGAATCAAATGACTAGGAG,TP63,0,0
446410,SAM24443678_rep24-TTTGTTGTCAGACCCG,ENSG00000169946_1,AAGTTTGCCGCTAGGGACA,ZFPM2,0,0
446411,SAM24443678_rep24-TTTGTTGTCCTTATAC,ENSG00000089775_3,ATAGCTGAATATGTCAGGT,ZBTB25,0,0


In [36]:
GENES = BARCODE_REC.drop_duplicates(['gene']).reset_index(drop=True)

In [37]:
GENES

Unnamed: 0,cell,barcode,sgrna,gene,read_count,umi_count
0,SAM24449114_rep1-AAACCCAAGACTGTTC,ENSG00000138767_2,GAGCCTAAGTACATCACTT,CNOT6L,0,0
1,SAM24449114_rep1-AAACCCAAGAGAACCC,ENSG00000081386_1,AACTCTGTGAACCGACGTC,ZNF510,0,0
2,SAM24449114_rep1-AAACCCAAGATGCGAC,ENSG00000180228_3,CCTGGGAAAGGGTATATTC,PRKRA,0,0
3,SAM24449114_rep1-AAACCCAAGCCTATCA,NTC_1819,TCTCGCGCTTACGTTGCGA,NTC,0,0
4,SAM24449114_rep1-AAACCCAAGCGCTTCG,ENSG00000198933_4,TCCCGGAGACCCCTCGCTT,TBKBP1,0,0
...,...,...,...,...,...,...
5112,SAM24449114_rep10-AACGAAAGTTGCCTAA,ENSG00000103035_3,CACACTGAGCAGCACCAGG,PSMD7,0,0
5113,SAM24449114_rep10-AGCTTCCAGGGCATGT,ENSG00000181222_1,TGGTCGTGTCCGGAGCTAA,POLR2A,0,0
5114,SAM24449114_rep10-CACGTTCTCCAGCAAT,ENSG00000154473_4,CTCCACTCCAGGCATGCGT,BUB3,0,0
5115,SAM24449114_rep12-ACGATGTGTCCGAAGA,ENSG00000249209_3,GCTCTGCTGGAAGGTACAA,ENSG00000249209,0,0


In [38]:
GENES.to_csv(genes_list, index=False)

### Optional: List of genes from energy Distance to process

In [2]:
ED = pd.read_csv("/gstore/project/crc_recursion_2/Sublib2_normcontrol/DS000015963/Energy_Distance/aggregated_energy_distace.csv")

In [11]:
GENES_list = ED[ED['phenotype_@0.05']=='Yes']['gene_symbol'].unique().tolist()

In [12]:
len(GENES_list)

1272

In [13]:
GENES = ED[ED['phenotype_@0.05']=='Yes'].drop_duplicates(['gene_symbol']).reset_index(drop=True)

In [14]:
GENES

Unnamed: 0,gene_symbol,key,energy_distance_pvalue,energy_distance_statistic,phenotype_@0.05,cell_counts_per_gene,-log10pval,log10stat,iteration
0,RPLP2,perGene_pca_sphered,0.001,0.231738,Yes,52.0,3.00000,-0.635002,10.0
1,FER1L5,perGene_pca_centered,0.001,0.399202,Yes,100.0,3.00000,-0.398807,5.0
2,YY1,perGene_pca_sphered,0.001,0.176572,Yes,118.0,3.00000,-0.753078,9.0
3,LSM3,perGene_pca_sphered,0.001,0.191618,Yes,90.0,3.00000,-0.717565,10.0
4,PRPF4,perGene_pca_centered,0.001,1.475709,Yes,43.0,3.00000,0.169001,6.0
...,...,...,...,...,...,...,...,...,...
1267,DHCR24,perGene_pca_sphered,0.050,0.066310,Yes,108.0,1.30103,-1.178421,1.0
1268,EPCAM,perGene_pca_sphered,0.050,0.059743,Yes,119.0,1.30103,-1.223715,5.0
1269,ADGRG5,perGene_pca_sphered,0.050,0.053979,Yes,139.0,1.30103,-1.267771,3.0
1270,ANKRD18B,perGene_pca_sphered,0.050,0.065793,Yes,106.0,1.30103,-1.181819,10.0


In [26]:
GENES.to_csv("genes_list_from_ED.csv", index=False)