In [46]:
import pandas as pd

In [47]:
# Exploring cluster informative genes expressed at the synaptic terminal
# Genes which explained differences in clusters were generated and stored
# at ../data/03_ICIM_analysis/KC_genes_ICIM.txt

gene_list = []
with open("../data/03_ICIM_analysis/KC_genes_ICIM.txt", 'r') as f:
    for line in f:
        gene_list.append(line.strip())

In [1]:
# Using the gene list pulled above, a Gene Ontology Analysis was run with 
# http://www.geneontology.org/page/go-enrichment-analysis

# A list of 18 genes expressed at synaptic terminals was produced
# These 18 genes will be used to visualize their expression levels
# in each of the clusters defined

# list of synaptic genes from GO analysis
# ../data/04_synaptic_gene_expression_profiling/pantherGeneList.txt

synap_gene_list = []
with open("../data/04_synaptic_gene_expression_profiling/pantherGeneList.txt", 'r') as f:
    for line in f:
        synap_gene_list.append(line.strip().split('\t')[1].strip())

In [3]:
synap_gene_list

['Rim',
 'DAT',
 'Pgk',
 'Arf79F',
 'veli',
 'Pka-R1',
 'Rop',
 'Rbp',
 'Syx1A',
 'nAChRalpha3',
 'CaMKII',
 'gammaSnap1',
 'Sytalpha',
 'CanB',
 'dlg1',
 'sesB',
 'nAChRalpha7',
 'VGlut']

In [49]:
df = pd.read_csv("../data/03_ICIM_analysis/KC_ICIM_TSNE_data.csv", index_col='CellID')

In [50]:
count = pd.read_csv("../data/02_filtered_kenyon_cells/CLEAN_LOG2TRANSFORM_kenyon_cells.csv", index_col='symbol')

In [51]:
count.head()

Unnamed: 0_level_0,ACATACGAGGGCTTCC-DGRP-551_0d_r1,ACCCACTTCACTCTTA-DGRP-551_0d_r1,ACCGTAAAGATAGTCA-DGRP-551_0d_r1,ACTTACTAGTGGTAAT-DGRP-551_0d_r1,ACTTGTTCATGGTTGT-DGRP-551_0d_r1,ACTTTCATCAATAAGG-DGRP-551_0d_r1,AGATCTGCAACAACCT-DGRP-551_0d_r1,AGATCTGTCTACTCAT-DGRP-551_0d_r1,AGCGGTCCATTTCACT-DGRP-551_0d_r1,AGCTCTCGTTTGACTG-DGRP-551_0d_r1,...,GGCAATTCATGGATGG-w1118_15d_r1,GTCCTCAGTTGCGCAC-w1118_15d_r1,GTGCAGCGTACCGTAT-w1118_15d_r1,TATGCCCTCTATGTGG-w1118_15d_r1,TCATTACAGAGGTTGC-w1118_15d_r1,TCGAGGCAGCTATGCT-w1118_15d_r1,TTCTACATCAGTGTTG-w1118_15d_r1,AAGTCTGTCTGGTTCC-w1118_30d_r1,ACATACGTCGGATGGA-w1118_30d_r1,GAAATGACAAGACACG-w1118_30d_r1
symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
128up,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
140up,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
14-3-3epsilon,3.459432,2.807355,3.321928,3.459432,2.807355,2.584963,3.0,3.0,3.321928,3.0,...,2.584963,1.584963,1.0,2.584963,1.0,1.0,2.807355,2.0,0.0,2.0
14-3-3zeta,5.754888,6.686501,6.409391,5.554589,6.189825,6.33985,6.686501,6.930737,3.169925,5.906891,...,4.523562,2.321928,3.169925,4.807355,3.459432,3.0,4.523562,3.321928,2.807355,4.321928
18SrRNA:CR41548,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [52]:
pull_count_table = count.loc[synap_gene_list]

In [53]:
pull_count_table.head()

Unnamed: 0_level_0,ACATACGAGGGCTTCC-DGRP-551_0d_r1,ACCCACTTCACTCTTA-DGRP-551_0d_r1,ACCGTAAAGATAGTCA-DGRP-551_0d_r1,ACTTACTAGTGGTAAT-DGRP-551_0d_r1,ACTTGTTCATGGTTGT-DGRP-551_0d_r1,ACTTTCATCAATAAGG-DGRP-551_0d_r1,AGATCTGCAACAACCT-DGRP-551_0d_r1,AGATCTGTCTACTCAT-DGRP-551_0d_r1,AGCGGTCCATTTCACT-DGRP-551_0d_r1,AGCTCTCGTTTGACTG-DGRP-551_0d_r1,...,GGCAATTCATGGATGG-w1118_15d_r1,GTCCTCAGTTGCGCAC-w1118_15d_r1,GTGCAGCGTACCGTAT-w1118_15d_r1,TATGCCCTCTATGTGG-w1118_15d_r1,TCATTACAGAGGTTGC-w1118_15d_r1,TCGAGGCAGCTATGCT-w1118_15d_r1,TTCTACATCAGTGTTG-w1118_15d_r1,AAGTCTGTCTGGTTCC-w1118_30d_r1,ACATACGTCGGATGGA-w1118_30d_r1,GAAATGACAAGACACG-w1118_30d_r1
symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Rim,1.0,2.0,2.321928,1.584963,1.584963,1.584963,2.584963,0.0,2.321928,1.0,...,1.584963,0.0,2.0,2.807355,2.0,1.584963,0.0,0.0,1.0,2.321928
DAT,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.584963,0.0,0.0
Pgk,2.321928,2.584963,2.0,1.584963,1.0,2.584963,1.0,1.0,1.0,2.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Arf79F,2.0,2.807355,1.0,1.584963,2.0,2.321928,2.321928,2.321928,1.0,2.321928,...,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,2.0,1.0
veli,1.0,2.321928,2.0,1.584963,1.584963,1.584963,0.0,1.0,2.321928,2.321928,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [54]:
# Create a function to pull out expression counts for each gene of interest
def pull_down_count(row, gene):
            cell = row.name
            return(pull_count_table.loc[gene, cell])

for gene in synap_gene_list:           
    df[gene] = df.apply(pull_down_count, gene=gene, axis=1)

In [55]:
df.head()

Unnamed: 0_level_0,Unnamed: 0,Age,Gender,Genotype,Replicate,nGene,nUMI,cell_type_id,is_kc,x,...,Syx1A,nAChRalpha3,CaMKII,gammaSnap1,Sytalpha,CanB,dlg1,sesB,nAChRalpha7,VGlut
CellID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ACATACGAGGGCTTCC-DGRP-551_0d_r1,0,0,Female,DGRP-551,DGRP-551_0d_Rep1,1328,3340.0,8.0,1,-16.732498,...,2.807355,0.0,2.321928,1.0,1.0,3.0,2.807355,3.584963,0.0,0.0
ACCCACTTCACTCTTA-DGRP-551_0d_r1,1,0,Female,DGRP-551,DGRP-551_0d_Rep1,1613,4580.0,8.0,1,-19.377689,...,3.321928,0.0,3.459432,0.0,1.584963,2.807355,3.321928,3.459432,1.0,1.0
ACCGTAAAGATAGTCA-DGRP-551_0d_r1,2,0,Male,DGRP-551,DGRP-551_0d_Rep1,1466,4349.0,22.0,1,-1.656786,...,3.584963,0.0,3.321928,0.0,0.0,4.0,3.0,2.321928,0.0,0.0
ACTTACTAGTGGTAAT-DGRP-551_0d_r1,3,0,Male,DGRP-551,DGRP-551_0d_Rep1,1174,2942.0,8.0,1,-15.413997,...,2.584963,0.0,2.584963,0.0,1.0,1.584963,3.169925,3.169925,1.0,0.0
ACTTGTTCATGGTTGT-DGRP-551_0d_r1,4,0,Male,DGRP-551,DGRP-551_0d_Rep1,1410,3620.0,8.0,1,-19.74316,...,3.321928,0.0,2.321928,1.0,1.0,2.321928,2.807355,3.70044,0.0,0.0


In [56]:
df.to_csv("../data/04_synaptic_gene_expression_profiling/CLEAN_LOG2TRANSFORM_WITHEXPRESSION_kenyon_cells.csv")