In [None]:
import pandas as pd
import numpy as np

In [None]:
# Setup


abs_l2fc_threshold = np.log2(1.5)
padj_threshold = 0.05
outdir = 'aaaa'

deseq_data_file = '../pipeline_results_stefan_rna_seq/deseq2/comparison1__COMPETENCE1_Sex__Not_competent_vs_Competent/Comparison_Competent_vs_Not_competent/Comparison_Competent_vs_Not_competent.deseq2_results.tsv'

In [None]:
print(f'Reading in DESeq2 data file: {deseq_data_file}')
deseq_data= pd.read_csv(deseq_data_file, sep='\t')
print(f'{deseq_data.shape[0]} genes reported')

In [None]:
deseq_data.head(2)

In [None]:
# Get reference list
reference_genes_list = deseq_data['region'].drop_duplicates().to_list()
print(f'{len(reference_genes_list)} background reference regulated genes')

In [None]:
#Format
deseq_data['minus_log10(padj)'] = -np.log10(deseq_data['padj'])
deseq_data = deseq_data.loc[:, ['region', 'log2FoldChange', 'minus_log10(padj)']]

In [None]:
# Remove entries containing NA
print(f'{deseq_data.shape[0]} genes BEFORE filter for NA')
filt = ~ deseq_data.isna().any(axis=1)
deseq_data = deseq_data[filt]
print(f'{deseq_data.shape[0]} genes AFTER filtering for NA')

In [None]:
# Filter for significant genes
deseq_data['Significant'] = 'NO'

filt = (deseq_data['log2FoldChange'] >= abs_l2fc_threshold) & (deseq_data['minus_log10(padj)'] >= -np.log10(padj_threshold))

up_degs = (deseq_data.loc[filt, 'region']
           .drop_duplicates()
           .to_list()
          )

filt = (deseq_data['log2FoldChange'] <= -abs_l2fc_threshold) & (deseq_data['minus_log10(padj)'] >= -np.log10(padj_threshold))


down_degs = (deseq_data.loc[filt, 'region']
           .drop_duplicates()
           .to_list()
          )

In [None]:
print(f'{len(up_degs)} UP regulated genes')
print(f'{len(down_degs)} DOWN regulated genes')

In [None]:
geneInputList = ','.join(down_degs[0:])
refInputList = ','.join(reference_genes_list[0:])

In [None]:
#geneInputList = up_degs[0]
#refInputList = down_degs[0]


In [None]:
import os
import json


# curl -X POST "https://pantherdb.org/services/oai/pantherdb/enrich/overrep?geneInputList=Q96PB1&organism=9606&annotDataSet=GO%3A0008150&enrichmentTestType=FISHER&correction=FDR" -H "accept: application/json"

# curl -X POST "https://pantherdb.org/services/oai/pantherdb/enrich/overrep?geneInputList=Q96PB1&organism=9606&annotDataSet=GO%3A0008150&enrichmentTestType=FISHER&correction=FDR" -H "accept: application/json"

fdr_threshold = 0.001

# Input
#geneInputList = 'Q96PB1'
organism = '9606'    #Human
refInputList  = ''

refOrganism = organism 
organism = f'organism={organism}'
refOrganism = f'refOrganism={refOrganism}'    # Set so target and reference gene lists are from the same organism

# Make the command
curl_command = 'curl -X POST "https://pantherdb.org/services/oai/pantherdb/enrich/overrep?'


geneInputList = f'geneInputList={geneInputList}'

if refInputList == '':
    command_parameters = '&'.join((geneInputList, organism, ''))  # Add a final & at the end
else:
    refInputList = f'refInputList={refInputList}'    
    command_parameters = '&'.join((geneInputList, organism, refInputList, refOrganism, ''))  # Add a final & at the end


# Sets other parameters, including search for Molecular Function GO terms (GO:0008150)
remaining_command = 'annotDataSet=GO%3A0008150&enrichmentTestType=FISHER&correction=FDR" -H "accept: application/json"'
redirect = ' > pantherdb.json'
command = curl_command + command_parameters + remaining_command + redirect

print(f'Searching Panther DB with command:\n{command}')

os.system(command)


# JSON file
with open('pantherdb.json', 'r') as f, open('pantherdb.tsv', 'w') as f_out:

    # Reading from file
    data = json.loads(f.read())

    # Iterating through the json list
    header = '\t'.join(['id', 'label', 'number_in_list', 'fold_enrichment', 'fdr'])
    f_out.writelines(header + '\n')

    for record in data['results']['result']:
        if record['fdr'] < fdr_threshold:

            if 'id' in record['term'].keys():     # Unclassified entries will be missing an id
                output_line = '\t'.join([
                    str(record['term']['id']),
                    str(record['term']['label']),
                    str(record['number_in_list']), 
                    str(record['fold_enrichment']),
                    str(record['fdr'])]
                )
            
                #print(output_line)
                f_out.writelines(output_line + '\n')

f.close()
f_out.close()


In [None]:
organism = '9606' 


