# String API
---
get String identifiers
>Faster 

In [5]:

import os

os.chdir('/Users/tomdoyle/')


In [6]:
import requests ## python -m pip install requests

string_api_url = "https://version-11-5.string-db.org/api"
output_format = "tsv-no-header"
method = "get_string_ids"

# import gene names
gene_list =[]
gene_list_file = "Documents/University/Southampton/Course/BIOL6068-Research_Project/Data/58_gene_names.txt" #gene names list
gene_list = open(gene_list_file).read().splitlines()

# human NCBI identifier 9606
species_id = 9606

## Set parameters
params = {

    "identifiers" : "\r".join(gene_list), # your protein list
    "species" : species_id, # species NCBI identifier 
    "limit" : 1, # only one (best) identifier per input protein
    "echo_query" : 1, # see your input identifiers in the output
    "caller_identity" : "Research_Project" # your app name
}

## Construct URL
request_url = "/".join([string_api_url, output_format, method])

## Call STRING
results = requests.post(request_url, data=params)

## Read and parse the results
for line in results.text.strip().split("\n"):
    l = line.split("\t")
    input_identifier, string_identifier = l[0], l[2]
    print("Input:", input_identifier, "STRING:", string_identifier, sep="\t")

# saves string_id into a list
string_id = []
for line in results.text.strip().split("\n"):
    l = line.split("\t")
    string_identifier = l[2]
    string_id.append(string_identifier)
string_id

Input:	BIRC2	STRING:	9606.ENSP00000477613
Input:	BIRC3	STRING:	9606.ENSP00000263464
Input:	CARD18	STRING:	9606.ENSP00000436691
Input:	CARD6	STRING:	9606.ENSP00000254691
Input:	CARD8	STRING:	9606.ENSP00000375767
Input:	CARD9	STRING:	9606.ENSP00000360797
Input:	CASP1	STRING:	9606.ENSP00000433138
Input:	CASP5	STRING:	9606.ENSP00000376849
Input:	CASP8	STRING:	9606.ENSP00000351273
Input:	CCL11	STRING:	9606.ENSP00000302234
Input:	CCL13	STRING:	9606.ENSP00000225844
Input:	CCL2	STRING:	9606.ENSP00000225831
Input:	CCL7	STRING:	9606.ENSP00000367832
Input:	CCL8	STRING:	9606.ENSP00000378118
Input:	CHUK	STRING:	9606.ENSP00000359424
Input:	CXCL1	STRING:	9606.ENSP00000379110
Input:	CXCL2	STRING:	9606.ENSP00000427279
Input:	CXCL8	STRING:	9606.ENSP00000306512
Input:	ERBIN	STRING:	9606.ENSP00000426632
Input:	HSP90AA1	STRING:	9606.ENSP00000335153
Input:	HSP90AB1	STRING:	9606.ENSP00000360609
Input:	HSP90B1	STRING:	9606.ENSP00000299767
Input:	IKBKB	STRING:	9606.ENSP00000430684
Input:	IL18	STRING:	9606.ENSP

['9606.ENSP00000477613',
 '9606.ENSP00000263464',
 '9606.ENSP00000436691',
 '9606.ENSP00000254691',
 '9606.ENSP00000375767',
 '9606.ENSP00000360797',
 '9606.ENSP00000433138',
 '9606.ENSP00000376849',
 '9606.ENSP00000351273',
 '9606.ENSP00000302234',
 '9606.ENSP00000225844',
 '9606.ENSP00000225831',
 '9606.ENSP00000367832',
 '9606.ENSP00000378118',
 '9606.ENSP00000359424',
 '9606.ENSP00000379110',
 '9606.ENSP00000427279',
 '9606.ENSP00000306512',
 '9606.ENSP00000426632',
 '9606.ENSP00000335153',
 '9606.ENSP00000360609',
 '9606.ENSP00000299767',
 '9606.ENSP00000430684',
 '9606.ENSP00000280357',
 '9606.ENSP00000263341',
 '9606.ENSP00000385675',
 '9606.ENSP00000358335',
 '9606.ENSP00000215832',
 '9606.ENSP00000352157',
 '9606.ENSP00000333685',
 '9606.ENSP00000215659',
 '9606.ENSP00000211287',
 '9606.ENSP00000229795',
 '9606.ENSP00000263025',
 '9606.ENSP00000378974',
 '9606.ENSP00000394560',
 '9606.ENSP00000219596',
 '9606.ENSP00000226574',
 '9606.ENSP00000216797',
 '9606.ENSP00000312988',


# Get network interactions

## retieve experimental scores over 0.6


In [7]:
output_format = "tsv-no-header"
method = "network"

## Construct URL
request_url = "/".join([string_api_url, output_format, method])

# set confience_score between 0-1
confidence_score = 0.6

## Set parameters
my_genes = string_id

params = {

    "identifiers" : "%0d".join(my_genes), # your protein
    "species" : species_id, # species NCBI identifier 
    "caller_identity" : "Research_Project" # your app name
    

}

""" other params
"required_score" :	# threshold of significance to include a interaction, a number between 0 and 1000 (default depends on the network)
    "network_type" :	# network type: functional (default), physical
    "add_nodes"	: # adds a number of proteins with to the network based on their confidence score
    "show_query_node_labels" :	# when available use submitted names in the preferredName column when (0 or 1) (default:0)
    """

## Call STRING
response = requests.post(request_url, data=params)

for line in response.text.strip().split("\n"):

    l = line.strip().split("\t")
    p1, p2 = l[2], l[3]

    ## filter the interaction according to experimental score
    experimental_score = float(l[10])
    if experimental_score > confidence_score:
        print("\t".join([p1, p2, "experimentally confirmed (prob. %.3f)" % experimental_score]))
        

MAPK1	MAPK14	experimentally confirmed (prob. 0.874)
MAPK1	MAPK14	experimentally confirmed (prob. 0.874)
MAPK1	MAPK3	experimentally confirmed (prob. 0.887)
MAPK1	MAPK3	experimentally confirmed (prob. 0.887)
TAB1	MAPK11	experimentally confirmed (prob. 0.684)
TAB1	MAPK11	experimentally confirmed (prob. 0.684)
TAB1	TAB3	experimentally confirmed (prob. 0.863)
TAB1	TAB3	experimentally confirmed (prob. 0.863)
TAB1	TRAF6	experimentally confirmed (prob. 0.760)
TAB1	TRAF6	experimentally confirmed (prob. 0.760)
TAB1	MAP3K7	experimentally confirmed (prob. 0.980)
TAB1	MAP3K7	experimentally confirmed (prob. 0.980)
TAB1	MAPK14	experimentally confirmed (prob. 0.966)
TAB1	MAPK14	experimentally confirmed (prob. 0.966)
TAB1	TAB2	experimentally confirmed (prob. 0.897)
TAB1	TAB2	experimentally confirmed (prob. 0.897)
TAB1	XIAP	experimentally confirmed (prob. 0.879)
TAB1	XIAP	experimentally confirmed (prob. 0.879)
NFKBIA	NFKBIB	experimentally confirmed (prob. 0.877)
NFKBIA	NFKBIB	experimentally confirmed (p

## Get combined score. confidence >0.6

In [8]:
for line in response.text.strip().split("\n"):

    l = line.strip().split("\t")
    p1, p2 = l[2], l[3]

    ## filter the interaction according to experimental score
    combined_score = float(l[5])
    if combined_score > confidence_score:
        print("\t".join([p1, p2, "combined score (prob. %.3f)" % combined_score]))
        

TRIP6	RIPK2	combined score (prob. 0.840)
TRIP6	RIPK2	combined score (prob. 0.840)
MAPK13	MAPK10	combined score (prob. 0.639)
MAPK13	MAPK10	combined score (prob. 0.639)
MAPK13	MAPK9	combined score (prob. 0.640)
MAPK13	MAPK9	combined score (prob. 0.640)
MAPK13	MAPK8	combined score (prob. 0.640)
MAPK13	MAPK8	combined score (prob. 0.640)
MAPK13	CARD9	combined score (prob. 0.650)
MAPK13	CARD9	combined score (prob. 0.650)
MAPK13	TAB3	combined score (prob. 0.682)
MAPK13	TAB3	combined score (prob. 0.682)
MAPK13	NOD2	combined score (prob. 0.684)
MAPK13	NOD2	combined score (prob. 0.684)
MAPK13	TAB2	combined score (prob. 0.686)
MAPK13	TAB2	combined score (prob. 0.686)
MAPK13	NOD1	combined score (prob. 0.687)
MAPK13	NOD1	combined score (prob. 0.687)
MAPK13	RIPK2	combined score (prob. 0.745)
MAPK13	RIPK2	combined score (prob. 0.745)
MAPK13	TAB1	combined score (prob. 0.803)
MAPK13	TAB1	combined score (prob. 0.803)
MAPK13	MAPK3	combined score (prob. 0.816)
MAPK13	MAPK3	combined score (prob. 0.816)
MA

# Save output to dataframe

use `df.loc[len(df)]` to append list of equal length to end of df LIFESAVER


In [10]:
import pandas as pd

output_format = "tsv-no-header"
method = "network"

## Construct URL
request_url = "/".join([string_api_url, output_format, method])

# set confience_score between 0-1
confidence_score = 0.6

## Set parameters
my_genes = string_id

params = {
    "identifiers" : "%0d".join(my_genes), # your protein
    "species" : species_id, # species NCBI identifier 
    "caller_identity" : "Research_Project" # your app name
}

""" other params
"required_score" :	# threshold of significance to include a interaction, a number between 0 and 1000 (default depends on the network)
    "network_type" :	# network type: functional (default), physical
    "add_nodes"	: # adds a number of proteins with to the network based on their confidence score
    "show_query_node_labels" :	# when available use submitted names in the preferredName column when (0 or 1) (default:0)
    """

## Call STRING
response = requests.post(request_url, data=params)

# creates data frame with colume headins 
patient_df = pd.DataFrame(columns = ['stringId_A', 'stringId_B', 'preferredName_A', 'preferredName_B', 'ncbiTaxonId', 'score', 'nscore', 'fscore', 'pscore', 'ascore', 'escore', 'dscore', 'tscore'])

for line in response.text.strip().split("\n"):
    # seperates each line into list
    l = line.strip().split("\t")
    
    # adds each line to the data frame 
    patient_df.loc[len(patient_df)] = l
 
patient_df.head()


Unnamed: 0,stringId_A,stringId_B,preferredName_A,preferredName_B,ncbiTaxonId,score,nscore,fscore,pscore,ascore,escore,dscore,tscore
0,9606.ENSP00000200457,9606.ENSP00000222823,TRIP6,NOD1,9606,0.441,0,0,0,0.0,0.0,0.0,0.441
1,9606.ENSP00000200457,9606.ENSP00000222823,TRIP6,NOD1,9606,0.441,0,0,0,0.0,0.0,0.0,0.441
2,9606.ENSP00000200457,9606.ENSP00000384273,TRIP6,RELA,9606,0.53,0,0,0,0.059,0.27,0.0,0.37
3,9606.ENSP00000200457,9606.ENSP00000384273,TRIP6,RELA,9606,0.53,0,0,0,0.059,0.27,0.0,0.37
4,9606.ENSP00000200457,9606.ENSP00000220751,TRIP6,RIPK2,9606,0.84,0,0,0,0.079,0.078,0.8,0.171


# Enrichment analysis from string API

# Functional annotation from string API

# PPI enrichment 
Tests if your network has more interactions than expected

# Get string clusters for a patient 

# Test

In [4]:
import get_patient
import pandas as pd
from get_data import genepy_norm_loeuf

genepy_norm_loeuf.head()

Unnamed: 0_level_0,A1BG,A1CF,A2M,A2ML1,A3GALT2,A4GALT,A4GNT,AAAS,AACS,AADAC,...,ZW10,ZWILCH,ZWINT,ZXDA,ZXDC,ZYG11A,ZYG11B,ZYX,ZZEF1,ZZZ3
Samid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AP0013,0.097623,0.027245,0.107498,0.011378,0.041579,0.028535,0.055493,0.0,0.0,0.0,...,0.0,0.026087,0.010998,0.0,0.008628,0.0,0.044287,0.0,0.340699,0.0
AP0030,0.134882,0.0,0.01883,0.064448,0.0,0.015138,0.02246,0.018639,0.0,0.004263,...,0.0,0.011772,0.007656,0.0,0.009848,0.0,0.03083,0.0,0.284207,0.0
AP0045,0.107196,0.0,0.025388,0.034014,0.0,0.007506,0.030282,0.0,0.0,0.121555,...,0.0,0.0,0.010323,0.0,0.008098,0.00952,0.0,0.0,0.014009,0.233638
AP0046,0.179652,0.0,0.025081,0.118953,0.014841,0.007415,0.029915,0.024826,0.0,0.0,...,0.0,0.0,0.010197,0.0,0.008,0.009405,0.048775,0.0,0.013839,0.0
AP0055,0.091878,0.0,0.038677,0.072722,0.00869,0.007527,0.08061,0.0252,0.0,0.121888,...,0.0,0.015916,0.010351,0.0,0.00812,0.009546,0.0,0.0,0.559744,0.234278


In [9]:
test1 = get_patient_as_Series(genepy_norm_loeuf, 1).sort_values(ascending=False).iloc[:2000].to_frame().T
test1
df1 = genepy_norm_loeuf

test2 = get_patient.get_patient_as_Series(df1, 0).sort_values(ascending=False)[:500]
test2

SPEN      3.715597
ZNF219    2.909842
MRC1      2.811599
RORB      2.447682
TTN       2.327598
            ...   
ALOX12    0.261297
DAGLA     0.259268
SCN1A     0.258925
NUP107    0.258824
PLB1      0.258447
Name: AP0013, Length: 500, dtype: float64

In [10]:
import panda
test1 = pd.read_table("../Data/GENEPY_JULY21_network_trial_NOD2disease.matrix",
                      index_col== 'Samid')

NameError: name 'index_col' is not defined