-
Notifications
You must be signed in to change notification settings - Fork 0
/
interface.py
142 lines (125 loc) · 5.42 KB
/
interface.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#!/usr/bin/env python
"""
Analyse interface output from COCOMAPS to yield
residue type and group information
"""
import os
import csv
import raw_parser
import gpcr_variables
from gpcr_variables import DEFAULT_FOLDER, ALIGN_VECTORS, \
DIMERS, PROCESSED_RESULTS_FOLDER, \
STRUCTURAL_PDBS_FOLDER
__author__ = "A.J. Preto"
__email__ = "martinsgomes.jose@gmail.com"
__group__ = "Data-Driven Molecular Design"
__group_leader__ = "Irina S. Moreira"
__project__ = "GPCRs"
def total_counter(input_seq_list,to_count):
"""
Count all non-gaps in the alignment
"""
count = 0
for res in input_seq_list:
if res != to_count:
count = count + 1
return count
def counter(input_seq_list,to_count):
"""
Count a specific type of residue in a sequence
"""
count = 0
for res in input_seq_list:
if res == to_count:
count = count + 1
return count
def res_type_counter(input_seq_list):
"""
Count residues according to their residue type
"""
res_name = {'A':'Alanine', 'R':'Arginine','D':'Aspartate',
'N':'Asparagine','C':'Cysteine','E':'Glutamate','G':'Glycine',
'Q':'Glutamine','H':'Histidine','I':'Isoleucine',
'L':'Leucine','K':'Lysine','M':'Methionine',
'F':'Phenylalanine','P':'Proline',
'S':'Serine','T':'Threonine','W':'Tryptophan',
'Y':'Tyrosine','V':'Valine'}
res_type = {'A':0, 'R':0,'D':0,'N':0,'C':0,'E':0,'Q':0,'G':0,'H':0,'I':0,'L':0,'K':0,'M':0,'F':0,'P':0,'S':0,'T':0,'W':0,'Y':0,'V':0}
res_list = ['G','A','V','L','I','M','S','T','C','P','N','Q','K','R','H','D','E','F','Y','W']
new_dict = {}
for target_res in res_list:
res_type[target_res] = counter(input_seq_list,target_res)
for rowing in res_list:
new_dict[res_name[rowing]] = str((float(res_type[rowing])/total_counter(input_seq_list,'-'))*100)
return new_dict
def res_group(input_dict):
"""
Count residues according to their residue group
- non polar aliphatic
- polar uncharged
- basic positively charged
- acid negatively charged
- non polar aromatic
"""
non_pol_ali = ['Glycine','Alanine','Valine','Leucine','Isoleucine','Methionine']
pol_un = ['Serine','Threonine','Cysteine','Proline','Asparagine','Glutamine']
bas_pos = ['Lysine','Arginine','Histidine']
acid_neg = ['Aspartate','Glutamate']
non_pol_aro = ['Phenylalanine','Tyrosine','Tryptophan']
list_of_groups = [non_pol_ali, pol_un, bas_pos, acid_neg, non_pol_aro]
new_dict = {'Non polar aliphatic': 0,'Polar uncharged': 0, \
'Basic positively charged':0,'Acid negatively charged': 0,\
'Non polar aromatic': 0}
for group in list_of_groups:
for res in group:
if group == non_pol_ali:
new_dict['Non polar aliphatic'] = new_dict['Non polar aliphatic'] + float(input_dict[res])
if group == pol_un:
new_dict['Polar uncharged'] = new_dict['Polar uncharged'] + float(input_dict[res])
if group == bas_pos:
new_dict['Basic positively charged'] = new_dict['Basic positively charged'] + float(input_dict[res])
if group == acid_neg:
new_dict['Acid negatively charged'] = new_dict['Acid negatively charged'] + float(input_dict[res])
if group == non_pol_aro:
new_dict['Non polar aromatic'] = new_dict['Non polar aromatic'] + float(input_dict[res])
return new_dict
def analyse_interface(input_file, seq, out_name, delimiter = ";"):
"""
Run all interface analysis
"""
interface_res = []
values_row = open(input_file, "r").readlines()[0].replace("\n","").split(delimiter)
for row_val, row_name in zip(values_row, seq):
if row_val != "-":
interface_res.append(row_name)
else:
interface_res.append("-")
total_res = total_counter(interface_res,'-')
interface_res_type = res_type_counter(interface_res)
interface_group_type = res_group(interface_res_type)
return interface_res, total_res, interface_res_type, interface_group_type
def apply_on_folder(input_alignments, input_dimers):
"""
Run interface analysis for all the .pdb files in the folder
"""
DR_interfaces = {}
protein_interfaces = {}
for receptor in input_dimers.keys():
for partner in input_dimers[receptor]:
cocomaps_name_DR = results_folder + "/" + receptor + "_" + partner + "_toDR_cocomaps.csv"
output_name_DR = results_folder + "/" + receptor + "_" + partner + "_toDR_amino.csv"
cocomaps_name_partner = results_folder + "/" + receptor + "_" + partner + "_toprotein_cocomaps.csv"
output_name_partner = results_folder + "/" + receptor + "_" + partner + "_toprotein_amino.csv"
try:
interface_DR = analyse_interface(cocomaps_name_DR, input_alignments[receptor], output_name_DR)
interface_prot = analyse_interface(cocomaps_name_partner, input_alignments[partner], output_name_partner)
except:
print("Failed to perform action on:", receptor, partner)
"""
Variable initialization
"""
home = DEFAULT_FOLDER
results_folder = home + "/" + PROCESSED_RESULTS_FOLDER
aligned_proteins = raw_parser.retrieve_clean(ALIGN_VECTORS)
dimers = raw_parser.retrieve_clean(DIMERS)
apply_on_folder(aligned_proteins, dimers)