-
Notifications
You must be signed in to change notification settings - Fork 0
/
pdb2fasta_file.py
64 lines (62 loc) · 2.58 KB
/
pdb2fasta_file.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import os
def translate_3aa1(three_letter):
"""A dictionary to convert 3 letter amino acids to one letter. """
if len(three_letter) > 3:
three_letter = three_letter[1:4]
trans = {'ALA': 'A', 'ARG': 'R', 'ASN': 'N', 'ASP': 'D', 'CYS': 'C', 'GLU': 'E', 'GLN': 'Q', 'GLY': 'G', 'HIS': 'H',
'ILE': 'I', 'LEU': 'L', 'LYS': 'K', 'MET': 'M', 'PHE': 'F', 'PRO': 'P', 'SER': 'S', 'THR': 'T', 'TRP': 'W',
'TYR': 'Y', 'VAL': 'V'}
return (trans[three_letter])
#This code is based on the code from https://github.com/dongshuyan/pdb2fasta/blob/master/pdb2fasta_file.py
def pdbTofasta_seqpos(pdbfile, chain, wild_res, mut_pos, partnerA, partnerB, ppi_fasta_dir):
"""Convert PDB dimer complex to FASTA sequence and map the structure's mutation position to sequence's position."""
name = (pdbfile.split('.', 1)[0])
name = name.split('/')[len(name.split('/')) - 1]
filename = name + '_' + partnerA + '_' + partnerB + '.fasta'
if os.path.exists(ppi_fasta_dir + filename):
print(filename+" already existed.")
else:
Aname = '>' + name + '_1|' + partnerA+'\n'
f = open(ppi_fasta_dir + filename, "w")
f.write(Aname)
prev = '-1'
input_file = open(pdbfile)
for line in input_file:
if len(line) < 1: continue
if line[0:4] != 'ATOM': continue
if line[21:22] != partnerA: continue
if line[22:31] != prev:
f.write('%c' % translate_3aa1(line[17:20]))
prev = line[22:31]
f.write('\n')
Bname = '>' + name + '_2|' + partnerB + '\n'
f.write(Bname)
prev = '-1'
input_file = open(pdbfile)
for linee in input_file:
toks = linee.split()
if len(linee) < 1: continue
if linee[0:4] != 'ATOM': continue
if linee[21:22] != partnerB : continue
if linee[22:31] != prev:
f.write('%c' % translate_3aa1(linee[17:20]))
prev = linee[22:31]
f.close()
# print name
# print '>',name[0:len(name)]
prev = '-1'
mut_seqpos = -99999
count_residues = 0
input_file = open(pdbfile)
for line in input_file:
toks = line.split()
if len(line) < 1: continue
if line[0:4] != 'ATOM': continue
if line[21:22] != chain: continue
if line[22:31] != prev:
count_residues += 1
if mut_pos == line[22:31].strip() and wild_res == translate_3aa1(line[17:20]):
mut_seqpos = count_residues
prev = line[22:31]
input_file.close()
return mut_seqpos