In [12]:
import os
import itertools

In [21]:
def sequence_to_resfile(sequence, output_file='resfile', start_residue=1, chain='A',verbose=False):
    """
    Convert an amino acid sequence to a Rosetta resfile and save it.
    
    Args:
        sequence (str): Amino acid sequence in one-letter code (e.g., 'MKLLPVRG')
        output_file (str): Name of output resfile (default: 'resfile')
        start_residue (int): Starting residue number (default: 1)
        chain (str): Chain ID (default: 'A')
    """
    # Header for resfile
    resfile_content = ["NATRO  # Keep all other residues fixed", "start"]
    
    # Convert each amino acid in sequence
    for i, aa in enumerate(sequence.upper(), start=start_residue):
        resfile_content.append(f"{i} {chain} PIKAA {aa}")
    
    # Write to file
    with open(output_file, 'w') as f:
        f.write('\n'.join(resfile_content))
    if verbose:
        print(f"Created resfile: {output_file}")
        print("Content:")
        print('\n'.join(resfile_content))

In [2]:
import random

# List of standard amino acids in one-letter code
amino_acids = "ACDEFGHIKLMNPQRSTVWY"

def generate_random_sequence(length):
    """Generate a random amino acid sequence of specified length."""
    return ''.join(random.choice(amino_acids) for _ in range(length))

In [4]:
#GA95
sequence = "TTYKLILNLKQAKEEAIKELVDAGTAEKYIKLIANAKTVEGVWTLKDEIKTFTVTE"
sequence_to_resfile(sequence, output_file='resfiles/GA95.txt', start_residue=1, chain='A')

Created resfile: resfiles/GA95.txt
Content:
NATRO  # Keep all other residues fixed
start
1 A PIKAA T
2 A PIKAA T
3 A PIKAA Y
4 A PIKAA K
5 A PIKAA L
6 A PIKAA I
7 A PIKAA L
8 A PIKAA N
9 A PIKAA L
10 A PIKAA K
11 A PIKAA Q
12 A PIKAA A
13 A PIKAA K
14 A PIKAA E
15 A PIKAA E
16 A PIKAA A
17 A PIKAA I
18 A PIKAA K
19 A PIKAA E
20 A PIKAA L
21 A PIKAA V
22 A PIKAA D
23 A PIKAA A
24 A PIKAA G
25 A PIKAA T
26 A PIKAA A
27 A PIKAA E
28 A PIKAA K
29 A PIKAA Y
30 A PIKAA I
31 A PIKAA K
32 A PIKAA L
33 A PIKAA I
34 A PIKAA A
35 A PIKAA N
36 A PIKAA A
37 A PIKAA K
38 A PIKAA T
39 A PIKAA V
40 A PIKAA E
41 A PIKAA G
42 A PIKAA V
43 A PIKAA W
44 A PIKAA T
45 A PIKAA L
46 A PIKAA K
47 A PIKAA D
48 A PIKAA E
49 A PIKAA I
50 A PIKAA K
51 A PIKAA T
52 A PIKAA F
53 A PIKAA T
54 A PIKAA V
55 A PIKAA T
56 A PIKAA E


In [5]:
#GB95
sequence = "TTYKLILNLKQAKEEAIKEAVDAGTAEKYFKLIANAKTVEGVWTYKDEIKTFTVTE"
sequence_to_resfile(sequence, output_file='resfiles/GB95.txt', start_residue=1, chain='A')

Created resfile: resfiles/GB95.txt
Content:
NATRO  # Keep all other residues fixed
start
1 A PIKAA T
2 A PIKAA T
3 A PIKAA Y
4 A PIKAA K
5 A PIKAA L
6 A PIKAA I
7 A PIKAA L
8 A PIKAA N
9 A PIKAA L
10 A PIKAA K
11 A PIKAA Q
12 A PIKAA A
13 A PIKAA K
14 A PIKAA E
15 A PIKAA E
16 A PIKAA A
17 A PIKAA I
18 A PIKAA K
19 A PIKAA E
20 A PIKAA A
21 A PIKAA V
22 A PIKAA D
23 A PIKAA A
24 A PIKAA G
25 A PIKAA T
26 A PIKAA A
27 A PIKAA E
28 A PIKAA K
29 A PIKAA Y
30 A PIKAA F
31 A PIKAA K
32 A PIKAA L
33 A PIKAA I
34 A PIKAA A
35 A PIKAA N
36 A PIKAA A
37 A PIKAA K
38 A PIKAA T
39 A PIKAA V
40 A PIKAA E
41 A PIKAA G
42 A PIKAA V
43 A PIKAA W
44 A PIKAA T
45 A PIKAA Y
46 A PIKAA K
47 A PIKAA D
48 A PIKAA E
49 A PIKAA I
50 A PIKAA K
51 A PIKAA T
52 A PIKAA F
53 A PIKAA T
54 A PIKAA V
55 A PIKAA T
56 A PIKAA E


In [6]:
#GA77
sequence = "TTYKLILNLKQAKEEAIKELVDAGIAEKYIKLIANAKTVEGVWTLKDEILKATVTE"
sequence_to_resfile(sequence, output_file='resfiles/GA77.txt', start_residue=1, chain='A')

Created resfile: resfiles/GA77.txt
Content:
NATRO  # Keep all other residues fixed
start
1 A PIKAA T
2 A PIKAA T
3 A PIKAA Y
4 A PIKAA K
5 A PIKAA L
6 A PIKAA I
7 A PIKAA L
8 A PIKAA N
9 A PIKAA L
10 A PIKAA K
11 A PIKAA Q
12 A PIKAA A
13 A PIKAA K
14 A PIKAA E
15 A PIKAA E
16 A PIKAA A
17 A PIKAA I
18 A PIKAA K
19 A PIKAA E
20 A PIKAA L
21 A PIKAA V
22 A PIKAA D
23 A PIKAA A
24 A PIKAA G
25 A PIKAA I
26 A PIKAA A
27 A PIKAA E
28 A PIKAA K
29 A PIKAA Y
30 A PIKAA I
31 A PIKAA K
32 A PIKAA L
33 A PIKAA I
34 A PIKAA A
35 A PIKAA N
36 A PIKAA A
37 A PIKAA K
38 A PIKAA T
39 A PIKAA V
40 A PIKAA E
41 A PIKAA G
42 A PIKAA V
43 A PIKAA W
44 A PIKAA T
45 A PIKAA L
46 A PIKAA K
47 A PIKAA D
48 A PIKAA E
49 A PIKAA I
50 A PIKAA L
51 A PIKAA K
52 A PIKAA A
53 A PIKAA T
54 A PIKAA V
55 A PIKAA T
56 A PIKAA E


In [7]:
#GB77
sequence = "TTYKLILNGKQLKEEAITEAVDAATAEKYFKLYANAKTVEGVWTYKDETKTFTVTE"
sequence_to_resfile(sequence, output_file='resfiles/GB77.txt', start_residue=1, chain='A')

Created resfile: resfiles/GB77.txt
Content:
NATRO  # Keep all other residues fixed
start
1 A PIKAA T
2 A PIKAA T
3 A PIKAA Y
4 A PIKAA K
5 A PIKAA L
6 A PIKAA I
7 A PIKAA L
8 A PIKAA N
9 A PIKAA G
10 A PIKAA K
11 A PIKAA Q
12 A PIKAA L
13 A PIKAA K
14 A PIKAA E
15 A PIKAA E
16 A PIKAA A
17 A PIKAA I
18 A PIKAA T
19 A PIKAA E
20 A PIKAA A
21 A PIKAA V
22 A PIKAA D
23 A PIKAA A
24 A PIKAA A
25 A PIKAA T
26 A PIKAA A
27 A PIKAA E
28 A PIKAA K
29 A PIKAA Y
30 A PIKAA F
31 A PIKAA K
32 A PIKAA L
33 A PIKAA Y
34 A PIKAA A
35 A PIKAA N
36 A PIKAA A
37 A PIKAA K
38 A PIKAA T
39 A PIKAA V
40 A PIKAA E
41 A PIKAA G
42 A PIKAA V
43 A PIKAA W
44 A PIKAA T
45 A PIKAA Y
46 A PIKAA K
47 A PIKAA D
48 A PIKAA E
49 A PIKAA T
50 A PIKAA K
51 A PIKAA T
52 A PIKAA F
53 A PIKAA T
54 A PIKAA V
55 A PIKAA T
56 A PIKAA E


In [8]:
#GA88
sequence = "TTYKLILNLKQAKEEAIKELVDAGIAEKYIKLIANAKTVEGVWTLKDEILTFTVTE"
sequence_to_resfile(sequence, output_file='resfiles/GA88.txt', start_residue=1, chain='A')

Created resfile: resfiles/GA88.txt
Content:
NATRO  # Keep all other residues fixed
start
1 A PIKAA T
2 A PIKAA T
3 A PIKAA Y
4 A PIKAA K
5 A PIKAA L
6 A PIKAA I
7 A PIKAA L
8 A PIKAA N
9 A PIKAA L
10 A PIKAA K
11 A PIKAA Q
12 A PIKAA A
13 A PIKAA K
14 A PIKAA E
15 A PIKAA E
16 A PIKAA A
17 A PIKAA I
18 A PIKAA K
19 A PIKAA E
20 A PIKAA L
21 A PIKAA V
22 A PIKAA D
23 A PIKAA A
24 A PIKAA G
25 A PIKAA I
26 A PIKAA A
27 A PIKAA E
28 A PIKAA K
29 A PIKAA Y
30 A PIKAA I
31 A PIKAA K
32 A PIKAA L
33 A PIKAA I
34 A PIKAA A
35 A PIKAA N
36 A PIKAA A
37 A PIKAA K
38 A PIKAA T
39 A PIKAA V
40 A PIKAA E
41 A PIKAA G
42 A PIKAA V
43 A PIKAA W
44 A PIKAA T
45 A PIKAA L
46 A PIKAA K
47 A PIKAA D
48 A PIKAA E
49 A PIKAA I
50 A PIKAA L
51 A PIKAA T
52 A PIKAA F
53 A PIKAA T
54 A PIKAA V
55 A PIKAA T
56 A PIKAA E


In [9]:
#GA91
sequence = "TTYKLILNLKQAKEEAIKELVDAGTAEKYIKLIANAKTVEGVWTLKDEILTFTVTE"
sequence_to_resfile(sequence, output_file='resfiles/GA91.txt', start_residue=1, chain='A')

Created resfile: resfiles/GA91.txt
Content:
NATRO  # Keep all other residues fixed
start
1 A PIKAA T
2 A PIKAA T
3 A PIKAA Y
4 A PIKAA K
5 A PIKAA L
6 A PIKAA I
7 A PIKAA L
8 A PIKAA N
9 A PIKAA L
10 A PIKAA K
11 A PIKAA Q
12 A PIKAA A
13 A PIKAA K
14 A PIKAA E
15 A PIKAA E
16 A PIKAA A
17 A PIKAA I
18 A PIKAA K
19 A PIKAA E
20 A PIKAA L
21 A PIKAA V
22 A PIKAA D
23 A PIKAA A
24 A PIKAA G
25 A PIKAA T
26 A PIKAA A
27 A PIKAA E
28 A PIKAA K
29 A PIKAA Y
30 A PIKAA I
31 A PIKAA K
32 A PIKAA L
33 A PIKAA I
34 A PIKAA A
35 A PIKAA N
36 A PIKAA A
37 A PIKAA K
38 A PIKAA T
39 A PIKAA V
40 A PIKAA E
41 A PIKAA G
42 A PIKAA V
43 A PIKAA W
44 A PIKAA T
45 A PIKAA L
46 A PIKAA K
47 A PIKAA D
48 A PIKAA E
49 A PIKAA I
50 A PIKAA L
51 A PIKAA T
52 A PIKAA F
53 A PIKAA T
54 A PIKAA V
55 A PIKAA T
56 A PIKAA E


In [10]:
#GA98
sequence = "TTYKLILNLKQAKEEAIKELVDAGTAEKYFKLIANAKTVEGVWTLKDEIKTFTVTE"
sequence_to_resfile(sequence, output_file='resfiles/GA98.txt', start_residue=1, chain='A')

Created resfile: resfiles/GA98.txt
Content:
NATRO  # Keep all other residues fixed
start
1 A PIKAA T
2 A PIKAA T
3 A PIKAA Y
4 A PIKAA K
5 A PIKAA L
6 A PIKAA I
7 A PIKAA L
8 A PIKAA N
9 A PIKAA L
10 A PIKAA K
11 A PIKAA Q
12 A PIKAA A
13 A PIKAA K
14 A PIKAA E
15 A PIKAA E
16 A PIKAA A
17 A PIKAA I
18 A PIKAA K
19 A PIKAA E
20 A PIKAA L
21 A PIKAA V
22 A PIKAA D
23 A PIKAA A
24 A PIKAA G
25 A PIKAA T
26 A PIKAA A
27 A PIKAA E
28 A PIKAA K
29 A PIKAA Y
30 A PIKAA F
31 A PIKAA K
32 A PIKAA L
33 A PIKAA I
34 A PIKAA A
35 A PIKAA N
36 A PIKAA A
37 A PIKAA K
38 A PIKAA T
39 A PIKAA V
40 A PIKAA E
41 A PIKAA G
42 A PIKAA V
43 A PIKAA W
44 A PIKAA T
45 A PIKAA L
46 A PIKAA K
47 A PIKAA D
48 A PIKAA E
49 A PIKAA I
50 A PIKAA K
51 A PIKAA T
52 A PIKAA F
53 A PIKAA T
54 A PIKAA V
55 A PIKAA T
56 A PIKAA E


In [11]:
#GB98
sequence = "TTYKLILNLKQAKEEAIKELVDAGTAEKYFKLIANAKTVEGVWTYKDEIKTFTVTE"
sequence_to_resfile(sequence, output_file='resfiles/GB98.txt', start_residue=1, chain='A')

Created resfile: resfiles/GB98.txt
Content:
NATRO  # Keep all other residues fixed
start
1 A PIKAA T
2 A PIKAA T
3 A PIKAA Y
4 A PIKAA K
5 A PIKAA L
6 A PIKAA I
7 A PIKAA L
8 A PIKAA N
9 A PIKAA L
10 A PIKAA K
11 A PIKAA Q
12 A PIKAA A
13 A PIKAA K
14 A PIKAA E
15 A PIKAA E
16 A PIKAA A
17 A PIKAA I
18 A PIKAA K
19 A PIKAA E
20 A PIKAA L
21 A PIKAA V
22 A PIKAA D
23 A PIKAA A
24 A PIKAA G
25 A PIKAA T
26 A PIKAA A
27 A PIKAA E
28 A PIKAA K
29 A PIKAA Y
30 A PIKAA F
31 A PIKAA K
32 A PIKAA L
33 A PIKAA I
34 A PIKAA A
35 A PIKAA N
36 A PIKAA A
37 A PIKAA K
38 A PIKAA T
39 A PIKAA V
40 A PIKAA E
41 A PIKAA G
42 A PIKAA V
43 A PIKAA W
44 A PIKAA T
45 A PIKAA Y
46 A PIKAA K
47 A PIKAA D
48 A PIKAA E
49 A PIKAA I
50 A PIKAA K
51 A PIKAA T
52 A PIKAA F
53 A PIKAA T
54 A PIKAA V
55 A PIKAA T
56 A PIKAA E


In [12]:
#GB91
sequence = "TTYKLILNLKQAKEEAIKEAVDAGTAEKYFKLIANAKTVEGVWTYKDEIKTFTVTE"
sequence_to_resfile(sequence, output_file='resfiles/GB91.txt', start_residue=1, chain='A')

Created resfile: resfiles/GB91.txt
Content:
NATRO  # Keep all other residues fixed
start
1 A PIKAA T
2 A PIKAA T
3 A PIKAA Y
4 A PIKAA K
5 A PIKAA L
6 A PIKAA I
7 A PIKAA L
8 A PIKAA N
9 A PIKAA L
10 A PIKAA K
11 A PIKAA Q
12 A PIKAA A
13 A PIKAA K
14 A PIKAA E
15 A PIKAA E
16 A PIKAA A
17 A PIKAA I
18 A PIKAA K
19 A PIKAA E
20 A PIKAA A
21 A PIKAA V
22 A PIKAA D
23 A PIKAA A
24 A PIKAA G
25 A PIKAA T
26 A PIKAA A
27 A PIKAA E
28 A PIKAA K
29 A PIKAA Y
30 A PIKAA F
31 A PIKAA K
32 A PIKAA L
33 A PIKAA I
34 A PIKAA A
35 A PIKAA N
36 A PIKAA A
37 A PIKAA K
38 A PIKAA T
39 A PIKAA V
40 A PIKAA E
41 A PIKAA G
42 A PIKAA V
43 A PIKAA W
44 A PIKAA T
45 A PIKAA Y
46 A PIKAA K
47 A PIKAA D
48 A PIKAA E
49 A PIKAA I
50 A PIKAA K
51 A PIKAA T
52 A PIKAA F
53 A PIKAA T
54 A PIKAA V
55 A PIKAA T
56 A PIKAA E


In [13]:
#GB88
sequence = "TTYKLILNLKQAKEEAITEAVDAGTAEKYFKLYANAKTVEGVWTYKDEIKTFTVTE"
sequence_to_resfile(sequence, output_file='resfiles/GB88.txt', start_residue=1, chain='A')

Created resfile: resfiles/GB88.txt
Content:
NATRO  # Keep all other residues fixed
start
1 A PIKAA T
2 A PIKAA T
3 A PIKAA Y
4 A PIKAA K
5 A PIKAA L
6 A PIKAA I
7 A PIKAA L
8 A PIKAA N
9 A PIKAA L
10 A PIKAA K
11 A PIKAA Q
12 A PIKAA A
13 A PIKAA K
14 A PIKAA E
15 A PIKAA E
16 A PIKAA A
17 A PIKAA I
18 A PIKAA T
19 A PIKAA E
20 A PIKAA A
21 A PIKAA V
22 A PIKAA D
23 A PIKAA A
24 A PIKAA G
25 A PIKAA T
26 A PIKAA A
27 A PIKAA E
28 A PIKAA K
29 A PIKAA Y
30 A PIKAA F
31 A PIKAA K
32 A PIKAA L
33 A PIKAA Y
34 A PIKAA A
35 A PIKAA N
36 A PIKAA A
37 A PIKAA K
38 A PIKAA T
39 A PIKAA V
40 A PIKAA E
41 A PIKAA G
42 A PIKAA V
43 A PIKAA W
44 A PIKAA T
45 A PIKAA Y
46 A PIKAA K
47 A PIKAA D
48 A PIKAA E
49 A PIKAA I
50 A PIKAA K
51 A PIKAA T
52 A PIKAA F
53 A PIKAA T
54 A PIKAA V
55 A PIKAA T
56 A PIKAA E


In [14]:
sequence = "TTYKLILNLKQAKEEAIKELVDAGTAEKYFKLIANAKTVEGVWTYKDEIKTFTVTE"
sequence_to_resfile(sequence, output_file='resfiles/GB98.txt', start_residue=1, chain='A')

Created resfile: resfiles/GB98.txt
Content:
NATRO  # Keep all other residues fixed
start
1 A PIKAA T
2 A PIKAA T
3 A PIKAA Y
4 A PIKAA K
5 A PIKAA L
6 A PIKAA I
7 A PIKAA L
8 A PIKAA N
9 A PIKAA L
10 A PIKAA K
11 A PIKAA Q
12 A PIKAA A
13 A PIKAA K
14 A PIKAA E
15 A PIKAA E
16 A PIKAA A
17 A PIKAA I
18 A PIKAA K
19 A PIKAA E
20 A PIKAA L
21 A PIKAA V
22 A PIKAA D
23 A PIKAA A
24 A PIKAA G
25 A PIKAA T
26 A PIKAA A
27 A PIKAA E
28 A PIKAA K
29 A PIKAA Y
30 A PIKAA F
31 A PIKAA K
32 A PIKAA L
33 A PIKAA I
34 A PIKAA A
35 A PIKAA N
36 A PIKAA A
37 A PIKAA K
38 A PIKAA T
39 A PIKAA V
40 A PIKAA E
41 A PIKAA G
42 A PIKAA V
43 A PIKAA W
44 A PIKAA T
45 A PIKAA Y
46 A PIKAA K
47 A PIKAA D
48 A PIKAA E
49 A PIKAA I
50 A PIKAA K
51 A PIKAA T
52 A PIKAA F
53 A PIKAA T
54 A PIKAA V
55 A PIKAA T
56 A PIKAA E


In [5]:
#GBWT
sequence = "MTYKLILNGKTLKGETTTEAVDAATAEKVFKQYANDNGVDGEWTYDDATKTFTVTE"
sequence_to_resfile(sequence, output_file='resfiles/GBWT.txt', start_residue=1, chain='A')

Created resfile: resfiles/GBWT.txt
Content:
NATRO  # Keep all other residues fixed
start
1 A PIKAA M
2 A PIKAA T
3 A PIKAA Y
4 A PIKAA K
5 A PIKAA L
6 A PIKAA I
7 A PIKAA L
8 A PIKAA N
9 A PIKAA G
10 A PIKAA K
11 A PIKAA T
12 A PIKAA L
13 A PIKAA K
14 A PIKAA G
15 A PIKAA E
16 A PIKAA T
17 A PIKAA T
18 A PIKAA T
19 A PIKAA E
20 A PIKAA A
21 A PIKAA V
22 A PIKAA D
23 A PIKAA A
24 A PIKAA A
25 A PIKAA T
26 A PIKAA A
27 A PIKAA E
28 A PIKAA K
29 A PIKAA V
30 A PIKAA F
31 A PIKAA K
32 A PIKAA Q
33 A PIKAA Y
34 A PIKAA A
35 A PIKAA N
36 A PIKAA D
37 A PIKAA N
38 A PIKAA G
39 A PIKAA V
40 A PIKAA D
41 A PIKAA G
42 A PIKAA E
43 A PIKAA W
44 A PIKAA T
45 A PIKAA Y
46 A PIKAA D
47 A PIKAA D
48 A PIKAA A
49 A PIKAA T
50 A PIKAA K
51 A PIKAA T
52 A PIKAA F
53 A PIKAA T
54 A PIKAA V
55 A PIKAA T
56 A PIKAA E


In [4]:
#GAWT
sequence = "MEAVDANSLAQAKEAAIKELKQYGIGDYYIKLINNAKTVEGVESLKNEILKALPTE"
sequence_to_resfile(sequence, output_file='resfiles/GAWT.txt', start_residue=1, chain='A')

Created resfile: resfiles/GAWT.txt
Content:
NATRO  # Keep all other residues fixed
start
1 A PIKAA M
2 A PIKAA E
3 A PIKAA A
4 A PIKAA V
5 A PIKAA D
6 A PIKAA A
7 A PIKAA N
8 A PIKAA S
9 A PIKAA L
10 A PIKAA A
11 A PIKAA Q
12 A PIKAA A
13 A PIKAA K
14 A PIKAA E
15 A PIKAA A
16 A PIKAA A
17 A PIKAA I
18 A PIKAA K
19 A PIKAA E
20 A PIKAA L
21 A PIKAA K
22 A PIKAA Q
23 A PIKAA Y
24 A PIKAA G
25 A PIKAA I
26 A PIKAA G
27 A PIKAA D
28 A PIKAA Y
29 A PIKAA Y
30 A PIKAA I
31 A PIKAA K
32 A PIKAA L
33 A PIKAA I
34 A PIKAA N
35 A PIKAA N
36 A PIKAA A
37 A PIKAA K
38 A PIKAA T
39 A PIKAA V
40 A PIKAA E
41 A PIKAA G
42 A PIKAA V
43 A PIKAA E
44 A PIKAA S
45 A PIKAA L
46 A PIKAA K
47 A PIKAA N
48 A PIKAA E
49 A PIKAA I
50 A PIKAA L
51 A PIKAA K
52 A PIKAA A
53 A PIKAA L
54 A PIKAA P
55 A PIKAA T
56 A PIKAA E


In [18]:
sequence_length = len(sequence)
n = 30

In [19]:
for i in range(n):
        # Generate random sequence of the same length as your example
        random_sequence = generate_random_sequence(sequence_length)
        
        # Process the sequence and save to a numbered output file
        output_file = f'resfiles/random_{i}.txt'
        sequence_to_resfile(random_sequence, output_file, start_residue=1, chain='A')
        
        print(f"Generated file {output_file} for sequence: {random_sequence}")


Created resfile: resfiles/random_0.txt
Content:
NATRO  # Keep all other residues fixed
start
1 A PIKAA D
2 A PIKAA S
3 A PIKAA A
4 A PIKAA G
5 A PIKAA D
6 A PIKAA K
7 A PIKAA Y
8 A PIKAA L
9 A PIKAA W
10 A PIKAA W
11 A PIKAA K
12 A PIKAA F
13 A PIKAA M
14 A PIKAA H
15 A PIKAA P
16 A PIKAA R
17 A PIKAA V
18 A PIKAA S
19 A PIKAA I
20 A PIKAA C
21 A PIKAA H
22 A PIKAA C
23 A PIKAA F
24 A PIKAA Q
25 A PIKAA I
26 A PIKAA H
27 A PIKAA V
28 A PIKAA L
29 A PIKAA I
30 A PIKAA W
31 A PIKAA I
32 A PIKAA D
33 A PIKAA Y
34 A PIKAA D
35 A PIKAA S
36 A PIKAA E
37 A PIKAA N
38 A PIKAA E
39 A PIKAA K
40 A PIKAA Q
41 A PIKAA V
42 A PIKAA Q
43 A PIKAA V
44 A PIKAA W
45 A PIKAA I
46 A PIKAA Q
47 A PIKAA R
48 A PIKAA M
49 A PIKAA H
50 A PIKAA Q
51 A PIKAA P
52 A PIKAA A
53 A PIKAA A
54 A PIKAA C
55 A PIKAA Q
56 A PIKAA L
Generated file resfiles/random_0.txt for sequence: DSAGDKYLWWKFMHPRVSICHCFQIHVLIWIDYDSENEKQVQVWIQRMHQPAACQL
Created resfile: resfiles/random_1.txt
Content:
NATRO  # Keep all other residues

In [15]:
#GB77
gb77_sequence = "TTYKLILNGKQLKEEAITEAVDAATAEKYFKLYANAKTVEGVWTYKDETKTFTVTE"
#GA77
ga77_sequence = "TTYKLILNLKQAKEEAIKELVDAGIAEKYIKLIANAKTVEGVWTLKDEILKATVTE"
#sequence_to_resfile(sequence, output_file, start_residue=1, chain='A')

In [4]:
def find_differences(seq1, seq2):
    """Find positions and amino acids that differ between two sequences."""
    return [(i, seq1[i], seq2[i]) for i in range(len(seq1)) if seq1[i] != seq2[i]]

In [8]:
find_differences(gb77_sequence, ga_77sequence)

[(8, 'G', 'L'),
 (11, 'L', 'A'),
 (17, 'T', 'K'),
 (19, 'A', 'L'),
 (23, 'A', 'G'),
 (24, 'T', 'I'),
 (29, 'F', 'I'),
 (32, 'Y', 'I'),
 (44, 'Y', 'L'),
 (48, 'T', 'I'),
 (49, 'K', 'L'),
 (50, 'T', 'K'),
 (51, 'F', 'A')]

In [22]:
def generate_intermediate_sequences(base_seq, differences, combination):
    """Generate a sequence based on a specific combination of differences."""
    seq_list = list(base_seq)
    for i, include in enumerate(combination):
        if include:
            pos, _, new_aa = differences[i]
            seq_list[pos] = new_aa
    return ''.join(seq_list)

In [23]:
def binary_to_decimal(binary_list):
    """Convert a binary list to decimal value."""
    return sum(val << i for i, val in enumerate(binary_list))

In [24]:
# Create output directory if it doesn't exist
output_dir = "all_paths"
os.makedirs(output_dir, exist_ok=True)

# Find differences between sequences
differences = find_differences(ga77_sequence, gb77_sequence)

# Create a map file to track sequences and IDs
map_file = os.path.join(output_dir, "sequence_map.txt")
with open(map_file, 'w') as f:
    f.write("ID\tSequence\n")

In [25]:
from tqdm import tqdm

In [26]:
for binary in tqdm(itertools.product([0, 1], repeat=len(differences))):
        # Generate the sequence for this combination
        seq = generate_intermediate_sequences(ga77_sequence, differences, binary)
        
        # Calculate the decimal ID for naming
        decimal_id = binary_to_decimal(binary)
        
        # Create resfile
        resfile_path = os.path.join(output_dir, f"{decimal_id}")
        sequence_to_resfile(seq, resfile_path, start_residue=1, chain='A')
        
        # Add to map file
        with open(map_file, 'a') as f:
            f.write(f"{decimal_id}\t{seq}\n")
        
print(f"Generated {2**len(differences)} resfiles and map file in '{output_dir}' directory")


8192it [00:00, 10597.65it/s]

Generated 8192 resfiles and map file in 'all_paths' directory



