#  T-cell vaccine design
Design vaccines to elicit a T-cell response by optimising coverage of potential T-cell epitope (PTEs)

In [None]:
from Bio import SeqIO
import igviz as ig
from itertools import product
import networkx as nx

In [None]:
# Change
fasta_path = '../data/nucleoprotein/3_nuc_pro_uniq.fa'
k = 9

In [None]:
def seqs_to_kmers(seqs, k=9):
    """
    Returns a dictionary of all possible k-mers and their counts for a given list of sequences and value of k
    :param seqs: List of amino acid sequences
    :param k: Integer for substring length 
    :returns: Dictionary containing all possible k-mers and their counts
    """
    kmers = {}
    for seq in seqs:
        for i in range(len(seq) - k + 1):
            kmer = seq[i:i+k]
            if kmer in kmers:
                # NOTE: Currently a k-mer can be counted multiple times within the same sequence
                kmers[kmer] += 1
            else:
                kmers[kmer] = 1
    return kmers

def construct_graph(kmers):
    """
    Return a Directed Graph with unique k-mers as nodes, where overlapping k-mers are connected by edges
    :param kmers: Dictionary containing k-mers and their counts
    :returns: Directed Graph containing k-mers
    """
    # Create graph
    G = nx.DiGraph()
    # Add nodes - for each unique k-mer
    for kmer, count in kmers.items():
        G.add_node(kmer, Frequency=count)
    # Add edges - where k-mers share an overlap of subsequent k-1 AAs
    for n1, n2 in product(G.nodes(), G.nodes()):
        if not G.has_edge(n1, n2) and n1[1:] == n2[:-1]:
            G.add_edge(n1, n2)
    return G

## Load the FASTA sequences

In [None]:
fasta_seqs = SeqIO.parse(open(fasta_path),'fasta')
seqs = {fasta.id: str(fasta.seq) for fasta in fasta_seqs}

## Split into k-mers
Compute all possible k-mers of length `k` for the given sequences

In [None]:
kmers = seqs_to_kmers(list(seqs.values()), k)

In [None]:
for kmer, count in kmers.items():
    print(kmer + "\t" + str(count))

## Construct the k-mer graph

In [None]:
G = construct_graph(kmers)

## Plot the k-mer graph

In [None]:
fig = ig.plot(G, color_method='Frequency', node_text=['Frequency'], layout='spring') # 'spiral'
fig.show()

In [None]:
# import matplotlib.pyplot as plt
# freq = list(nx.get_node_attributes(G,'frequency').values())
# nx.draw(G, node_color=freq, node_size=10)
# plt.show()