In [1]:
import requests

def read_uniprot_fasta_url(url: str) -> str:
    response = requests.get(url, timeout=30)
    response.raise_for_status()
    lines = response.text.splitlines()
    return "".join(line.strip() for line in lines if not line.startswith(">"))

AA_MW = {
    'A': 89.09,  'R': 174.20, 'N': 132.12, 'D': 133.10,
    'C': 121.16, 'Q': 146.15, 'E': 147.13, 'G': 75.07,
    'H': 155.16, 'I': 131.18, 'L': 131.18, 'K': 146.19,
    'M': 149.21, 'F': 165.19, 'P': 115.13, 'S': 105.09,
    'T': 119.12, 'W': 204.23, 'Y': 181.19, 'V': 117.15
}

def score_residue(residue: str) -> float:
    return AA_MW.get(residue, 0.0)


def generate_kmers(sequence: str, max_k: int):
    all_kmers = {length: [] for length in range(1, max_k + 1)}

    for length in range(1, max_k + 1):
        for i in range(len(sequence) - length + 1):
            subseq = sequence[i:i + length]
            score = sum(score_residue(r) for r in subseq)
            all_kmers[length].append((subseq, score))

    return all_kmers

def main():
    url = "https://rest.uniprot.org/uniprotkb/P04637.fasta"
    k = 5

    sequence = read_uniprot_fasta_url(url)
    print("Sequence length:", len(sequence))

    all_kmers = generate_kmers(sequence, k)

    # best k-mer of length k
    best_subseq, best_score = max(all_kmers[k], key=lambda x: x[1])
    print("\nHeaviest k-mer:", best_subseq)
    print("Total molecular weight:", round(best_score, 2), "Da")

    # best for each length
    print("\nBest k-mers by length:")
    for length in range(1, k + 1):
        kmers = all_kmers[length]
        max_score = max(score for _, score in kmers)
        best = [(s, sc) for s, sc in kmers if sc == max_score]

        print(f"\n{length}-mers (max weight = {round(max_score,2)} Da):")
        for s, sc in best:
            print(s, round(sc, 2))


if __name__ == "__main__":
    main()

Sequence length: 393

Heaviest k-mer: RERFE
Total molecular weight: 807.85 Da

Best k-mers by length:

1-mers (max weight = 204.23 Da):
W 204.23
W 204.23
W 204.23
W 204.23

2-mers (max weight = 369.42 Da):
WF 369.42

3-mers (max weight = 515.57 Da):
QWF 515.57

4-mers (max weight = 662.7 Da):
EQWF 662.7

5-mers (max weight = 807.85 Da):
RERFE 807.85
