In [1]:
from google.colab import drive

# 1. Mount your Google Drive
drive.mount('/content/drive')

def compute_lcs(s1: str, s2: str):
    """
    Computes the length of the Longest Common Subsequence (LCS) of s1 and s2,
    and recovers one LCS string via backtracking.
    Returns: (length_u, lcs_string)
    """
    n, m = len(s1), len(s2)
    L = [[0] * (m + 1) for _ in range(n + 1)]
    for i in range(1, n + 1):
        for j in range(1, m + 1):
            if s1[i - 1] == s2[j - 1]:
                L[i][j] = L[i - 1][j - 1] + 1
            else:
                L[i][j] = max(L[i - 1][j], L[i][j - 1])

    i, j = n, m
    lcs_chars = []
    while i > 0 and j > 0:
        if s1[i - 1] == s2[j - 1]:
            lcs_chars.append(s1[i - 1])
            i -= 1
            j -= 1
        elif L[i - 1][j] >= L[i][j - 1]:
            i -= 1
        else:
            j -= 1

    return L[n][m], "".join(reversed(lcs_chars))


def edit_distance_via_lcs(s1: str, s2: str):
    """
    Computes the weighted edit distance D under costs:
      insertion = deletion = 1, substitution = 2, match = 0
    via the formula D = len(s1) + len(s2) - 2*u where u = LCS length.
    Returns: (D, u, lcs_string)
    """
    u, lcs_seq = compute_lcs(s1, s2)
    D = len(s1) + len(s2) - 2 * u
    return D, u, lcs_seq


def read_fasta(path: str) -> str:
    """Reads a FASTA file and returns the concatenated sequence."""
    with open(path) as f:
        return "".join(line.strip() for line in f if not line.startswith(">"))


if __name__ == "__main__":
    # Paths inside Colab after mounting Drive
    PATH_SARS2 = "/content/drive/MyDrive/sequence.fasta"
    PATH_MERS  = "/content/drive/MyDrive/middle_east.fasta"

    # Load sequences
    sars2 = read_fasta(PATH_SARS2)
    mers  = read_fasta(PATH_MERS)

    # Compute and report
    D, u, lcs_seq = edit_distance_via_lcs(sars2, mers)
    print(f"SARS‑CoV‑2 Spike length: {len(sars2)} aa")
    print(f"MERS‑CoV    Spike length: {len(mers)} aa")
    print(f"LCS length u = {u}")
    print(f"Weighted edit distance D = {len(sars2)} + {len(mers)} - 2*{u} = {D}")
    print(f"One LCS (first 50 aa): {lcs_seq[:50]}")
    print(f"One LCS (last 50 aa):  {lcs_seq[-50:]}")


Mounted at /content/drive
SARS‑CoV‑2 Spike length: 1273 aa
MERS‑CoV    Spike length: 1483 aa
LCS length u = 592
Weighted edit distance D = 1273 + 1483 - 2*592 = 1572
One LCS (first 50 aa): MVFLLLPVSSCVTTRPAGYPRSTQLFPHVSGTGTKFVNDVFANRIGTTST
One LCS (last 50 aa):  KALNESIDLELGYYKWPWYIWLGFIAGLAVILCCTCCLKCCCDEDEPVVH
