In [None]:
from math import factorial
from google.colab import files

# Function to open and parse a FASTA file
def open_fasta(uploaded_file):
    """
    Opens and reads a FASTA file to extract the RNA sequence.

    Parameters:
        uploaded_file (str): The name of the uploaded FASTA file.

    Returns:
        str: The RNA sequence extracted from the FASTA file.
    """
    sequence = ''
    try:
        with open(uploaded_file, 'r') as fasta:
            for line in fasta:
                if line[0] != '>':  # Skip the header line starting with '>'
                    sequence += line.strip()  # Remove newline characters and add sequence to string
    except Exception as e:
        print(f"Error reading the file: {e}")
        return ""
    return sequence

# Function to calculate maximum matchings for RNA sequence
def MM(seq):
    """
    Calculates the maximum number of matchings for RNA sequence based on AU and GC pairs.

    Parameters:
        seq (str): The RNA sequence for which to calculate maximum matchings.

    Returns:
        int: The total number of maximum matchings for the RNA sequence.
    """
    # Validate sequence characters
    valid_bases = {'A', 'U', 'C', 'G'}
    if any(base not in valid_bases for base in seq):
        raise ValueError("The RNA sequence contains invalid characters. Only A, U, C, and G are allowed.")

    # Count occurrences of A, U, C, G
    A = seq.count('A')
    U = seq.count('U')
    C = seq.count('C')
    G = seq.count('G')

    # Calculate maximum matchings for AU pairs (using factorial logic for combinations)
    if A > U:
        MMAU = factorial(A) // factorial(A - U)
    else:
        MMAU = factorial(U) // factorial(U - A)

    # Calculate maximum matchings for GC pairs
    if C > G:
        MMGC = factorial(C) // factorial(C - G)
    else:
        MMGC = factorial(G) // factorial(G - C)

    return MMAU * MMGC

# Function to prompt user for input method (manual or file upload)
def get_input():
    """
    Prompts the user to either input the RNA sequence manually or upload a FASTA file.

    Returns:
        str: The RNA sequence entered by the user.
    """
    choice = input("CHOOSE \n (1) Do you want to input a sequence manually or \n (2) upload a FASTA file? \nEnter 1 or 2: ")

    # Handle user choice
    if choice == '1':
        seq = input("Please enter the RNA sequence: ").strip().upper()
        return seq
    elif choice == '2':
        print("Please upload a FASTA file.")
        uploaded = files.upload()  # Open the file upload dialog in Colab
        uploaded_file = list(uploaded.keys())[0]  # Get the uploaded file name
        sequence = open_fasta(uploaded_file)
        return sequence
    else:
        print("Invalid choice. Please enter 1 for manual input or 2 for file upload.")
        return get_input()

# Main execution
if __name__ == "__main__":
    try:
        sequence = get_input()

        # Ensure the sequence is not empty or invalid
        if not sequence:
            print("No valid sequence provided. Please try again.")
        else:
            # Clean the sequence to ensure no spaces/newlines and convert to uppercase
            cleaned_sequence = sequence.replace('\n', '').replace(' ', '').upper()

            # Calculate and print the maximum matchings for the input RNA sequence
            result = MM(cleaned_sequence)
            print(f"Maximum matchings for the uploaded sequence: {result}")

    except ValueError as ve:
        print(f"Error: {ve}")  # Catch invalid character errors in the sequence
    except Exception as e:
        print(f"An error occurred: {e}")  # Catch any other errors



CHOOSE 
 (1) Do you want to input a sequence manually or 
 (2) upload a FASTA file? 
Enter 1 or 2: 2
Please upload a FASTA file.


Saving test.fasta to test (4).fasta
Maximum matchings for the uploaded sequence: 6


In [3]:
!pip install biopython


Collecting biopython
  Downloading biopython-1.84-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Downloading biopython-1.84-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.2/3.2 MB[0m [31m22.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: biopython
Successfully installed biopython-1.84


In [4]:
from Bio import SeqIO
from google.colab import files
import math

# Function to calculate the number of maximum matchings for an RNA sequence
def mmch(rna):
    """
    This function calculates the total number of maximum matchings of basepair edges
    in the bonding graph of an RNA string. The basepair edges are formed between
    A-U and C-G. The number of matchings is determined by the combinations possible
    for A-U and C-G pairs, considering the available counts of bases.

    Args:
    rna (str): RNA sequence (string) composed of the characters 'A', 'U', 'C', and 'G'.

    Returns:
    int: The total number of maximum matchings of base pairs.
    """
    # Initialize a dictionary to count occurrences of each base (A, U, C, G)
    counts = {'A': 0, 'U': 0, 'C': 0, 'G': 0}

    # Count the occurrences of each base in the RNA sequence
    for n in rna:
        counts[n] += 1

    # Calculate maximum A-U pairings (pair A with U)
    max_au = min(counts['A'], counts['U'])

    # Calculate maximum C-G pairings (pair C with G)
    max_cg = min(counts['C'], counts['G'])

    # The total number of matchings is the product of the combinations (binomial coefficient)
    # for A-U and C-G pairings.
    # Using binomial coefficient: n choose k = n! / (k! * (n-k)!)

    # Calculate combinations for A-U and C-G
    au_combinations = math.comb(counts['A'], max_au) * math.comb(counts['U'], max_au)
    cg_combinations = math.comb(counts['C'], max_cg) * math.comb(counts['G'], max_cg)

    # Return the total number of matchings
    return au_combinations * cg_combinations

def get_input():
    """
    Prompts the user to either input the RNA sequence manually or upload a FASTA file.

    Returns:
        str: The RNA sequence entered by the user.
    """
    choice = input("CHOOSE \n (1) Do you want to input a sequence manually or \n (2) upload a FASTA file? \nEnter 1 or 2: ")

    # Handle user choice
    if choice == '1':
        seq = input("Please enter the RNA sequence: ").strip().upper()
        return seq
    elif choice == '2':
        print("Please upload a FASTA file.")
        uploaded = files.upload()  # Open the file upload dialog in Colab
        uploaded_file = list(uploaded.keys())[0]  # Get the uploaded file name
        sequence = open_fasta(uploaded_file)
        return sequence
    else:
        print("Invalid choice. Please enter 1 for manual input or 2 for file upload.")
        return get_input()


# Main execution
if __name__ == "__main__":
    try:
        sequence = get_input()

        # Ensure the sequence is not empty or invalid
        if not sequence:
            print("No valid sequence provided. Please try again.")
        else:
            # Clean the sequence to ensure no spaces/newlines and convert to uppercase
            cleaned_sequence = sequence.replace('\n', '').replace(' ', '').upper()

            # Calculate and print the maximum matchings for the input RNA sequence
            result = mmch(cleaned_sequence)
            print(f"Maximum matchings for the entered sequence: {result}")

    except ValueError as ve:
        print(f"Error: {ve}")  # Catch invalid character errors in the sequence
    except Exception as e:
        print(f"An error occurred: {e}")  # Catch any other errors




CHOOSE 
 (1) Do you want to input a sequence manually or 
 (2) upload a FASTA file? 
Enter 1 or 2: 2
Please upload a FASTA file.


Saving test.fasta to test.fasta
An error occurred: name 'open_fasta' is not defined
