<a href="https://colab.research.google.com/github/Aleezahshaikh/Bioinformatics_problems/blob/main/Maximum%20Matchings%20and%20RNA%20Secondary%20Structures/Maximum_Matchings_and_RNA_Secondary_Structures.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install biopython

Collecting biopython
  Downloading biopython-1.84-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Downloading biopython-1.84-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.2/3.2 MB[0m [31m20.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: biopython
Successfully installed biopython-1.84


In [3]:
from math import factorial
from Bio import SeqIO
from google.colab import files
import math

def mmch(rna):
    """
    Calculates the total number of maximum matchings of basepair edges in the bonding graph of an RNA string.

    Args:
        rna (str): RNA sequence composed of 'A', 'U', 'C', and 'G' characters.

    Returns:
        int: The total number of maximum matchings of base pairs.
    """
    counts = {'A': 0, 'U': 0, 'C': 0, 'G': 0}
    for n in rna:
        if n in counts:
            counts[n] += 1

    if counts['A'] == counts['U'] and counts['C'] == counts['G']:
        au_matchings = math.factorial(counts['A'])
        cg_matchings = math.factorial(counts['C'])
        return au_matchings * cg_matchings
    else:
        max_au = min(counts['A'], counts['U'])
        max_cg = min(counts['C'], counts['G'])
        au_combinations = math.comb(counts['A'], max_au) * math.comb(counts['U'], max_au)
        cg_combinations = math.comb(counts['C'], max_cg) * math.comb(counts['G'], max_cg)
        return au_combinations * cg_combinations

def open_fasta(uploaded_file):
    """
    Opens and reads a FASTA file to extract the RNA sequence.

    Args:
        uploaded_file (str): Name of the uploaded FASTA file.

    Returns:
        str: RNA sequence extracted from the FASTA file.
    """
    sequence = ''
    with open(uploaded_file, 'r') as fasta:
        for line in fasta:
            if line[0] != '>':
                sequence += line.strip()
    return sequence

def get_input():
    """
    Prompts the user to input an RNA sequence manually or upload a FASTA file.

    Returns:
        str: RNA sequence entered by the user.
    """
    choice = input("CHOOSE \n (1) Do you want to input a sequence manually or \n (2) Upload a FASTA file \nEnter 1 or 2: ")

    if choice == '1':
        seq = input("Please enter the RNA sequence: ").strip().upper()
        if len(seq) > 100:
            print("Error: The sequence length exceeds 100 characters. Please try again with a shorter sequence.")
            return get_input()
        return seq
    elif choice == '2':
        print("Please upload a FASTA file.")
        uploaded = files.upload()
        uploaded_file = list(uploaded.keys())[0]
        sequence = open_fasta(uploaded_file)
        if len(sequence) > 100:
            print("Error: The sequence length exceeds 100 characters. Please try again with a shorter sequence.")
            return get_input()
        return sequence
    else:
        print("Invalid choice. Please enter 1 for manual input or 2 for file upload.")
        return get_input()

if __name__ == "__main__":
    try:
        sequence = get_input()
        if sequence:
            cleaned_sequence = sequence.replace('\n', '').replace(' ', '').upper()
            result = mmch(cleaned_sequence)
            print(f"Maximum matchings for the entered sequence: {result}")
    except Exception as e:
        print(f"An error occurred: {e}")


#TEST CASE 1
#expected output for the following input sample is 6
#>Rosalind_92
#AUGCUUC

#TEST CASE 2
#expected output for the following input sample is 4
#>Rosalind_92
#AAUUCCGG

CHOOSE 
 (1) Do you want to input a sequence manually or 
 (2) Upload a FASTA file 
Enter 1 or 2: 1
Please enter the RNA sequence: AAUUCCGG
Maximum matchings for the entered sequence: 4
