In [1]:
!pip install biopython


Collecting biopython
  Downloading biopython-1.84-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Downloading biopython-1.84-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.2/3.2 MB[0m [31m22.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: biopython
Successfully installed biopython-1.84


In [None]:
from Bio import SeqIO
from google.colab import files

def find_longest_common_substring(dna_strings):
    """
    Finds the longest common substring among a list of DNA strings.

    Parameters:
        dna_strings (list): List of DNA sequences (strings).

    Returns:
        str: The longest common substring, or "none" if no common substring exists.
    """
    if not dna_strings:
        print("No DNA sequences found.")
        return ""

    base_string = dna_strings[0]
    max_length = len(base_string)

    # Iterate over possible substring lengths from longest to shortest
    for length in range(max_length, 0, -1):
        for start in range(max_length - length + 1):
            substring = base_string[start:start + length]
            if all(substring in seq for seq in dna_strings[1:]):
                return substring
    return "none"

def get_dna_sequences():
    """
    Prompts the user to input DNA sequences manually or upload a FASTA file.
    Validates the input based on specified limits for number and length of sequences.

    Returns:
        list: A list of DNA sequences, or an empty list if input is invalid.
    """
    choice = input(
        "CHOOSE\n(1) Input sequences manually\n(2) Upload a FASTA file\nEnter 1 or 2: "
    )

    if choice == '1':
        num_sequences = int(input("Enter the number of DNA sequences (up to 100): "))
        if num_sequences > 100:
            print("Error: Maximum number of sequences is 100.")
            return []

        sequences = []
        for i in range(num_sequences):
            seq = input(f"Enter DNA sequence {i + 1} (up to 1000 bp): ").strip().upper()
            if len(seq) > 1000:
                print("Error: Sequence length exceeds 1000 base pairs.")
                return []
            sequences.append(seq)
        return sequences

    elif choice == '2':
        print("Please upload a FASTA file.")
        uploaded = files.upload()
        uploaded_file = list(uploaded.keys())[0]
        sequences = [str(record.seq) for record in SeqIO.parse(uploaded_file, "fasta")]

        if len(sequences) > 100:
            print("Error: FASTA file contains more than 100 sequences.")
            return []
        if any(len(seq) > 1000 for seq in sequences):
            print("Error: One or more sequences in the file exceed 1000 base pairs.")
            return []

        return sequences

    else:
        print("Invalid choice. Please enter 1 for manual input or 2 for file upload.")
        return get_dna_sequences()

# Main execution
sequences = get_dna_sequences()

if not sequences:
    print("No sequences found. Please check the input.")
else:
    result = find_longest_common_substring(sequences)
    print("Longest common substring:", result)



CHOOSE
(1) Input sequences manually
(2) Upload a FASTA file
Enter 1 or 2: 1
Enter the number of DNA sequences (up to 100): 3
Enter DNA sequence 1 (up to 1000 bp): PLEASANTLY
Enter DNA sequence 2 (up to 1000 bp): MEANLY
Enter DNA sequence 3 (up to 1000 bp): LEA
Longest common substring: EA
