# GUI BIO

In [1]:
import tkinter as tk
import tkinter as tk
from tkinter import filedialog
import customtkinter as ctk
from Bio import SeqIO
import warnings

# Suppress all warnings
warnings.filterwarnings("ignore")

ctk.set_appearance_mode("Dark")

class BioinformaticsApp:
    def __init__(self, master):
        self.master = master
        self.master.title("Bioinformatics GUI")

        # Calculate center position for the window
        screen_width = self.master.winfo_screenwidth()
        screen_height = self.master.winfo_screenheight()
        window_width = 857  # Adjusted dimensions
        window_height = 857  # Adjusted dimensions
        x = (screen_width - window_width) // 2
        y = (screen_height - window_height) // 2
        self.master.geometry(f"{window_width}x{window_height}+{x}+{y}")

        # Make the window non-resizable
        self.master.resizable(False, False)

        self.dna_sequence = ""  # Variable to store DNA sequence

        self.create_widgets()

    def create_widgets(self):
        # Frame for file upload
        upload_frame = ctk.CTkFrame(self.master)
        upload_frame.pack(pady=10)

        file_name = tk.StringVar()
        file_name.set("No file selected")

        def open_file():
            nonlocal file_name
            file_path = filedialog.askopenfilename(filetypes=[("FASTA files", "*.fasta")])
            if file_path:
                file_name.set(file_path.split("/")[-1])

                # Read DNA sequence from the selected file
                try:
                    self.dna_sequence = ''
                    with open(file_path, "r") as f:
                        for line in f:
                            if not line.startswith('>'):
                                self.dna_sequence += line.strip()

                    # Print the loaded sequence
                    print("Loaded DNA Sequence:", self.dna_sequence)
                except FileNotFoundError:
                    print(f"File not found: {file_path}")

        upload_button = ctk.CTkButton(upload_frame, text="Upload DNA file", command=open_file)
        upload_button.pack(pady=5)

        file_label = ctk.CTkLabel(upload_frame, textvariable=file_name)
        file_label.pack(pady=5)

        # Frame for algorithm checklist
        algo_frame = ctk.CTkFrame(self.master)
        algo_frame.pack(pady=10)

        algo_label = ctk.CTkLabel(algo_frame, text="Select the algorithm to apply:")
        algo_label.pack()

        algos = ["DNA reverse complementary", "DNA translation to protein", "Exact matching", "Boyer moore", "K-mer indexing", "K-M-P algorithm", "Suffix array", "Approximate matching"]

        selected_algo = tk.StringVar(value=algos[0])  # Initialize with the first algorithm
                                                    # start choice
        for algo in algos:
            radio_button = tk.Radiobutton(algo_frame, text=algo, variable=selected_algo, value=algo,bg="#303030",
                                         fg="white", selectcolor="#303030",activebackground="#303030")
            radio_button.pack(anchor=tk.W)
            
        input_frame = ctk.CTkFrame(self.master)
        input_frame.pack(pady=10)
        # Name label
        name_label = ctk.CTkLabel(input_frame, text="Pattern ")
        name_label.grid(row=0, column=0, padx=5, pady=5, sticky="e")
        # Entry box for name(pattern)
        name_entry = ctk.CTkEntry(input_frame)
        name_entry.grid(row=0, column=1, padx=5, pady=5)

        # Frame for results
        result_frame = ctk.CTkFrame(self.master)
        result_frame.pack(pady=10)

        result_text = tk.Text(result_frame, width=80, height=20, state=tk.DISABLED)
        result_text.pack()

        def run_algo():
            nonlocal selected_algo, result_text
            result_text.config(state=tk.NORMAL)
            result_text.delete(1.0, tk.END)
            
            

            if not self.dna_sequence:
                result_text.insert(tk.END, "Please select a DNA file first.\n")
                result_text.config(state=tk.DISABLED)
                return
            pattern=name_entry.get()
            algo = selected_algo.get()
            result = self.run_algorithm(algo, self.dna_sequence,pattern)
            # Insert the results into the text widget
            result_text.insert(tk.END, f"Results for {algo}:\n", "header")
            if algo==algos[0]:
                result_text.insert(tk.END, f"\n DNA reverse complementary: \n\n", "header")
                result_text.insert(tk.END, result, "DNA reverse complementary")#"" this return algo
                result_text.insert(tk.END, "\n")
                result_text.config(state=tk.DISABLED)
            if algo==algos[1]:
                result_text.insert(tk.END, f"\n Full translation:\n", "header")
                result_text.insert(tk.END, result[0], "\n Full translation \n")
                result_text.insert(tk.END, f"\n Protein translation:\n", "header")
                result_text.insert(tk.END, result[1], "\n Protein translation \n")
                result_text.insert(tk.END, "\n")
                result_text.config(state=tk.DISABLED)
            

            if algo==algos[2]:
                result_text.insert(tk.END, f"\n Pattern that we need:\n", "header")
                result_text.insert(tk.END, result[0], "\n Pattern that we need \n")
                result_text.insert(tk.END, "\n")
                
                
                result_text.insert(tk.END, f"\n Pattern found at positions:\n", "header")
                result_text.insert(tk.END, result[1] , "\n Start positions: \n")
                result_text.insert(tk.END, "\n")
                result_text.insert(tk.END, result[2] , "\n End positions: \n")
                result_text.insert(tk.END, "\n")
                
                
                
                
                result_text.insert(tk.END, f"\n the pattern that founded:\n", "header")
                result_text.insert(tk.END, result[3], "\n the pattern that founded: \n")
                result_text.insert(tk.END, "\n")
                result_text.config(state=tk.DISABLED)
                
            if algo==algos[3]:
                result_text.insert(tk.END, f"\n Position of matched pattern :\n", "header")
                result_text.insert(tk.END, result[0], "\n position \n") 
                result_text.insert(tk.END, f"\n Skip Alignment in Bad Character rule :\n", "header")
                result_text.insert(tk.END, result[1], "\n Bad \n") 
                result_text.insert(tk.END, f"\n Skip Alignment in Good Suffix rule  :\n", "header")
                result_text.insert(tk.END, result[2], "\n Good \n") 
                result_text.config(state=tk.DISABLED)
            if algo==algos[4]:
                result_text.insert(tk.END, f"\n Position of matched pattern :\n", "header")
                result_text.insert(tk.END, result[0], "\n position \n")
                result_text.insert(tk.END, "\n")
                result_text.insert(tk.END, f"\n Skipped :\n", "header")
                result_text.insert(tk.END, result[1], "\n Skiiped \n") 
                result_text.config(state=tk.DISABLED)
            
            if algo==algos[5]:
                result_text.insert(tk.END, "\n Occurrences of pattern:\n", "header")
                result_text.insert(tk.END, result, "\n Occurrences \n")
                result_text.insert(tk.END, "\n")
                result_text.config(state=tk.DISABLED)
            
            if algo==algos[-2]:
                result_text.insert(tk.END, f"\n Position Match:\n", "header")
                result_text.insert(tk.END, result[0], "\n Position \n")
                result_text.insert(tk.END, "\n")
                result_text.insert(tk.END, f"\n Skipped:\n", "header")
                result_text.insert(tk.END, result[1], "\n Skipped \n")
                result_text.insert(tk.END, "\n")
                result_text.config(state=tk.DISABLED)
    
            if algo==algos[-1]:
                result_text.insert(tk.END, f"\n the approximately matching position that founded:\n", "header")
                result_text.insert(tk.END, result[0], "\n the Start Position: \n")
                result_text.insert(tk.END, "\n")
                result_text.insert(tk.END, result[1] , "\n End positions: \n")
                result_text.insert(tk.END, "\n")
                result_text.config(state=tk.DISABLED)
                
            #if algo==algos[4]:
            #if algo==algos[5]:
            #if algo==algos[6]:
            #if algo==algos[7]:
                    
        run_button = ctk.CTkButton(self.master, text="Run algorithm", command=run_algo)
        run_button.pack(pady=10)

        # Configure text widget tags
        result_text.tag_configure("header", font=("Arial", 15, "bold"), underline=True,foreground="red")
        
        
        
        result_text.tag_configure("DNA reverse complementary", font=("Arial", 12,))
        result_text.tag_configure("Full translation", font=("Arial", 12,"italic"))
        result_text.tag_configure("Protein translation", font=("Arial", 12, "italic"))

    def run_algorithm(self, algo, sequence,pattern):
        if algo == "DNA reverse complementary":
            return self.reverse_complement(sequence)
        elif algo == "DNA translation to protein":
            return self.translate(sequence)
        elif algo == "Exact matching":
            return self.naive_search(sequence,pattern)
        elif algo == "Boyer moore":
            return self.boyer_moore(sequence,pattern)
        elif algo == "K-mer indexing":
            return self.kmer_indexing(sequence,pattern)
        elif algo == "K-M-P algorithm":
            return self.kmp(sequence,pattern)
        elif algo == "Suffix array":
            return self.suffix_array(sequence,pattern)
        elif algo == "Approximate matching":
            return self.approximate_matching(sequence,pattern)

        
# implementation of all algorithms
    def reverse_complement(self, sequence):
        complement = {'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A'}
        reverse_complement = ''.join(complement.get(base, base) for base in reversed(sequence))
        return reverse_complement

    def translate(self, sequence):
        dic = {"TTT" : "F", "CTT" : "L", "ATT" : "I", "GTT" : "V",
               "TTC" : "F", "CTC" : "L", "ATC" : "I", "GTC" : "V",
               "TTA" : "L", "CTA" : "L", "ATA" : "I", "GTA" : "V",
               "TTG" : "L", "CTG" : "L", "ATG" : "M", "GTG" : "V",
               "TCT" : "S", "CCT" : "P", "ACT" : "T", "GCT" : "A",
               "TCC" : "S", "CCC" : "P", "ACC" : "T", "GCC" : "A",
               "TCA" : "S", "CCA" : "P", "ACA" : "T", "GCA" : "A",
               "TCG" : "S", "CCG" : "P", "ACG" : "T", "GCG" : "A",
               "TAT" : "Y", "CAT" : "H", "AAT" : "N", "GAT" : "D",
               "TAC" : "Y", "CAC" : "H", "AAC" : "N", "GAC" : "D",
               "TAA" : "*", "CAA" : "Q", "AAA" : "K", "GAA" : "E",
               "TAG" : "*", "CAG" : "Q", "AAG" : "K", "GAG" : "E",
               "TGT" : "C", "CGT" : "R", "AGT" : "S", "GGT" : "G",
               "TGC" : "C", "CGC" : "R", "AGC" : "S", "GGC" : "G",
               "TGA" : "*", "CGA" : "R", "AGA" : "R", "GGA" : "G",
               "TGG" : "W", "CGG" : "R", "AGG" : "R", "GGG" : "G" 
               }
        s=""
        sf=""
        flag=0
        for i in range(0,len(sequence)-2,3):
            if dic[sequence[i:i+3]]=="M":
                flag=1
            elif (dic[sequence[i:i+3]]=="*"):
                flag=0

            if flag==1:
                s+=dic[sequence[i:i+3]]
            sf+=dic[sequence[i:i+3]]
        # Return both the full translation sequence and the protein sequence as a tuple
        return  sf,s

    def naive_search(self, sequence,pattern):
        matches=[]
        pattern_len=len(pattern)
        sequence_len=len(sequence)
        for i in range(sequence_len-pattern_len+1):
            if sequence[i:i+pattern_len]==pattern:
                matches.append(i)
        if not matches:
            return f'Pattern that we need to search {pattern}',f'Start Position not founded',f'End Position not founded',f'Sequence that you need not in the dna sequence'
        return f'Pattern that we need to search {pattern}',f'Start Position {matches[0]}',f'End Position {matches[0]+len(pattern)}',f'Sequence that we founded it {sequence[matches[0]:matches[0]+len(pattern)]}'

    def approximate_matching(self, sequence,pattern):
        dp= [[0 for i in range(len(pattern)+1)] for j in range(len(sequence)+1)]

        for j in range(len(pattern)+1):
            dp[0][j]=j
        for i in range(1,len(sequence)+1):
            for j in range(1,len(pattern)+1):
                if sequence[i-1]==pattern[j-1]:
                    dp[i][j]=dp[i-1][j-1]
                else:
                    dp[i][j]=min(dp[i-1][j-1],dp[i-1][j],dp[i][j-1])+1
                    
        matches=[]
        for i in range(1,len(sequence)+1):
            if dp[i][len(pattern)] == 0:
                matches.append(i-len(pattern))
        if not matches:
            return f'Start Position not founded',f'End Position not founded'
        return f'Start position : {matches[0]}', f'End Position : {matches[0]+len(pattern)}'
    

    def boyer_moore(self,sequence, pattern):
        skip_bad = {}
        for i in range(len(pattern)-1):
            skip_bad[pattern[i]] = len(pattern)-i-1
        # Adding Good Suffix rule
        suffixes = _build_suffix_array(pattern)
        skip_good = {}
        for i in range(len(pattern)-1):
            j = suffixes[i]
            skip_val = len(pattern)-j-1
            if skip_val not in skip_good:
                skip_good[len(pattern)-j-1] = skip_val
        # Applying Bad Character rule
        i = 0
        while i <= len(sequence)-len(pattern):
            j = len(pattern)-1
            while j >= 0 and pattern[j] == sequence[i+j]:
                j -= 1
            if j < 0:
                return i-1, len(skip_bad), len(skip_good)
            else:
                skip_val = skip_bad.get(sequence[i+j], len(pattern))
                i += skip_val
        return -1, len(skip_bad), len(skip_good), i
    
    def suffix_array(self,sequence,pattern):
        suffix_array = _build_suffix_array(sequence)
        matches = []
        left = 0
        right = len(sequence)
        alignments = 0
        while left < right:
            middle = (left + right) // 2
            suffix = suffix_array[middle]
            alignments += 1
            if pattern > sequence[suffix:suffix + len(pattern)]:
                left = middle + 1
            else:
                right = middle
        start = left
        right = len(sequence)
        while left < right:
            middle = (left + right) // 2
            suffix = suffix_array[middle]
            alignments += 1
            if pattern < sequence[suffix:suffix + len(pattern)]:
                right = middle
            else:
                left = middle + 1
        end = right
        for i in range(start, end):
            suffix = suffix_array[i]
            matches.append(suffix)
        skipped = (len(sequence) - len(pattern) + 1) - alignments
        matches.sort()
        return matches,skipped

    def _build_suffix_array(self,sequence):
        suffixes = [(sequence[i:], i) for i in range(len(sequence))]
        suffixes.sort()
        return [suffix[1] for suffix in suffixes]

    def kmer_indexing(self, sequence, pattern):
        
        kmer_dict = self._build_kmer_index(sequence,min(len(pattern),50))
        matches = []
        kmer = pattern[:min(len(pattern),50)]
        alignments = 0
        if kmer in kmer_dict:
            for index in kmer_dict[kmer]:
                alignments += 1
                if sequence[index:index+len(pattern)] == pattern:
                    matches.append(index)
        skipped = len(sequence) - len(pattern) + 1 - alignments
        return matches, skipped
    
    def _build_kmer_index(self, sequence, k):
        kmer_dict = {}
        for i in range(len(sequence) - k + 1):
            kmer = sequence[i:i+k]
            if kmer not in kmer_dict:
                kmer_dict[kmer] = []
            kmer_dict[kmer].append(i)
        return kmer_dict
    def kmp(self, sequence,pattern):
        failure_table = self.create_failure_table(pattern)
        i = 0
        j = 0
        occurrences = []
        while i < len(sequence):
            if pattern[j] == sequence[i]:
                i += 1
                j += 1
                if j == len(pattern):
                    occurrences.append(i - len(pattern))
                    j = failure_table[j - 1]
            else:
                if j > 0:
                    j = failure_table[j - 1]
                else:
                    i += 1
        return f"Occurrences of '{pattern}': {occurrences}"
    def create_failure_table(self, pattern):
        failure_table = [0] * len(pattern)
        i = 1
        j = 0
        while i < len(pattern):
            if pattern[i] == pattern[j]:
                j += 1
                failure_table[i] = j
                i += 1
            else:
                if j != 0:
                    j = failure_table[j - 1]
                else:
                    failure_table[i] = 0
                    i += 1
        return failure_table

# Create the root window
root = ctk.CTk()
app = BioinformaticsApp(root)
root.mainloop()
