## Install all libraries

In [None]:
!pip install bio
!pip install tqdm
!pip install gprofiler-official
!pip install gradio

Collecting bio
  Downloading bio-1.7.1-py3-none-any.whl.metadata (5.7 kB)
Collecting biopython>=1.80 (from bio)
  Downloading biopython-1.84-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting gprofiler-official (from bio)
  Downloading gprofiler_official-1.0.0-py3-none-any.whl.metadata (11 kB)
Collecting mygene (from bio)
  Downloading mygene-3.2.2-py2.py3-none-any.whl.metadata (10 kB)
Collecting biothings-client>=0.2.6 (from mygene->bio)
  Downloading biothings_client-0.3.1-py2.py3-none-any.whl.metadata (9.8 kB)
Downloading bio-1.7.1-py3-none-any.whl (280 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m281.0/281.0 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading biopython-1.84-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.2/3.2 MB[0m [31m19.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading gprofiler_official-1.0.0-py3-none-any.whl

In [None]:
import re
import pprint
from Bio.Seq import Seq
import pandas as pd
from google.colab import drive
from tqdm import tqdm
import gradio as gr

## Create the main structure

### Cds class

In [None]:
class Cds:
    def __init__(self, data):
        self.data = data

    def get_cds(self, gene_id, cds_id=None):
        cds_data = {}
        for chromosome, genes in self.data.items():
            if gene_id in genes:
                regions = genes[gene_id].get('mRNA', {})
                if regions.get('CDS'):
                    orientation = regions['CDS'][0]['orientation'] if regions['CDS'] else ""
                    for cds in regions['CDS']:
                        if not cds_id or cds['id'] == cds_id:
                            cds_data[cds['id']] = {
                                "chromosome_id": chromosome,
                                "gene_id": gene_id,
                                "start": int(cds['start']),
                                "end": int(cds['end']),
                                "orientation": orientation
                            }
        return cds_data

### Exon class

In [None]:
class Exon:
    def __init__(self, data):
        self.data = data

    def get_exon(self, gene_id, exon_id=None):
        exon_data = {}
        for chromosome, genes in self.data.items():
            if gene_id in genes:
                regions = genes[gene_id].get('mRNA', {})
                if regions.get('exon'):
                    orientation = regions['exon'][0]['orientation'] if regions['exon'] else ""
                    for exon in regions['exon']:
                        if not exon_id or exon['id'] == exon_id:
                            exon_data[exon['id']] = {
                                "chromosome_id": chromosome,
                                "gene_id": gene_id,
                                "start": int(exon['start']),
                                "end": int(exon['end']),
                                "orientation": orientation
                            }
        return exon_data

### UTR class

In [None]:
class UTR:
    def __init__(self, data):
        self.data = data

    def _find_variant(self, variant):
        for chromosome, genes in self.data.items():
            for gene, gene_data in genes.items():
                for intermediate_type in gene_data:
                    if intermediate_type not in ["CDS", "exon", "type", "start", "end", "orientation"] and variant in gene_data[intermediate_type]:
                        return chromosome, gene, intermediate_type
        return None, None, None

    def get_five_utr(self, chromosome=None, gene=None, variant=None):
        intermediate_type = None
        if variant:
            chromosome, gene, intermediate_type = self._find_variant(variant)
            # Verifique se chromosome, gene e intermediate_type são None
            if chromosome is None or gene is None or intermediate_type is None:
                return None
        if chromosome not in self.data or gene not in self.data[chromosome] or intermediate_type not in self.data[chromosome][gene] or variant not in self.data[chromosome][gene][intermediate_type]:
            return None
        variant_data = self.data[chromosome][gene][intermediate_type][variant]
        cds_start = int(variant_data['CDS'][0]['start'])
        exon_start = int(variant_data['exon'][0]['start'])
        orientation = variant_data["orientation"]
        return {
            "chromosome_id": chromosome,
            "gene_id": gene,
            "start": exon_start,
            "end": cds_start - 1,
            "orientation": orientation
        }

    def get_three_utr(self, chromosome=None, gene=None, variant=None):
        intermediate_type = None
        if variant:
            chromosome, gene, intermediate_type = self._find_variant(variant)
            # Verifique se chromosome, gene e intermediate_type são None
            if chromosome is None or gene is None or intermediate_type is None:
                return None
        if chromosome not in self.data or gene not in self.data[chromosome] or intermediate_type not in self.data[chromosome][gene] or variant not in self.data[chromosome][gene][intermediate_type]:
            return None
        variant_data = self.data[chromosome][gene][intermediate_type][variant]
        cds_end = int(variant_data['CDS'][-1]['end'])
        exon_end = int(variant_data['exon'][-1]['end'])
        orientation = variant_data["orientation"]
        return {
            "chromosome_id": chromosome,
            "gene_id": gene,
            "start": cds_end + 1,
            "end": exon_end,
            "orientation": orientation
        }

### Fasta Handler class

In [None]:
class FASTAHandlerUpdated:

    def __init__(self, fasta_file_path):
        with open(fasta_file_path, 'r') as f:
            fasta_content = f.read()
        self.fasta_data = self.parse_fasta(fasta_content)

    @staticmethod
    def reverse_complement(sequence):
        """Return the reverse complement of a DNA sequence."""
        complement = {'A': 'T', 'T': 'A', 'C': 'G', 'G': 'C', 'N': 'N'}
        sequence = sequence.upper()  # Convert the sequence to uppercase
        return ''.join(complement[base] for base in sequence[::-1])

    @staticmethod
    def parse_fasta(fasta_content):
        fasta_dict = {}
        entries = fasta_content.strip().split(">")
        for entry in entries:
            if not entry:
                continue
            header, sequence = entry.split("\n", 1)
            header = header.split()[0]
            sequence = sequence.replace("\n", "")
            fasta_dict[header] = sequence
        return fasta_dict

    def get_sequence(self, chromosome_id, start, end, orientation="5'3'"):
        sequence = self.fasta_data[chromosome_id][start-1:end]
        if orientation == "3'5'":
            sequence = self.reverse_complement(sequence)
        return sequence

    def get_chromosome_sequence(self, chromosome_id):
        return self.fasta_data.get(chromosome_id, "")

### Dicionary class

In [None]:
class Dictionary:

    def __init__(self, gff_file, output_file_name):
        self.gff_file = gff_file
        self.output_file_name = output_file_name

    def create_dictionary(self):
        gene_pattern = r"(\w+)=(.*?)(?:;|$)"
        geneid_pattern = r"GeneID:(\d+)"
        current_chromosome = None
        current_gene = None
        current_intermediate_type = None
        current_intermediate_id = None
        data = {}

        with open(f'{self.gff_file}', "r") as file:
            for line in tqdm(file, desc='Reading GFF file'):
                line = line.strip()
                if line.startswith('#') or line == '':
                    continue
                columns = line.split('\t')
                if len(columns) < 3:
                    continue
                chromosome = columns[0]
                type = columns[2]
                content = columns[8]
                start = columns[3]
                end = columns[4]
                orientation = "5'3'" if columns[6] == '+' else "3'5'"

                matches = re.findall(gene_pattern, content)
                dictionary = {}
                for k, v in matches:
                    dictionary[k] = v

                geneid_match = re.search(geneid_pattern, content)
                geneid = geneid_match.group(1) if geneid_match else None

                id_value = dictionary.get('ID', '')
                parent = dictionary.get('Parent', '')

                if chromosome != current_chromosome:
                    current_chromosome = chromosome
                    if current_chromosome not in data:
                        data[current_chromosome] = {}

                if type in ["gene", "pseudogene"]:
                    current_gene = id_value
                    data[current_chromosome][current_gene] = {
                        "type": type, "start": start, "end": end, "orientation": orientation, "GeneID": geneid}
                    continue

                if current_gene and type not in ["CDS", "exon"]:
                    if current_gene not in data[current_chromosome]:
                        data[current_chromosome][current_gene] = {}

                    current_intermediate_type = type
                    current_intermediate_id = id_value

                    if current_intermediate_type not in data[current_chromosome][current_gene]:
                        data[current_chromosome][current_gene][current_intermediate_type] = {}

                    data[current_chromosome][current_gene][current_intermediate_type][current_intermediate_id] = {
                        "type": type, "start": start, "end": end, "orientation": orientation, "CDS": [], "exon": []}

                if current_gene and type in ["CDS", "exon"]:
                    # Verificar se intermediates é um dicionário antes de tentar acessar items()
                    for intermediate_type, intermediates in data[current_chromosome][current_gene].items():
                        if intermediate_type not in ["CDS", "exon", "type", "start", "end", "orientation"]:
                            if isinstance(intermediates, dict):  # Verificação adicionada
                                for variant_id, variant in intermediates.items():
                                    if parent == variant_id:
                                        data[current_chromosome][current_gene][intermediate_type][variant_id][type].append(
                                            {"id": id_value, "type": type, "start": start, "end": end, "orientation": orientation}
                                        )
                                        break

        return data


## Create functions core

In [None]:
def add_sequence_to_data(chromosome_id, start, end, orientation):
    """Helper function to fetch sequence data from FASTA file."""
    return fasta_handler.get_sequence(chromosome_id, start, end, orientation)

def translate_dna_to_protein(dna_sequence):
    """Converts a DNA sequence to a protein sequence using the genetic code."""
    # Genetic code table
    codon_table = {
    'ATA': 'I', 'ATC': 'I', 'ATT': 'I',  # Isoleucina
    'ACG': 'T', 'ACA': 'T', 'ACC': 'T', 'ACU': 'T',  # Treonina
    'CTA': 'L', 'CTC': 'L', 'CTG': 'L', 'CTT': 'L',  # Leucina
    'CCA': 'P', 'CCC': 'P', 'CCG': 'P', 'CCT': 'P',  # Prolina
    'GTA': 'V', 'GTC': 'V', 'GTG': 'V', 'GTT': 'V',  # Valina
    'GCA': 'A', 'GCC': 'A', 'GCG': 'A', 'GCT': 'A',  # Alanina
    'TAA': 'X', 'TAG': 'X', 'TGA': 'X',  # Códons de parada representados como 'X'
    'TTA': 'L', 'TTG': 'L', 'TTC': 'F', 'TTT': 'F',  # Fenilalanina
    'TAC': 'Y', 'TAT': 'Y',  # Tirosina
    'TGC': 'C', 'TGT': 'C',  # Cisteína
    'TCA': 'S', 'TCC': 'S', 'TCG': 'S', 'TCT': 'S',  # Serina
    'GAA': 'E', 'GAC': 'D', 'GAG': 'E', 'GAT': 'D',  # Glutamina e Ácido Aspártico
    'CAA': 'Q', 'CAC': 'H', 'CAG': 'Q', 'CAT': 'H',  # Glutamina e Histidina
    'AGA': 'R', 'AGC': 'S', 'AGG': 'R', 'AGT': 'S',  # Arginina
    'CGA': 'R', 'CGC': 'R', 'CGG': 'R', 'CGT': 'R',  # Arginina
    'AAA': 'K', 'AAC': 'N', 'AAG': 'K', 'AAT': 'N',  # Lisina e Asparagina
    'GGA': 'G', 'GGC': 'G', 'GGG': 'G', 'GGT': 'G',  # Glicina
    'ATG': 'M',  # Metionina
}

    # Convert DNA to upper case
    dna_sequence = dna_sequence.upper()

    # Change 'U' to 'T'
    dna_sequence = dna_sequence.replace('U', 'T')

    # Split into codons
    amino_acid_sequence = []

    # Iterate over the DNA sequence in steps of 3
    for i in range(0, len(dna_sequence), 3):
        # Ensure there are enough nucleotides left for a full codon
        if i + 3 <= len(dna_sequence):
            codon = dna_sequence[i:i+3]
            amino_acid = codon_table.get(codon, '')  # Use empty string for unknown codons
            amino_acid_sequence.append(amino_acid)

    return ''.join(amino_acid_sequence)



def create_dict(data, dictionary, fasta_handler):
    # Iterate through the data to add cds_sequence, exon_sequence, 5'UTR and 3'UTR keys
    for chromosome, genes in data.items():
        for gene, gene_data in genes.items():
            concatenated_cds_sequences = []
            concatenated_exon_sequences = []

            # Adicionando a nova chave 'id' baseada em 'GeneID'
            if 'GeneID' in gene_data:
                gene_data['id'] = gene_data['GeneID']  # Copiar o valor de GeneID para a nova chave id

            for itype, variants in gene_data.items():
                if isinstance(variants, dict):  # Ensure that variants is a dictionary
                    for variant, variant_data in variants.items():
                        # Add CDS and exon sequences
                        if 'CDS' in variant_data and variant_data['CDS']:
                            cds_sequences = [
                                add_sequence_to_data(chromosome, int(cds['start']), int(cds['end']), cds['orientation'])
                                for cds in variant_data['CDS']
                            ]
                            variant_data['cds_sequence'] = cds_sequences
                            concatenated_cds_sequences.extend(cds_sequences)
                        if 'exon' in variant_data and variant_data['exon']:
                            exon_sequences = [
                                add_sequence_to_data(chromosome, int(exon['start']), int(exon['end']), exon['orientation'])
                                for exon in variant_data['exon']
                            ]
                            variant_data['exon_sequence'] = exon_sequences
                            concatenated_exon_sequences.extend(exon_sequences)

                        # Determine 5'UTR and 3'UTR based on orientation
                        if 'CDS' in variant_data and len(variant_data['CDS']) > 0:
                            if variant_data['orientation'] == "5'3'":
                                first_exon_start = int(variant_data['exon'][0]['start'])
                                first_cds_start  = int(variant_data['CDS'][0]['start'])
                                last_cds_end     = int(variant_data['CDS'][-1]['end'])
                                last_exon_end    = int(variant_data['exon'][-1]['end'])
                                variant_data['five_utr']  = add_sequence_to_data(chromosome, first_exon_start, first_cds_start-1, "5'3'")
                                variant_data['three_utr'] = add_sequence_to_data(chromosome, last_cds_end+1, last_exon_end, "5'3'")
                            elif variant_data['orientation'] == "3'5'":
                                last_cds_start  = int(variant_data['CDS'][0]['end'])
                                last_exon_start = int(variant_data['exon'][0]['end'])
                                first_exon_end = int(variant_data['exon'][-1]['start'])
                                first_cds_end  = int(variant_data['CDS'][-1]['start'])
                                variant_data['five_utr']  = add_sequence_to_data(chromosome, first_cds_end+1, first_exon_end, "3'5'")
                                variant_data['three_utr'] = add_sequence_to_data(chromosome, last_exon_start, last_cds_start-1, "3'5'")

            # Add concatenated sequences to gene level
            gene_data['cds_sequence']  = ''.join(concatenated_cds_sequences)
            gene_data['exon_sequence'] = ''.join(concatenated_exon_sequences)

            # Convert CDS sequence to amino acids and add to gene_data
            if concatenated_cds_sequences:  # Se houver sequências de CDS
                full_cds_sequence = ''.join(concatenated_cds_sequences)
                gene_data['amino_acids'] = translate_dna_to_protein(full_cds_sequence)

    return data


def dictionary_to_dataframe(data):
    rows = []
    for chromosome, genes in data.items():
        for gene_id, gene_data in genes.items():
            # Adicione a nova chave 'id' ao gene_data
            gene_id_value = gene_data.get('id', '')  # Obtenha o valor da nova chave 'id'
            for itype, variants in gene_data.items():
                if isinstance(variants, dict):
                    for variant, variant_data in variants.items():
                        cds_sequence = variant_data.get("cds_sequence", [])
                        exon_sequence = variant_data.get("exon_sequence", [])
                        five_utr = variant_data.get("five_utr", "")
                        three_utr = variant_data.get("three_utr", "")
                        amino_acids = gene_data.get("amino_acids", "")  # Obtém a sequência de aminoácidos

                        # Adicionando as sequências limpas
                        cds_sequence_clean = ''.join(cds_sequence).replace(' ', '').replace(',', '')
                        exon_sequence_clean = ''.join(exon_sequence).replace(' ', '').replace(',', '')

                        row = [
                            gene_id, itype, variant,
                            chromosome, variant_data.get("start", ""),
                            variant_data.get("end", ""), variant_data.get("orientation", ""),
                            cds_sequence, cds_sequence_clean,
                            exon_sequence, exon_sequence_clean,
                            five_utr, three_utr,
                            gene_id_value,  # Adicionando o id ao DataFrame
                            amino_acids,  # Adicionando a sequência de aminoácidos ao DataFrame
                            "", "", "",
                        ]
                        rows.append(row)

    # Convert the rows into a pandas DataFrame
    column_names = [
        "Gene_ID", "RNA_ID", "Variant_ID", "Chromosome",
        "Start_Position", "End_Position", "Orientation",
        "CDS_Sequence", "CDS_Sequence_Clean", "Exon_Sequence", "Exon_Sequence_Clean", "5'UTR_Sequence", "3'UTR_Sequence",
        "Gene_Name", "Amino_Acids", "Comments", "Database_Link", "Publication_Reference"
    ]

    df = pd.DataFrame(rows, columns=column_names)
    return df


## Load brute data and create the dataframe

In [None]:
drive.mount('/content/drive')
gff_file = '/content/drive/My Drive/Ufscar/Bio_francis/Base Dados/GCF.gff'
fasta_file_path = "/content/drive/My Drive/Ufscar/Bio_francis/Base Dados/GCF_003254395.2_Amel_HAv3.1_genomic.fna"
output_file_name = "NOVO_TESTE_gff_converted_to_dictionary_humano.txt"
dictionary = Dictionary(gff_file, output_file_name)
data = dictionary.create_dictionary()
fasta_handler = FASTAHandlerUpdated(fasta_file_path)

Mounted at /content/drive


Reading GFF file: 537418it [00:17, 30439.23it/s]


In [None]:
data = create_dict(data, dictionary, fasta_handler)
data = dictionary_to_dataframe(data)

## Fixed the dataframe to run webapp

In [None]:
columns_to_upper = [
    'CDS_Sequence',
    'CDS_Sequence_Clean',
    'Exon_Sequence',
    'Exon_Sequence_Clean',
    "5'UTR_Sequence",
    "3'UTR_Sequence"
]

for column in columns_to_upper:
    if column in data.columns:
        data[column] = data[column].str.upper()

data['Gene_ID'] = data['Gene_ID'].str.replace("gene-", "", regex=False)
data['Variant_ID'] = data['Variant_ID'].str.replace("rna-", "", regex=False)

Unnamed: 0,Gene_ID,RNA_ID,Variant_ID,Chromosome,Start_Position,End_Position,Orientation,CDS_Sequence,CDS_Sequence_Clean,Exon_Sequence,Exon_Sequence_Clean,5'UTR_Sequence,3'UTR_Sequence,Gene_Name,Amino_Acids,Comments,Database_Link,Publication_Reference
0,LOC551580,transcript,XR_001705491.2,NC_037638.1,9274,12174,3'5',,,,TCCATCAATATTTATTATTAACGTTTGTATCATTAATTTATCAATA...,,,551580,MSIENSEISVTIEFGGGAELLFNKKKRHEVNLPGVNETIQKLLFIT...,,,
1,LOC551580,transcript,XR_001705490.2,NC_037638.1,9274,12174,3'5',,,,TCCATCAATATTTATTATTAACGTTTGTATCATTAATTTATCAATA...,,,551580,MSIENSEISVTIEFGGGAELLFNKKKRHEVNLPGVNETIQKLLFIT...,,,
2,LOC551580,mRNA,XM_623972.6,NC_037638.1,9273,12174,3'5',,ATGTCTATCGAAAATAGCGAAATTTCCGTGACCATTGAATTTGGAG...,,TCCATCAATATTTATTATTAACGTTTGTATCATTAATTTATCAATA...,,,551580,MSIENSEISVTIEFGGGAELLFNKKKRHEVNLPGVNETIQKLLFIT...,,,
3,LOC551555,mRNA,XM_006557405.3,NC_037638.1,10792,17179,5'3',,ATGTCGGAAGAAAATGAACATACTTCTGAAGAAACTGAGGCAGAAA...,,TGTCGCTACGAGAAACTACACGTGCCAGGGGAATAATTTGTGTAAA...,TGTCGCTACGAGAAACTACACGTGCCAGGGGAATAATTTGTGTAAA...,GGAAACTGATACGATTCTAAATAAGTGCTTTTTCCTTGTACAAATT...,551555,MSEENEHSEEEAEMNTENRNKEQSKSETCVLNNETEQPMLEDEEMP...,,,
4,LOC551555,mRNA,XM_006557404.3,NC_037638.1,12282,17180,5'3',,ATGTCGGAAGAAAATGAACATACTTCTGAAGAAACTGAGGCAGAAA...,,TCATTAGTCTCGCTATTATGAATTTAATTACGATGTGAAGATAGTA...,TCATTAGTCTCGCTATTATGAATTTAATTACGATGTGAAGATAGTA...,GGAAACTGATACGATTCTAAATAAGTGCTTTTTCCTTGTACAAATT...,551555,MSEENEHSEEEAEMNTENRNKEQSKSETCVLNNETEQPMLEDEEMP...,,,


## Create the Webapp

Functions core to webapp

In [None]:
def find_gene_info(data, queries):
    all_results = []  # Lista para armazenar todos os resultados

    for query in queries:
        try:
            # Aplicar a busca no DataFrame
            result = data.apply(lambda col: col.astype(str).str.contains(query, na=False))
            if not result.any().any():
                all_results.append(f"No information found for: {query}")
                continue

            matched_rows = data[result.any(axis=1)]

            for index, row in matched_rows.iterrows():
                gene = row["Gene_ID"].replace("gene-", "")
                variant = row["Variant_ID"].replace("rna-", "")
                five_utr_sequence = row["5'UTR_Sequence"]
                three_utr_sequence = row["3'UTR_Sequence"]
                cds_sequence = row["CDS_Sequence_Clean"]
                exon_sequence = row["Exon_Sequence_Clean"]
                chromosome = row["Chromosome"]
                start = int(row["Start_Position"])
                end = int(row["End_Position"])
                orientation = row["Orientation"]
                gene_name = row["Gene_Name"]
                aminoacids = row["Amino_Acids"]

                try:
                    fullfasta = fasta_handler.get_sequence(chromosome, start, end, orientation)
                except Exception as e:
                    print(f"Error fetching fullfasta for gene {gene}: {e}")
                    fullfasta = "Sequence not available"

                fasta_info = f""">[{gene}][{gene_name}][{variant}][three_utr_sequence][{chromosome}]
{three_utr_sequence}
>[{gene}][{gene_name}][{variant}][five_utr_sequence][{chromosome}]
{five_utr_sequence}
>[{gene}][{gene_name}][{variant}][cds][{chromosome}]
{cds_sequence}
>[{gene}][{gene_name}][{variant}][transcript][{chromosome}]
{exon_sequence}
>[{gene}][{gene_name}][{variant}][aminoacids][{chromosome}]
{aminoacids}"""
                all_results.append(fasta_info)

        except Exception as e:
            print(f"An error occurred while processing the query '{query}': {e}")
            all_results.append(f"Error processing query '{query}': {e}")

    return "\n".join(all_results) if all_results else None



def save_result_to_txt(result):
    output_file_path = 'result.txt'  # Caminho onde o arquivo será salvo
    try:
        with open(output_file_path, 'w') as file:
            file.write(result)
        return output_file_path  # Retorna o caminho do arquivo gerado
    except Exception as e:
        return f"Erro ao salvar o arquivo: {e}"

### Webapp structure

In [None]:

# Função para salvar o resultado em um arquivo .txt
def save_result_to_txt(result):
    output_file_path = 'result.txt'  # Caminho onde o arquivo será salvo
    try:
        with open(output_file_path, 'w') as file:
            file.write(result)
        return output_file_path  # Retorna o caminho do arquivo gerado
    except Exception as e:
        return None

# Função principal que executa a pesquisa e gera o arquivo .txt
def search_genes(search_query):
    search_query = str(search_query)

    # Remove espaços em branco adicionais e quebras de linha
    search_query = search_query.replace("\r", "").strip()

    # Divide as consultas por vírgula ou quebra de linha
    queries = [query.strip() for query in search_query.replace(",", "\n").splitlines() if query.strip()]

    # Encontra informações sobre os genes
    result = find_gene_info(data, queries)  # Certifique-se de passar o DataFrame 'data'

    # Verifica se houve resultado e salva em arquivo .txt
    if result:
        output_file_path = save_result_to_txt(result)
        if output_file_path:  # Certifica-se de que o arquivo foi salvo corretamente
            return result, output_file_path  # Retorna o resultado e o caminho do arquivo
        else:
            return result, None  # Caso o arquivo não seja salvo, retorna o texto sem o arquivo
    else:
        return "Nenhum resultado encontrado.", None

# Criar a interface Gradio
interface = gr.Interface(
    fn=search_genes,
    inputs=gr.Textbox(lines=2, placeholder="Digite os termos de pesquisa separados por vírgula ou vírgula e espaço"),
    outputs=[gr.Textbox(label="Resultado da Pesquisa"), gr.File(label="Baixar resultado como .txt")],
    title="Pesquisa de Genes",
    description="Digite termos de pesquisa e obtenha informações sobre genes. Você pode baixar o resultado como um arquivo .txt."
)

# Rodar a interface
interface.launch(share=True)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://9061c25283a4204a9d.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


