### **Estudio de los genes**

In [15]:
#Libreria para trabajar en la API de la NCBI

pip install biopython




In [72]:
#No correr este código. Este código se usó para consultar en la API de la NCBI si el gen existe, para resolver dudas puntuales.

from Bio import Entrez
import pandas as pd

# Especificar un e-mail para la web de NCBI
Entrez.email = "angel_g_dominguez@hotmail.com"

def informacion_gen(nombre_gen):
    handle = Entrez.esearch(db="gene", term=nombre_gen)
    record = Entrez.read(handle)
    gene_id = record["IdList"][0]  # Con esto se halla el ID del gen que aparece en primer lugar
    handle = Entrez.esummary(db="gene", id=gene_id)
    summary = Entrez.read(handle)
    return summary

nombre_gen = "TFAP2B"  
datos_gen = informacion_gen(nombre_gen)
print(datos_gen)

{'DocumentSummarySet': DictElement({'DocumentSummary': [DictElement({'Name': 'TP53', 'Description': 'tumor protein p53', 'Status': '0', 'CurrentID': '0', 'Chromosome': '17', 'GeneticSource': 'genomic', 'MapLocation': '17p13.1', 'OtherAliases': 'BCC7, BMFS5, LFS1, P53, TRP53', 'OtherDesignations': 'cellular tumor antigen p53|antigen NY-CO-13|mutant tumor protein 53|phosphoprotein p53|transformation-related protein 53|tumor protein 53|tumor supressor p53', 'NomenclatureSymbol': 'TP53', 'NomenclatureName': 'tumor protein p53', 'NomenclatureStatus': 'Official', 'Mim': ['191170'], 'GenomicInfo': [{'ChrLoc': '17', 'ChrAccVer': 'NC_000017.11', 'ChrStart': '7687489', 'ChrStop': '7668420', 'ExonCount': '13'}], 'GeneWeight': '938340', 'Summary': 'This gene encodes a tumor suppressor protein containing transcriptional activation, DNA binding, and oligomerization domains. The encoded protein responds to diverse cellular stresses to regulate expression of target genes, thereby inducing cell cycle a

In [94]:
# Se continua con la obtención de una tabla que nos de información específica de cada gen. Para ello, se carga el archivo con los genes de interés.

df = pd.read_excel("microarrays-genes.xlsx")


In [95]:
df

Unnamed: 0,type,Gen
0,1553394_a_at,TP53
1,1553434_at,CYP4Z1
2,1553602_at,ERBB2
3,1555745_a_at,TP53
4,1555778_a_at,VEGFA
...,...,...
130,240304_s_at,EP300
131,242579_at,APP
132,243241_at,TP53
133,37892_at,TP53


In [97]:
%%time

#Se itera y por cada gen se realiza una consulta en la API de la NCBI para obtener información de interés de cara a tomar decisiones
#relativas a genes implicados en procesos tumorales. El objetivo es generar una tabla con los datos de la NCBI para una posterior
#investigación bibliográfica

from Bio import Entrez
import pandas as pd

# SE especifica un e-mail para la web de NCBI
Entrez.email = "angel_g_dominguez@hotmail.com"

def informacion_gen(nombre_gen):
    handle = Entrez.esearch(db="gene", term=nombre_gen)
    record = Entrez.read(handle)
    if record["IdList"]:  # Se verifica si encontramos resultados en la API
        gene_id = record["IdList"][0]  # Devuelve el ID del gen encontrado
        handle = Entrez.esummary(db="gene", id=gene_id)
        summary = Entrez.read(handle)
        return summary
    else:
        print(f"No se encontró información para el gen: {nombre_gen}") #Si no encuentra un gen, devuelve el mensaje
        return None

# Leer el DataFrame con los nombres de los genes
df_genes = pd.read_excel("microarrays-genes.xlsx")  

# Lista para almacenar la información de los genes
informacion_genes = []

# Iterar sobre los nombres de los genes en el DataFrame
for nombre_gen in df_genes["Gen"]:
    datos_gen = informacion_gen(nombre_gen)
    if datos_gen:
        informacion_genes.append(datos_gen)

# Convertir la lista de información de genes a DataFrame
df_informacion_genes = pd.json_normalize([x["DocumentSummarySet"]["DocumentSummary"][0] for x in informacion_genes])


# Guardar la información en un archivo CSV
df_informacion_genes.to_csv("informacion_genes.csv", index=False)

df_informacion_genes

CPU times: total: 55.8 s
Wall time: 6min 7s


Unnamed: 0,Name,Description,Status,CurrentID,Chromosome,GeneticSource,MapLocation,OtherAliases,OtherDesignations,NomenclatureSymbol,...,Mim,GenomicInfo,GeneWeight,Summary,ChrSort,ChrStart,LocationHist,Organism.ScientificName,Organism.CommonName,Organism.TaxID
0,LOC134670285,TP53-regulated inhibitor of apoptosis 1-like,0,0,13,genomic,,,TP53-regulated inhibitor of apoptosis 1-like,,...,[],"[{'ChrLoc': '13', 'ChrAccVer': 'NC_085944.1', ...",0,,~~last,999999999,"[{'AnnotationRelease': 'RS_2024_02', 'Assembly...",Cydia fagiglandana,,1458189
1,NR3C1,nuclear receptor subfamily 3 group C member 1,0,0,5,genomic,5q31.3,"GCCR, GCR, GCRST, GR, GRL",glucocorticoid receptor|nuclear receptor subfa...,NR3C1,...,[138040],"[{'ChrLoc': '5', 'ChrAccVer': 'NC_000005.10', ...",110260,"This gene encodes glucocorticoid receptor, whi...",05,143277930,"[{'AnnotationRelease': 'RS_2023_10', 'Assembly...",Homo sapiens,human,9606
2,TP53,tumor protein p53,0,0,17,genomic,17p13.1,"BCC7, BMFS5, LFS1, P53, TRP53",cellular tumor antigen p53|antigen NY-CO-13|mu...,TP53,...,[191170],"[{'ChrLoc': '17', 'ChrAccVer': 'NC_000017.11',...",938340,This gene encodes a tumor suppressor protein c...,17,7668420,"[{'AnnotationRelease': 'RS_2023_10', 'Assembly...",Homo sapiens,human,9606
3,LOC134670285,TP53-regulated inhibitor of apoptosis 1-like,0,0,13,genomic,,,TP53-regulated inhibitor of apoptosis 1-like,,...,[],"[{'ChrLoc': '13', 'ChrAccVer': 'NC_085944.1', ...",0,,~~last,999999999,"[{'AnnotationRelease': 'RS_2024_02', 'Assembly...",Cydia fagiglandana,,1458189
4,TP53,tumor protein p53,0,0,17,genomic,17p13.1,"BCC7, BMFS5, LFS1, P53, TRP53",cellular tumor antigen p53|antigen NY-CO-13|mu...,TP53,...,[191170],"[{'ChrLoc': '17', 'ChrAccVer': 'NC_000017.11',...",938340,This gene encodes a tumor suppressor protein c...,17,7668420,"[{'AnnotationRelease': 'RS_2023_10', 'Assembly...",Homo sapiens,human,9606
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
130,TP53,tumor protein p53,0,0,17,genomic,17p13.1,"BCC7, BMFS5, LFS1, P53, TRP53",cellular tumor antigen p53|antigen NY-CO-13|mu...,TP53,...,[191170],"[{'ChrLoc': '17', 'ChrAccVer': 'NC_000017.11',...",938340,This gene encodes a tumor suppressor protein c...,17,7668420,"[{'AnnotationRelease': 'RS_2023_10', 'Assembly...",Homo sapiens,human,9606
131,TP53,tumor protein p53,0,0,17,genomic,17p13.1,"BCC7, BMFS5, LFS1, P53, TRP53",cellular tumor antigen p53|antigen NY-CO-13|mu...,TP53,...,[191170],"[{'ChrLoc': '17', 'ChrAccVer': 'NC_000017.11',...",938340,This gene encodes a tumor suppressor protein c...,17,7668420,"[{'AnnotationRelease': 'RS_2023_10', 'Assembly...",Homo sapiens,human,9606
132,LOC134670285,TP53-regulated inhibitor of apoptosis 1-like,0,0,13,genomic,,,TP53-regulated inhibitor of apoptosis 1-like,,...,[],"[{'ChrLoc': '13', 'ChrAccVer': 'NC_085944.1', ...",0,,~~last,999999999,"[{'AnnotationRelease': 'RS_2024_02', 'Assembly...",Cydia fagiglandana,,1458189
133,LOC134670285,TP53-regulated inhibitor of apoptosis 1-like,0,0,13,genomic,,,TP53-regulated inhibitor of apoptosis 1-like,,...,[],"[{'ChrLoc': '13', 'ChrAccVer': 'NC_085944.1', ...",0,,~~last,999999999,"[{'AnnotationRelease': 'RS_2024_02', 'Assembly...",Cydia fagiglandana,,1458189


In [98]:
df_infogenes = pd.read_csv("informacion_genes.csv")

In [99]:
pd.json_normalize(df_informacion_genes["GenomicInfo"].explode()) #Normalizamos para limpiar la estructura de algunas columnas

Unnamed: 0,ChrLoc,ChrAccVer,ChrStart,ChrStop,ExonCount
0,13,NC_085944.1,12014682,12015583,2
1,5,NC_000005.10,143435511,143277930,15
2,17,NC_000017.11,7687489,7668420,13
3,13,NC_085944.1,12014682,12015583,2
4,17,NC_000017.11,7687489,7668420,13
...,...,...,...,...,...
130,17,NC_000017.11,7687489,7668420,13
131,17,NC_000017.11,7687489,7668420,13
132,13,NC_085944.1,12014682,12015583,2
133,13,NC_085944.1,12014682,12015583,2


In [103]:
# Tabla final donde se puede encontrar en Summary información relativa si la hay, al igual que podemos hallar
# Obtener resultados en otros organismos implica que el estudio requiere de profundizar en cada apartado, encontrar estudios en
#humana, descartar genes de menor interés, comprobar compatibilidad en estudios de humana, etc... 

df_genes = pd.json_normalize([x["DocumentSummarySet"]["DocumentSummary"][0] for x in informacion_genes])

df_genes

Unnamed: 0,Name,Description,Status,CurrentID,Chromosome,GeneticSource,MapLocation,OtherAliases,OtherDesignations,NomenclatureSymbol,...,Mim,GenomicInfo,GeneWeight,Summary,ChrSort,ChrStart,LocationHist,Organism.ScientificName,Organism.CommonName,Organism.TaxID
0,LOC134670285,TP53-regulated inhibitor of apoptosis 1-like,0,0,13,genomic,,,TP53-regulated inhibitor of apoptosis 1-like,,...,[],"[{'ChrLoc': '13', 'ChrAccVer': 'NC_085944.1', ...",0,,~~last,999999999,"[{'AnnotationRelease': 'RS_2024_02', 'Assembly...",Cydia fagiglandana,,1458189
1,NR3C1,nuclear receptor subfamily 3 group C member 1,0,0,5,genomic,5q31.3,"GCCR, GCR, GCRST, GR, GRL",glucocorticoid receptor|nuclear receptor subfa...,NR3C1,...,[138040],"[{'ChrLoc': '5', 'ChrAccVer': 'NC_000005.10', ...",110260,"This gene encodes glucocorticoid receptor, whi...",05,143277930,"[{'AnnotationRelease': 'RS_2023_10', 'Assembly...",Homo sapiens,human,9606
2,TP53,tumor protein p53,0,0,17,genomic,17p13.1,"BCC7, BMFS5, LFS1, P53, TRP53",cellular tumor antigen p53|antigen NY-CO-13|mu...,TP53,...,[191170],"[{'ChrLoc': '17', 'ChrAccVer': 'NC_000017.11',...",938340,This gene encodes a tumor suppressor protein c...,17,7668420,"[{'AnnotationRelease': 'RS_2023_10', 'Assembly...",Homo sapiens,human,9606
3,LOC134670285,TP53-regulated inhibitor of apoptosis 1-like,0,0,13,genomic,,,TP53-regulated inhibitor of apoptosis 1-like,,...,[],"[{'ChrLoc': '13', 'ChrAccVer': 'NC_085944.1', ...",0,,~~last,999999999,"[{'AnnotationRelease': 'RS_2024_02', 'Assembly...",Cydia fagiglandana,,1458189
4,TP53,tumor protein p53,0,0,17,genomic,17p13.1,"BCC7, BMFS5, LFS1, P53, TRP53",cellular tumor antigen p53|antigen NY-CO-13|mu...,TP53,...,[191170],"[{'ChrLoc': '17', 'ChrAccVer': 'NC_000017.11',...",938340,This gene encodes a tumor suppressor protein c...,17,7668420,"[{'AnnotationRelease': 'RS_2023_10', 'Assembly...",Homo sapiens,human,9606
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
130,TP53,tumor protein p53,0,0,17,genomic,17p13.1,"BCC7, BMFS5, LFS1, P53, TRP53",cellular tumor antigen p53|antigen NY-CO-13|mu...,TP53,...,[191170],"[{'ChrLoc': '17', 'ChrAccVer': 'NC_000017.11',...",938340,This gene encodes a tumor suppressor protein c...,17,7668420,"[{'AnnotationRelease': 'RS_2023_10', 'Assembly...",Homo sapiens,human,9606
131,TP53,tumor protein p53,0,0,17,genomic,17p13.1,"BCC7, BMFS5, LFS1, P53, TRP53",cellular tumor antigen p53|antigen NY-CO-13|mu...,TP53,...,[191170],"[{'ChrLoc': '17', 'ChrAccVer': 'NC_000017.11',...",938340,This gene encodes a tumor suppressor protein c...,17,7668420,"[{'AnnotationRelease': 'RS_2023_10', 'Assembly...",Homo sapiens,human,9606
132,LOC134670285,TP53-regulated inhibitor of apoptosis 1-like,0,0,13,genomic,,,TP53-regulated inhibitor of apoptosis 1-like,,...,[],"[{'ChrLoc': '13', 'ChrAccVer': 'NC_085944.1', ...",0,,~~last,999999999,"[{'AnnotationRelease': 'RS_2024_02', 'Assembly...",Cydia fagiglandana,,1458189
133,LOC134670285,TP53-regulated inhibitor of apoptosis 1-like,0,0,13,genomic,,,TP53-regulated inhibitor of apoptosis 1-like,,...,[],"[{'ChrLoc': '13', 'ChrAccVer': 'NC_085944.1', ...",0,,~~last,999999999,"[{'AnnotationRelease': 'RS_2024_02', 'Assembly...",Cydia fagiglandana,,1458189
