## **MACHINE LEARNING PRODUCTS (MLOps)**


***En este cuaderno jupiter se encuentra un producto minimo viable, de nuestros proyecto - "Sistema de Recomendacion del Mundial del 2026" solo vamos  a utilizar solo la data de Google Maps***

In [1]:
# Librerias
import pymysql
import mysql.connector
from mysql.connector import Error
from sqlalchemy import create_engine
from dotenv import load_dotenv
import pandas as pd
import os

In [2]:
# La data se encuentra en AWS vamos a conectar
# Cargamos el archivo .env
load_dotenv()

# Acceder a las variables entorno
password_bd = os.getenv("PASS_BD")
user_bd = os.getenv("USER_BD")
host_name = os.getenv("HOST_NAME")
name_bd = os.getenv("NAME_BD")

# Conectamos la base de datos


def BD_connection(host_name, user_bd, user_pass, name_bd):
    connection = None
    try:
        connection = mysql.connector.connect(
            host=host_name,
            user=user_bd,
            passwd=user_pass,
            database=name_bd,
            port=3306
        )
        print("MYSQL DATABASE connection succesful")
    except Error as err:
        print(f"Error: '{err}'")
    return connection

In [3]:
connection = BD_connection(host_name, user_bd, password_bd, name_bd)

MYSQL DATABASE connection succesful


In [4]:
# verifiquemos la conexion realizando una consulta simple
if connection:
    cursor = connection.cursor()
    cursor.execute("SELECT DATABASE()")
    db = cursor.fetchone()
    print(f"Conectado a la base de datos: {db[0]}")

Conectado a la base de datos: Yelp


In [5]:
# Funcion para obtener las tablas disponibles
def show_tables(connection):
    cursor = connection.cursor()
    cursor.execute("SHOW TABLES")
    tables = cursor.fetchall()
    for table in tables:
        print(table)

In [6]:
# Corremos la query
connection = BD_connection(host_name, user_bd, password_bd, name_bd)
show_tables(connection)

MYSQL DATABASE connection succesful
('business',)
('sites',)


In [7]:
# Funcion para describir la estructura de una tabla
def describe_table(connection, table_name):
    cursor = connection.cursor()
    query = f"DESCRIBE {table_name}"
    cursor.execute(query)
    columns = cursor.fetchall()
    for column in columns:
        print(column)

In [8]:
describe_table(connection, 'business')

('business_id', b'text', 'YES', '', None, '')
('name', b'text', 'YES', '', None, '')
('address', b'text', 'YES', '', None, '')
('city', b'text', 'YES', '', None, '')
('state', b'text', 'YES', '', None, '')
('postal_code', b'text', 'YES', '', None, '')
('latitude', b'double', 'YES', '', None, '')
('longitude', b'double', 'YES', '', None, '')
('stars', b'double', 'YES', '', None, '')
('review_count', b'bigint', 'YES', '', None, '')
('categories', b'text', 'YES', '', None, '')
('hours', b'text', 'YES', '', None, '')
('RestaurantsDelivery', b'text', 'YES', '', None, '')
('BusinessAcceptsCreditCards', b'text', 'YES', '', None, '')
('BusinessParking', b'text', 'YES', '', None, '')
('BikeParking', b'text', 'YES', '', None, '')
('RestaurantsPriceRange2', b'text', 'YES', '', None, '')
('RestaurantsTakeOut', b'text', 'YES', '', None, '')
('ByAppointmentOnly', b'text', 'YES', '', None, '')
('GoodForKids', b'text', 'YES', '', None, '')
('DogsAllowed', b'text', 'YES', '', None, '')
('Ambience', b't

In [9]:
describe_table(connection, 'sites')

('business_id', b'text', 'YES', '', None, '')
('name', b'text', 'YES', '', None, '')
('address', b'text', 'YES', '', None, '')
('city', b'text', 'YES', '', None, '')
('state', b'text', 'YES', '', None, '')
('postal_code', b'text', 'YES', '', None, '')
('latitude', b'double', 'YES', '', None, '')
('longitude', b'double', 'YES', '', None, '')
('stars', b'double', 'YES', '', None, '')
('review_count', b'bigint', 'YES', '', None, '')
('categories', b'text', 'YES', '', None, '')
('hours', b'text', 'YES', '', None, '')
('RestaurantsDelivery', b'text', 'YES', '', None, '')
('BusinessAcceptsCreditCards', b'text', 'YES', '', None, '')
('BusinessParking', b'text', 'YES', '', None, '')
('BikeParking', b'text', 'YES', '', None, '')
('RestaurantsPriceRange2', b'text', 'YES', '', None, '')
('RestaurantsTakeOut', b'text', 'YES', '', None, '')
('ByAppointmentOnly', b'text', 'YES', '', None, '')
('GoodForKids', b'text', 'YES', '', None, '')
('DogsAllowed', b'text', 'YES', '', None, '')
('Ambience', b't

In [10]:
# Vamos a utilizar esta funcion para correr queries
def execute_query(connection, query):
    cursor = connection.cursor()
    try:
        cursor.execute(query)
        connection.commit()
        print("Query hecha con éxito")
    except Error as err:
        print("Error", err)

In [11]:
def execute_query(connection, query, fetch_data=False):
    """
    Ejecuta una consulta SQL en la base de datos conectada.

    Args:
    - connection: Conexión a la base de datos.
    - query: Consulta SQL a ejecutar.
    - fetch_data: Booleano para indicar si se deben recuperar los datos de la consulta.

    Returns:
    - Resultados de la consulta si fetch_data es True. None en caso contrario.
    """
    cursor = connection.cursor(buffered=True)  # Crear un cursor con buffering
    try:
        cursor.execute(query)  # Ejecutar la consulta

        if fetch_data:
            resultados = cursor.fetchall()  # Recuperar todos los resultados
            return resultados
        else:
            connection.commit()  # Confirmar la transacción
            print("Query hecha con éxito")

    except Error as err:
        print("Error:", err)  # Imprimir cualquier error ocurrido
    finally:
        cursor.close()  # Cerrar el cursor
        if connection.unread_result:
            connection.next_result()  # Limpiar cualquier resultado no leído
        connection.commit()  # Asegurar que cualquier resultado pendiente sea limpiado

In [12]:
# Verificar la conexión tabla business
if connection:
    print("Conexión exitosa a la base de datos")

    # Contar registros en `business`
    query = "SELECT COUNT(*) FROM business"
    resultado = execute_query(connection, query, fetch_data=True)

    if resultado:
        print(f"Número de registros en 'business': {resultado[0][0]}")

    # Obtener promedio de calificaciones
    query = "SELECT AVG(stars) FROM business"
    resultado = execute_query(connection, query, fetch_data=True)

    if resultado:
        print(f"Promedio de calificaciones: {resultado[0][0]}")

Conexión exitosa a la base de datos
Número de registros en 'business': 85028
Promedio de calificaciones: 3.598102977842593


In [13]:
# Verificar la conexión tabla sites
if connection:
    print("Conexión exitosa a la base de datos")

    # Contar registros en `sites`
    query = "SELECT COUNT(*) FROM sites"
    resultado = execute_query(connection, query, fetch_data=True)

    if resultado:
        print(f"Número de registros en 'sites': {resultado[0][0]}")

    # Obtener promedio de calificaciones
    query = "SELECT AVG(stars) FROM sites"
    resultado = execute_query(connection, query, fetch_data=True)

    if resultado:
        print(f"Promedio de calificaciones: {resultado[0][0]}")

Conexión exitosa a la base de datos
Número de registros en 'sites': 85028
Promedio de calificaciones: 3.598102977842593


In [14]:
# Ahora nos conectaremos a la BD de Google
connection_2 = BD_connection(host_name, user_bd, password_bd, "Google")

MYSQL DATABASE connection succesful


In [15]:
# Corremos la query
connection_2 = BD_connection(host_name, user_bd, password_bd, "Google")
show_tables(connection_2)

MYSQL DATABASE connection succesful
('sites',)
('users_review',)


In [16]:
# Crear el engine de SQLAlchemy usando la conexion MySQL
def create_sqlalchemy_engine(host_name, user_bd, password_bd, name_bd):
    url = f"mysql+mysqlconnector://{user_bd}:{password_bd}@{host_name}/{name_bd}"
    return create_engine(url)

In [17]:
# Crear el engine
engine = create_sqlalchemy_engine(host_name, user_bd, password_bd, name_bd)

In [18]:
# Crear el engine
engine_2 = create_sqlalchemy_engine(host_name, user_bd, password_bd, "Google")

In [19]:
# Consultar SQL para obtener la tabla businees & sites
query_business = "SELECT * FROM business"
query_sites = "SELECT * FROM sites"
query_google = "SELECT * FROM users_review"

In [20]:
# Cargamos los datos en un Dataframe
business = pd.read_sql(query_business, engine)
sites = pd.read_sql(query_sites, engine)
review = pd.read_sql(query_google, engine_2)

In [21]:
# Punto de reinicio
df_business = pd.DataFrame(business)
df_sites = pd.DataFrame(sites)
df_review = pd.DataFrame(review)

In [22]:
df_business.head(4)

Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,...,BusinessAcceptsCreditCards,BusinessParking,BikeParking,RestaurantsPriceRange2,RestaurantsTakeOut,ByAppointmentOnly,GoodForKids,DogsAllowed,Ambience,NoiseLevel
0,MTSW4McQd7CbVtyjqoe9mw,St Honore Pastries,935 Race St,Philadelphia,CA,19107,39.955505,-75.155564,4.0,80,...,False,['street'],True,1.0,True,False,,,,
1,mWMc6_wTdE0EUBKIGXDVfA,Perkiomen Valley Brewery,101 Walnut St,Green Lane,MO,18054,40.338183,-75.471659,4.5,13,...,True,['lot'],True,,True,,True,,,
2,n_0UpQx1hsNbnPUSlodU8w,Famous Footwear,"8522 Eager Road, Dierbergs Brentwood Point",Brentwood,PA,63144,38.627695,-90.340465,2.5,13,...,True,['lot'],True,2.0,,,,,,
3,qkRM_2X51Yqxk3btlwAQIg,Temple Beth-El,400 Pasadena Ave S,St. Petersburg,PA,33707,27.76659,-82.732983,3.5,5,...,,,,,,,,,,


In [23]:
df_sites.head(4)

Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,...,BusinessAcceptsCreditCards,BusinessParking,BikeParking,RestaurantsPriceRange2,RestaurantsTakeOut,ByAppointmentOnly,GoodForKids,DogsAllowed,Ambience,NoiseLevel
0,MTSW4McQd7CbVtyjqoe9mw,St Honore Pastries,935 Race St,Philadelphia,CA,19107,39.955505,-75.155564,4.0,80,...,False,['street'],True,1.0,True,False,,,,
1,mWMc6_wTdE0EUBKIGXDVfA,Perkiomen Valley Brewery,101 Walnut St,Green Lane,MO,18054,40.338183,-75.471659,4.5,13,...,True,['lot'],True,,True,,True,,,
2,n_0UpQx1hsNbnPUSlodU8w,Famous Footwear,"8522 Eager Road, Dierbergs Brentwood Point",Brentwood,PA,63144,38.627695,-90.340465,2.5,13,...,True,['lot'],True,2.0,,,,,,
3,qkRM_2X51Yqxk3btlwAQIg,Temple Beth-El,400 Pasadena Ave S,St. Petersburg,PA,33707,27.76659,-82.732983,3.5,5,...,,,,,,,,,,


In [24]:
df_review.head(4)

Unnamed: 0,id_user,name,time_comment,text_comment,rating,time_resp,text_resp,gmap_id,state
0,1.043885e+20,Evan Russo,08/11/2020 18:00,Guy at the front door was rude beyond belief. ...,1,01/01/1900 00:00:00,No response from the establishment,0x89e30b59869f14e5:0x76ca09855156779b,Massachusetts
1,1.061707e+20,Anthony Renzulli,27/08/2020 18:53,Great comics great toy store downstairs,5,01/01/1900 00:00:00,No response from the establishment,0x89e30b59869f14e5:0x76ca09855156779b,Massachusetts
2,1.042075e+20,Nils Heinonen,24/06/2020 16:51,Awesome store with a great staff and an incred...,5,01/01/1900 00:00:00,No response from the establishment,0x89e30b59869f14e5:0x76ca09855156779b,Massachusetts
3,1.156954e+20,Dude Bro,18/03/2020 14:12,Excellent choices broad variety and best price...,5,01/01/1900 00:00:00,No response from the establishment,0x89e30b59869f14e5:0x76ca09855156779b,Massachusetts


In [25]:
df_review.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8200000 entries, 0 to 8199999
Data columns (total 9 columns):
 #   Column        Dtype  
---  ------        -----  
 0   id_user       float64
 1   name          object 
 2   time_comment  object 
 3   text_comment  object 
 4   rating        int64  
 5   time_resp     object 
 6   text_resp     object 
 7   gmap_id       object 
 8   state         object 
dtypes: float64(1), int64(1), object(7)
memory usage: 563.0+ MB


### **Sistema de Recomendacion documentacion**

#### ***Aqui se van a documnetar los metodos utilizados***:
