## Enviroment

In [None]:
import pandas as pd
from sqlalchemy import create_engine, text
import json
import logging

In [2]:
df = pd.read_csv("../data/Airbnb_Open_Data.csv", low_memory=False, encoding='ISO-8859-1')
print("Datos del CSV cargados correctamente.")
print(df.head())

Datos del CSV cargados correctamente.
        id                                              NAME      host id  \
0  1001254                Clean & quiet apt home by the park  80014485718   
1  1002102                             Skylit Midtown Castle  52335172823   
2  1002403               THE VILLAGE OF HARLEM....NEW YORK !  78829239556   
3  1002755                                               NaN  85098326012   
4  1003689  Entire Apt: Spacious Studio/Loft by central park  92037596077   

  host_identity_verified host name neighbourhood group neighbourhood  \
0            unconfirmed  Madaline            Brooklyn    Kensington   
1               verified     Jenna           Manhattan       Midtown   
2                    NaN     Elise           Manhattan        Harlem   
3            unconfirmed     Garry            Brooklyn  Clinton Hill   
4               verified    Lyndon           Manhattan   East Harlem   

        lat      long        country  ... service fee minimum nigh

Conexión a la base de datos mediante las credenciales definidas

In [25]:
def create_db_engine():
    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
    credentials = "../credentials.json"

    """
    Lee las credenciales desde un archivo JSON, crea y retorna un engine para conectarse a la base de datos.
    Utiliza logging para notificar el resultado de la conexión.
    """
    try:
        with open(credentials, 'r') as file:
            config = json.load(file)
        user = config["user"]
        password = config["password"]
        host = config["host"]
        port = config["port"]
        database = config["database"]
        
        # Construcción de la cadena de conexión
        connection_string = f"postgresql+psycopg2://{user}:{password}@{host}:{port}/{database}"
        engine = create_engine(connection_string)
        
        # Realizamos una consulta simple para probar la conexión
        with engine.connect() as conn:
            conn.execute(text("SELECT 1"))
        logging.info("Conexión a la base de datos exitosa.")
        
        return engine
    except Exception as e:
        logging.error("Error al conectar a la base de datos: %s", e)
        return None

# Ejemplo de uso:
if __name__ == "__main__":
    engine = create_db_engine()
    if engine:
        logging.info("El engine se creó correctamente y está listo para usar.")


2025-03-21 09:38:16,347 - INFO - Conexión a la base de datos exitosa.
2025-03-21 09:38:16,347 - INFO - El engine se creó correctamente y está listo para usar.


In [3]:
credentials = "../credentials.json"

with open(credentials) as f:
    creds = json.load(f)
    
DB_USER = creds["user"]
DB_PASSWORD = creds["password"]
DB_HOST = creds["host"]
DB_PORT = creds["port"]
DB_NAME = creds["database"]

creator_connection_string = f'postgresql+psycopg2://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/postgres'
creator_engine = create_engine(creator_connection_string)

with creator_engine.connect() as connection:
    connection.execution_options(isolation_level="AUTOCOMMIT")
    result = connection.execute(
        text(f"SELECT 1 FROM pg_database WHERE datname = '{DB_NAME}'")
    )
    
    if not result.scalar():
        connection.execute(text(f"CREATE DATABASE {DB_NAME}"))
        print(f"Base de datos '{DB_NAME}' creada exitosamente!")
    else:
        print(f"La base de datos '{DB_NAME}' ya existe.")

La base de datos 'airbnb' ya existe.


In [4]:
connection_string = f'postgresql+psycopg2://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}'
engine = create_engine(connection_string)

table_name = "airbnb_data"
df.to_sql(table_name, engine, if_exists='replace', index=False)

print(f"Tabla '{table_name}' creada y datos cargados en la base de datos '{DB_NAME}'.")

Tabla 'airbnb_data' creada y datos cargados en la base de datos 'airbnb'.


In [5]:
with engine.connect() as connection:
    query = f"SELECT * FROM {table_name} LIMIT 5;"
    df_5filas = pd.read_sql(query, connection)
    print(f"Primeros 5 registros de la tabla '{table_name}':")
    print(df_5filas)

Primeros 5 registros de la tabla 'airbnb_data':
        id                                              NAME      host id  \
0  1001254                Clean & quiet apt home by the park  80014485718   
1  1002102                             Skylit Midtown Castle  52335172823   
2  1002403               THE VILLAGE OF HARLEM....NEW YORK !  78829239556   
3  1002755                                              None  85098326012   
4  1003689  Entire Apt: Spacious Studio/Loft by central park  92037596077   

  host_identity_verified host name neighbourhood group neighbourhood  \
0            unconfirmed  Madaline            Brooklyn    Kensington   
1               verified     Jenna           Manhattan       Midtown   
2                   None     Elise           Manhattan        Harlem   
3            unconfirmed     Garry            Brooklyn  Clinton Hill   
4               verified    Lyndon           Manhattan   East Harlem   

        lat      long        country  ... service fee  m

In [6]:
total_csv_rows = len(df)
print(f"El archivo CSV tiene un total de {total_csv_rows} filas.")

with engine.connect() as connection:
    result = connection.execute(text(f"SELECT COUNT(*) FROM {table_name};"))
    total_rows = result.scalar()
    print(f"La tabla '{table_name}' tiene un total de {total_rows} filas.")

El archivo CSV tiene un total de 102599 filas.
La tabla 'airbnb_data' tiene un total de 102599 filas.
