In [None]:
import os
import pandas as pd
import numpy as np
import psycopg2
from datetime import datetime, timedelta
from sqlalchemy import create_engine, text


usuario = "postgres"
port = "5432"

database = "kurtdb"

host = "34.139.44.224"
local_pass = "gneomar/99"

In [None]:
# @title
def read_folder(folder_path):
    files = os.listdir(folder_path)
    dfs = []

    for file in files:
        file_path = os.path.join(folder_path, file)
        if file_path.endswith('.csv'):
            df = pd.read_csv(file_path)
        elif file_path.endswith('.xlsx') or file_path.endswith('.xlsm'):
            df = pd.read_excel(file_path)
        else:
            raise ValueError("Unsupported file format. Only .csv, .xlsx, and .xlsm files are supported.")

        dfs.append(df)

    return pd.concat(dfs, ignore_index=True)

def probar_conexion_postgresql(host, clave, database):
    try:
        conexion = psycopg2.connect(
            host=host,
            database=database,
            user='postgres',
            password=clave,
            port='5432'
        )

        print("Conexión exitosa a PostgreSQL")
        conexion.close()

    except Exception as e:
        print(f"Error al intentar conectar a PostgreSQL: {e}")

def obtener_tablas_disponibles(host, password, database):
    conexion = None
    try:
        conexion = psycopg2.connect(database=database, user='postgres', password=password, host=host, port='5432')
        cursor = conexion.cursor()
        consulta_sql = "SELECT table_name FROM information_schema.tables WHERE table_schema = 'public';"
        cursor.execute(consulta_sql)
        tablas_disponibles = [tabla[0] for tabla in cursor.fetchall()]
        return tablas_disponibles

    except Exception as e:
        print(f"Error al intentar obtener la lista de tablas: {e}")

    finally:
        if conexion is not None:
            cursor.close()
            conexion.close()

def eliminar_tabla(host, password, database, nombre_tabla):
    conexion = None
    try:
        conexion = psycopg2.connect(database=database, user='postgres', password=password, host=host, port='5432')
        cursor = conexion.cursor()
        consulta_sql = f"DROP TABLE IF EXISTS {nombre_tabla};"
        cursor.execute(consulta_sql)
        conexion.commit()

        print(f"La tabla {nombre_tabla} ha sido eliminada con éxito.")

    except Exception as e:
        print(f"Error al intentar eliminar la tabla: {e}")

    finally:
        if conexion is not None:
            cursor.close()
            conexion.close()

def sql_to_df(host, password, database, query):
    conexion = None
    try:
        conexion = psycopg2.connect(database=database, user='postgres', password=password, host=host, port='5432')
        engine = create_engine(f'postgresql://{usuario}:{password}@{host}:{port}/{database}')

        with engine.connect() as conn:
          result = conn.execute(text(query))
          columns = result.keys()
          main_df = pd.DataFrame(result.fetchall(), columns=columns)

        return main_df

    except Exception as e:
        print(f"Error : {e}")

    finally:
        if conexion is not None:
            conexion.close()

def sql_query(host, password, database, query, values=None, fetch=True):
    conexion = None
    try:
        conexion = psycopg2.connect(database=database, user='postgres', password=password, host=host, port='5432')
        engine = create_engine(f'postgresql://{usuario}:{password}@{host}:{port}/{database}')
        cursor = conexion.cursor()

        if values:
            cursor.execute(query, values)
        else:
            cursor.execute(query)

        if fetch:
            return cursor.fetchall()
        else:
            conexion.commit()

    except Exception as e:
        print(f"Error al ejecutar la consulta: {e}")

    finally:
        if conexion is not None:
            cursor.close()
            conexion.close()

def df_to_sql(host, password, database, df, nombre, metodo):
    conexion = None
    try:
        conexion = psycopg2.connect(database=database, user='postgres', password=password, host=host, port='5432')
        engine = create_engine(f'postgresql://{usuario}:{password}@{host}:{port}/{database}')
        df.to_sql(nombre, con=engine, if_exists=metodo, index=False)
        print(f"Tabla {nombre} creada exitosamente en la base de datos {database} del servidor {host}")

    except Exception as e:
        print(f"Error al enviar la tabla : {e}")

    finally:
        if conexion is not None:
            conexion.close()

def dato_columnas(host, password, database, tabla_nombre):
    query = f"""
    SELECT column_name, data_type
    FROM information_schema.columns
    WHERE table_name = '{tabla_nombre}';
    """
    tipos = sql_query(host, password, database, query)
    df_info_columnas = pd.DataFrame(tipos, columns=['columna', 'tipo'])
    return df_info_columnas

def uniformizar_datos(host, password, database, df_entrada, tabla, arreglo=True):
    df = df_entrada.copy()
    df = df.map(lambda x: x.strip() if isinstance(x, str) else x)
    df = df.replace('', np.nan)
    df_datos = dato_columnas(host, password, database, tabla)
    col_tabla = df_datos['columna'].to_list()
    tipo_fechas = ['date', 'timestamp without time zone']
    tipo_int = ['integer', 'bigint', 'smallint']
    tipo_str = ['character', 'character varying', 'text']
    for col in df.columns:
        if col in col_tabla:
            tipo_dato = df_datos.loc[df_datos['columna'] == col, 'tipo'].iloc[0]
            if tipo_dato in tipo_fechas:
                df[col] = df[col].replace(0, np.nan)
                df[col] = pd.to_datetime(df[col], errors='coerce')
            elif tipo_dato == 'time without time zone':
                df[col] = df[col].replace(0, np.nan)
                df[col] = pd.to_datetime(df[col], format='%H:%M:%S').dt.time
            elif tipo_dato in tipo_int:
                df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0).astype(int)
            elif tipo_dato == 'double precision':
                df[col] = pd.to_numeric(df[col], errors='coerce').fillna(np.nan).astype(float)
            elif tipo_dato == 'boolean':
                df[col] = df[col].apply(lambda x:
                        False if x.strip() not in ['True', 'False', 'SI', 'NO']
                        else (True if x.strip() == 'True' or x.strip() == 'SI' else False))
                df[col] = df[col].fillna(False)
                df[col] = df[col].astype(bool)
            elif tipo_dato in tipo_str:
                df[col] = df[col].astype(str)
        else:
            print('No esta dentro de la tabla : ' + str(col))

    if arreglo:
        return df.copy()
    else:
        columnas_presentes = [col for col in col_tabla if col in df.columns]
        df = df[columnas_presentes].copy()
        return df

In [None]:
probar_conexion_postgresql(host, local_pass, database)

In [None]:
tablas_disp = obtener_tablas_disponibles(host, local_pass, database)

print("Tablas disponibles en la base de datos:")
for i, tabla in enumerate(tablas_disp, 1):
    print(i, tabla)

In [None]:
tablename = 'incoin'
query = f"""
select * from {tablename}
"""
df = sql_to_df(host, local_pass, database, query)
name_file = f"data/{tablename}.csv"
df.to_csv(name_file, encoding='utf-8-sig')
df