In [1]:
import numpy as np
import pandas as pd
import mysql.connector
from dotenv import dotenv_values

In [2]:
# Load the environment variables from the .env file
env_vars = dotenv_values('.env')

# Get the password from the environment variables
password = env_vars.get('PASSWORD')

# Get the user from the environment variables
user = env_vars.get('USER')

In [3]:
if password:
    print("Password retrieved successfully")
else:
    print("Password not found in .env file.")

Password retrieved successfully


In [4]:
# Connect to the database
db = mysql.connector.connect(host="localhost",
                             user=user,
                             passwd=password,
                             database="gsalud")

# Create a cursor object using the cursor() method
cursor = db.cursor()

In [5]:
# Load the dataframe from the csv file
pagos_cleaned = pd.read_csv('pagos_cleaned.csv', encoding='utf-8')

pagos_cleaned.head()

Unnamed: 0,Index,Fecha,Paciente,Valor,Actividades,Actividad_1,Actividad_2,Actividad_3,Metodo_de_pago
0,0,2023-06-09,Manfry Ariza,60000,valoracion,valoracion,,,Datafono
1,1,2023-06-09,Nazly Avila,185000,"montaje,retiro",montaje,retiro,,Nequi
2,2,2023-06-09,Paula Hernandez,25000,radiografia,radiografia,,,Efectivo
3,3,2023-06-09,Kevin Santiago Soto,105000,"control,reparacion",control,reparacion,,Efectivo
4,4,2023-06-09,Laura Cupa,60000,control,control,,,Datafono


In [6]:
# Extract unique values of "Paciente" for the "patient" table
pacientes = pagos_cleaned['Paciente'].unique()

# Extract unique values of "Actividades" for the "activity" table
activities = pagos_cleaned['Actividades'].str.split(
    ',', expand=True).stack().unique()

# extract unique values of "Metodo_de_pago" for the "payment_method" table
payment_types = pagos_cleaned['Metodo_de_pago'].unique()

### Database functions:

In [7]:
# Create a function to get the ID from the database
def get_id(cursor, table_name, column_name, value):
    # Get the ID from the database based on the given value
    query = f"SELECT {column_name} FROM {table_name} WHERE name = %s"
    cursor.execute(query, (value, ))
    result = cursor.fetchone()
    if result:
        return result[0]
    else:
        raise ValueError(f"{value} does not exist in the {table_name} table.")

In [8]:
# Create a function to check if a value exists in the database
def check_existence(cursor, table_name, column_name, value):
    # Check if the value exists in the database
    query = f"SELECT * FROM {table_name} WHERE {column_name} = %s"
    cursor.execute(query, (value, ))
    result = cursor.fetchone()
    if result:
        return True
    else:
        return False

In [9]:
# Create function to insert data into the database
def insert_data(cursor,
                table_name,
                id_name,
                id_value,
                column_names=None,
                values=None):
    if column_names and values:
        # Insert the data into the database
        query = f"INSERT INTO {table_name} ({id_name}, {', '.join(column_names)}) VALUES (%s, {', '.join(['%s' for _ in range(len(column_names))])})"
        cursor.execute(query, (id_value, *values))
    else:
        query = f"INSERT INTO {table_name} ({id_name}) VALUES (%s)"
        cursor.execute(query, (id_value, ))

In [10]:
# Create function to fill data into the database
def fill_data(data, table_name, column_name, cursor):
    try:
        for item in data:
            if not check_existence(cursor, table_name, column_name, item):
                insert_data(cursor, table_name, column_name, item)
                db.commit()

    except Exception as e:
        print(f"An error occurred: {e}")

In [11]:
# Function to normallize and fill the appointment and activities_of_appointment
def normalize_appointment_data(data, cursor):
    try:
        # Iterate over the rows of the dataframe
        for _, row in data.iterrows():
            # Get the appointment ID to check if it already exists in the database
            appointment_id = row['Index']

            if check_existence(cursor, "appointment", "appointment_id",
                               appointment_id):
                continue  # Skip the rest of the loop if the appointment already exists

            # Get the patient_id and payment_id
            patient_id = get_id(cursor, "patient", "patient_id",
                                row['Paciente'])
            payment_id = get_id(cursor, "payment_method", "payment_method_id",
                                row['Metodo_de_pago'])

            try:
                # Disable the auto increment temporarily to allow direct insertion of id
                cursor.execute(
                    "SET @@session.sql_mode = 'NO_AUTO_VALUE_ON_ZERO';")
                # Insert the data into the "appointment" table
                insert_data(
                    cursor, "appointment", "appointment_id", appointment_id,
                    ["date", "patient_id", "value", "payment_method_id"],
                    [row['Fecha'], patient_id, row['Valor'], payment_id])

                # Enable the auto increment again
                cursor.execute(
                    "SET @@session.sql_mode = 'NO_ENGINE_SUBSTITUTION';")

                # Iterate over the activities to insert them on the activities_of_appointment table
                for activity in row['Actividades'].split(','):
                    activity_id = get_id(cursor, "activity", "activity_id",
                                         activity)

                    # Check if the activity is already associated with the appointment
                    exists_query = "SELECT * FROM activities_of_appointment WHERE appointment_id = %s AND activity_id = %s"
                    cursor.execute(exists_query, (appointment_id, activity_id))

                    if cursor.fetchone():
                        continue  # Skip the rest of the loop if the activity is already associated with the appointment

                    # Insert the data into the "activities_of_appointment" table
                    insert_data(cursor, "activities_of_appointment",
                                "appointment_id", appointment_id,
                                ["activity_id"], [activity_id])

                db.commit()  # Commit if everything is successful
            except Exception as e:
                # Enable the auto increment again
                cursor.execute(
                    "SET @@session.sql_mode = 'NO_ENGINE_SUBSTITUTION';")
                db.rollback()
                print(f"An error occurred: {e}")

    except Exception as e:
        db.rollback()
        print(f"An error occurred: {e}")

### Fill tables with data:

In [12]:
# Insert data into the "patient" table
fill_data(pacientes, "patient", "name", cursor)

# Insert data into the "activity" table
fill_data(activities, "activity", "name", cursor)

# Insert data into the "payment_method" table
fill_data(payment_types, "payment_method", "name", cursor)

In [13]:
# Normalize and fill the appointment and activities_of_appointment tables
normalize_appointment_data(pagos_cleaned, cursor)

In [14]:
# Check that all tables are filled
cursor.execute("SHOW TABLES")
tables = cursor.fetchall()
for table in tables:
    cursor.execute(f"SELECT COUNT(*) FROM {table[0]}")
    count = cursor.fetchone()
    print(f"{table[0]}: {count[0]}")

activities_of_appointment: 191
activity: 26
appointment: 137
patient: 112
payment_method: 4
