### Upload Data into Database:

Before running the  code in this script, make sure that the tables are created in the database.

If the tables in the database contain old data, refer to the SQL code file named firstcut_data.

In [None]:
import os
import logging
import psycopg2
import numpy as np
import pandas as pd
from dotenv import load_dotenv
from modules.connection_mod import connect, show_psycopg2_exception

load_dotenv()
logger = logging.getLogger(__name__)

In [None]:
# Set up the connection parameters:
conn_params_dict = {
    "host": os.getenv('PG_HOST'),
    "database": os.getenv('DATABASE'),
    "user": os.getenv('PG_USER'),
    "password": os.getenv('PG_PASSWORD')
}

In [None]:
def execute_many(conn, table_dataframe_pairs):
    """
    This function will process the list of table and dataframe pairs and insert the data into the database.
    
    Args:
        conn (_type_): connection object
        table_dataframe_pairs (list of tuples): list of tuples containing dataframes and its corresponding table name
    """
    cursor = conn.cursor()
    
    try:
        for table, dataframe in table_dataframe_pairs:
            tpls = [tuple(x) for x in dataframe.to_numpy()] #Creating a list of tuples from the dataframe values
            
            cols = ','.join(list(dataframe.columns)) #dataframe columns with Comma-separated
            
            # Generate the placeholders for values dynamically based on the number of columns in each dataframe
            placeholders = ','.join(['%s'] * len(dataframe.columns))
            
            # SQL query to execute
            sql = f"INSERT INTO {table} ({cols}) VALUES ({placeholders})"
            
            cursor.executemany(sql, tpls)
        
        conn.commit()
        logger.info('Data inserted successfully using the execute_many() function')
        print("Data inserted successfully for tables")
    except (Exception, psycopg2.DatabaseError) as err:
        show_psycopg2_exception(err) #pass exception to function
    finally:
        cursor.close()

In [None]:
# Connect to the database
conn = connect(conn_params_dict)
conn.autocommit = True

In [None]:
# Read in the data
patient = pd.read_csv('../COUNTRIES/USA/data/processed/MLI_Dec_1_2024-01-31_14-01.csv', index_col=False)
condition = pd.read_csv('../COUNTRIES/USA/data/processed/MLI_Dec_2_2024-01-31_14-01.csv', index_col=False)
microscopy = pd.read_csv('../COUNTRIES/USA/data/processed/MLI_Dec_3_2024-01-31_14-01.csv', index_col=False)
culture = pd.read_csv('../COUNTRIES/USA/data/processed/MLI_Dec_4_2024-01-31_14-01.csv', index_col=False)
specimen = pd.read_csv('../COUNTRIES/USA/data/processed/MLI_Dec_5_2024-01-31_14-01.csv', index_col=False)

In [None]:
# List of tuples with the dataframes to upload
table_dataframe_pairs = [
    ('staging.patient_leo', patient),
    ('staging.condition_temp_leo', condition),
    ('staging.microscopy_leo', microscopy),
    ('staging.culture_leo', culture),
    ('staging.specimen_leo', specimen)
]

In [None]:
execute_many(conn, table_dataframe_pairs)

In [None]:
# regimen upload failed. I think this is because it is expecting the drugs column to be an array.
# I will have to do something similar to what I did with the condition table.

# regimen_upload['drugs'] = regimen_upload['drugs'].str.replace(r'{|}', '', regex=True)
# regimen_upload.to_csv('regimen_upload_temp3.csv', index=False)

In [None]:
# regimen_upload_temp = pd.read_csv('Mexico/data/CB_data_file_2/regimen_upload_temp2.csv', index_col=False)

In [None]:
# Treatment upload had to be done using the Import/Export function in PgAdmin.
# For some reason it was failing because it wanted to status_code column to be present.

# Condition and regimen need to be uploaded into temp tables because it is easier to transform them in PgAdmin
# regimen_upload_temp upload had to be done using the Import/Export function in PgAdmin.
# execute_many(conn, condition_upload, 'condition_temp')

#### Loading Patient table data

* This portion might not always be needed.

In [None]:
# Read in the data
patient_july = pd.read_csv('../countries/Georgia/data/processed/Georgia_patient_july.csv', index_col=False)
patient_june = pd.read_csv('../countries/Georgia/data/processed/Georgia_patient_june.csv', index_col=False)
patient_august = pd.read_csv('../countries/Georgia/data/processed/Georgia_patient_august.csv', index_col=False)
patient_september = pd.read_csv('../countries/Georgia/data/processed/Georgia_patient_september.csv', index_col=False)

In [None]:
# Run the execute_many method for tables:
# execute_many(conn, patient_july, 'patient')
# execute_many(conn, patient_june, 'patient')
# execute_many(conn, patient_august, 'patient')
# execute_many(conn, patient_september, 'patient')

In [None]:
conn.close()