In [3]:
import os
current_directory = os.path.dirname(os.path.abspath("__file__"))
os.chdir(current_directory)

In [6]:
import os
import argparse
import glob
import psycopg2
import subprocess

def krbauth(username: str, password: str) -> bool:
    """Perform Kerberos authentication.

    Args:
    username: username for the account having access to auth
    password: password for user
    Returns:
    success: the exit code of subprocess.run() flipped, (because a successful run exits with 0)
    and produces a non-zero output if something is wrong.
    """
    cmd = ["kinit", username]
    try:
        command = subprocess.run(
            cmd, input=password.encode(), check=True, capture_output=True
        )
        success = command.returncode
    except subprocess.CalledProcessError as e:
        print(f"The error output for krbauth: {e.output}")
        print(f"The error code: {e.returncode}")
        print(f"The stderr: {e.stderr}")
        print(f"The stdout: {e.stdout}")
        raise (subprocess.CalledProcessError(success, command))
    # we flip the bool value of success, because we are returning the exit code of subprocess.run()
    # if it is 0, then it was a successful run, meaning we need to flip it, to make it truthy
    return not bool(success)


def read_queries_from_file(filename):
    """
    Args: 
    filename: str with file path
    Returns:
    query_question_pair:list of SQL queries lists
    """
    query_question_pair = []
    with open(filename, 'r') as file:
        content = file.read()
        sections = content.split('\n\n')
        for section in sections:
                lines = section.strip().split('\n')
                if len(lines) >= 2:
                    question_time = lines[0].strip()
                    sql_query = ' '.join(lines[1:]).strip()
                    query_question_pair.append(sql_query)
    return query_question_pair


def process_files_in_folder(folder_path):
    """
    Args: 
    filename: str with file path
    Returns:
    result_dict: dictionary with keys as question IDs and values as lists of results
    """
    result_dict = {}
    file_names = os.listdir(folder_path)
    sorted_file_names = sorted(file_names, key=lambda x: int(x.split('_')[0][1:]) if x.split('_')[0][1:].isdigit() else float('inf'))
    for filename in sorted_file_names:
        if filename.endswith('.txt'):
            file_path = os.path.join(folder_path, filename)
            queries_list = read_queries_from_file(file_path) 
            result_dict[os.path.splitext(filename)[0]] = queries_list
    return result_dict



    
# EXECUTE THE PIPELINE:  TAKE ALL FILES, READ THEM AND SUBMIT QUERIES TO DB, THEN STORE RESULTS
folder_path = '../T1/' 

# Loop over the dictionaries in the process_files_in_folder result
for key, value in process_files_in_folder(folder_path).items():
    question_id = key
    list_of_queries = value
    results = []  # Initialize an empty list for each question ID
    
    # Process each query in the list
    for query in list_of_queries:
        hostname = '' # define hostname
        database = 'datalake' # change to database name
        username = '' # define username
        port_id = 5434
        conn = None
        cur = None

        try:
            conn = psycopg2.connect(host=hostname, dbname=database, user=username, port=port_id)
            cur = conn.cursor()
            cur.execute(query)
            result = cur.fetchone()
            
            results.append(result)

        except psycopg2.Error as e:
            error_message = str(e).split("\n")[0]
            results.append(f"Error: {str(error_message)}") 

        except Exception as error:
            results.append(f"Error executing query: {error}" )

        finally:
            if cur is not None:
                cur.close()
            if conn is not None:
                conn.close() 
                
    # write down results for each question
    with open('./answers_T1.txt', 'a') as output_file:
        results = '\t'.join(map(str, results))
        output_file.write(f'{question_id}\t{results}\n')
            