In [4]:
# Importing required packages
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np
import getpass
import mysql.connector as connection
from functools import reduce
import datetime
from datetime import datetime

pd.set_option('display.max_column', None)

# The main report generation function
def generate_location_setup_report():
   
    # Define a function to conenct to MySQL DB
    def f_connect_to_database(p_host, p_user, p_password, p_database):
                while True:
                    try:
                        # Setup database connection
                        v_mydb = connection.connect(
                            host=p_host,
                            user=p_user,
                            password=p_password,
                            database=p_database,
                            use_pure=True
                        )
                        v_cursor = v_mydb.cursor()
                        # print("Connected to the database\n")
                        return v_mydb, v_cursor
    
                    except connection.Error as err:
                        print(f"Error: {err}")
                        print("\n retry !")
                        return None, None

    
    # Function to close the database connection
    def f_close_database_connection(p_mydb, p_cursor):
        try:
            if p_cursor is not None:
               p_cursor.close()
               # print("Cursor closed.")
            if p_mydb.is_connected():
               p_mydb.close()
               # print("Database connection closed.\n")
        except Exception as e:
           return f"An error occurred while closing the database connection: {e}"

    
    # Function to execute a query
    def f_execute_query_with_params(p_mydb, p_cursor, p_query):
        try:
            if p_mydb.is_connected():
                p_cursor.execute(p_query)
                v_results = p_cursor.fetchall()
                v_columns = [desc[0] for desc in p_cursor.description]
                df = pd.DataFrame(v_results, columns=v_columns)
                return df
            else:
                # print("Database connection is not opened.")
                return None
        except Exception as e:
            print(f"An error occurred: {e}")
            return None

    
    # Function to execute each query and store the output in a dictionary
    def f_execute_queries_and_collect_results(v_mydb, v_cursor, v_queries):
        v_output_dataframes = {}
    
        for query_name, query in v_queries.items():
            try:
                df = f_execute_query_with_params(v_mydb, v_cursor, query)
    
                if df is not None:
                    v_output_dataframes[query_name] = df
                    # print(f"{query_name} executed successfully.\n")
            except Exception as e:
                print(f"An error occurred while executing {query_name}: {e}")
        return v_output_dataframes

    
    def f_rename_dataframes(dataframes_dict):
        renamed_dataframes = {}
    
        try:
            # Check if 'Query1' exists in the dictionary
            if 'Query1' in dataframes_dict:
                renamed_dataframes['master_df'] = dataframes_dict['Query1']
            else:
                raise KeyError("'Query1' not found in the dictionary.")
            
            # Check if 'Query2' exists in the dictionary
            if 'Query2' in dataframes_dict:
                renamed_dataframes['location_attribute_type_df'] = dataframes_dict['Query2']
            else:
                raise KeyError("'Query2' not found in the dictionary.")
        
        except Exception as e:
            print(f"An error occurred while renaming df: {e}")
            # You can choose to raise the exception again to propagate it further if needed  
        return renamed_dataframes

    
    def f_process_location_attribute_type_df(location_attribute_type_df):
        dataframes = {}
    
        try:
            for index, row in location_attribute_type_df.iterrows():
                attribute_type_id = row['location_attribute_type_id']
                attribute_name = row['name']
                df_name = f"df_{attribute_type_id}"
                
                # print(f"Attribute Type ID: {attribute_type_id}, Attribute Name: {attribute_name}")
                # print(df_name)
    
        except Exception as e:
            print("An error occurred during DataFrame processing:", e)
    
        return dataframes


    def f_process_location_attribute_data(location_attribute_type_df, v_mydb, v_cursor):
        dataframes = {}
    
        try:
            for _, row in location_attribute_type_df.iterrows():
                attribute_type_id = row['location_attribute_type_id']
                attribute_name = row['name']
    
                location_attribute_query = f"""
                SELECT u.username as Sevika_name, la.location_id, la.value_reference as {attribute_name}
                FROM location_attribute la
                join users u on (la.voided=0 and la.creator = u.user_id and u.retired = 0)
                WHERE voided = 0 AND attribute_type_id = {attribute_type_id};"""
    
                # Assuming f_execute_query_with_params executes the query and returns a DataFrame
                attribute_data_df = f_execute_query_with_params(v_mydb, v_cursor, location_attribute_query)
    
                if not attribute_data_df.empty:
                    df_name = f"df_{attribute_type_id}"
                    dataframes[df_name] = attribute_data_df
    
                #     print(attribute_data_df)
                else:
                    print(f"No data found for Attribute Type ID: {attribute_type_id}, Name: {attribute_name}")
                    
        except Exception as e:
            print("An error occurred during DataFrame processing:", e)
        return dataframes

        
    # Function to check database connection
    def f_check_database_connection(v_mydb):
        try:
            if v_mydb.is_connected():
                # print("Connected")
                return True
            else:
                print("Not connected")
            
        except Exception as e:
            print(f"An error occurred while checking database connection: {e}")
            return False

    def f_print_dataframes(dataframes):
        try:
            for df_name, df in dataframes.items():
                print("Dataframe Name:", df_name)
                print(df)
                print("\n")
        except Exception as e:
            print("An error occurred:", e)    

    def f_merge_dataframes(dataframes):
        try:
            new = pd.merge(dataframes['df_2'], dataframes['df_3'], on=['Sevika_name', 'location_id'], how='left')
            new = pd.merge(new, dataframes['df_4'], on=['Sevika_name', 'location_id'], how='left')
            new = pd.merge(new, dataframes['df_5'], on=['Sevika_name', 'location_id'], how='left')
            new = pd.merge(new, dataframes['df_6'], on=['Sevika_name', 'location_id'], how='left')
            new = pd.merge(new, dataframes['df_7'], on=['Sevika_name', 'location_id'], how='left')
            new = pd.merge(new, dataframes['df_8'], on=['Sevika_name', 'location_id'], how='left')
            new = pd.merge(new, dataframes['df_9'], on=['Sevika_name', 'location_id'], how='left')
            new = pd.merge(new, dataframes['df_10'], on=['Sevika_name', 'location_id'], how='left')
            new = pd.merge(dataframes['df_11'], new, on=['Sevika_name', 'location_id'], how='left')
            return new
        except Exception as e:
            print("An error occurred:", e)
        return None

    def f_merge_and_update_master_df(master_df, new_df):
        try:
            master_df = pd.merge(master_df, new_df, on='location_id', how='left')
            # print(master_df)
            
        except Exception as e:
            print("An error occurred:", e)
        return master_df
    
    #To remove column which is not required
    def f_remove_column(df, column_name):
        try:
            # Attempt to drop the specified column
            df = df.drop(column_name, axis=1)
            return df
        except KeyError:
            # Handle the KeyError exception if the specified column does not exist
            print(f"Column '{column_name}' not found in the DataFrame.")
            return df

    
    def f_export_to_excel(df, file_name=None):
        
        try:
            # Get the current date and time
            current_datetime = datetime.now()
    
            # If no file name is provided, generate a default name
            if file_name is None:
                formatted_file_name = f"output_{current_datetime.strftime('%Y%m%d_%H%M%S')}.xlsx"
            else:
                formatted_file_name = file_name
    
            # Export the DataFrame to Excel
            df.to_excel(formatted_file_name, index=False)
    
            # print(f"DataFrame exported to {formatted_file_name}")
            return formatted_file_name
    
        except Exception as e:
            print(f"An error occurred while exporting to Excel: {e}")
        return None


    #input parameters
    v_host_input = input("Enter the host: ")
    v_user_input = input("Enter the username: ")
    v_password_input = getpass.getpass("Enter the password: ")
    v_database_input = input("Enter the database: ")

    
    # SQL queries
    v_queries = {'Query1':'''select l.location_id,
                                    l.name as Village,
                                    ls.name as Sanch,
                                    ld.name as District,
                                    l.state_province as State
    						from    location l
                            join    location_tag_map lmap on (lmap.location_id = l.location_id and lmap. location_tag_id = 8)
                            left join location ls on (ls.location_id = l.parent_location)
                            left join location ld on (ld.location_id = ls.parent_location);''',
                                         
                  'Query2':'''SELECT location_attribute_type_id, name
                                     FROM location_attribute_type
                                     WHERE retired = 0;'''}

    # Connect to the database
    v_mydb, v_cursor = f_connect_to_database(v_host_input, v_user_input, v_password_input, v_database_input)

    # Executing SQL queries
    v_output_dataframes = f_execute_queries_and_collect_results(v_mydb, v_cursor, v_queries)

    #calling the functions to rename dfs
    renamed_dataframes= f_rename_dataframes(v_output_dataframes)
    
    # Access the renamed DataFrames
    master_df = renamed_dataframes.get('master_df')
    location_attribute_type_df = renamed_dataframes.get('location_attribute_type_df')

    #calling function to extract and store all dfs in a single dataframe
    f_process_location_attribute_type_df(location_attribute_type_df)

    #calling function to extract and store all dfs in a single dataframe
    dataframes = f_process_location_attribute_data(location_attribute_type_df, v_mydb, v_cursor)

    #function to close the database connection
    f_close_database_connection(v_mydb, v_cursor)

    #Function to check database connection
    f_check_database_connection(v_mydb)

    #function to print individual dataframes
    # f_print_dataframes(dataframes)

    #calling function to merge individual dataframes based on location_id and Sevika_name
    merged_df = f_merge_dataframes(dataframes)
    merged_df

    #calling function to merge
    final_output = f_merge_and_update_master_df(master_df, merged_df)

    # #calling function to rearrange columns 
    # rearranged_df = f_rearrange_columns(master_df)

    #To remove column which is not required
    final_output = f_remove_column(final_output, 'location_id')

    #calling a function to export output file
    f_export_to_excel(final_output,file_name=None)
