### Importing the relavant Libraries

In [6]:
import pandas as pd
import numpy as np
import os
import warnings
import PyPDF2
from datetime import datetime
from io import StringIO


%run "./excel_functions.ipynb"

current_time = datetime.now().strftime(format = "%H:%M:%S")

#main_folder_path = "./033_REPORTS_05032023/"
babcock_only_sum = 0

FUNCTION CALLED: read_report_file
==READING IN THE FILE ./033_REPORTS_05032023/30/2PM/033_INCOMING_COMMISSIONS_SUMMARY_30112023_PM.CSV
Format: CSV
STEP 1: Checking if the file has a header or not
STEP 2: File has no header
==END OF FUNCTION: READ REPORT FILE==

FUNCTION EXECUTION ENDED: read_report_file


In [3]:
# # Get the current date
# current_date = datetime.now()

# # Format the date
# formatted_date = current_date.strftime("%d %b %Y")

# # Get current hour
# current_hour = datetime.now().hour

In [11]:
def _get_folder_hour(main_folder_path):
    
    folder_hour = [path_component for path_component in main_folder_path.split("/") if path_component !=""]
    
    try:
        folder_hour = folder_hour[-1] #The folder hour is the last component of the path
    
    except IndexError as e:
        print(f"Folder hour value: {folder_hour}")
        print(f"Python error: {e}")
        return ""
        
    return folder_hour

### Creating a list of keywords for every report

In [4]:
list_of_reports = ["incoming_commissions_summary",
                  "incoming_commissions_details",
                  "incoming_cpay_details",
                  "incoming_reversal_details",
                  "incoming_transfers_details",
                  "incoming_payments_summary",
                  "outgoing_cardloads_details",
                  "outgoing_commissions_summary",
                  "outgoing_reversal_details",
                  "outgoing_cpay_details",
                  "outgoing_comm_summary_details",
                  "samebank_commissions_summary",
                  "outgoing_payments_detail",
                   "net_settlement_position"
                  ]

### A function for renaming files:

**This is used for removing errors regarding the spelling of "commission" in the filenames**

In [5]:
@function_name_decorator
def _rename_files(directory_path):
    # List all files in the directory
    files = os.listdir(directory_path)

    # Iterate through each file
    for filename in files:
        # Check if the file contains the misspelled word
        if "commssion" in filename.lower():
            # Generate the new filename by replacing the misspelled word
            new_filename = filename.replace("COMMSSION", "COMMISSION")

            # Construct the full paths
            old_path = os.path.join(directory_path, filename)
            new_path = os.path.join(directory_path, new_filename)

            # Rename the file
            os.rename(old_path, new_path)

            print(f"Renamed: {filename} to {new_filename}")
    #print("==END OF FUNCTION: RENAME FILES==\n")

### A function for generating folder names 7AM, 11AM, 2PM & 5PM:

This function helps other parts of the code decide which folder to access for transformation

In [6]:
@function_name_decorator
def _convert_hour_to_am_pm(hour, for_folder = False):
    
    try:
        hour = int(hour)
        
    except ValueError as e:
        print(f"The argument inputted is not a number.\Python error: {e}")
        warnings.warn(f"Returning the original input without any transformations")
        return hour
    
    if hour < 7:
        if for_folder:
            return "7AM/"
        
        else:
            return "7AM"
        
        
    elif hour > 7 and hour < 11:
        if for_folder:
            return "11AM/"
        else:
            return "11AM"
    
    elif hour > 11 and hour < 14:
        if for_folder:
            return "2PM/"
        else:
            return "2PM"
        
    else:
        if for_folder:
            return "5PM/"
        else:
            return "5PM"
    
    #print("==END OF FUNCTION: CONVERT HOUR TO AM PM==\n")
# folder_path = main_folder_path + _convert_hour_to_am_pm(current_hour, for_folder= True)
# folder_path

### A function that generates a list of Report files:

This function reaches into the required folder: 7AM, 11AM, 2PM, and 5PM and generates the list of files available

In [7]:
@function_name_decorator
def list_of_files_generator(main_folder_path):
    
    folder_hour = _get_folder_hour(main_folder_path)
    
    print(f"\n==GENERATING THE LIST OF FILES FOR {folder_hour}")
    print(f"The current time: {current_time}")
    
    print(f"STEP 1: Listing all the files available in the {folder_hour} folder")
    
    try:
        list_of_files = os.listdir(main_folder_path)
    
    except FileNotFoundError as e:
        warnings.warn(f"Folder {main_folder_path} could not be found")
        print("Returning an empty string as the list of files")
        return ""
    
    for file in list_of_files:
        print(file)
        
    
    if len(list_of_files) == 0:
        warnings.warn(f"There are NO files in {folder_hour} folder")
        print("Returning an empty string as the list of files")
        return ""
    
    else:
        #print("==END OF FUNCTION: LIST OF FILES GENERATOR==\n")
        return list_of_files
    
# list_of_files = list_of_files_generator(main_folder_path)
        

### A function to get Net Settlement PDF file

In [8]:
@function_name_decorator
def _get_net_settlement_path(keyword, list_of_files):
    print("\n==GETTING THE PATH FOR NET SETTLEMENT PDF FILE")
    
    print(f"STEP 1: Looping through the files in the directory (List of files) in the {_convert_hour_to_am_pm(current_hour)} directory")
    for filename in list_of_files:
        if keyword in filename.lower() and filename.lower().endswith(".pdf"):
            #print("==END OF FUNCTION: GET NET SETTLEMENT PATH==\n")
            return filename
          
    #print("==END OF FUNCTION: GET NET SETTLEMENT PATH==\n")
    return ""


### A function to generate Narration:

This function is used to generate the narration for some other functions so that they can fill up their output dataframe

In [9]:
@function_name_decorator
def _get_incoming_commissions_narration(keyword, main_folder_path):
    print("\n==CREATING NARRATION FOR INCOMING COMMISSIONS SUMMARY==")
    narration = ""
    
    print("STEP 1: Getting the file path for the settlement PDF file")
    required_filename = _get_net_settlement_path(keyword, list_of_files)
    
    
    if required_filename == "":
        warnings.warn("Could not find the Settlement PDF file")
        return ""
    
    pdf_text = read_pdf(main_folder_path+ required_filename)

    batch_id = pdf_text.split("Batch Id: ")[1].split()[0]

    print("STEP 2: Recording the Narration")
    narration = batch_id + "/" + formatted_date + "/IN COMM"
    
    if batch_id == "":
        warnings.warn("No Batch ID for narration found")
        #print("==END OF FUNCTION: GET INCOMING COMMISSIONS NARRATION==\n")
        return narration
    
    else:
        #print("==END OF FUNCTION: GET INCOMING COMMISSIONS NARRATION==\n")
        return narration
    
# _get_incoming_commissions_narration("net_settlement_position")

### A function for generating INCOMING_COMMISSION_DETAILS

In [10]:
@function_name_decorator
def incoming_commission_details(filepath):
    
    print("\n==STEPS FOR INCOMING COMMISSION DETAILS==")
    print("STEP 1: Reading in the file")
    df, has_no_header = read_report_file(filepath)
    
    if has_no_header == True:
        print("STEP 2: Adding column names to the data")
        df = add_letter_columns_to_df(df)
    
    elif has_no_header == "NA":
        warnings.warn("File was not found!")
        return ""
    
    print("STEP 3: Sorting values by column 'I'")
    incoming_commissions_df = df.sort_values("I")
    
    print("STEP 4: Selecting only values with 'BABCOCK' from column I")
    babcock_mask = incoming_commissions_df["I"].str.contains("BABCOCK")
    babcock_df = incoming_commissions_df.loc[babcock_mask, :]

    print("STEP 5: Moving column 'H', to column 'A'")
    babcock_df = excel_move_column(babcock_df, "H", "A")
    
    print("STEP 6: Moving column 'F' to column 'B'")
    babcock_df = excel_move_column(babcock_df, "F", "B")
    
    print("STEP 7: Moving column 'F' to 'C'")
    babcock_df = excel_move_column(babcock_df, "F", "C")
    
    print("STEP 8: Inserting 2 column spaces between columns D and E")
    babcock_df = generate_inner_columns(babcock_df, 2, "D")
    
    print("STEP 9: Getting the last 7 characters from column C and inserting in column D")
    babcock_df.loc[:, "D"] = babcock_df["C"].str[-7:]
    
    print("STEP 10: Concatenating columns D and C")
    babcock_df["E"] = babcock_df["D"] + babcock_df["C"]
    
    print("STEP 11: Copy the values of column E to column C")
    babcock_df.loc[:, "C"] = babcock_df["E"].copy()
    
    print("STEP 12: Remove the 'PAYMENT' word from column C")
    babcock_df["C"] = babcock_df["C"].str.replace("PAYMENT", "")
    
    print("STEP 13: Selecting only columns 'A', 'B', and 'C'")
    babcock_incoming_commissions_settlement = babcock_df.loc[:, ["A", "B", "C"]]
    
    print("STEP 14: Renaming the columns to: ACCOUNT, AMOUNT, and NARRATION")
    babcock_incoming_commissions_settlement.rename(columns= {"A": "ACCOUNT", "B": "AMOUNT", 
                                                             "C": "NARRATION"}, inplace = True)
    
    global babcock_only_sum
    
    babcock_only_sum = babcock_incoming_commissions_settlement["AMOUNT"].astype(float).sum()
    
    #print("==END OF FUNCTION: INCOMING COMMISSIONS DETAILS==\n")
    return babcock_incoming_commissions_settlement

# babcock_incoming_commissions_details_df = incoming_commission_details("033_REPORTS_05032023/033_INCOMING_COMMISSIONS_DETAILS_05032023_AM.CSV")

### A function for generating INCOMING_COMMISSIONS_SUMMARY

In [11]:
@function_name_decorator
def incoming_commissions_summary(filepath):
    
    print("\n==STEPS FOR INCOMING COMMISSIONS SUMMARY==")
    print("STEP 1: Reading in the file")
    df, has_no_header = read_report_file(filepath)
    
    if has_no_header:
        print("STEP 2: Adding column names to the data")
        df = add_letter_columns_to_df(df)
    
    elif has_no_header == "NA":
        warnings.warn("File was not found!")
        return ""
    
    print("STEP 3: Comparing Babcock settlement sums from INCOMING COMMISSIONS SUMMARY and result from data manipulation")
    babcock_mask = df["C"].str.lower().str.contains("babcock")
    babcock_summary_amount = df.loc[babcock_mask, "D"].sum()
    print(f"babcock summary amount: {babcock_summary_amount}")
    print(f"babcock only sum: {babcock_only_sum}")
    #confirmation_mask = babcock_incoming_commissions_details_df["AMOUNT"].sum() == df["D"]

    if babcock_summary_amount == babcock_only_sum:
        print("Successfully confirmed Babcock settlement value")
    
    else:
        warnings.warn("Babcock settlement value not confirmed!")
        
    
    print("STEP 4: Considering all records in the summary except Babcock...")
    except_babcock_mask = ~(df["C"].str.lower().str.contains("babcock"))
    incoming_commissions_summary_df = df.loc[except_babcock_mask, :]
    
    print("STEP 5: Reading the Batch ID from the NET SETTLEMENT POSITION")
    narration = _get_incoming_commissions_narration("net_settlement_position", main_folder_path)
    
    print("STEP 7: Selecting only columns B and D")
    incoming_commissions_settlement = incoming_commissions_summary_df.loc[:, ["B", "D"]]
    
    print("STEP 8: Rename the columns to ACCOUNT AND AMOUNT")
    incoming_commissions_settlement.columns = ["ACCOUNT", "AMOUNT"]
    
    print("STEP 9: Including the NARRATION column to the output dataframe")
    if narration == "":
        warnings.warn("NARRATION was not extracted from NET SETTLEMENT POSITION PDF FILE. This may be because the file is not in PATH")
    incoming_commissions_settlement.loc[:, "NARRATION"] = narration
    
    #print("==END OF FUNCTION: INCOMING COMMISSIONS SUMMARY==\n")
    return incoming_commissions_settlement

# filepath = "033_REPORTS_05032023/033_INCOMING_COMMISSIONS_SUMMARY_05032023_AM.CSV"
# net_settlement_position_pdf_file_path = "./033_REPORTS_05032023/033_NET_SETTLEMENT_POSITION_05032023_AM.PDF"
# incoming_commissions_summary_df = incoming_commissions_summary(filepath)
# incoming_commissions_summary_df

### A function for generating INCOMING_TRANSFER_DETAILS

In [12]:
@function_name_decorator
def incoming_transfer_details(filepath):
    
    print("\n==STEPS FOR INCOMING TRANSFER DETAILS==")
    
    print("STEP 1: Reading in the file")
    incoming_transfer_df, has_no_header = read_report_file(filepath)
    
    if has_no_header:
        print("STEP 2: Adding column names to the data")
        incoming_transfer_df = add_letter_columns_to_df(incoming_transfer_df)
    
    elif has_no_header == "NA":
        warnings.warn("File was not found!")
        return ""
    
    print("STEP 3: Summing up the values of column H")
    auto_sum = incoming_transfer_df["H"].fillna(0).astype(float).sum()
    
    print("STEP 4: Extracting the Account ID from column G")
    account_num = incoming_transfer_df["G"].unique()[0]
    
    print("STEP 5: Create a dictionary for the Account extracted and Amount calculated")
    auto_sum_acct_num_dict = {
    
   "ACCOUNT": [account_num],
    "AMOUNT": [auto_sum]
    }
    
    print("STEP 6: Convert the dictionary into a DataFrame")
    income_transfer_settlement_df = pd.DataFrame(auto_sum_acct_num_dict)

    print("STEP 7: Including the NARRATION in the DataFrame")
    income_transfer_settlement_df ["NARRATION"] = f"Incoming Transfers {formatted_date} S1"
    
    #print("==END OF FUNCTION: INCOMING TRANSFER DETAILS==\n")
    return income_transfer_settlement_df

# filepath = "033_REPORTS_05032023/033_INCOMING_TRANSFERS_DETAILS_06032023_AM.CSV"
# incoming_transfer_details_df = incoming_transfer_details(filepath)
# incoming_transfer_details_df

### A function for generating OUTGOING_COMMISSIONS_SUMMARY

In [13]:
@function_name_decorator
def outgoing_commissions_summary(filepath):
    
    print("\n==STEPS FOR OUTGOING COMMISSIONs SUMMARY==")

    print("STEP 1: Reading in the file")
    outgoing_summary_df, has_no_header = read_report_file(filepath)
    
    if has_no_header:
        print("STEP 2: Adding column names to the data")
        outgoing_summary_df = add_letter_columns_to_df(outgoing_summary_df)
    
    elif has_no_header == "NA":
        warnings.warn("File was not found!")
        return ""
    
    print("STEP 3: Create a dictionary mapping column headers to their respective values")
    outgoing_summary_dict = outgoing_summary_df.to_dict("list")

    print("STEP 4: Selecting just column headers B and D and their values")
    cols_to_retrieve = ["B", "D"]
    selected_summary_dict = {key: outgoing_summary_dict[key] for key in cols_to_retrieve if key in outgoing_summary_dict}
    
    print("STEP 5: Including the NARRATION in the dictionary")
    selected_summary_dict["NARRATION"] = [f"OUTGOING COMM {formatted_date} S1"] * len(outgoing_summary_df)
    
    print("STEP 6: Creating a DataFrame from the dictionary")
    outgoing_commission_summary_df = pd.DataFrame(selected_summary_dict)
    
    print("STEP 7: Negating the values of column D because it is an 'Outgoing' value")
    outgoing_commission_summary_df.loc[:, "D"] = outgoing_commission_summary_df["D"].apply(abs) * -1

    print("STEP 8: Renaming the columns of the final dataframe")
    outgoing_commission_summary_df.rename(columns = {"B": "ACCOUNT", "D": "AMOUNT"}, inplace = True)
    
    #print("==END OF FUNCTION: OUTGOING COMMISSION SUMMARY==\n")
    return outgoing_commission_summary_df


# filepath = "033_REPORTS_05032023/033_OUTGOING_COMMSSIONS_SUMMARY_05032023_AM.CSV"
# outgoing_commission_summary_df = outgoing_commissions_summary(filepath)
# outgoing_commission_summary_df

### A function for generating OUTGOING_PAYMENTS_SUMMARY

In [14]:
@function_name_decorator
def outgoing_payments_summary(filepath):
    
    print("\n==STEPS FOR OUTGOING PAYMENTS SUMMARY==")
    
    print("STEP 1: Reading the file for OUTGOING PAYMENTS SUMMARY...")
    print("STEP 1: Reading in the file")
    outgoing_payments_df, has_no_header = read_report_file(filepath)
    
    if has_no_header:
        print("STEP 2: Adding column names to the data")
        outgoing_payments_df = add_letter_columns_to_df(outgoing_payments_df)
    
    elif has_no_header == "NA":
        warnings.warn("File was not found!")
        return ""
    
    print("STEP 3: Filling NA in column J and taking the sum of column D values for each category in column J")
    outgoing_payments_df["J"].fillna(method="ffill", inplace = True)
    outgoing_payments_settlement_df = outgoing_payments_df.groupby("J").agg({"D": "sum"}).reset_index()
    
    print("STEP 4: Because this is an outgoing file, the values of aggregated column D is negated")
    outgoing_payments_settlement_df.loc[:, "D"] = outgoing_payments_settlement_df["D"].apply(abs) * -1
    
    print("STEP 5: Renaming the columns")
    outgoing_payments_settlement_df.rename(columns = {"J": "ACCOUNT", "D": "AMOUNT"}, inplace = True)
    
    print("STEP 6: Including the NARRATION in the DataFrame")
    outgoing_payments_settlement_df["NARRATION"] = f"Outgoing Payment {formatted_date} S1"

    print("STEP 7: In case there is any missing value in ACCOUNT column, fill it with the value specified")
    outgoing_payments_settlement_df["ACCOUNT"].fillna(value = "NGN09992501102")
    
    #print("==END OF FUNCTION: OUTGOING PAYMENTS SUMMARY==\n")
    return outgoing_payments_settlement_df


# filepath = "033_REPORTS_05032023/033_OUTGOING_PAYMENTS_SUMMARY_05032023_AM.CSV"
# outgoing_payments_summary_df = outgoing_payments_summary(filepath)
# outgoing_payments_summary_df

### A function for generating SAMEBANK_COMMISSIONS_SUMMARY

In [15]:
@function_name_decorator
def samebank_commissions_summary(filepath):
    
    print("\n==STEPS FOR SAMEBANK COMMISSIONS SUMMARY==")
    
    print("STEP 1: Reading the file for SAMEBANK COMMISSIONS SUMMARY...")
    print("STEP 1: Reading in the file")
    same_bank_commission_df, has_no_header = read_report_file(filepath)
    
    if has_no_header:
        print("STEP 2: Adding column names to the data")
        same_bank_commission_df = add_letter_columns_to_df(same_bank_commission_df)
    
    elif has_no_header == "NA":
        warnings.warn("File was not found!")
        return ""
    
    print("STEP 3: Selecting columns B and F from the SAMEBANK COMMISSIONS DataFrame")
    col_B_F = same_bank_commission_df.loc[:, ["B", "F"]]
    
    print("STEP 4: Adding the NARRATION for the DataFrame containing columns B and F")
    col_B_F.loc[:, "NARRATION"] = f"23CSA53028163_A/{formatted_date}/SMBK COMM"
    
    print("STEP 5: Renaming the column headers")
    col_B_F.rename(columns = {"B": "ACCOUNT", "F": "AMOUNT"}, inplace = True)

    print("STEP 6: Repeating the above steps (3-5) for columns E and F")
    col_E_F = same_bank_commission_df.loc[:, ["E", "F"]]
    col_E_F.loc[:, "NARRATION"] = f"23CSA53028163_A/{formatted_date}/SMBK COMM"
    
    print("STEP 7: Negating the values on column F because column E gives the values to be negated")
    col_E_F.loc[:, "F"] = col_E_F["F"].apply(abs) * -1
    
    print("STEP 8: Renaming the column headers")
    col_E_F.rename(columns = {"E": "ACCOUNT", "F": "AMOUNT"}, inplace = True)
    
    print("STEP 9: Combining both DataFrames")
    same_bank_commission_settlement = pd.concat([col_B_F, col_E_F])
    
    #print("==END OF FUNCTION: SAMEBANK COMMISSION SUMMARY==\n")
    return same_bank_commission_settlement

# filepath = "033_REPORTS_05032023/033_SAMEBANK_COMMSSIONS_SUMMARY_05032023_AM.CSV"
# samebank_commissions_summary_df = samebank_commissions_summary(filepath)
# samebank_commissions_summary_df

### A function for generating INCOMING_REVERSAL_DETAILS

In [16]:
@function_name_decorator
def incoming_reversal_details(filepath):
    
    print("==\nSTEPS FOR INCOMING REVERSAL DETAILS")
    
    print("STEP 1: Reading in the file INCOMING_REVERSAL_DETAILS...")
    df, has_no_header = read_report_file(filepath)
    
    if has_no_header:
        print("STEP 2: Adding column names to the data")
        df = add_letter_columns_to_df(df)
    
    elif has_no_header == "NA":
        warnings.warn("File was not found!")
        return ""
    
    print("STEP 3: Calculating the transaction amount by summing up column D")
    transaction_amount = df["D"].astype(float).sum()
    
    print("STEP 4: Creating a dictionary for the Narration")
    incoming_reversal_dict = {"ACCOUNT": ["NGN09992501102"],
                          "AMOUNT": [transaction_amount],
                          "NARRATION": [f"Incoming Reversal {formatted_date}"]
                         }
    
    print("STEP 5: Converting the dictionary to a DataFrame")
    incoming_reversal_settlement_df = pd.DataFrame(incoming_reversal_dict)
    
    #print("==END OF FUNCTION: INCOMING REVERSAL DETAILS==\n")
    
    return incoming_reversal_settlement_df


### A function for generating OUTGOING_CPAY_COMM_SUMMARY_DETAILS

In [17]:
@function_name_decorator
def outgoing_cpay_comm_summary_details(filepath):
    
    print("==\nSTEPS FOR INCOMING REVERSAL DETAILS")
    
    print("STEP 1: Reading in the file INCOMING_REVERSAL_DETAILS...")
    df, has_no_header = read_report_file(filepath)
    
    if has_no_header:
        print("STEP 2: Adding column names to the data")
        df = add_letter_columns_to_df(df)
    
    elif has_no_header == "NA":
        warnings.warn("File was not found!")
        return ""
    
    print("STEP 3: Calculating the transaction amount by summing up column H")
    transaction_amount = df["H"].astype(float).sum()
    
    print("STEP 4: Creating the outgoing cpay comm dictionary")
    outgoing_cpay_comm_dict = {"ACCOUNT": ["1021821845"],
                          "AMOUNT": [transaction_amount],
                          "NARRATION": [f"Outgoing Cpay COMM {formatted_date}"]}
    
    print("STEP 5: Negating the AMOUNT because it is outgoing")
    outgoing_cpay_comm_dict["AMOUNT"] = [round(abs(transaction_amount) * -1, 2)]
    
    print("STEP 5: Creating the outgoing cpay comm DataFrame")
    outgoing_cpay_comm_df = pd.DataFrame(outgoing_cpay_comm_dict)
    
    #print("==END OF FUNCTION: OUTGOING CPAY COMM SUMMARY DETAILS==\n")
    
    return outgoing_cpay_comm_df


### A function for generating OUTGOING_CPAY_DETAILS

In [18]:
@function_name_decorator
def outgoing_cpay_details(filepath):
    print("==\nSTEPS FOR OUTGOING CPAY DETAILS")
    
    print("STEP 1: Reading in the file OUTGOING_CPAY_DETAILS...")
    df, has_no_header = read_report_file(filepath)
    
    if has_no_header:
        print("STEP 2: Adding column names to the data")
        df = add_letter_columns_to_df(df)
    
    elif has_no_header == "NA":
        warnings.warn("File was not found!")
        return ""
    
    print("STEP 3: Calculating the transaction amount")
    transaction_amount = df["TRANS_AMOUNT"].astype(float).sum()

    print("STEP 4: Creating the Narration dictionary")
    outgoing_cpay_dict = {"ACCOUNT": ["1021821845"],
                     "AMOUNT": [transaction_amount],
                     "NARRATION": [f"Outgoing CorporatePay {formatted_date}"]}
    
    print("STEP 5: Negating the transaction amount because it is outgoing")
    outgoing_cpay_dict["AMOUNT"] = [abs(transaction_amount) * -1]
    
    print("STEP 6: Creating the Final Narration DataFrame")
    outgoing_cpay_settlement_df = pd.DataFrame(outgoing_cpay_dict)
    
   # print("==END OF FUNCTION: OUTGOING CPAY DETAILS==\n")
    return outgoing_cpay_settlement_df


### A function for generating OUTGOING_REVERSAL_DETAILS

In [19]:
@function_name_decorator
def outgoing_reversal_details(filepath):
    print("==\nSTEPS FOR OUTGOING REVERSAL DETAILS")
    
    print("STEP 1: Reading in the file for OUTGOING_REVERSAL_DETAILS...")
    df, has_no_header = read_report_file(filepath)
    
    if has_no_header:
        print("STEP 2: Adding column names to the data")
        df = add_letter_columns_to_df(df)
    
    elif has_no_header == "NA":
        warnings.warn("File was not found!")
        return ""
    
    print("STEP 3: Creating transaction amount by summing up column D ")
    transaction_amount = abs(df["D"].astype(float).sum()) * -1
    
    print("STEP 4: Creating the Narration dictionary")
    account = "NGN09992527084"
    outgoing_reversal_details_dict = {"ACCOUNT": [account] * len(df),
                                 "AMOUNT": [transaction_amount] * len(df),
                                  "NARRATION": [f"Outgoing Reversal {formatted_date}"] * len(df)
                                 }
    
    print("STEP 5: Creating the Narration DataFrame")
    outgoing_reversal_details_settlement_df = pd.DataFrame(outgoing_reversal_details_dict)
    
    #print("==END OF FUNCTION: OUTGOING REVERSAL DETAILS==\n")
    return outgoing_reversal_details_settlement_df

### A function for generating OUTGOING_CARD_LOAD_DETAILS

In [20]:
@function_name_decorator
def outgoing_cardloads_details(filepath):
    
    print("==STEPS FOR OUTGOING CARD LOAD==")
    
    print("STEP 1: Reading in the file for OUTGOING_REVERSAL_DETAILS...")
    df, has_no_header = read_report_file(filepath)
    
    if has_no_header:
        print("STEP 2: Adding column names to the data")
        df = add_letter_columns_to_df(df)
     
    elif has_no_header == "NA":
        warnings.warn("File was not found!")
        return ""
    
    print("STEP 3: Summing column F by each category in column L")
    outgoing_card_load_details_settlement_df = df.groupby("L").agg({"F": "sum"}).reset_index()
    
    print("STEP 4: Negating column F because it is outgoing")
    outgoing_card_load_details_settlement_df["F"] = outgoing_card_load_details_settlement_df["F"].apply(abs) * -1
    
    print("STEP 5: Adding the Narration column to the DataFrame")
    outgoing_card_load_details_settlement_df.loc[:, "NARRATION"] = f"Outgoing Cardloads {formatted_date}"
    
    print("STEP 6: Renaming the columns L and F to ACCOUNT and AMOUNT respectively")
    outgoing_card_load_details_settlement_df.rename(columns = {"L": "ACCOUNT", "F": "AMOUNT"}, inplace = True)
    
   # print("==END OF FUNCTION: OUTGOING CARD LOAD DETAILS==\n")
    return outgoing_card_load_details_settlement_df


### A function for generating INCOMING PAYMENTS SUMMARY

In [21]:
@function_name_decorator
def incoming_payments_summary(filepath):
    
    print("==STEPS FOR INCOMING PAYMENT SUMMARY")
    
    print("STEP 1: Reading in the file for INCOMING PAYMENT SUMMARY...")
    df, has_no_header = read_report_file(filepath)
    
    if has_no_header:
        print("STEP 2: Adding column names to the data")
        df = add_letter_columns_to_df(df)
     
    elif has_no_header == "NA":
        warnings.warn("File was not found!")
        return ""
    
    print("STEP 3: Selecting just columns A and D")
    incoming_payments_summary_settlement_df = df[["A", "D"]]

    print("STEP 4: Finding the Net Settlement Position file")
    keyword = "net_settlement_position"
    
    
    net_settlement_position_pdf_file_path = main_folder_path + _get_net_settlement_path(keyword, list_of_files)
    print(main_folder_path)
    print(net_settlement_position_pdf_file_path)
    
    print("STEP 5: Reading the batch ID from the Net Settlement PDF file")
    net_settlement_pdf_text = read_pdf(net_settlement_position_pdf_file_path)
    net_settlement_data = net_settlement_pdf_text.split("Page")[0]
    print(net_settlement_data)
    batch_id = net_settlement_data.split("Batch Id: ")[1].split()[0]
    
    print("STEP 6: Using the extracted batch ID to create the Narration column value")
    narration = batch_id + f"{formatted_date}" + "IN PAYMENT"
    incoming_payments_summary_settlement_df.loc[:, "NARRATION"] = narration

    print("STEP 7: Renaming columns A and D to ACCOUNT and AMOUNT")
    incoming_payments_summary_settlement_df.rename(columns = {"A": "ACCOUNT", "D": "AMOUNT"}, inplace = True)
    
    
    #print("==END OF FUNCTION: GET INCOMING PAYMENTS SUMMARY==\n")
    
    return incoming_payments_summary_settlement_df


### A function for generating INCOMING CPAY DETAILS

In [22]:
@function_name_decorator
def incoming_cpay_details(filepath):
    
    print("==STEPS FOR INCOMING CPAY DETAILS==")
    
    print("STEP 1: Reading in the file for INCOMING_CPAY_DETAILS...")
    df, has_no_header = read_report_file(filepath)
    
    if has_no_header:
        print("STEP 2: Adding column names to the data")
        df = add_letter_columns_to_df(df)
    
    elif has_no_header == "NA":
        warnings.warn("File was not found!")
        return ""
    
    print("STEP 3: Calculating the transaction amount")
    transaction_amount = df["TRANS_AMOUNT"].astype(float).sum()
    
    print("STEP 4: Creating the Narration dictionary")
    incoming_cpay_dict = {"ACCOUNT": "NGN09992508050",
                      "AMOUNT": transaction_amount,
                      "NARRATION": f"Incoming CorporatePay {formatted_date}"
                     }
    
    print("STEP 5: Creating the Narration DataFrame")
    incoming_cpay_details_settlement_df = pd.DataFrame(incoming_cpay_dict, index=[0])
    
    #print("==END OF FUNCTION: INCOMING CPAY DETAILS==\n")
    
    return incoming_cpay_details_settlement_df


### A function for generating the Narration Template:

This function is used by other functions to create the Narration Template

In [23]:
@function_name_decorator
def _create_narration_template():
    
    print("\n==CREATING THE NARRATION TEMPLATE==")
    
    print("STEP 1: Create template dictionary")
    narration_template_dict = {
    
    "ACCOUNT": [],
    "AMOUNT": [],
    "NARRATION": []
    }
    
    print("STEP 2: Create template DataFrame")
    narration_template_df = pd.DataFrame(narration_template_dict)
    
    return narration_template_df

# narration_template_df = _create_narration_template()

### A function for compiling DataFrames:

The function is used for creating a DataFrames and its output is a combination of different DataFrames. It is used for compiling all the DataFrames generated during the transformations undergone in the functions defined above

In [24]:
@function_name_decorator
def compile_dataframes(*dframes):
    
    print("==COMPILING THE SETTLEMENT DATAFRAMES==")
    
    print("STEP 1: Calling the create function to create Template")
    template_df = _create_narration_template()
    
    print("Compiling all the templates into 1 DataFrame")
    narrations_df = pd.concat(dframes)
    
    return narrations_df

# narrations_df = compile_dataframes(narration_template_df, outgoing_commission_summary_df, 
#           incoming_transfer_details_df, babcock_incoming_commissions_details_df, incoming_commissions_summary_df,
#               samebank_commissions_summary_df)

# narrations_df

In [8]:
def _get_net_settlement_values(list_of_files, main_folder_path):
    
    print("==COLLECTING ALL THE VALUES IN THE NET SETTLEMENT POSITION PDF")
    
    print("STEP 1: Getting the filename of the Net Settlement Position")
    filename = _get_net_settlement_path("net_settlement", list_of_files)
    
    print("STEP 2: Creating the full path for the Net Settlement Position")
    filepath = main_folder_path + filename

    print("STEP 3: Reading the Net Settlement PDF file")
    net_settlement_content = read_pdf(filepath)

    print("STEP 4: Creating a list of values in the Net Settlement Position PDF file")
    net_settlement_list = net_settlement_content.split()

    net_settlement_list = [_convert_string_numbers(content, mute = True) for content in net_settlement_list]
    
    net_settlement_list = [content for content in net_settlement_list if content != "Not a number"]
    
    print("STEP 5: Returning the list of values extracted.")
    
    return net_settlement_list



### A function to extract Settlement Position:

This function is used to extract the Settlement Position from the correct Settlement PDF file and returns this value

In [25]:
@function_name_decorator
def _extract_settlement_position(filepath):
    
    print("==EXTRACTING THE SETTLEMENT POSITION")
    pdf_text = read_pdf(filepath)
    
    print("STEP 1: Extracting the necessary text from the pdf file")
    net_settlement_data = pdf_text.split("Page")[0]
    
    print("STEP 2: Converting the extracted text into an IO object")
    data_io = StringIO(net_settlement_data)

    print("STEP 3: Read the data as a DataFrame")
    net_settlement_df = pd.read_csv(data_io, delim_whitespace=True, skiprows=2, header=None)
    
    print("STEP 4: Extracting the Total Incoming and Outgoing Amounts.")
    try:
        total_incoming_amount = _convert_string_numbers(net_settlement_df[0].to_list()[:-1][-1], mute = True)

        total_outgoing_amount = _convert_string_numbers(net_settlement_df[1].to_list()[:-1][-1], mute = True)
        print(f"\nTotal Incoming Amount in Net Settlement PDF = {total_incoming_amount} ")
        print(f"Total Outgoing Amount in Net Settlement PDF = {total_outgoing_amount}\n")
    
    except IndexError as e:
        warnings.warn("Couldn't extract Total Incoming and Outgoing Amounts. Perhaps file structure has changed!")
        print("Equating them to empty strings")
        total_incoming_amount = ""
        total_outgoing_amount = ""
    
    print("STEP 5: Extracting Settlement Position")
    try:
        settlement_position = round(convert_string_numbers(net_settlement_df[2].to_list()[:-1][-1]), 2)
        
    except IndexError as e:
        warnings.warn("Couldn't extract Settlement Position. Perhaps file structure has changed!")
        print("Equating Settlement Position to an empty string and returning it")
        settlement_position = ""
        return settlement_position
    
    print("STEP 6: Checking if there was an issue with the values extracted in Steps 3 and 4")
    if total_incoming_amount == "" or total_outgoing_amount == "":
        return settlement_position
    
    print("STEP 7: Calculating the Settlement Position from the Total Incoming and Outgoing Amounts extracted")
    print(f"{total_incoming_amount} - {total_outgoing_amount}")
    calculated_settlement_position = round(total_incoming_amount - total_outgoing_amount, 2)
    print(f"Calculated Settlement Position = {calculated_settlement_position}")
    print(f"Actual Settlement Position = {settlement_position}")
    
    print("STEP 8: Checking if this calculated Settlement Position is equal to the extracted Settlement Position")
    if calculated_settlement_position == settlement_position:
        print("STEP 9: Net Settlement Position Confirmed! Returning the value")
        return settlement_position
    
    else:
        print("STEP 9: Net Settlement Position was NOT confirmed.")
        warnings.warn("Calculated settlement is NOT equal to extracted settlement. Perhaps file structure has changed!")
        print(f"\nReturning settlement position: {settlement_position}. Check that it is correct")
        return settlement_position
    
    return net_settlement_value

### A function that maps Report to their respective function:

This function returns a mapping of each Report and the corresponding Function that is to handle them

In [7]:
#list_of_files = list_of_files_generator(main_folder_path)

@function_name_decorator
def file_to_function_mapping(list_of_files):
    
    file_function_mapping = {}
    if len(list_of_files) == 0:
        warnings.warn("\nNo files were detected! No file to function mapping possible")
        return ""
    
    for prospect_report_name in reports_keywords_dict.keys():
        for report_name in list_of_files:
            if report_name.lower().find(prospect_report_name.lower()) > 0:
                if "comm_summary_detail" in report_name.lower():
                    if report_name.lower().endswith(".csv") or report_name.lower().endswith(".xls") or report_name.lower().endswith(".xlsx"):
                        print(f"Report found\nReport name: {report_name}")
                        #print(f"{report_name} ⇒ { reports_keywords_dict[prospect_report_name].__name__}")
                        file_function_mapping[report_name] = reports_keywords_dict[prospect_report_name]
                
                elif "summary_detail" in report_name.lower():
                    continue
                
                else:
                    if report_name.lower().endswith(".csv") or report_name.lower().endswith(".xls") or report_name.lower().endswith(".xlsx"):
                        print(f"Report found\nReport name: {report_name}")
                        #print(f"{report_name} ⇒ { reports_keywords_dict[prospect_report_name].__name__}")
                        file_function_mapping[report_name] = reports_keywords_dict[prospect_report_name]
                    
                
    return file_function_mapping

In [None]:
@function_name_decorator
def supplementary_file_to_function_mapping(list_of_files):
    
    file_function_mapping = {}
    if len(list_of_files) == 0:
        warnings.warn("\nNo files were detected! No file to function mapping possible")
        return ""
    
    for prospect_report_name in supplementary_reports_keywords_dict.keys():
        for report_name in list_of_files:
            if report_name.lower().find(prospect_report_name.lower()) > 0:
                if "comm_summary_detail" in report_name.lower():
                    if report_name.lower().endswith(".pdf"):
                        print(f"Report found\nReport name: {report_name}")
                        #print(f"{report_name} ⇒ { reports_keywords_dict[prospect_report_name].__name__}")
                        file_function_mapping[report_name] = reports_keywords_dict[prospect_report_name]
                
                elif "summary_detail" in report_name.lower():
                    continue
                
                else:
                    if report_name.lower().endswith(".pdf"):
                        print(f"Report found\nReport name: {report_name}")
                        #print(f"{report_name} ⇒ { reports_keywords_dict[prospect_report_name].__name__}")
                        file_function_mapping[report_name] = supplementary_reports_keywords_dict[prospect_report_name]
                    
                
    return file_function_mapping

In [None]:
@function_name_decorator
def get_alternative_report_name(filename):
    
    print(f"==CALCULATING NEW SETTLEMENT FOR {filename} FROM ANOTHER REPORT")
    
    print("STEP 1: Replace 'DETAILS' with 'SUMMARY' and replace file extension with 'PDF'")
    new_filename = filename.replace("DETAILS", "SUMMARY").split(".")[0] + ".PDF"
    
    print("STEP 2: Checking if the New filename exists")
    if new_filename in list_of_files:
        full_path = main_folder_path + new_filename
        print(f"Filename {new_filename} exists!")
        
    elif "AM" not in new_filename or "PM" not in new_filename:
        am_pm = _get_folder_hour(main_folder_path)[-2:]
        new_filename = new_filename.split(".")[0] + "_" + am_pm + ".PDF"
        if new_filename in list_of_files:
            print(f"Filename {new_filename} exists!")
        
    return new_filename


In [None]:
@function_name_decorator
def alternative_settlement_process(new_filename):
    
    print(f"==\nSETTLEMENT PROCESS FOR NEW FILE: {new_filename}")
    
    print("STEP 1: Creating the full path for the new file")
    full_path = main_folder_path + new_filename
    print(f"Full path to file: {full_path}")
    
    print("STEP 2: Creating the file-to-function mapping")
    workflow_dict = supplementary_file_to_function_mapping(list_of_files)
    
    print("STEP 3: Getting the function assigned to the new file")
    narration_function = workflow_dict[new_filename]
    
    print("STEP 4: Applying the narration function to the file")
    narration = narration_function(full_path)
    
    if isinstance(narration, str):
        warnings.warn(f"File was NOT processed because it was not found.\nFilename: {filename}")
        
        
    return narration


In [27]:
# workflow_dict = file_to_function_mapping(list_of_files)
# worklfow_dict

### Create a Final Narration DataFrame:

This function creates a Narration DataFrame which is combination of all the generated DataFrames. It uses the dictionary/mapping created using the file_to_function_mapping to run each file against their transformation function

In [28]:
@function_name_decorator
def generate_narration_batch_compilation(workflow_dict, main_folder_path):
    
    folder_hour = _get_folder_hour(main_folder_path)
    
    print(f"\n==GENERATING COMPILED NARRATION FOR {folder_hour}")
    
    if workflow_dict == "":
        warnings.warn("\nNo file to function mapping. No compiled report possible")
        return ""
    
    print("STEP 1: Initialize the Narration list that holds all the Narration DataFrames.\nAlso getting all values in Net Settlement Position PDF")
    narration_list = []
    files_worked_on =[]
    settlement_values_list = _get_net_settlement_values(list_of_files, main_folder_path)
    
    print("\nSTEP 2: Looping through each Identified Report stored in the workflow_dict")
    for filenum, filename in enumerate(workflow_dict.keys()):
        
        print(f"\n\nBEGINNING TRANSFORMATION FOR {filename}")
        print(f"    STEP 2.1.{filenum}: Creating the full path for the file. It will include {folder_hour}/ in front of the filename")
        full_path = main_folder_path + filename
        print(full_path)

        print(f"    STEP 2.2.{filenum}: Getting the function responsible for cleaning the filename {filename}")
        narration_function = workflow_dict[filename]
        
        print(f"    STEP 2.3.{filenum}: Running the Narration function on the file")
        narration = narration_function(full_path)
        
        if isinstance(narration, str):
            warnings.warn(f"File was NOT processed because it was not found.\nFilename: {filename}")
            continue
            
        
        print("\nREPORT FILE NAME : TOTAL AMOUNT CALCULATED")
        narration_total = round(narration['AMOUNT'].astype(float).sum(), 2)
        print(f"{filename}: {narration_total}")
        
        if "incoming_commissions_summary" in filename.lower() or "incoming_commissions_details" in filename.lower():
            print(f"No checks necessary. Filename: {filename}")
            files_worked_on.append(filename)
            narration_list.append(narration)
        
        else:
            print(f"STEP 3: Checking if Narration total is on the Settlement Position created earlier")
            
            if abs(narration_total) in settlement_values_list:
                print(f"{filename} settlement confirmed from Net Settlement Postion PDF")
                print(f"STEP 4.{filenum}: Appending the DataFrame generated by the function to the Narration list created earlier")
                narration_list.append(narration)
                files_worked_on.append(filename)
                print(narration)


            else:
                warnings.warn(f"{filename} settlement NOT confirmed from Net Settlement Postion PDF")
                print(f"{filename} settlement NOT confirmed. Checking for alternative among other provided documents")
                new_name = get_alternative_report_name(filename)
                narration = alternative_settlement_process(new_name)
                print(narration)
                print("\nREPORT FILE NAME : NEW TOTAL AMOUNT CALCULATED")
                narration_total = round(narration['AMOUNT'].astype(float).sum(), 2)
                print(f"{filename}: {narration_total}")
                if abs(narration_total) in settlement_values_list:
                    print(f"{filename} settlement confirmed from Net Settlement Postion PDF on Second Attempt")
                    print(f"STEP 4.{filenum}: Appending the DataFrame generated by the function to the Narration list created earlier")
                    narration_list.append(narration)
                    files_worked_on.append(filename)
                    print(narration)

                else:
                    warnings.warn(f"{filename} settlement NOT confirmed from Net Settlement Postion PDF after 2nd attempt")
                    print(f"This report: {filename} will not be considered in the final entries")
                    files_worked_on.append(filename)
        
    print("\n\nCompiling all the DataFrames to 1 compiled narration")
    print(f"Files worked on are: {files_worked_on}\n")
    
    if len(files_worked_on) != len(narration_list):
        warnings.warn("Not all files were processed. Calculations will most likely be wrong because 1 or more files are missing from the compiled report, or file contains incomplete values")
        print(f"Number of files worked on: {len(files_worked_on)}.\nNumber of Dataframes: {len(narration_list)}")
    else:
        print(f"Number of files worked on: {len(files_worked_on)}.\nNumber of Dataframes: {len(narration_list)}")
    try:
        narrations_df = pd.concat(narration_list)
        
    except ValueError as e:
        print(f"\nThere are no files to process. Perhaps there are no files in the path provided.\nPython error: {e}")
        narrations_df = ""
        
    return narrations_df


# narrations_df = generate_narration_batch_compilation(workflow_dict)
# narrations_df 

### A function for comparing Transformation and Settlement Position:

This is a function for comparing the result of the sum from the final value in the Transformation with the value extracted from the Net Settlement PDF file

In [29]:
@function_name_decorator
def compare_total_settlement_total_narration(narrations_df, main_folder_path):
    
    print("\n==COMPARING THE TOTAL SETTLEMENT POSITION WITH THE COMPILED NARRATION")
    
    if isinstance(narrations_df, str):
        warnings.warn("There is no compiled report to compare with settlement position")
        return "NA"
    
    print("STEP 1: Summing the complete narration values under the AMOUNT column")
    total_narration_value = narrations_df["AMOUNT"].sum()
    total_narration_value = round(total_narration_value, 2)
    print(f"AMOUNT = {total_narration_value}")
    
    print("\nSTEP 2: Getting the Path for the Net Settlement Position")
#     net_settlement_position_pdf_file_path = main_folder_path
#     net_settlement_position_pdf_file_path += _convert_hour_to_am_pm(current_hour, for_folder= True)
    net_settlement_position_pdf_file_path = main_folder_path + _get_net_settlement_path("net_settlement_position", list_of_files)
    print(f"Net Settlement Position Path: {net_settlement_position_pdf_file_path}")
    
    print(f"\n\nSTEP 3: Extracting the settlement position value from the NET_SETTLEMENT pdf file")
    net_settlement_value = _extract_settlement_position(net_settlement_position_pdf_file_path)
    
    print(f"\n\nBack to Comparing Net Settlement Value with the Calculated Amount")
    print(f"Net settlement value from PDF = {net_settlement_value}")
    print(f"Total Narration value from calculation = {total_narration_value}")
    
    print("STEP 4: Calculating the Balance. This must be ZERO")
    print(f"{net_settlement_value} - {total_narration_value}")
    balance = net_settlement_value - total_narration_value
    balance = round(balance, 2)
    
    if balance == 0:
        print("Check complete. Balance is correct")
        print(f"Balance = {balance}")
        
    else:
        warnings.warn("Check complete but Balance is INCORRECT.")
        print(f"Balance = {balance}")
        
    return balance

### A function for raising entries

In [30]:
def process_of_raising_entries(dframe):
    print("==RAISING ENTRIES==")
    
    if isinstance(dframe, str):
        warnings.warn("Final DataFrame is a string. Cannot provide any output.")
        return "NA"
    
    print("STEP 1: Make a copy of the DataFrame")
    df = dframe.copy()
    
    print("STEP 2: Sort all the values by the AMOUNT column")
    df = df.sort_values("AMOUNT")
    
    print("STEP 3: Limit the NARRATION column width to 30 characters. All other columns are separated by 10 characters")
    
    df["NARRATION"] = df["NARRATION"].str[:30]
    
    
    print("STEP 4: Add an extra column to the end of the DataFrame because we are about to insert an empty column")
    df = _create_inner_columns(df, 2, "NARRATION")
    
    
    print("STEP 5: Placing the Transaction ID (i.e. 'C' or 'D') in a column 2 columns away from 'NARRATION'")
    df.loc[:, "E"] = np.where(df["AMOUNT"] < 0, "D", "C")
    
    print("STEP 6: Reorder the data by ACCOUNT")
    df["ACCOUNT"] = df["ACCOUNT"].astype(str)
    df = df.sort_values("ACCOUNT")
    
    print("STEP 7: Selecting a portion of the Data where the ACCOUNT starts with 'PAL0999'")
    pal_df = df.loc[df["ACCOUNT"].astype(str).str.startswith("PAL0999"), :]
    
    #print("\nIf this selected DataFrame is empty, then return the result of the transformation so far. Else, continue the transformations")
    if pal_df.empty:
        print("There is no PAL account. Entries complete!")

        print("STEP 8: Deleting column D")
        final_df = df.drop("D", axis = 1)

        print("STEP 9: Renaming column E to TRANSACTION TYPE")
        final_df.rename(columns = {"E": "TRANSACTION TYPE"}, inplace = True)
        #final_df = final_df[["ACCOUNT", "AMOUNT", "NARRATION"]]
        print("\nEntries complete!")
       # print("\n==END OF FUNCTION: PROCESS OF RAISING ENTRIES==")
        return final_df
    
    else:
        print("STEP 8: Create copies of the smaller DataFrame so you can use them to compute VAT on the PAL account")
        calc_df = pal_df.copy()
        vat_df = pal_df.copy()
        print("Calc df before transformation:\n")
        print(calc_df)
        
        print("Vat df before transformation:\n")
        print(vat_df)
        
        print("STEP 9: Calculating the VAT amount")
        calc_df["AMOUNT"] = calc_df["AMOUNT"]*0.9302
        vat_df["AMOUNT"] = vat_df["AMOUNT"] - vat_df["AMOUNT"]*0.9302
        vat_df["ACCOUNT"] = vat_df["ACCOUNT"].apply(lambda x: "NGN09992511001" if x.startswith("PAL0999") else x)
        
        print("Calc df after transformation:\n")
        print(calc_df)
        
        print("Vat df after transformation:\n")
        print(vat_df)
        
        print("STEP 10: Updating the original DataFrame with the new values in the AMOUNT column")
        df.update(calc_df)
        
        print("STEP 11: Updating the original DataFrame with the new values in the AMOUNT (VAT) and ACCOUNT columns")
        final_df = pd.concat([df, vat_df])
        final_df = final_df.drop("D", axis = 1)

        print("STEP 12: Renaming column E to TRANSACTION TYPE")
        final_df.rename(columns = {"E": "TRANSACTION TYPE"}, inplace = True)
        #final_df = final_df[["ACCOUNT", "AMOUNT", "NARRATION"]]
        print("\nEntries complete!")
        #print("\n==END OF FUNCTION: PROCESS OF RAISING ENTRIES==")
        return final_df


### Additional functions not discussed in process documentation. 

#### These are used when there is any inaccurate balance after running any of the functions above.

In [None]:
def outgoing_reversal_summary(filepath):
    
    file = read_pdf(filepath)
    
    data = file.split("Page")[0]
    
    print("STEP 2: Converting the extracted text into an IO object")
    data_io = StringIO(data)

    print("STEP 3: Read the data as a DataFrame")
    df = pd.read_csv(data_io, delim_whitespace=True, skiprows=2, header=None)

    longest_col_length = 0
    longest_col = None
    for col in net_settlement_df.columns:

        col_length = len(net_settlement_df[col].dropna())
        if col_length > longest_col_length:
            longest_col_length = col_length
            longest_col = col
        
    transaction_amount = net_settlement_df[longest_col].to_list()[-1]
    
    transaction_amount = convert_string_numbers(transaction_amount)
    transaction_amount = abs(transaction_amount)* -1
    
    print("STEP 4: Creating the Narration dictionary")
    account = "NGN09992527084"
    outgoing_reversal_details_dict = {"ACCOUNT": [account],
                                 "AMOUNT": [transaction_amount],
                                  "NARRATION": [f"Outgoing Reversal {formatted_date}"]
                                 }
    
    print("STEP 5: Creating the Narration DataFrame")
    outgoing_reversal_details_settlement_df = pd.DataFrame(outgoing_reversal_details_dict)
    
    #print("==END OF FUNCTION: OUTGOING REVERSAL DETAILS==\n")
    return outgoing_reversal_details_settlement_df


In [None]:
def incoming_cpay_summary(filepath):
    
    file = read_pdf(filepath)
    data = file.split("Page")[0]

    print("STEP 2: Converting the extracted text into an IO object")
    data_io = StringIO(data)

    print("STEP 3: Read the data as a DataFrame")
    df = pd.read_csv(data_io, delim_whitespace=True, skiprows=1, header=None)
    longest_col_length = 0
    longest_col = None
    for col in df.columns:

        col_length = len(df[col].dropna())
        if col_length > longest_col_length:
            longest_col_length = col_length
            longest_col = col

    col_values = df[longest_col].to_list()

    transaction_amount = [total for total in col_values if "/" not in total][-1]
    transaction_amount = convert_string_numbers(transaction_amount)
    
    print("STEP 4: Creating the Narration dictionary")
    incoming_cpay_dict = {"ACCOUNT": "NGN09992508050",
                      "AMOUNT": transaction_amount,
                      "NARRATION": f"Incoming CorporatePay {formatted_date}"
                     }
    
    print("STEP 5: Creating the Narration DataFrame")
    incoming_cpay_details_settlement_df = pd.DataFrame(incoming_cpay_dict, index=[0])
    
    #print("==END OF FUNCTION: INCOMING CPAY DETAILS==\n")
    
    return incoming_cpay_details_settlement_df

In [None]:
def incoming_transfer_summary(filepath):
    
    file = read_pdf(filepath)
    data = file.split("Total")[1]
    print(data)
    print("STEP 2: Converting the extracted text into an IO object")
    data_io = StringIO(data)

    print("STEP 3: Read the data as a DataFrame")
    df = pd.read_csv(data_io, delim_whitespace=True, skiprows=1, header=None)
    print(df)
    longest_col_length = 0
    longest_col = None
    for col in df.columns:

        col_length = len(df[col].dropna())
        if col_length > longest_col_length:
            longest_col_length = col_length
            longest_col = col
            
        if "account" in df[col].str.lower().to_list():
            col_list = df[col].to_list()
            col_idx = df.columns.tolist().index(col)
            target_idx = col_idx + 1
            all_cols = df.columns
            target_col = all_cols[target_idx]
            account_num = df[target_col].dropna().to_list()[-1]

    col_values = df[longest_col].to_list()

    transaction_amount = [total for total in col_values if "/" not in total][-1]
    auto_sum = convert_string_numbers(transaction_amount)
    
    print("STEP 5: Create a dictionary for the Account extracted and Amount calculated")
    auto_sum_acct_num_dict = {
    
   "ACCOUNT": [account_num],
    "AMOUNT": [auto_sum]
    }
    
    print("STEP 6: Convert the dictionary into a DataFrame")
    income_transfer_settlement_df = pd.DataFrame(auto_sum_acct_num_dict)

    print("STEP 7: Including the NARRATION in the DataFrame")
    income_transfer_settlement_df ["NARRATION"] = f"Incoming Transfers {formatted_date} S1"
    
    #print("==END OF FUNCTION: INCOMING TRANSFER DETAILS==\n")
    return income_transfer_settlement_df


In [None]:
def outgoing_cardloads_summary(filepath):
    import re
    file = read_pdf(filepath)
    data = file.split("Pages")[0].split("\n")
    print(data[8])
    ngn_pattern = r'NGN(\d+)'#NGN(\d+)\s.*?(\d{1,3}(?:,\d{3})*(?:\.\d{2})?)'
    amount_pattern = r'\b(\d{1,3}(,\d{3})*(\.\d+)?)\b'
    accounts_list = []
    amounts_list = []
    for line in data:
        ngn_match = re.findall(ngn_pattern, line)
        amount_match = re.findall(amount_pattern, line)
        
        ngn_code = 0
        amount = 0
        amount_value = ""
    
        if ngn_match and amount_match:
            accounts_list.append("NGN"+str(ngn_match[0]))
            
            #print(line)
            numerical_values = [float(value.replace(',', '')) for value in line.split() if value.replace(',', '').replace('.', '').isdigit()]
                
            largest_number = max(numerical_values, default=None)
            #print(largest_number)
            
            amounts_list.append(largest_number)
            
    print(accounts_list)
    print(amounts_list)
    
    amounts_list = [-value for value in amounts_list]
    
    outgoing_cardload_dict = {"ACCOUNT": accounts_list, "AMOUNT": amounts_list, 
                              "NARRATION": [f"Outgoing Cardloads {formatted_date}"] * len(accounts_list)}
    
    outgoing_cardload_df = pd.DataFrame(outgoing_cardload_dict)
    
    return outgoing_cardload_df



In [None]:
def incoming_reversal_summary(filepath):
    
    file = read_pdf(filepath)
    data = file.split("Pages")[0]
    
    data_io = StringIO(data)

    print("STEP 3: Read the data as a DataFrame")
    df = pd.read_csv(data_io, delim_whitespace=True, skiprows=0, header=None)
    
    longest_col_length = 0
    longest_col = None
    for col in df.columns:

        col_length = len(df[col].dropna())
        if col_length > longest_col_length:
            longest_col_length = col_length
            longest_col = col

    col_values = df[longest_col].to_list()

    transaction_amount = [total for total in col_values if "/" not in total][-1]
    transaction_amount = convert_string_numbers(transaction_amount)
    
    print("STEP 4: Creating a dictionary for the Narration")
    incoming_reversal_dict = {"ACCOUNT": ["NGN09992501102"],
                          "AMOUNT": [transaction_amount],
                          "NARRATION": [f"Incoming Reversal {formatted_date}"]
                         }
    
    print("STEP 5: Converting the dictionary to a DataFrame")
    incoming_reversal_settlement_df = pd.DataFrame(incoming_reversal_dict)
    
    #print("==END OF FUNCTION: INCOMING REVERSAL DETAILS==\n")
    
    return incoming_reversal_settlement_df



In [None]:
def outgoing_cpay_summary(filepath):
    
    file = read_pdf(filepath)
    
    data_list = file.split("Total")
    list_of_targets = []
    for data in data_list:
        list_of_strings = data.split()
    
        for i, strings in enumerate(list_of_strings):
    
            try:
                target_value = re.findall(r'\b(\d{1,3}(,\d{3})*(\.\d+)?)\b', strings)[0][0]
                list_of_targets.append(target_value)
                
            except IndexError as e:
                if i < 1:
                    print(e)
                continue
    
    largest_target = 0
    for potential_target in list_of_targets:
        potential_target = _convert_string_numbers(potential_target)
        if potential_target > largest_target:
            largest_target = potential_target
            
        
    transaction_amount = convert_string_numbers(largest_target)
    transaction_amount = abs(transaction_amount) * -1
    
    outgoing_cpay_dict = {"ACCOUNT": ["1021821845"],
                     "AMOUNT": [transaction_amount],
                     "NARRATION": [f"Outgoing CorporatePay {formatted_date}"]}
    
    print("STEP 5: Negating the transaction amount because it is outgoing")
    outgoing_cpay_dict["AMOUNT"] = [abs(transaction_amount) * -1]
    
    print("STEP 6: Creating the Final Narration DataFrame")
    outgoing_cpay_settlement_df = pd.DataFrame(outgoing_cpay_dict)
    
   # print("==END OF FUNCTION: OUTGOING CPAY DETAILS==\n")
    return outgoing_cpay_settlement_df
    
    return df
