In [39]:
import os


In [112]:
import pandas as pd


class DataProcessor:
    def __init__(self, file_path):
        self.file_path = file_path
        self.df = None  # Initialize DataFrame to None
        #Initializing class attribute
        self.matched_count = 0
        self.unmatched_count = 0
        self.total_sum = 0
        self.matched_df = None
        self.unmatched_df = None

    def process_data(self):
        print(f"Matching for {str(self.file_path)[-17:]} in progress...")
        """Loads data, extracts matching criteria, groups, filters, and saves results."""
        self.df = pd.read_csv(self.file_path)
        self.df['group_key'] = self.df['EXTERNAL_REF_NO'].str.extract('(?<=/)(.*?)(?=_)', expand=False)

        grouped_df = self.df.groupby('group_key')[['TXN_AMT']].agg(sum=pd.NamedAgg(column='TXN_AMT', aggfunc='sum')).reset_index()
        grouped_df['matched'] = (grouped_df['sum'] == 0).astype(int)
        
        matched_df = self.df[self.df['group_key'].isin(grouped_df[grouped_df['matched'] == 1]['group_key'])]
        
        
        unmatched_df = self.df[~self.df['group_key'].isin(grouped_df[grouped_df['matched'] == 1]['group_key'])]
        self.total_sum += df['TXN_AMT'].sum()
        self.unmatched_count += unmatched_df['TXN_AMT'].count() #increase the number of unmatched items
        self.matched_count += matched_df['TXN_AMT'].count() #increase the number of matched items
        
        print(f"Done Matching: Found {matched_df['TXN_AMT'].count()} matched items and {unmatched_df['TXN_AMT'].count()} unmatched items...")
        
    def update_record(self):
         return self.total_sum, self.unmatched_count, self.matched_count   
        
    def save_output(self):
        print("Saving output to file...")
        matched_df.to_csv(f"{str(self.file_path)[-17:-4]}_matched.csv", index=False)
        unmatched_df.to_csv(f"{str(self.file_path)[-17:-4]}_unmatched.csv", index=False)
        print("done saving... exiting logic")



In [113]:
# # Example usage:
# processor = DataProcessor(r"C:\Users\waliu\Documents\aaaaaaAutom8\spools\spool1_sheets\spool1_sheet8.csv")
# processor.process_data()  # This will execute the data processing steps

In [125]:
total_matched_count = 0
total_unmatched_count = 0
total_sum = 0

folder_path = r"C:\Users\waliu\Documents\aaaaaaAutom8\spools\spool2_sheets"
file_list = os.listdir(folder_path)
files = os.path.join(folder_path)

for item in file_list:
    processor = DataProcessor(os.path.join(folder_path, item))
    processor.process_data()
    total_matched_count += processor.matched_count
    total_unmatched_count += processor.unmatched_count
    total_sum += processor.total_sum


Matching for spool2_sheet0.csv in progress...
Done Matching: Found 858375 matched items and 141625 unmatched items...
Matching for spool2_sheet1.csv in progress...
Done Matching: Found 896559 matched items and 103441 unmatched items...
Matching for pool2_sheet10.csv in progress...
Done Matching: Found 0 matched items and 1000000 unmatched items...
Matching for pool2_sheet11.csv in progress...
Done Matching: Found 0 matched items and 1000000 unmatched items...
Matching for pool2_sheet12.csv in progress...
Done Matching: Found 0 matched items and 1000000 unmatched items...
Matching for pool2_sheet13.csv in progress...


  # This is added back by InteractiveShellApp.init_path()


Done Matching: Found 0 matched items and 724143 unmatched items...
Matching for pool2_sheet14.csv in progress...
Done Matching: Found 0 matched items and 185 unmatched items...
Matching for spool2_sheet2.csv in progress...
Done Matching: Found 959973 matched items and 40027 unmatched items...
Matching for spool2_sheet3.csv in progress...
Done Matching: Found 904746 matched items and 95254 unmatched items...
Matching for spool2_sheet4.csv in progress...
Done Matching: Found 842961 matched items and 157039 unmatched items...
Matching for spool2_sheet5.csv in progress...
Done Matching: Found 828963 matched items and 171037 unmatched items...
Matching for spool2_sheet6.csv in progress...
Done Matching: Found 922047 matched items and 77953 unmatched items...
Matching for spool2_sheet7.csv in progress...
Done Matching: Found 962541 matched items and 37459 unmatched items...
Matching for spool2_sheet8.csv in progress...
Done Matching: Found 446439 matched items and 190148 unmatched items...
M

In [126]:
total_matched_count


7622604

In [128]:
folder_path = r"C:\Users\waliu\Documents\aaaaaaAutom8\spools\spool1_sheets"
file_list = os.listdir(folder_path)
files = os.path.join(folder_path)

for item in file_list:
    processor = DataProcessor(os.path.join(folder_path, item))
    processor.process_data()
    total_matched_count += processor.matched_count
    total_unmatched_count += processor.unmatched_count
    total_sum += processor.total_sum

Matching for pool1_sheet10.csv in progress...
Done Matching: Found 933425 matched items and 66575 unmatched items...
Matching for pool1_sheet11.csv in progress...
Done Matching: Found 899016 matched items and 100984 unmatched items...
Matching for pool1_sheet12.csv in progress...
Done Matching: Found 970494 matched items and 29506 unmatched items...
Matching for pool1_sheet13.csv in progress...
Done Matching: Found 949908 matched items and 50092 unmatched items...
Matching for pool1_sheet14.csv in progress...
Done Matching: Found 927225 matched items and 72775 unmatched items...
Matching for pool1_sheet15.csv in progress...
Done Matching: Found 959064 matched items and 40936 unmatched items...
Matching for pool1_sheet16.csv in progress...
Done Matching: Found 981228 matched items and 18772 unmatched items...
Matching for pool1_sheet17.csv in progress...
Done Matching: Found 910371 matched items and 89629 unmatched items...
Matching for 7_outstanding.csv in progress...


KeyError: "Columns not found: 'TXN_AMT'"

In [129]:
total_matched_count

15153335

In [130]:
total_unmatched_count

6207579