In [2]:
import pandas as pd
import os
df_list = []

# Directory containing CSV files
directory = r'C:\Users\pskotte\Downloads\Bluefin'

# Read all CSV files into a list of dataframes
for filename in os.listdir(directory):
    if filename.endswith('.csv'):
        file_path = os.path.join(directory, filename)
        df = pd.read_csv(file_path)
        df_list.append(df)

# Concatenate all dataframes into a single dataframe
combined_df = pd.concat(df_list, ignore_index=True)

# Display the first few rows of the combined dataframe
combined_df.head()

Unnamed: 0,IdAchDetail,IdAchBatch,TransactionType,FileName,App,Processor,PaymentFrequency,DateCreated,DateModified,BankResponseCode,...,TransitRoutingCheckDigit,LastFourAccountNumber,Amount,PaymentJournalID,IndividualName,PaymentTypeCode,AddendaRecordIndicator,TraceNumber,ReturnFileName,Record
0,2468684,1109,REPRESENTMENT,GMACH-retry05232024044555885.ACH,DM,BofA,One-Time,2024-05-22 15:11:11,2024-05-22 15:11:11,R01,...,8,3382,100.0,519207216,MONICA MEJIA,R,0,111000020000492,GMRETURN.20240525124722.txt.asc,6271210003588768374364159338200000100005192072...
1,2468683,1109,REPRESENTMENT,GMACH-retry05232024044555885.ACH,DM,BofA,One-Time,2024-05-22 15:11:11,2024-05-22 15:11:11,R01,...,6,5378,500.0,519204999,KENDYL WASHATKA,R,0,111000020000491,GMRETURN.20240529124656.txt.asc,6271240031165869182535446537800000500005192049...
2,2468682,1109,REPRESENTMENT,GMACH-retry05232024044555885.ACH,DM,BofA,One-Time,2024-05-22 15:11:11,2024-05-22 15:11:11,R01,...,2,2958,111.11,519206967,MARITZA CABALLERO,R,0,111000020000480,GMRETURN.20240529124656.txt.asc,6271070021929039123699544295800000111115192069...
3,2468681,1109,REPRESENTMENT,GMACH-retry05232024044555885.ACH,DM,BofA,One-Time,2024-05-22 15:11:11,2024-05-22 15:11:11,R01,...,8,9808,102.93,519211208,EURIDSE MURILLO,R,0,111000020000479,GMRETURN.20240529124656.txt.asc,6271221052784355209000822980800000102935192112...
4,2468680,1109,REPRESENTMENT,GMACH-retry05232024044555885.ACH,DM,BofA,One-Time,2024-05-22 15:11:11,2024-05-22 15:11:11,R01,...,9,4890,185.53,519206723,CHRISTOPHER BOBROWSKI,R,0,111000020000470,GMRETURN.20240529124656.txt.asc,6273140742696439795800654489000000185535192067...


In [6]:
import pandas as pd
import os
import re

def extract_date_from_filename(filename):
    match = re.search(r'retry(\d{8})', filename)
    if match:
        date_str = match.group(1)
        return pd.to_datetime(date_str, format='%m%d%Y').strftime('%m/%d/%Y')
    return None

# Updated create_summary_df function to extract date from filename
def create_summary_df(combined_df):
    summary_data = []
    
    # Group by 'FileName' and process each group
    grouped = combined_df.groupby('FileName')
    for name, group in grouped:
        date_represented = extract_date_from_filename(name)
        count = len(group)
        success_count = len(group[group['ProcessorStatus'] == 'APPROVED'])
        success_rate = success_count / count if count != 0 else 0
        failed_count = count - success_count
        decline_rate = failed_count / count if count != 0 else 0
        collected_amount = group[group['ProcessorStatus'] == 'APPROVED']['Amount'].sum()
        
        summary_data.append({
            'Date Represented': date_represented,
            'Count': count,
            'Success Count': success_count,
            'Success Rate': success_rate,
            'Failed Count': failed_count,
            'Decline Rate': decline_rate,
            'Collected Amount': collected_amount
        })
    
    summary_df = pd.DataFrame(summary_data)
    return summary_df

# Create the summary dataframe
summary_df = create_summary_df(combined_df)

# Display the summary dataframe
summary_df.head()

Unnamed: 0,Date Represented,Count,Success Count,Success Rate,Failed Count,Decline Rate,Collected Amount
0,05/08/2024,570,144,0.252632,426,0.747368,23098.55
1,05/09/2024,330,98,0.29697,232,0.70303,17222.47
2,05/10/2024,398,78,0.19598,320,0.80402,12568.38
3,05/11/2024,775,97,0.125161,678,0.874839,16374.27
4,05/14/2024,711,140,0.196906,571,0.803094,28783.12


In [14]:
import pandas as pd
import os
import re

# Function to extract date from filename
def extract_date_from_filename(filename):
    match = re.search(r'retry(\d{8})', filename)
    if match:
        date_str = match.group(1)
        return pd.to_datetime(date_str, format='%m%d%Y').strftime('%m/%d/%Y')
    return None

# Function to clean the Amount column
def clean_amount_column(df, column_name):
    df[column_name] = df[column_name].replace({'\$': '', ',': ''}, regex=True)
    df[column_name] = df[column_name].astype(float)
    return df

# Join all CSV files from the specified folder and create a refined dataset using the Receiver ID column
new_directory = r'C:\Users\pskotte\Desktop\New folder'
new_df_list = []

# Reading all CSV files and appending to the list
for filename in os.listdir(new_directory):
    if filename.endswith('.csv'):
        file_path = os.path.join(new_directory, filename)
        df = pd.read_csv(file_path)
        new_df_list.append(df)

# Concatenate all dataframes into a single dataframe from the new directory
new_combined_df = pd.concat(new_df_list, ignore_index=True)

# Clean the Amount column in both dataframes
combined_df = clean_amount_column(combined_df, 'Amount')
new_combined_df = clean_amount_column(new_combined_df, 'Amount')

# Refine the new_combined_df to only keep rows where there is a match in the PaymentJournalID column with the Receiver ID column
refined_df = new_combined_df[new_combined_df['Receiver ID'].isin(combined_df['PaymentJournalID'])]

# Updated create_summary_df function to include 'CashPro Amount'
def create_summary_df(combined_df, refined_df):
    summary_data = []
    
    # Group by 'FileName' and process each group
    grouped = combined_df.groupby('FileName')
    for name, group in grouped:
        date_represented = extract_date_from_filename(name)
        count = len(group)
        success_count = len(group[group['ProcessorStatus'] == 'APPROVED'])
        success_rate = success_count / count if count != 0 else 0
        failed_count = count - success_count
        decline_rate = failed_count / count if count != 0 else 0
        collected_amount = group[group['ProcessorStatus'] == 'APPROVED']['Amount'].sum()
        
        # Calculate CashPro Amount for the summary data
        matching_rows = refined_df[refined_df['Receiver ID'].isin(group['PaymentJournalID'])]
        
        # Filter rows where Receiver ID appears only once in the entire refined dataset
        unique_receivers = refined_df['Receiver ID'].value_counts()
        single_occurrence_ids = unique_receivers[unique_receivers == 1].index
        single_occurrence_rows = matching_rows[matching_rows['Receiver ID'].isin(single_occurrence_ids)]
        
        # Sum the Amounts based on Report Date in the CashPro Amount column
        cashpro_amount = single_occurrence_rows['Amount'].sum()
        
        summary_data.append({
            'Date Represented': date_represented,
            'Count': count,
            'Success Count': success_count,
            'Success Rate': success_rate,
            'Failed Count': failed_count,
            'Decline Rate': decline_rate,
            'Collected Amount': collected_amount,
            'CashPro Amount': cashpro_amount
        })
    
    summary_df = pd.DataFrame(summary_data)
    return summary_df

# Create the summary dataframe with the refined dataset
summary_df = create_summary_df(combined_df, refined_df)

# Display the summary dataframe
summary_df.head()

Unnamed: 0,Date Represented,Count,Success Count,Success Rate,Failed Count,Decline Rate,Collected Amount,CashPro Amount
0,05/08/2024,570,144,0.252632,426,0.747368,23098.55,23098.55
1,05/09/2024,330,98,0.29697,232,0.70303,17222.47,15790.22
2,05/10/2024,398,78,0.19598,320,0.80402,12568.38,22217.27
3,05/11/2024,775,97,0.125161,678,0.874839,16374.27,13750.9
4,05/14/2024,711,140,0.196906,571,0.803094,28783.12,28783.12


In [15]:
# Save the summary dataframe to a CSV file
summary_df.to_csv(r'C:\Users\pskotte\Desktop\Internal Representments.csv', index=False)