In [None]:
import pandas as pd

# Sample dataframe
data = {
    'O/S Cash USD': [100, 200, 150, 300, 400],
    'Cpty Name': ['Company A', 'Company B', 'Company A', 'Company C', 'Company B'],
    'Treated_Comments': ['Comment1', 'Comment2', 'Comment3', 'Comment4', 'Comment5'],
    'Treated_Fails_code': ['Code1', 'Code2', 'Code1', 'Code2', 'Code1']
}

df = pd.DataFrame(data)

# Group by 'Treated_Fails_code' and 'Cpty Name', then count occurrences
grouped = df.groupby(['Treated_Fails_code', 'Cpty Name']).size().reset_index(name='Count')

# Sort by 'Treated_Fails_code' and 'Count' in descending order
sorted_grouped = grouped.sort_values(by=['Treated_Fails_code', 'Count'], ascending=[True, False])

# Save the result to an Excel file
output_file = 'fails_code_counts.xlsx'
with pd.ExcelWriter(output_file) as writer:
    sorted_grouped.to_excel(writer, sheet_name='Fails_Code_Counts', index=False)

print(f"Data saved to {output_file}")


In [None]:
import pandas as pd
import re

# Sample dataframe
data = {
    'O/S Cash USD': [100, 200, 150, 300, 400],
    'Cpty Name': ['Company A', 'Company B', 'Company A', 'Company C', 'Company B'],
    'Treated_Comments': [
        'Short to Deliver due to pending receipts from CP x',
        'Short to Deliver due to failing receipts from Cp b',
        'Cp a was short to deliver',
        'Firm was Short due to failing receipts from Cp b',
        'Cp a was short to deliver'
    ],
    'Treated_Fails_code': ['FTD', 'FTD', 'FTD', 'FTD', 'Other']
}

df = pd.DataFrame(data)

# Filter rows where 'Treated_Fails_code' is 'FTD'
ftd_df = df[df['Treated_Fails_code'] == 'FTD'].copy()

# Function to extract firm mentioned after 'from' or find other cases of failure to deliver
def extract_firm(comment):
    # Regular expression to find the firm after 'from'
    match = re.search(r'from\s([\w\s]+)', comment, re.IGNORECASE)
    if match:
        return match.group(1).strip()  # Return firm name after 'from'
    # Handle other patterns like 'Cp a was short to deliver'
    match_alt = re.search(r'([\w\s]+)\swas short to deliver', comment, re.IGNORECASE)
    if match_alt:
        return match_alt.group(1).strip()  # Return firm name before 'was short to deliver'
    return None  # If no match is found, return None

# Apply the extraction function to the 'Treated_Comments' column
ftd_df['Because of Which firm'] = ftd_df['Treated_Comments'].apply(extract_firm)

# Count the occurrences of FTD for each 'Cpty Name'
ftd_summary = ftd_df.groupby(['Cpty Name', 'Because of Which firm']).size().reset_index(name='Count')

# Save the result to the existing Excel file
output_file = 'fails_code_counts.xlsx'
with pd.ExcelWriter(output_file, mode='a', engine='openpyxl') as writer:
    # Save the new analysis in a new sheet
    ftd_summary.to_excel(writer, sheet_name='FTD_Analysis', index=False)

print(f"New analysis saved to {output_file}")
