In [12]:
# Loading Required Libraries and Data
import pandas as pd
from IPython.display import display

# Excel file
file_path = "../data/Common Data Warehouse-Orginal.xlsx"
xls = pd.ExcelFile(file_path)


customer_df = xls.parse("Customer")  # Contains Rating Score
financial_df = xls.parse("Financial")  # Links Customer and Instrument
instrument_df = xls.parse("Instrument")  # Contains Performing/Non-Performing Status

#  Converting Rating Score to Standardized Categories ---
def classify_rating(score):
    """Classifies the rating score based on predefined categories."""
    score = str(score)  
    if "6" in score:
        return "Highest Rating"
    elif "5" in score:
        return "High Rating"
    elif "4" in score:
        return "Good Rating"
    elif "3" in score:
        return "Fair Rating"
    elif "2" in score:
        return "Medium Rating"
    elif "1" in score or "0" in score:
        return "Lowest Rating (Defaulted)"
    elif "A" in score:
        return "Highest Score"
    elif "U" in score:
        return "Unassigned"
    else:
        return "Unknown"

customer_df["Rating Category"] = customer_df["Rating Score"].apply(classify_rating)

#  Merging Customer Data with Financial Data Using Cust ID
customer_financial_df = financial_df.merge(
    customer_df, left_on="Cust ID", right_on="Customer ID", how="left"
)

# Merging with Instrument Table Using Agmt ID
customer_financial_instrument_df = customer_financial_df.merge(
    instrument_df, left_on="Agmt ID", right_on="Agreement ID", how="left"
)

#  Identifying Anomalies 
def detect_anomalies(row):
    """Detecting anomalies based on rating category and performance status."""
    if row["Performing/Non Performing"] == "N" and row["Rating Category"] in ["Highest Rating", "Highest Score", "Good Rating","Medium Rating","Fair Rating"]:
        return "Anomaly: Non-Performing but High Rating"
    elif row["Performing/Non Performing"] == "Y" and row["Rating Category"] in ["Lowest Rating (Defaulted)"]:
        return "Anomaly: Performing but Defaulted Rating"
    else:
        return "No Anomaly"

customer_financial_instrument_df["Anomaly"] = customer_financial_instrument_df.apply(detect_anomalies, axis=1)

anomalies_df = customer_financial_instrument_df[customer_financial_instrument_df["Anomaly"] != "No Anomaly"]

#  Select Only Relevant Columns 
anomalies_summary_df = anomalies_df[['Customer ID',"Performing/Non Performing", "Rating Score", "Anomaly"]]


# Counting the different types of anomalies
anomaly_summary = anomalies_summary_df["Anomaly"].value_counts().reset_index()
anomaly_summary.columns = ["Anomaly Type", "Count"]

# Counting total anomalies
total_anomalies = anomalies_summary_df.shape[0]

performing_low_rating = anomalies_summary_df[
    (anomalies_summary_df["Performing/Non Performing"] == "Y") &
    (anomalies_summary_df["Anomaly"] == "Anomaly: Performing but Defaulted Rating")
].shape[0]

non_performing_high_rating = anomalies_summary_df[
    (anomalies_summary_df["Performing/Non Performing"] == "N") &
    (anomalies_summary_df["Anomaly"] == "Anomaly: Non-Performing but High Rating")
].shape[0]

# Display Results
print(f"ðŸ”¹ Total Anomalies Detected: {total_anomalies}")
print(f"ðŸ”¹ Performing Customers with Low Rating (Incorrect): {performing_low_rating}")
print(f"ðŸ”¹ Non-Performing Customers with High Rating (Incorrect): {non_performing_high_rating}")
display(anomaly_summary)

# Save Overall Summary to Excel
summary_output_path = "../data/Rating_Performance_Anomalies_Overview.xlsx"
anomaly_summary.to_excel(summary_output_path, index=False)

#  download link for summary file
summary_output_path

# Count total anomalies
total_anomalies = anomalies_summary_df.shape[0]

# summary of anomalies
print(f"Total Anomalies Detected: {total_anomalies}")
display(anomalies_summary_df)

# Saving the anomalies summary to Excel
output_file_path = "../data/Rating_Performance_Anomalies_Summary.xlsx"
anomalies_summary_df.to_excel(output_file_path, index=False)

#  download link
output_file_path


ðŸ”¹ Total Anomalies Detected: 3411
ðŸ”¹ Performing Customers with Low Rating (Incorrect): 3411
ðŸ”¹ Non-Performing Customers with High Rating (Incorrect): 0


Unnamed: 0,Anomaly Type,Count
0,Anomaly: Performing but Defaulted Rating,3411


Total Anomalies Detected: 3411


Unnamed: 0,Customer ID,Performing/Non Performing,Rating Score,Anomaly
1,1000009653088,Y,PC0,Anomaly: Performing but Defaulted Rating
18,1000009742439,Y,PC0+,Anomaly: Performing but Defaulted Rating
26,1000009652648,Y,PC0+,Anomaly: Performing but Defaulted Rating
42,1000029256066,Y,PC0-,Anomaly: Performing but Defaulted Rating
57,1000009742768,Y,PC0,Anomaly: Performing but Defaulted Rating
...,...,...,...,...
39289,1000009742460,Y,PC0+,Anomaly: Performing but Defaulted Rating
39293,1000007673730,Y,PC0,Anomaly: Performing but Defaulted Rating
39308,1000009650828,Y,PC0+,Anomaly: Performing but Defaulted Rating
39329,1000009663350,Y,PC0,Anomaly: Performing but Defaulted Rating


'../data/Rating_Performance_Anomalies_Summary.xlsx'