# SettlementGuard EDA
Exploratory Data Analysis for the Synthetic Trade Settlement Dataset.

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

df = pd.read_csv('settlement_data.csv')
df.head()

## Distribution of Target Variable (IS_FAILED)

In [None]:
sns.countplot(x='IS_FAILED', data=df)
plt.title('Class Imbalance: Success (0) vs Fail (1)')
plt.show()

print(df['IS_FAILED'].value_counts(normalize=True))

## Failure Rate by Counterparty Rating

In [None]:
plt.figure(figsize=(10, 6))
sns.barplot(x='Counterparty_Rating', y='IS_FAILED', data=df, order=['AAA', 'AA', 'A', 'BBB', 'BB', 'CCC'])
plt.title('Failure Probability by Credit Rating')
plt.ylabel('Failure Rate')
plt.show()

## Failure Rate by SSI Status

In [None]:
plt.figure(figsize=(6, 4))
sns.barplot(x='SSI_Status', y='IS_FAILED', data=df)
plt.title('Impact of SSI Mismatch on Failure')
plt.show()

## Correlation Heatmap

In [None]:
# Select numerical columns for correlation
cols = ['Notional_Amount_USD', 'Market_Volatility_Index', 'Failure_Prob', 'IS_FAILED', 'Trade_Hour']
corr = df[cols].corr()

plt.figure(figsize=(8, 6))
sns.heatmap(corr, annot=True, cmap='coolwarm')
plt.title('Correlation Matrix')
plt.show()