<a href="https://colab.research.google.com/github/BrindhaHema/CreditRisk_StressTesting/blob/main/Gan.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
from tensorflow.keras import layers, models, optimizers

# --- 1. Synthetic Bank Portfolio ---
n_loans = 100
np.random.seed(42)
portfolio = pd.DataFrame({
    'LoanID': range(1, n_loans+1),
    'Segment': np.random.choice(['Retail', 'MSME', 'Corporate', 'Auto'], n_loans),
    'Industry': np.random.choice(['Agri', 'Manufacturing', 'Hospitality', 'Tech', 'Realty'], n_loans),
    'Outstanding': np.random.randint(2e5, 4e6, n_loans),
    'PD': np.round(np.random.uniform(0.01, 0.10, n_loans), 3),
    'LGD': np.round(np.random.uniform(0.3, 0.85, n_loans), 3)
})
portfolio['EAD'] = portfolio['Outstanding'] * np.random.uniform(0.8, 1.2, n_loans)
portfolio['EL_Base'] = portfolio['PD'] * portfolio['LGD'] * portfolio['EAD']

# --- 2. Scenario Definitions (could be loaded from CSV) ---
scenario_defs = pd.DataFrame([
    {'ScenarioID':'S1','ScenarioType':'Hypothetical','Description':'PD x2, LGD +0.1','PD_Factor':2,'PD_Add':0,'LGD_Factor':1,'LGD_Add':0.1,'EAD_Factor':1,'Sector':'','Sector_PD_Factor':None,'Sector_LGD_Add':None},
    {'ScenarioID':'S2','ScenarioType':'Regulatory','Description':'PD +0.05, LGD +0.05','PD_Factor':1,'PD_Add':0.05,'LGD_Factor':1,'LGD_Add':0.05,'EAD_Factor':1,'Sector':'','Sector_PD_Factor':None,'Sector_LGD_Add':None},
    {'ScenarioID':'S3','ScenarioType':'Historical','Description':'Hospitality COVID','PD_Factor':1,'PD_Add':0,'LGD_Factor':1,'LGD_Add':0,'EAD_Factor':1,'Sector':'Hospitality','Sector_PD_Factor':3,'Sector_LGD_Add':0.15},
    {'ScenarioID':'S4','ScenarioType':'Black Swan','Description':'Severe tail event','PD_Factor':5,'PD_Add':0,'LGD_Factor':1,'LGD_Add':0.25,'EAD_Factor':0.9,'Sector':'','Sector_PD_Factor':None,'Sector_LGD_Add':None}
])

# --- 3. Scenario runner with audit logging ---
def run_scenario_df(df, sc_params):
    sc = df.copy()
    sc['PD_Scen'] = sc['PD'] * sc_params['PD_Factor'] + sc_params['PD_Add']
    sc['LGD_Scen'] = (sc['LGD'] * sc_params['LGD_Factor'] + sc_params['LGD_Add']).clip(upper=1)
    sc['EAD_Scen'] = sc['EAD'] * sc_params['EAD_Factor']
    sc['TransformNotes'] = f"PD*{sc_params['PD_Factor']}+{sc_params['PD_Add']}; LGD*{sc_params['LGD_Factor']}+{sc_params['LGD_Add']}; EAD*{sc_params['EAD_Factor']}"
    # Apply sector-specific factors if present
    if sc_params['Sector'] and pd.notnull(sc_params['Sector_PD_Factor']):
        idx = sc['Industry'] == sc_params['Sector']
        sc.loc[idx, 'PD_Scen'] = sc.loc[idx, 'PD'] * sc_params['Sector_PD_Factor']
        sc.loc[idx, 'TransformNotes'] += f"; {sc_params['Sector']} PD*{sc_params['Sector_PD_Factor']}"
    if sc_params['Sector'] and pd.notnull(sc_params['Sector_LGD_Add']):
        idx = sc['Industry'] == sc_params['Sector']
        sc.loc[idx, 'LGD_Scen'] += sc_params['Sector_LGD_Add']
        sc.loc[idx, 'TransformNotes'] += f"; {sc_params['Sector']} LGD+{sc_params['Sector_LGD_Add']}"
    sc['LGD_Scen'] = sc['LGD_Scen'].clip(upper=1)
    sc['EL_Scenario'] = sc['PD_Scen'] * sc['LGD_Scen'] * sc['EAD_Scen']
    sc['ScenarioType'] = sc_params['ScenarioType']
    sc['ScenarioID'] = sc_params['ScenarioID']
    sc['Description'] = sc_params['Description']
    sc['Audit_Metadata'] = str(sc_params)
    return sc

# --- 4. Apply all scenarios from dataset ---
scenario_results = []
for _, sc_def in scenario_defs.iterrows():
    scenario_results.append(run_scenario_df(portfolio, sc_def))
scenarios_all = pd.concat(scenario_results)

# --- 5. GAN Features: Synthetic Macro-Event-Portfolio Data for GAN Training ---
macro_events = [
    {'Event': 'COVID-19', 'GDP_Growth': 3.2,  'Inflation': 5.5, 'RBI_Repo': 4.4},
    {'Event': 'Demonetization', 'GDP_Growth': 7.6, 'Inflation': 3.2, 'RBI_Repo': 6.3},
    {'Event': '2008_Global_Recession', 'GDP_Growth': -1.5, 'Inflation': 7.9, 'RBI_Repo': 7.5},
]
records = []
for e in macro_events:
    for _, loan in portfolio.iterrows():
        rec = {**loan.to_dict(), **e}
        records.append(rec)
gan_data = pd.DataFrame(records)
event_dummies = pd.get_dummies(gan_data['Event']) # One-hot for event conditioning
gan_features = pd.concat([
    gan_data[['GDP_Growth','Inflation','RBI_Repo','PD','LGD','EAD']],
    event_dummies
], axis=1).astype('float32')

# --- 6. GAN Scenario Generation (with explainability logging) ---
latent_dim = 16
data_dim = gan_features.shape[1]

generator = models.Sequential([
    layers.Dense(32, activation='relu', input_dim=latent_dim),
    layers.Dense(32, activation='relu'),
    layers.Dense(data_dim, activation='linear')
])
discriminator = models.Sequential([
    layers.Dense(32, activation='relu', input_dim=data_dim),
    layers.Dense(32, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])
discriminator.compile(optimizer=optimizers.Adam(learning_rate=0.002), loss='binary_crossentropy')
discriminator.trainable = False
gan_input = layers.Input(shape=(latent_dim,))
generated = generator(gan_input)
gan_output = discriminator(generated)
gan_model = models.Model(gan_input, gan_output)
gan_model.compile(optimizer=optimizers.Adam(learning_rate=0.002), loss='binary_crossentropy')

# Train GAN (short demo loop)
epochs = 200
batch_size = 64
half_batch = batch_size // 2
features_np = gan_features.values
for epoch in range(epochs):
    idx = np.random.randint(0, features_np.shape[0], half_batch)
    real_samples = features_np[idx]
    noise = np.random.normal(0, 1, (half_batch, latent_dim))
    gen_samples = generator.predict(noise)
    x_disc = np.vstack([real_samples, gen_samples])
    y_disc = np.hstack([np.ones(half_batch), np.zeros(half_batch)])
    d_loss = discriminator.train_on_batch(x_disc, y_disc)
    noise = np.random.normal(0, 1, (batch_size, latent_dim))
    y_gen = np.ones(batch_size)
    g_loss = gan_model.train_on_batch(noise, y_gen)

# Generate GAN scenarios and explain
n_gan = 5
gan_noise = np.random.normal(0, 1, (n_gan, latent_dim))
gan_scenarios = generator.predict(gan_noise)
gan_cols = gan_features.columns
gan_df = pd.DataFrame(gan_scenarios, columns=gan_cols)
gan_df['ScenarioType'] = 'GAN_Dynamic'
gan_df['ScenarioID'] = ['GAN_'+str(i+1) for i in range(n_gan)]
gan_df['Description'] = 'Synthetic scenario from GAN generator'
gan_df['Source'] = 'GAN'
gan_df['Parameters'] = f'weights: {generator.count_params()} | latent_dim: {latent_dim} | data_dim: {data_dim}'
gan_df['TransformNotes'] = f'Generated with GAN on macro+event+portfolio shocks'
gan_results = gan_df[['ScenarioID','ScenarioType','GDP_Growth','PD','LGD','EAD','Description','Source','Parameters','TransformNotes']]

# --- 7. Reporting and audit trace ---
print('\n--- Expected Loss by Scenario Type ---')
summary = scenarios_all.groupby('ScenarioType')['EL_Scenario'].sum().reset_index()
print(summary)
print('\n--- Sample scenario result (audit trace included):')
print(scenarios_all[['ScenarioID','ScenarioType','LoanID','Industry','EL_Scenario','Description','Audit_Metadata']].head())
print('\n--- GAN Dynamic Scenario Samples (audit trace included) ---')
print(gan_results.head())

# Save output for full audit trail
scenarios_all.to_csv('scenarios_from_dataset_with_audit.csv', index=False)
gan_results.to_csv('gan_dynamic_scenarios_audit.csv', index=False)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 174ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30