In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import roc_auc_score
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers, callbacks
import warnings
warnings.filterwarnings("ignore")

#Loading the data
train = pd.read_csv("train.csv")
test  = pd.read_csv("test.csv")

train["RiskFlag"] = train["RiskFlag"].fillna(0).astype(int)

X = train.drop("RiskFlag", axis=1)
y = train["RiskFlag"]
test_X = test.copy()

# COLUMN TYPES
numeric_cols = [
    "ApplicantYears","AnnualEarnings","RequestedSum","TrustMetric","WorkDuration",
    "ActiveAccounts","OfferRate","RepayPeriod","DebtFactor"
]

categorical_cols = [
    "QualificationLevel","WorkCategory","RelationshipStatus",
    "OwnsProperty","FamilyObligation","FundUseCase","JointApplicant"
]

preprocess = ColumnTransformer(
    transformers=[
        ("num", StandardScaler(), numeric_cols),
        ("cat", OneHotEncoder(handle_unknown='ignore'), categorical_cols)
    ]
)

# Fit on full training data
preprocess.fit(X)

# Prepare numpy arrays
X_prep = preprocess.transform(X).astype("float32")
test_prep = preprocess.transform(test_X).astype("float32")

input_dim = X_prep.shape[1]
print("Final Feature Count:", input_dim)

# DEFINE MODEL
def build_model():
    model = models.Sequential([
        layers.Input(shape=(input_dim,)),

        layers.Dense(512, activation="relu"),
        layers.BatchNormalization(),
        layers.Dropout(0.30),

        layers.Dense(256, activation="relu"),
        layers.BatchNormalization(),
        layers.Dropout(0.25),

        layers.Dense(128, activation="relu"),
        layers.BatchNormalization(),
        layers.Dropout(0.20),

        layers.Dense(64, activation="relu"),
        layers.BatchNormalization(),

        layers.Dense(1, activation="sigmoid")
    ])

    optimizer = optimizers.AdamW(learning_rate=0.001, weight_decay=1e-5)

    model.compile(
        optimizer=optimizer,
        loss="binary_crossentropy",
        metrics=[tf.keras.metrics.AUC(name="AUC")]
    )
    return model

# 3-FOLD CROSS VALIDATION
skf = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)
auc_scores = []

for fold, (train_idx, val_idx) in enumerate(skf.split(X_prep, y), 1):
    print(f"\n Training Fold {fold}...")

    X_train, X_val = X_prep[train_idx], X_prep[val_idx]
    y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]

    model = build_model()

    es = callbacks.EarlyStopping(
        patience=6,
        restore_best_weights=True
    )

    model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=35,
        batch_size=256,
        callbacks=[es],
        verbose=0
    )

    preds = model.predict(X_val, verbose=0).flatten()
    auc = roc_auc_score(y_val, preds)
    auc_scores.append(auc)

    print(f"Fold {fold} AUC: {auc:.4f}")

print("\n FINAL MEAN AUC:", np.mean(auc_scores))

final_model = build_model()

es_final = callbacks.EarlyStopping(
    patience=6,
    restore_best_weights=True
)

final_model.fit(
    X_prep, y,
    epochs=35,
    batch_size=256,
    callbacks=[es_final],
    verbose=0
)

test_prob = final_model.predict(test_prep, verbose=0).flatten()
test_binary = (test_prob >= 0.5).astype(int)

output = pd.DataFrame({
    "ProfileID": test["ProfileID"],
    "RiskFlag": test_binary
})

filename = "NN.csv"
output.to_csv(filename, index=False)

print(f"\n Saved: {filename}")
output.head()

from google.colab import files
files.download(filename)
