In [None]:
# LR + SVM + NN

import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.svm import LinearSVC
from sklearn.calibration import CalibratedClassifierCV
from sklearn.ensemble import StackingClassifier
from sklearn.metrics import roc_auc_score
import warnings
warnings.filterwarnings("ignore")

# Load data
train = pd.read_csv("train.csv")
test  = pd.read_csv("test.csv")

# Fix NaN in target
train["RiskFlag"] = train["RiskFlag"].fillna(0).astype(int)

X = train.drop("RiskFlag", axis=1)
y = train["RiskFlag"]
test_X = test.copy()

# Coloumn Types
numeric_cols = [
    "ApplicantYears","AnnualEarnings","RequestedSum","TrustMetric","WorkDuration",
    "ActiveAccounts","OfferRate","RepayPeriod","DebtFactor"
]

categorical_cols = [
    "QualificationLevel","WorkCategory","RelationshipStatus",
    "OwnsProperty","FamilyObligation","FundUseCase","JointApplicant"
]

preprocess = ColumnTransformer(
    transformers=[
        ("num", StandardScaler(), numeric_cols),
        ("cat", OneHotEncoder(handle_unknown='ignore'), categorical_cols)
    ]
)


# Logistic Regression
log_reg = LogisticRegression(max_iter=4000)

# Linear SVM
svm_linear = CalibratedClassifierCV(
    LinearSVC(C=1),
    cv=3
)

# Neural Network
mlp = MLPClassifier(
    hidden_layer_sizes=(50, 25),
    activation='relu',
    solver='adam',
    max_iter=400,
    learning_rate="adaptive"
)

#Stacking the models
estimators = [
    ('lr', log_reg),
    ('svm', svm_linear),
    ('nn', mlp)
]

stacked_model = StackingClassifier(
    estimators=estimators,
    final_estimator=LogisticRegression(max_iter=3000),
    stack_method="predict_proba",
    n_jobs=-1
)

model = Pipeline([
    ("preprocess", preprocess),
    ("stack", stacked_model)
])

skf = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)

auc_scores = []

for fold, (train_idx, val_idx) in enumerate(skf.split(X, y), 1):
    X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
    y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]

    model.fit(X_train, y_train)
    preds = model.predict_proba(X_val)[:, 1]
    auc = roc_auc_score(y_val, preds)
    auc_scores.append(auc)

    print(f"Fold {fold} AUC: {auc:.4f}")

print("\n Average AUC:", np.mean(auc_scores))

model.fit(X, y)

test_pred = model.predict_proba(test_X)[:, 1]
test_binary = (test_pred >= 0.5).astype(int)

output = pd.DataFrame({
    "ProfileID": test["ProfileID"],
    "RiskFlag": test_binary
})

filename = "LR_SVM_NN.csv"
output.to_csv(filename, index=False)

print(f"\n File created successfully: {filename}")
output.head()

from google.colab import files
files.download(filename)
