# Credit Risk Analysis
This notebook covers data exploration, feature engineering, model training, and evaluation for credit risk assessment.

In [None]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, accuracy_score, confusion_matrix, classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from imblearn.over_sampling import SMOTE

## Data Exploration & EDA

In [None]:
# Load and explore data
df = pd.read_csv('../data/loan_data.csv')
df.info()
df.describe()
sns.countplot(x='default', data=df)
plt.show()

## Feature Engineering

In [None]:
# Feature engineering example
df['debt_to_income'] = df['amount'] / df['annual_income']

## Model Training & Testing

In [None]:
# Split data
X = df.drop('default', axis=1)
y = df['default']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Balance data
smote = SMOTE()
X_train_res, y_train_res = smote.fit_resample(X_train, y_train)
# Train models
lr = LogisticRegression().fit(X_train_res, y_train_res)
rf = RandomForestClassifier().fit(X_train_res, y_train_res)
xgb = XGBClassifier().fit(X_train_res, y_train_res)

## Model Evaluation

In [None]:
# Evaluate models
for model in [lr, rf, xgb]:
    y_pred = model.predict(X_test)
    print(f'AUC: {roc_auc_score(y_test, y_pred):.2f}')
    print(f'Accuracy: {accuracy_score(y_test, y_pred):.2f}')
    print(confusion_matrix(y_test, y_pred))
    print(classification_report(y_test, y_pred))

## Feature Importance

In [None]:
# Feature importance for Random Forest
importances = rf.feature_importances_
feat_names = X.columns
sns.barplot(x=importances, y=feat_names)
plt.title('Feature Importance')
plt.show()