# üè¶ Simple Loan Default Prediction
This notebook uses **Logistic Regression** and **SVM** to predict loan eligibility.

### Steps:
1. Upload `train.csv` and `test_Y3wMUE5_7gLdaTN.csv` to Colab.
2. Run all cells.

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report

# 1. Load Data
train = pd.read_csv('train.csv')
test = pd.read_csv('test_Y3wMUE5_7gLdaTN.csv')

# 2. Basic Cleaning
for df in [train, test]:
    df.drop('Loan_ID', axis=1, inplace=True, errors='ignore')
    # Fill missing values with most common value
    for col in df.columns:
        df[col] = df[col].fillna(df[col].mode()[0])

# 3. Convert text to numbers (One-Hot Encoding)
train = pd.get_dummies(train)
test = pd.get_dummies(test)

# Align test columns with train
test = test.reindex(columns=train.columns, fill_value=0).drop('Loan_Status_Y', axis=1, errors='ignore')

X = train.drop('Loan_Status_Y', axis=1)
y = train['Loan_Status_Y']

# Split data for validation
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features (Important for SVM and Logistic Regression)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X)

# 4. Optimization: GridSearchCV & Cross Validation

# A. Logistic Regression: Regularization (C parameter) tuning
lr_params = {'C': [0.01, 0.1, 1, 10, 100]}
lr_grid = GridSearchCV(LogisticRegression(max_iter=1000), lr_params, cv=5)
lr_grid.fit(X_train_scaled, y_train)

print(f"Best Logistic Regression Params: {lr_grid.best_params_}")
lr_best = lr_grid.best_estimator_
print(f"LR Validation Accuracy: {accuracy_score(y_val, lr_best.predict(X_val_scaled)):.2f}")

# B. SVM: C parameter and Kernel comparison
svm_params = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'rbf']
}
svm_grid = GridSearchCV(SVC(probability=True), svm_params, cv=5)
svm_grid.fit(X_train_scaled, y_train)

print(f"\nBest SVM Params: {svm_grid.best_params_}")
svm_best = svm_grid.best_estimator_
print(f"SVM Validation Accuracy: {accuracy_score(y_val, svm_best.predict(X_val_scaled)):.2f}")

# 5. Cross Validation Comparison
lr_cv = cross_val_score(lr_best, X_train_scaled, y_train, cv=5)
svm_cv = cross_val_score(svm_best, X_train_scaled, y_train, cv=5)
print(f"\nLR Cross-Val Mean: {lr_cv.mean():.2f}")
print(f"SVM Cross-Val Mean: {svm_cv.mean():.2f}")

# 6. Final Predictions using Best Model (SVM)
test_final = pd.read_csv('test_Y3wMUE5_7gLdaTN.csv')
X_test_scaled_final = scaler.transform(test.reindex(columns=X.columns, fill_value=0))
test_final['Prediction'] = svm_best.predict(X_test_scaled_final)
test_final['Prediction'] = test_final['Prediction'].map({1: 'Y', 0: 'N'})
test_final.to_csv('optimized_predictions.csv', index=False)
print("\n‚úÖ Optimized predictions saved to optimized_predictions.csv")