# Loan Approval Prediction (Kaggle Dataset)

This notebook builds a machine learning model to predict loan approval using the Kaggle Loan Prediction dataset.

In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


In [None]:

train_df = pd.read_csv("dataset/train_u6lujuX_CVtuZ9i.csv")
test_df  = pd.read_csv("dataset/test_Y3wMUE5_7gLdaTN.csv")

train_df.head()


In [None]:

train_df.info()


In [None]:

train_df['Loan_Status'].value_counts()


In [None]:

for col in ['Gender', 'Married', 'Dependents', 'Self_Employed']:
    train_df[col].fillna(train_df[col].mode()[0], inplace=True)
    test_df[col].fillna(test_df[col].mode()[0], inplace=True)

for col in ['LoanAmount', 'Loan_Amount_Term', 'Credit_History']:
    train_df[col].fillna(train_df[col].median(), inplace=True)
    test_df[col].fillna(test_df[col].median(), inplace=True)


In [None]:

le = LabelEncoder()
categorical_cols = [
    'Gender', 'Married', 'Dependents', 'Education',
    'Self_Employed', 'Property_Area'
]

for col in categorical_cols:
    train_df[col] = le.fit_transform(train_df[col])
    test_df[col] = le.transform(test_df[col])

train_df['Loan_Status'] = train_df['Loan_Status'].map({'Y': 1, 'N': 0})


In [None]:

X = train_df.drop(['Loan_ID', 'Loan_Status'], axis=1)
y = train_df['Loan_Status']


In [None]:

X_train, X_valid, y_train, y_valid = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


In [None]:

model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)


In [None]:

y_pred = model.predict(X_valid)

print("Accuracy:", accuracy_score(y_valid, y_pred))
print(classification_report(y_valid, y_pred))


In [None]:

cm = confusion_matrix(y_valid, y_pred)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()


In [None]:

test_features = test_df.drop('Loan_ID', axis=1)
test_predictions = model.predict(test_features)


In [None]:

submission = pd.DataFrame({
    'Loan_ID': test_df['Loan_ID'],
    'Loan_Status': test_predictions
})

submission['Loan_Status'] = submission['Loan_Status'].map({1: 'Y', 0: 'N'})
submission.head()


In [None]:

submission.to_csv("loan_submission.csv", index=False)
print("Submission file saved as loan_submission.csv")
