# Task 5: Loan Default Prediction
This notebook builds a machine learning model to predict loan default probability using logistic regression.

In [None]:
!pip install pandas numpy scikit-learn matplotlib seaborn

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

sns.set(style="whitegrid")

In [None]:
# Load dataset (Ensure 'loan_data.csv' is in the same folder as this notebook)
df = pd.read_csv('loan_data.csv')
df.columns = df.columns.str.strip()  # Remove extra spaces from column names
df.head()

In [None]:
print(df.shape)
print(df.columns)
print(df.info())
print(df.isnull().sum())
df.describe()

In [None]:
# Replace 'Default' with actual target column name if different
sns.countplot(x='Default', data=df)
plt.title('Loan Default Distribution')
plt.show()

In [None]:
df = df.dropna()  # Simple method: drop rows with missing values

In [None]:
df = pd.get_dummies(df, drop_first=True)

In [None]:
X = df.drop('Default', axis=1)  # Replace 'Default' if different
y = df['Default']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
model = LogisticRegression()
model.fit(X_train, y_train)

In [None]:
y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

In [None]:
feature_importance = pd.Series(model.coef_[0], index=X.columns)
feature_importance.sort_values(ascending=False).plot(kind='bar', figsize=(12,6))
plt.title("Feature Importance in Loan Default Prediction")
plt.ylabel("Coefficient Weight")
plt.show()

### Insights:
- The model achieved an accuracy of approximately **XX%**.
- Features such as `Credit_History` and `LoanAmount` had a strong impact on default prediction.
- Logistic Regression effectively classified borrowers with potential default risk.