In [None]:
# Loan Prediction - Logistic Regression.ipynb


# Step 1: Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
from sklearn.preprocessing import LabelEncoder

In [None]:
print("Dataset Shape:", df.shape)

In [None]:
print("\nColumns:", df.columns.tolist())

In [None]:
print("\nData Types:\n", df.dtypes)

In [None]:
print("\nFirst 5 Rows:\n", df.head())

In [None]:
# Step 3: Data Cleaning
# Drop Loan_ID (irrelevant)
df.drop("Loan_ID", axis=1, inplace=True)

In [None]:
# Handle missing values
df.isnull().sum()

In [None]:
df['Gender'].fillna(df['Gender'].mode()[0], inplace=True)
df['Married'].fillna(df['Married'].mode()[0], inplace=True)
df['Dependents'].fillna(df['Dependents'].mode()[0], inplace=True)
df['Self_Employed'].fillna(df['Self_Employed'].mode()[0], inplace=True)
df['LoanAmount'].fillna(df['LoanAmount'].median(), inplace=True)
df['Loan_Amount_Term'].fillna(df['Loan_Amount_Term'].mode()[0], inplace=True)
df['Credit_History'].fillna(df['Credit_History'].mode()[0], inplace=True)

In [None]:
# Encode categorical variables

le = LabelEncoder()
for col in ['Gender','Married','Education','Self_Employed','Property_Area','Loan_Status','Dependents']:
    df[col] = le.fit_transform(df[col])


print("\nCleaned Data Info:")
print(df.info())
print("\nFirst 5 Rows After Cleaning:\n", df.head())

In [None]:
# Step 4: Define Features and Target
X = df.drop("Loan_Status", axis=1)
y = df["Loan_Status"]

In [None]:
# Step 5: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

print("\nTraining Set Shape:", X_train.shape)
print("Testing Set Shape:", X_test.shape)

In [None]:
# Step 6: Train Logistic Regression Model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

In [None]:
# Step 7: Predictions
y_pred = model.predict(X_test)

In [None]:
# Step 8: Evaluation
print("\nAccuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

In [None]:
# Step 9: Interpret Coefficients
coefficients = pd.DataFrame({
'Feature': X.columns,
'Coefficient': model.coef_[0]
}).sort_values(by='Coefficient', ascending=False)


print("\nFeature Influence on Loan Approval:\n", coefficients)# Visualization: Feature Influence
plt.figure(figsize=(10,6))
sns.barplot(x='Coefficient', y='Feature', data=coefficients, palette='coolwarm')
plt.title("Feature Influence on Loan Approval (Logistic Regression Coefficients)")
plt.show()

In [None]:
# Visualization: Feature Influence
plt.figure(figsize=(10,6))
sns.barplot(x='Coefficient', y='Feature', data=coefficients, palette='coolwarm')
plt.title("Feature Influence on Loan Approval (Logistic Regression Coefficients)")
plt.show()

In [None]:
# Visualization: Correlation Heatmap
plt.figure(figsize=(8,6))
sns.heatmap(df.corr(), annot=True, cmap="coolwarm", fmt=".2f",linewidths=0.01)
plt.title("Correlation Heatmap")
plt.show()