In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.metrics import accuracy_score, classification_report


In [None]:
data = pd.read_csv('/content/loan.csv')

In [None]:
data.head(10)

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,LP001002,Male,No,0,Graduate,No,5849,0.0,0.0,360.0,1.0,Urban,Y
1,LP001003,Male,Yes,1,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N
2,LP001005,Male,Yes,0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y
3,LP001006,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
4,LP001008,Male,No,0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y
5,LP001011,Male,Yes,2,Graduate,Yes,5417,4196.0,267.0,360.0,1.0,Urban,Y
6,LP001013,Male,Yes,0,Not Graduate,No,2333,1516.0,95.0,360.0,1.0,Urban,Y
7,LP001014,Male,Yes,3+,Graduate,No,3036,2504.0,158.0,360.0,0.0,Semiurban,N
8,LP001018,Male,Yes,2,Graduate,No,4006,1526.0,168.0,360.0,1.0,Urban,Y
9,LP001020,Male,Yes,1,Graduate,No,12841,10968.0,349.0,360.0,1.0,Semiurban,N


In [None]:
data.drop('Loan_ID', axis=1, inplace=True)

# Inspect for missing values
print("Initial missing values:\n", data.isnull().sum())

# Fill missing values
data['Gender'].fillna(data['Gender'].mode()[0], inplace=True)
data['Married'].fillna(data['Married'].mode()[0], inplace=True)
data['Dependents'].replace('3+', 3, inplace=True)
data['Dependents'].fillna(data['Dependents'].mode()[0], inplace=True)
data['Education'].fillna(data['Education'].mode()[0], inplace=True)
data['Self_Employed'].fillna(data['Self_Employed'].mode()[0], inplace=True)
data['LoanAmount'].fillna(data['LoanAmount'].mean(), inplace=True)
data['Loan_Amount_Term'].fillna(data['Loan_Amount_Term'].mean(), inplace=True)
data['Credit_History'].fillna(data['Credit_History'].mode()[0], inplace=True)

# Verify no missing values remain
print("Missing values after filling:\n", data.isnull().sum())

# Manual encoding for binary variables
data['Gender'] = data['Gender'].map({'Male': 1, 'Female': 0})
data['Married'] = data['Married'].map({'Yes': 1, 'No': 0})
data['Education'] = data['Education'].map({'Graduate': 1, 'Not Graduate': 0})
data['Self_Employed'] = data['Self_Employed'].map({'Yes': 1, 'No': 0})
data['Loan_Status'] = data['Loan_Status'].map({'Y': 1, 'N': 0})

# Convert 'Dependents' to numeric
data['Dependents'] = data['Dependents'].astype(int)

# Convert 'Property_Area' using one-hot encoding
data = pd.get_dummies(data, columns=['Property_Area'], drop_first=True)


Initial missing values:
 Gender               13
Married               3
Dependents           15
Education             0
Self_Employed        32
ApplicantIncome       0
CoapplicantIncome     0
LoanAmount           22
Loan_Amount_Term     14
Credit_History       50
Property_Area         0
Loan_Status           0
dtype: int64
Missing values after filling:
 Gender               0
Married              0
Dependents           0
Education            0
Self_Employed        0
ApplicantIncome      0
CoapplicantIncome    0
LoanAmount           0
Loan_Amount_Term     0
Credit_History       0
Property_Area        0
Loan_Status          0
dtype: int64


In [None]:
print("Data types:\n", data.dtypes)


Data types:
 Gender                       int64
Married                      int64
Dependents                   int64
Education                    int64
Self_Employed                int64
ApplicantIncome              int64
CoapplicantIncome          float64
LoanAmount                 float64
Loan_Amount_Term           float64
Credit_History             float64
Loan_Status                  int64
Property_Area_Semiurban       bool
Property_Area_Urban           bool
dtype: object


In [None]:
# Define features and target
X = data.drop('Loan_Status', axis=1)
y = data['Loan_Status']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
# Initialize models
log_clf = LogisticRegression(random_state=42)
tree_clf = DecisionTreeClassifier(random_state=42)
rf_clf = RandomForestClassifier(random_state=42)


In [None]:
# Train models
log_clf.fit(X_train, y_train)
tree_clf.fit(X_train, y_train)
rf_clf.fit(X_train, y_train)

# Create Voting Classifier
voting_clf = VotingClassifier(
    estimators=[('lr', log_clf), ('dt', tree_clf), ('rf', rf_clf)],
    voting='hard'
)


In [None]:
voting_clf.fit(X_train, y_train)


In [None]:
y_pred = voting_clf.predict(X_test)


In [None]:
# Evaluate performance
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f"Ensemble Model Accuracy: {accuracy}")
print(f"Classification Report:\n{report}")

Ensemble Model Accuracy: 0.772972972972973
Classification Report:
              precision    recall  f1-score   support

           0       0.85      0.43      0.57        65
           1       0.76      0.96      0.85       120

    accuracy                           0.77       185
   macro avg       0.80      0.69      0.71       185
weighted avg       0.79      0.77      0.75       185



In [None]:
def get_user_input():
    user_data = {}
    user_data['Gender'] = int(input("Gender (Male: 1, Female: 0): "))
    user_data['Married'] = int(input("Married (Yes: 1, No: 0): "))
    user_data['Dependents'] = int(input("Dependents (0, 1, 2, 3): ").replace('3+', '3'))
    user_data['Education'] = int(input("Education (Graduate: 1, Not Graduate: 0): "))
    user_data['Self_Employed'] = int(input("Self_Employed (Yes: 1, No: 0): "))
    user_data['ApplicantIncome'] = float(input("ApplicantIncome: "))
    user_data['CoapplicantIncome'] = float(input("CoapplicantIncome: "))
    user_data['LoanAmount'] = float(input("LoanAmount: "))
    user_data['Loan_Amount_Term'] = float(input("Loan_Amount_Term: "))
    user_data['Credit_History'] = float(input("Credit_History (1.0, 0.0): "))
    property_area = input("Property_Area (Urban, Semiurban, Rural): ").strip()
    user_data['Property_Area_Semiurban'] = 1 if property_area == 'Semiurban' else 0
    user_data['Property_Area_Urban'] = 1 if property_area == 'Urban' else 0

    return pd.DataFrame(user_data, index=[0])
user_input = get_user_input()
user_input = scaler.transform(user_input)
user_prediction = voting_clf.predict(user_input)
loan_status = 'Approved' if user_prediction[0] == 1 else 'Rejected'
print(f"Loan Status: {loan_status}")

Gender (Male: 1, Female: 0): 1
Married (Yes: 1, No: 0): 1
Dependents (0, 1, 2, 3): 2
Education (Graduate: 1, Not Graduate: 0): 1
Self_Employed (Yes: 1, No: 0): 1
ApplicantIncome: 300000
CoapplicantIncome: 20000
LoanAmount: 200000
Loan_Amount_Term: 36
Credit_History (1.0, 0.0): 1.0
Property_Area (Urban, Semiurban, Rural): Urban
Loan Status: Rejected
