In [3]:
import pandas as pd

# Load the dataset
file_path = '/content/drive/MyDrive/semester project (AI)/Copy of loan.csv'
data= pd.read_csv(file_path)

# Display the first few rows of the dataset to understand its structure
data.head()


Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,LP001002,Male,No,0,Graduate,No,5849,0.0,,360.0,1.0,Urban,Y
1,LP001003,Male,Yes,1,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N
2,LP001005,Male,Yes,0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y
3,LP001006,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
4,LP001008,Male,No,0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y


In [4]:
# Check for missing values in the dataset
missing_values = data.isnull().sum()

# Display the missing values
missing_values


Loan_ID               0
Gender               13
Married               3
Dependents           15
Education             0
Self_Employed        32
ApplicantIncome       0
CoapplicantIncome     0
LoanAmount           22
Loan_Amount_Term     14
Credit_History       50
Property_Area         0
Loan_Status           0
dtype: int64

In [5]:
# Impute missing values for categorical variables using the mode
data['Gender'].fillna(data['Gender'].mode()[0], inplace=True)
data['Married'].fillna(data['Married'].mode()[0], inplace=True)
data['Dependents'].fillna(data['Dependents'].mode()[0], inplace=True)
data['Self_Employed'].fillna(data['Self_Employed'].mode()[0], inplace=True)

# Impute missing values for numerical variables using the median
data['LoanAmount'].fillna(data['LoanAmount'].median(), inplace=True)
data['Loan_Amount_Term'].fillna(data['Loan_Amount_Term'].median(), inplace=True)
data['Credit_History'].fillna(data['Credit_History'].mode()[0], inplace=True)

# Check if there are any missing values left
missing_values_after_imputation = data.isnull().sum()

# Display the result
missing_values_after_imputation


Loan_ID              0
Gender               0
Married              0
Dependents           0
Education            0
Self_Employed        0
ApplicantIncome      0
CoapplicantIncome    0
LoanAmount           0
Loan_Amount_Term     0
Credit_History       0
Property_Area        0
Loan_Status          0
dtype: int64

In [6]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

# One-hot encode categorical variables
categorical_columns = ['Gender', 'Married', 'Dependents', 'Education', 'Self_Employed', 'Property_Area']
data_encoded = pd.get_dummies(data, columns=categorical_columns, drop_first=True)

# Define features and target variable
X = data_encoded.drop(columns=['Loan_ID', 'Loan_Status'])
y = data_encoded['Loan_Status'].apply(lambda x: 1 if x == 'Y' else 0)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Perform logistic regression
log_reg = LogisticRegression(max_iter=1000)
log_reg.fit(X_train, y_train)

# Predict on the test set
y_pred = log_reg.predict(X_test)

# Generate the classification report
report = classification_report(y_test, y_pred)

report


'              precision    recall  f1-score   support\n\n           0       0.95      0.42      0.58        43\n           1       0.76      0.99      0.86        80\n\n    accuracy                           0.79       123\n   macro avg       0.85      0.70      0.72       123\nweighted avg       0.83      0.79      0.76       123\n'

**Model result on input**

In [7]:
def predict_loan_approval(input_data):
    """
    This function takes a dictionary input_data and predicts loan approval using the trained logistic regression model.
    """
    input_df = pd.DataFrame([input_data])

    # One-hot encode the input data
    input_encoded = pd.get_dummies(input_df, columns=categorical_columns, drop_first=True)

    # Ensure the input data has all the necessary columns
    for col in X.columns:
        if col not in input_encoded:
            input_encoded[col] = 0

    input_encoded = input_encoded[X.columns]

    # Predict the loan approval
    prediction = log_reg.predict(input_encoded)

    return 'Approved' if prediction[0] == 1 else 'Not Approved'

# Example input data
input_data_example = {
    'Gender': 'Male',
    'Married': 'Yes',
    'Dependents': '0',
    'Education': 'Graduate',
    'Self_Employed': 'No',
    'ApplicantIncome': 5000,
    'CoapplicantIncome': 0,
    'LoanAmount': 150,
    'Loan_Amount_Term': 360,
    'Credit_History': 1,
    'Property_Area': 'Urban'
}

# Predict loan approval for the example input data
result = predict_loan_approval(input_data_example)
result


'Approved'