# Importing libraries and dependencies

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn import svm # Support vector machine
from sklearn.metrics import accuracy_score

# Data Collection and Processing

In [3]:
#loading the dataset to pandas Dataframe
loan_dataset = pd.read_csv('dataset_loan_status.csv')

In [4]:
loan_dataset.columns

Index(['Loan_ID', 'Gender', 'Married', 'Dependents', 'Education',
       'Self_Employed', 'ApplicantIncome', 'CoapplicantIncome', 'LoanAmount',
       'Loan_Amount_Term', 'Credit_History', 'Property_Area', 'Loan_Status'],
      dtype='object')

In [5]:
loan_dataset.shape

(614, 13)

In [6]:
loan_dataset.head()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,LP001002,Male,No,0,Graduate,No,5849,0.0,,360.0,1.0,Urban,Y
1,LP001003,Male,Yes,1,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N
2,LP001005,Male,Yes,0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y
3,LP001006,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
4,LP001008,Male,No,0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y


In [7]:
loan_dataset.describe()

Unnamed: 0,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History
count,614.0,614.0,592.0,600.0,564.0
mean,5403.459283,1621.245798,146.412162,342.0,0.842199
std,6109.041673,2926.248369,85.587325,65.12041,0.364878
min,150.0,0.0,9.0,12.0,0.0
25%,2877.5,0.0,100.0,360.0,1.0
50%,3812.5,1188.5,128.0,360.0,1.0
75%,5795.0,2297.25,168.0,360.0,1.0
max,81000.0,41667.0,700.0,480.0,1.0


In [8]:
# dropping the missing values
loan_dataset = loan_dataset.dropna()

In [9]:
loan_dataset.isnull().sum()

Loan_ID              0
Gender               0
Married              0
Dependents           0
Education            0
Self_Employed        0
ApplicantIncome      0
CoapplicantIncome    0
LoanAmount           0
Loan_Amount_Term     0
Credit_History       0
Property_Area        0
Loan_Status          0
dtype: int64

In [10]:
# Dependent column values
loan_dataset['Dependents'].value_counts()

Dependents
0     274
2      85
1      80
3+     41
Name: count, dtype: int64

In [11]:
# replacing the value of 3+ to 4
loan_dataset = loan_dataset.replace(to_replace='3+', value=4)

In [12]:
loan_dataset.head()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
1,LP001003,Male,Yes,1,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N
2,LP001005,Male,Yes,0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y
3,LP001006,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
4,LP001008,Male,No,0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y
5,LP001011,Male,Yes,2,Graduate,Yes,5417,4196.0,267.0,360.0,1.0,Urban,Y


# Convert categorical columns to numerical values

In [14]:
print(loan_dataset['Education'].unique())
print(loan_dataset['Gender'].unique())
print(loan_dataset['Married'].unique())
print(loan_dataset['Education'].unique())
print(loan_dataset['Self_Employed'].unique())
print(loan_dataset['Property_Area'].unique())

['Graduate' 'Not Graduate']
['Male' 'Female']
['Yes' 'No']
['Graduate' 'Not Graduate']
['No' 'Yes']
['Rural' 'Urban' 'Semiurban']


In [15]:
loan_dataset['Loan_Status'] = loan_dataset['Loan_Status'].map({'N': 0, 'Y': 1})
loan_dataset['Gender'] = loan_dataset['Gender'].map({'Male': 1, 'Female': 0})
loan_dataset['Married'] = loan_dataset['Married'].map({'No': 0, 'Yes': 1})
loan_dataset['Education'] = loan_dataset['Education'].map({'Not Graduate': 0, 'Graduate': 1})
loan_dataset['Self_Employed'] = loan_dataset['Self_Employed'].map({'No': 0, 'Yes': 1})
loan_dataset['Property_Area'] = loan_dataset['Property_Area'].map({'Rural': 0, 'Semiurban': 1, 'Urban': 2})

In [16]:
loan_dataset.head()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
1,LP001003,1,1,1,1,0,4583,1508.0,128.0,360.0,1.0,0,0
2,LP001005,1,1,0,1,1,3000,0.0,66.0,360.0,1.0,2,1
3,LP001006,1,1,0,0,0,2583,2358.0,120.0,360.0,1.0,2,1
4,LP001008,1,0,0,1,0,6000,0.0,141.0,360.0,1.0,2,1
5,LP001011,1,1,2,1,1,5417,4196.0,267.0,360.0,1.0,2,1


# Training and Test Set

In [31]:
X = loan_dataset.drop(columns=['Loan_ID', 'Loan_Status'], axis=1)
Y = loan_dataset['Loan_Status']

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.1,stratify=Y, random_state=2)

In [60]:
X

Unnamed: 0,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area
1,1,1,1,1,0,4583,1508.0,128.0,360.0,1.0,0
2,1,1,0,1,1,3000,0.0,66.0,360.0,1.0,2
3,1,1,0,0,0,2583,2358.0,120.0,360.0,1.0,2
4,1,0,0,1,0,6000,0.0,141.0,360.0,1.0,2
5,1,1,2,1,1,5417,4196.0,267.0,360.0,1.0,2
...,...,...,...,...,...,...,...,...,...,...,...
609,0,0,0,1,0,2900,0.0,71.0,360.0,1.0,0
610,1,1,4,1,0,4106,0.0,40.0,180.0,1.0,0
611,1,1,1,1,0,8072,240.0,253.0,360.0,1.0,2
612,1,1,2,1,0,7583,0.0,187.0,360.0,1.0,2


In [39]:
print(f"X_train shape: {X_train.shape}")
print(f"X_test shape:  {X_test.shape}")
print(f"Y_train shape: {Y_train.shape}")
print(f"Y_test shape:  {Y_test.shape}")

X_train shape: (432, 11)
X_test shape:  (48, 11)
Y_train shape: (432,)
Y_test shape:  (48,)


# Support Vector Machine Model

In [42]:
classifier = svm.SVC(kernel='linear')
classifier.fit(X_train, Y_train)

# Model Evaluation

In [53]:
# Accuracy score on training data
X_train_prediction = classifier.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction, Y_train)
print('Accuracy on training data:', training_data_accuracy)

Accuracy on training data: 0.7986111111111112


In [70]:
# Accuracy score on test data
X_test_prediction = classifier.predict(X_test)
test_data_accuracy = accuracy_score(X_test_prediction, Y_test)
print('Accuracy on training data:', test_data_accuracy)

Accuracy on training data: 0.8333333333333334


# Making a predictive System

In [73]:
def predict_loan(input_tuple, classifier):
    """
    Predict loan approval status for a single input instance.

    Parameters:
    - input_tuple (tuple): Input data corresponding to the feature columns.
    - classifier (sklearn model): Trained scikit-learn classifier.

    Returns:
    - prediction (int): 0 or 1 predicted by the classifier.
    - result_text (str): Human-readable interpretation.
    """
    # Define the expected feature names (exclude Loan_ID and Loan_Status)
    feature_names = [
        'Gender', 'Married', 'Dependents', 'Education', 'Self_Employed',
        'ApplicantIncome', 'CoapplicantIncome', 'LoanAmount', 'Loan_Amount_Term',
        'Credit_History', 'Property_Area'
    ]

    # Convert input to NumPy array and reshape
    input_array = np.asarray(input_tuple).reshape(1, -1)

    # Validate feature count
    expected_n_features = len(feature_names)
    assert input_array.shape[1] == expected_n_features, \
        f"Input has {input_array.shape[1]} features, but expected {expected_n_features}"

    # Create DataFrame with correct feature names
    input_df = pd.DataFrame(input_array, columns=feature_names)

    # Predict
    prediction = classifier.predict(input_df)

    # Interpret result
    result_text = 'Loan Approved' if prediction[0] == 1 else 'Loan Not Approved'

    # Print and return
    print(f"Predicted class: {prediction[0]} → {result_text}")
    return prediction[0], result_text

In [75]:
# Example input
sample_input = (1, 1, 4, 1, 0, 4106, 0.0, 40.0, 180.0, 1.0, 0)

# Run the prediction
pred_class, pred_text = predict_loan(sample_input, classifier)


Predicted class: 1 → Loan Approved
