In [1]:
import pandas as pd
import joblib

In [2]:
test_data = pd.read_csv('/home/saranrajgandhi/Guvi_final_projects/loan_status_prediction/Loan-Status-Prediction/data/Loan_Status_test.csv')
test_data.head()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area
0,LP001015,Male,Yes,0,Graduate,No,5720,0,110.0,360.0,1.0,Urban
1,LP001022,Male,Yes,1,Graduate,No,3076,1500,126.0,360.0,1.0,Urban
2,LP001031,Male,Yes,2,Graduate,No,5000,1800,208.0,360.0,1.0,Urban
3,LP001035,Male,Yes,2,Graduate,No,2340,2546,100.0,360.0,,Urban
4,LP001051,Male,No,0,Not Graduate,No,3276,0,78.0,360.0,1.0,Urban


In [3]:
# remove + sign from dependent column so that we can treat the Dependent columns as a numerical
test_data['Dependents'] = test_data['Dependents'].apply(lambda x: x.replace("+", "") if isinstance(x, str) else x)

In [4]:
# impute the missing values in numerical column using KNN technique
from sklearn.impute import KNNImputer
columns_with_missing = ['LoanAmount', 'Loan_Amount_Term', 'Credit_History', 'Dependents']

imputer = KNNImputer(n_neighbors=5)

test_data[columns_with_missing] = imputer.fit_transform(test_data[columns_with_missing])

In [5]:
# impute the missing values of categorical columns with its mode(most frequent data)
test_data['Gender'].fillna(test_data['Gender'].mode()[0], inplace=True)
test_data['Married'].fillna(test_data['Married'].mode()[0], inplace=True)
test_data['Self_Employed'].fillna(test_data['Self_Employed'].mode()[0], inplace=True)

In [6]:
# drop Loan_ID because its a uniquie identifier, it will help our model.
test_data = test_data.drop(columns=['Loan_ID'], axis=1)

In [7]:
x_encoded_data = pd.get_dummies(test_data, columns = ['Gender', 'Married', 'Education', 'Self_Employed', 'Property_Area'])


In [8]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_encoded_data)

In [9]:
# load the saved model
model = joblib.load('best_model_Random Forest.joblib')

In [10]:
prediction = model.predict(x_train_scaled)

In [12]:
label_encoder = joblib.load("loan_status_label_encoder.joblib")

In [13]:
prediction

array([1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0,
       0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0,
       1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1,
       0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1,
       0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1,
       1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0,
       0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0,
       1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1,
       0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0,
       1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0,
       1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1,

In [14]:
final_predicted_val = label_encoder.inverse_transform(prediction)

In [15]:
test_data['predictied_loan_status'] = final_predicted_val

In [17]:
# pd.set_option('display.max_rows', None)
test_data

Unnamed: 0,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,predictied_loan_status
0,Male,Yes,0.0,Graduate,No,5720,0,110.0,360.0,1.0,Urban,Y
1,Male,Yes,1.0,Graduate,No,3076,1500,126.0,360.0,1.0,Urban,Y
2,Male,Yes,2.0,Graduate,No,5000,1800,208.0,360.0,1.0,Urban,Y
3,Male,Yes,2.0,Graduate,No,2340,2546,100.0,360.0,1.0,Urban,Y
4,Male,No,0.0,Not Graduate,No,3276,0,78.0,360.0,1.0,Urban,N
5,Male,Yes,0.0,Not Graduate,Yes,2165,3422,152.0,360.0,1.0,Urban,N
6,Female,No,1.0,Not Graduate,No,2226,0,59.0,360.0,1.0,Semiurban,N
7,Male,Yes,2.0,Not Graduate,No,3881,0,147.0,360.0,0.0,Rural,N
8,Male,Yes,2.0,Graduate,No,13633,0,280.0,240.0,1.0,Urban,Y
9,Male,No,0.0,Not Graduate,No,2400,2400,123.0,360.0,1.0,Semiurban,Y
