# Import Libraries

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import joblib

# Load Dataset


In [None]:
loan = pd.read_csv('/content/drive/MyDrive/pgp datasets 23/ML Project 1 Dataset.csv')

# Load Dataset Name

In [None]:
loan

Unnamed: 0,APP_ID,CIBIL_SCORE_VALUE,NEW_CUST,CUS_CATGCODE,EMPLOYMENT_TYPE,AGE,SEX,NO_OF_DEPENDENTS,MARITAL,EDU_QUA,P_RESTYPE,P_CATEGORY,EMPLOYEE_TYPE,MON_IN_OCC,INCOM_EXP_GMI,LTV,TENURE,STATUS
0,12345,0,YES,1,0,31,F,3,0,0,1,4,2,36,0,0.767104,12,0
1,12347,0,NO,1,1,40,F,2,1,1,0,1,1,12,2,0.619077,24,0
2,12349,0,YES,1,0,27,F,3,0,0,1,2,2,72,0,0.848949,36,0
3,12351,2,NO,1,1,33,M,2,0,1,0,2,1,120,1,0.515646,12,0
4,12353,2,NO,1,1,29,F,1,0,1,1,2,1,24,2,0.614123,24,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13294,38933,0,YES,1,0,36,M,3,1,0,0,1,0,24,0,0.840564,24,1
13295,38935,2,NO,1,1,29,F,1,0,1,0,4,1,25,2,0.450000,12,1
13296,38937,0,YES,1,1,28,F,0,0,1,0,2,1,13,2,0.593333,36,1
13297,38939,0,YES,1,1,28,F,0,0,1,1,3,1,13,2,0.106667,36,0


# Information about Dataset

In [None]:
loan.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13299 entries, 0 to 13298
Data columns (total 18 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   APP_ID               13299 non-null  int64  
 1    CIBIL_SCORE_VALUE   13299 non-null  int64  
 2   NEW_CUST             13299 non-null  object 
 3   CUS_CATGCODE         13299 non-null  int64  
 4    EMPLOYMENT_TYPE     13299 non-null  int64  
 5   AGE                  13299 non-null  int64  
 6    SEX                 13299 non-null  object 
 7    NO_OF_DEPENDENTS    13299 non-null  int64  
 8    MARITAL             13299 non-null  int64  
 9    EDU_QUA             13299 non-null  int64  
 10    P_RESTYPE          13299 non-null  int64  
 11   P_CATEGORY          13299 non-null  int64  
 12   EMPLOYEE_TYPE       13299 non-null  int64  
 13    MON_IN_OCC         13299 non-null  int64  
 14   INCOM_EXP_GMI       13299 non-null  int64  
 15  LTV                  13299 non-null 

# Strip spaces from column names

In [None]:
loan.columns = loan.columns.str.strip()

# Encode categorical variables

In [None]:
label_encoders = {}
for column in ['NEW_CUST', 'SEX', 'MARITAL']:
    le = LabelEncoder()
    loan[column] = le.fit_transform(loan[column].astype(str))
    label_encoders[column] = le

# Separate features and target variable

In [None]:
X = loan.drop(columns=['APP_ID', 'STATUS'])
y = loan['STATUS']

# Handle missing values

In [None]:
imputer = SimpleImputer(strategy='most_frequent')
X_imputed = imputer.fit_transform(X)

# Scale numerical features

In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_imputed)

# Split the data into training and testing sets

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

# Train the model

In [None]:
model = LogisticRegression(random_state=50)
model.fit(X_train, y_train)


# Save the model to a file

In [None]:
joblib_file = 'loan_approval_model.pkl'
joblib.dump(model, joblib_file)
print(f"Model saved to {joblib_file}")

Model saved to loan_approval_model.pkl



# Load the model from the file

In [None]:
loaded_model = joblib.load(joblib_file)

# Make predictions using the loaded model

In [None]:
y_pred_loaded = loaded_model.predict(X_test)

# Evaluate the loaded model

In [None]:
accuracy_loaded = accuracy_score(y_test, y_pred_loaded)
cm_loaded = confusion_matrix(y_test, y_pred_loaded)
report_loaded = classification_report(y_test, y_pred_loaded)

In [None]:
print(f'Accuracy of loaded model: {accuracy_loaded}')
print('Confusion Matrix of loaded model:')
print(cm_loaded)
print('Classification Report of loaded model:')
print(report_loaded)

Accuracy of loaded model: 0.6378446115288221
Confusion Matrix of loaded model:
[[2178  296]
 [1149  367]]
Classification Report of loaded model:
              precision    recall  f1-score   support

           0       0.65      0.88      0.75      2474
           1       0.55      0.24      0.34      1516

    accuracy                           0.64      3990
   macro avg       0.60      0.56      0.54      3990
weighted avg       0.62      0.64      0.59      3990



In [None]:
from google.colab import files
files.download("loan_approval_model.pkl")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>