In [38]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report

In [39]:
df = pd.read_csv(r"C:\Users\KUMALIKA\OneDrive\Desktop\loan_data.csv",encoding="ISO-8859-1")

In [40]:
df

Unnamed: 0,credit.policy,purpose,int.rate,installment,log.annual.inc,dti,fico,days.with.cr.line,revol.bal,revol.util,inq.last.6mths,delinq.2yrs,pub.rec,not.fully.paid
0,1,debt_consolidation,0.1189,829.10,11.350407,19.48,737,5639.958333,28854,52.1,0,0,0,0
1,1,credit_card,0.1071,228.22,11.082143,14.29,707,2760.000000,33623,76.7,0,0,0,0
2,1,debt_consolidation,0.1357,366.86,10.373491,11.63,682,4710.000000,3511,25.6,1,0,0,0
3,1,debt_consolidation,0.1008,162.34,11.350407,8.10,712,2699.958333,33667,73.2,1,0,0,0
4,1,credit_card,0.1426,102.92,11.299732,14.97,667,4066.000000,4740,39.5,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9573,0,all_other,0.1461,344.76,12.180755,10.39,672,10474.000000,215372,82.1,2,0,0,1
9574,0,all_other,0.1253,257.70,11.141862,0.21,722,4380.000000,184,1.1,5,0,0,1
9575,0,debt_consolidation,0.1071,97.81,10.596635,13.09,687,3450.041667,10036,82.9,8,0,0,1
9576,0,home_improvement,0.1600,351.58,10.819778,19.18,692,1800.000000,0,3.2,5,0,0,1


In [41]:
le = LabelEncoder()
df['purpose'] = le.fit_transform(df['purpose'])

In [42]:
X = df.drop('not.fully.paid', axis=1)
y = df['not.fully.paid']

In [43]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [44]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [45]:
log_reg = LogisticRegression()
log_reg.fit(X_train_scaled, y_train)
y_pred_log = log_reg.predict(X_test_scaled)

In [49]:
y_pred = log_reg.predict(X_test_scaled)
print("=== Model Evaluation ===")
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

=== Model Evaluation ===
Accuracy: 0.8392484342379958
              precision    recall  f1-score   support

           0       0.84      1.00      0.91      1611
           1       0.38      0.02      0.03       305

    accuracy                           0.84      1916
   macro avg       0.61      0.51      0.47      1916
weighted avg       0.77      0.84      0.77      1916



In [50]:
print("\n=== User Loan Application ===")
user_input = {
    'credit.policy': 1,
    'purpose': 'credit_card',
    'int.rate': 0.12,
    'installment': 200.0,
    'log.annual.inc': 10.5,
    'dti': 15.0,
    'fico': 700,
    'days.with.cr.line': 4000,
    'revol.bal': 5000,
    'revol.util': 40.0,
    'inq.last.6mths': 1,
    'delinq.2yrs': 0,
    'pub.rec': 0
}


=== User Loan Application ===


In [51]:
user_input['purpose'] = le.transform([user_input['purpose']])[0]

In [52]:
user_df = pd.DataFrame([user_input])
user_scaled = scaler.transform(user_df)

In [55]:
prediction = log_reg.predict(user_scaled)[0]
prediction_proba = log_reg.predict_proba(user_scaled)[0][1]

In [60]:
print("\n=== Prediction Result ===")
if prediction == 1:
    print(" The applicant is likely NOT to fully pay the loan.")
else:
    print(" The applicant is likely to fully pay the loan.")
print(f"Probability of NOT fully paying: {prediction_proba:.2f}")


=== Prediction Result ===
 The applicant is likely to fully pay the loan.
Probability of NOT fully paying: 0.13


In [57]:
dtree = DecisionTreeClassifier()
dtree.fit(X_train, y_train)
y_pred_tree = dtree.predict(X_test)

In [56]:
print("=== Logistic Regression ===")
print("Accuracy:", accuracy_score(y_test, y_pred_log))
print(classification_report(y_test, y_pred_log))

print("\n=== Decision Tree ===")
print("Accuracy:", accuracy_score(y_test, y_pred_tree))
print(classification_report(y_test, y_pred_tree))

=== Logistic Regression ===
Accuracy: 0.8392484342379958
              precision    recall  f1-score   support

           0       0.84      1.00      0.91      1611
           1       0.38      0.02      0.03       305

    accuracy                           0.84      1916
   macro avg       0.61      0.51      0.47      1916
weighted avg       0.77      0.84      0.77      1916


=== Decision Tree ===
Accuracy: 0.7437369519832986
              precision    recall  f1-score   support

           0       0.85      0.84      0.85      1611
           1       0.22      0.25      0.23       305

    accuracy                           0.74      1916
   macro avg       0.54      0.54      0.54      1916
weighted avg       0.75      0.74      0.75      1916

