# Project: ML Project KNN Loan Applicant Credit Risk Analysis Credit Risk Analysis is K-Nearest Neighbours

## Part-1 Data Exploration and Pre-processing

In [None]:
import pandas as pd 
from sklearn.neighbors import KNeighborsClassifier
import matplotlib.pyplot as plt 
import seaborn as sns
from sklearn.model_selection import train_test_split as TTS
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score, precision_score, recall_score

#### 1) load the given dataset
#### 2) Check for null values
#### 3) Get basic info. from data
#### 4) Describe the dataset

In [None]:
Data = pd.read_excel("./Data.xlsx")
display(Data)

Null_Values = Data.isnull().mean() * 100 
print("\nNull Values = \n", Null_Values)

print("\n\n")
Info = Data.info()
display(pd.DataFrame(Info))

print("\n\nDescribe the dataset")
Stats = Data.describe()
display(Stats)

#### 5) Display scatterplot betwwen age & Total work exp.

In [None]:
plt.title("Age v/s Work Exp.")
plt.scatter(Data["Age"], Data["Total Work Experience"], c='b', marker='o', edgecolors='k', label='Data Points')
plt.xlabel('Age', color='black')
plt.ylabel('Total Work Experience', color='black')
plt.title("Age v/s Work Exp.", color='black')
plt.legend()
plt.show()

#### 6) Display boxplot for age

In [None]:
plt.title("Box Plot - Age")
plt.boxplot(Data['Age'])
plt.show()

#### 7) Display boxplot for Cibil Score

In [None]:
plt.title("Box Plot - Cibil Score")
plt.boxplot(Data['Cibil score'])
plt.show()

#### 8) Create target and features data where target is Total bounces past 12 months

In [None]:
Cols = Data.columns.to_list()
print("\nColumns - ", Cols)

X = Data.drop(columns=["Total bounces past12months"])
Y = Data['Total bounces past12months']

## Part-2 Working with Model

#### 1) Split data into training and testing sets

#### 2) Create a KNN classifier betwwen Features and target data



In [None]:
X_train, X_test, Y_train, Y_test = TTS(X, Y, train_size = 0.8, random_state=0)

Model = KNeighborsClassifier()
Model.fit(X_train, Y_train)

Y_Pred = Model.predict(X_test)
print("\nModel Prediction - ", Y_Pred)

#### 3) Display the test score

#### 4) Display the trainning score

#### 5) Print the accuracy score

In [None]:
Train, Test = Model.score(X_train, Y_train) * 100, Model.score(X_test, Y_test) * 100 
print("\nTrainning Score = ", Train)
print("\nTesting Score = ", Test)

Accuracy = accuracy_score(Y_test, Y_Pred) * 100
print("\nAccuracy Score = ", Accuracy)

#### 6) Try 1 to 14 k values for classifier

In [None]:
Best_Accuracy, Best_Train, Best_Test = [], [], []

for i in range(1,15):
    Model = KNeighborsClassifier(n_neighbors = i)
    Model.fit(X_train, Y_train)

    Y_Pred2 = Model.predict(X_test)
    Train2 = Best_Train.append(Model.score(X_train, Y_train) * 100)
    Test2 = Best_Test.append(Model.score(X_test, Y_test) * 100)
    Accuracy2 = Best_Accuracy.append(accuracy_score(Y_test, Y_Pred2) * 100)

print("\nAccuracy = ", Best_Accuracy)
print("\n\n")
print("\nTrain = ", Best_Train)
print("\n\n")
print("\nTest = ", Best_Test)


In [None]:
plt.figure(figsize=(8,5))
plt.plot(range(1,15), Best_Accuracy, marker='o', label='Test Accuracy')
plt.plot(range(1,15), Best_Train, marker='s', linestyle='--', label='Train Accuracy')

plt.title("K Value vs Accuracy")
plt.xlabel("K (Number of Neighbors)")
plt.ylabel("Accuracy (%)")
plt.legend()
plt.grid(True)
plt.show()
