In [2]:
from sklearn import preprocessing
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split

weather = ['sunny', 'sunny', 'overcast', 'rainy', 'rainy', 'overcast', 'sunny']
feature1 = ['hot', 'hot', 'hot', 'mild', 'cool', 'cool', 'mild']
feature2 = ['high', 'high', 'high', 'normal', 'normal', 'high', 'normal']
play = ['no', 'no', 'yes', 'yes', 'yes', 'yes', 'no']

le_weather = preprocessing.LabelEncoder()
le_feature1 = preprocessing.LabelEncoder()
le_feature2 = preprocessing.LabelEncoder()
le_play = preprocessing.LabelEncoder()

weather_encoded = le_weather.fit_transform(weather)
feature1_encoded = le_feature1.fit_transform(feature1)
feature2_encoded = le_feature2.fit_transform(feature2)
play_encoded = le_play.fit_transform(play)


features = list(zip(feature1_encoded, feature2_encoded)) 


features_train, features_test, label_train, label_test = train_test_split(features, play_encoded, test_size=0.2, random_state=42)


model = KNeighborsClassifier(n_neighbors=3, metric='euclidean')
model.fit(features_train, label_train)


predicted = model.predict(features_test)
predicted_labels = le_play.inverse_transform(predicted)

print("Prediction:", predicted_labels)

conf_mat = confusion_matrix(label_test, predicted)
print("Confusion Matrix:")
print(conf_mat)

accuracy = accuracy_score(label_test, predicted)
print("Accuracy:", accuracy)


Prediction: ['yes' 'yes']
Confusion Matrix:
[[0 2]
 [0 0]]
Accuracy: 0.0


In [4]:
import numpy as np
from collections import Counter

training_samples = [
    (7, 7, 'Bad'),
    (7, 4, 'Bad'),
    (3, 4, 'Good'),
    (1, 4, 'Good')
]

query_instance = (3, 7)

def euclidean_distance(p, q):
    return np.sqrt((q[0] - p[0])**2 + (q[1] - p[1])**2)

distances = []
for sample in training_samples:
    distance = euclidean_distance(query_instance, sample[:2])  
    distances.append((distance, sample[2]))

distances.sort(key=lambda x: x[0])

K = 3
nearest_neighbors = distances[:K]

classes = [neighbor[1] for neighbor in nearest_neighbors]

predicted_class = Counter(classes).most_common(1)[0][0]


print("Distances and Classifications:")
for distance, classification in distances:
    print(f"Distance: {distance:.2f}, Classification: {classification}")

print(f"\nNearest Neighbors (K={K}): {nearest_neighbors}")
print(f"Predicted Classification for the query instance {query_instance}: {predicted_class}")


Distances and Classifications:
Distance: 3.00, Classification: Good
Distance: 3.61, Classification: Good
Distance: 4.00, Classification: Bad
Distance: 5.00, Classification: Bad

Nearest Neighbors (K=3): [(np.float64(3.0), 'Good'), (np.float64(3.605551275463989), 'Good'), (np.float64(4.0), 'Bad')]
Predicted Classification for the query instance (3, 7): Good


In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

# Sample dataset
data = {
    'person_age': [25, 30, 35, 40, 22, 28, 33, 36, 29, 38],
    'person_gender': ['Male', 'Female', 'Female', 'Male', 'Female', 'Male', 'Female', 'Male', 'Female', 'Male'],
    'person_education': ['Bachelor', 'Master', 'PhD', 'Bachelor', 'PhD', 'Master', 'Bachelor', 'PhD', 'Master', 'Bachelor'],
    'person_income': [50000, 60000, 70000, 80000, 55000, 65000, 75000, 85000, 70000, 90000],
    'person_emp_exp': [1, 5, 10, 15, 2, 6, 3, 8, 4, 7],
    'person_home_ownership': ['Rent', 'Own', 'Mortgage', 'Rent', 'Own', 'Mortgage', 'Rent', 'Own', 'Mortgage', 'Rent'],
    'loan_amnt': [2000, 3000, 4000, 5000, 2500, 3500, 4500, 5500, 3000, 6000],
    'loan_intent': ['Personal', 'Business', 'Personal', 'Home', 'Business', 'Home', 'Personal', 'Business', 'Personal', 'Home'],
    'loan_int_rate': [5.5, 6.0, 5.0, 4.5, 5.8, 6.1, 5.3, 4.9, 5.6, 4.2],
    'loan_percent_income': [4, 5, 6, 7, 4.5, 5.4, 6.0, 6.5, 5.2, 6.7]
}

df = pd.DataFrame(data)

df['loan_approval'] = np.where(df['person_income'] > 60000, 1, 0)

print("Class distribution:")
print(df['loan_approval'].value_counts())

label_encoders = {}
for column in ['person_gender', 'person_education', 'person_home_ownership', 'loan_intent']:
    le = LabelEncoder()
    df[column] = le.fit_transform(df[column])
    label_encoders[column] = le

X = df.drop(columns=['loan_approval'])
y = df['loan_approval']


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Step 5: Create and train the KNN model
k = 3 
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(X_train, y_train)

y_pred = knn.predict(X_test)

cm = confusion_matrix(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, zero_division=0) 

print("Confusion Matrix:")
print(cm)
print(f"Accuracy: {accuracy * 100:.2f}%")
print("Classification Report:")
print(report)

Class distribution:
loan_approval
1    7
0    3
Name: count, dtype: int64
Confusion Matrix:
[[0 1]
 [0 1]]
Accuracy: 50.00%
Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       0.50      1.00      0.67         1

    accuracy                           0.50         2
   macro avg       0.25      0.50      0.33         2
weighted avg       0.25      0.50      0.33         2

