In [36]:
!pip install fpdf
!pip install pandas
!pip install numpy
!pip install seaborn



In [37]:
# Step 1: Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns
from fpdf import FPDF

In [38]:
# Step 2: Load and preprocess the data
data = pd.read_csv('data.csv')

In [39]:
# Check for missing values
if data.isnull().sum().sum() > 0:
    data.fillna(0, inplace=True)  # Fill missing values with 0

In [40]:
X = data.iloc[:, 1:].values  # Pixel values
y = data.iloc[:, 0].values   # Labels

In [41]:
# Normalize the pixel values
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [42]:
# Step 3: Define a function to evaluate the KNN model
def evaluate_knn(X_train, X_test, y_train, y_test, k):
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    conf_matrix = confusion_matrix(y_test, y_pred)
    return accuracy, conf_matrix

In [43]:
# Step 4: Loop over different scenarios
results = []
k_values = [2, 4, 5, 6, 7, 10]
splits = [0.6, 0.7, 0.75, 0.8, 0.9, 0.95]

In [44]:
for split in splits:
    for k in k_values:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1-split, random_state=42)
        accuracy, conf_matrix = evaluate_knn(X_train, X_test, y_train, y_test, k)
        results.append({
            'train_split': split,
            'test_split': 1-split,
            'k': k,
            'accuracy': accuracy,
            'conf_matrix': conf_matrix
        })

In [45]:
# Step 5: Save the results in a PDF
class PDF(FPDF):
    def header(self):
        self.set_font('Arial', 'B', 12)
        self.cell(0, 10, 'KNN Model Performance', 0, 1, 'C')

    def chapter_title(self, title):
        self.set_font('Arial', 'B', 12)
        self.cell(0, 10, title, 0, 1, 'L')
        self.ln(5)

    def chapter_body(self, body):
        self.set_font('Arial', '', 12)
        self.multi_cell(0, 10, body)
        self.ln()

    def add_confusion_matrix(self, matrix, title):
        self.add_page()
        self.chapter_title(title)
        fig, ax = plt.subplots(figsize=(10, 7))
        sns.heatmap(matrix, annot=True, fmt='d', cmap='Blues', ax=ax)
        plt.title('Confusion Matrix')
        plt.xlabel('Predicted')
        plt.ylabel('True')
        plt.savefig('conf_matrix.png')
        plt.close(fig)
        self.image('conf_matrix.png', x=10, y=None, w=190)
        self.ln(10)

pdf = PDF()
pdf.add_page()

In [46]:
for result in results:
    title = f'Train/Test Split: {result["train_split"]*100}/{result["test_split"]*100}, K={result["k"]}'
    body = f'Accuracy: {result["accuracy"]*100:.2f}%\nConfusion Matrix:\n{result["conf_matrix"]}'
    pdf.chapter_title(title)
    pdf.chapter_body(body)
    pdf.add_confusion_matrix(result['conf_matrix'], title)

pdf.output('KNN_model_performance.pdf')

''

In [47]:
# Uncomment and run one scenario as an example
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1-0.8, random_state=42)
accuracy, conf_matrix = evaluate_knn(X_train, X_test, y_train, y_test, 5)
print(f'Accuracy: {accuracy*100:.2f}%')
print('Confusion Matrix:')
print(conf_matrix)

Accuracy: 94.01%
Confusion Matrix:
[[802   0   3   0   0   1  10   0   0   0]
 [  0 903   2   0   0   1   2   0   1   0]
 [  9  11 791  12   1   3   5   3   8   3]
 [  4   4   2 883   0  16   1   9   9   9]
 [  2   6   5   2 786   2   6   3   0  27]
 [  5   1   3  26   2 646  11   1   2   5]
 [ 12   1   3   0   1   8 760   0   0   0]
 [  2  17   5   6  11   0   0 824   0  28]
 [  6  14   7  19  12  34   5   2 731   5]
 [  6   3   3  13  10   1   0  27   4 771]]
