In [None]:
import numpy as np

# Load the training and test datasets
train_data = np.load('/content/X_kannada_MNIST_train.npz')['arr_0']
train_labels = np.load('/content/y_kannada_MNIST_train.npz')['arr_0']
test_data = np.load('/content/X_kannada_MNIST_test.npz')['arr_0']
test_labels = np.load('/content/y_kannada_MNIST_test.npz')['arr_0']


In [None]:
import numpy as np
from sklearn.decomposition import PCA
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, roc_auc_score

In [None]:
# Flatten the 28x28 images to a 1D array (28*28 = 784 features)
train_data_flat = train_data.reshape(train_data.shape[0], -1)
test_data_flat = test_data.reshape(test_data.shape[0], -1)

In [None]:
# Perform PCA
pca = PCA(n_components=10)
train_data_pca = pca.fit_transform(train_data_flat)
test_data_pca = pca.transform(test_data_flat)


In [None]:
# Initialize the classifiers
classifiers = {
    'Decision Trees': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier(),
    'Naive Bayes': GaussianNB(),
    'K-NN': KNeighborsClassifier(),
    'SVM': SVC(probability=True)
}

In [None]:
# Training and evaluation for each classifier
for clf_name, clf in classifiers.items():
    # Fit the classifier on the training data
    clf.fit(train_data_pca, train_labels)

    # Make predictions on the test data
    y_pred = clf.predict(test_data_pca)

    # Calculate metrics
    accuracy = accuracy_score(test_labels, y_pred)
    precision = precision_score(test_labels, y_pred, average='macro')
    recall = recall_score(test_labels, y_pred, average='macro')
    f1 = f1_score(test_labels, y_pred, average='macro')
    conf_matrix = confusion_matrix(test_labels, y_pred)
    roc_auc = roc_auc_score(test_labels, clf.predict_proba(test_data_pca), multi_class='ovo')
        # Print the metrics
    print(f"Classifier: {clf_name}")
    print(f"Accuracy: {accuracy}")
    print(f"Precision: {precision}")
    print(f"Recall: {recall}")
    print(f"F1 Score: {f1}")
    print("Confusion Matrix:")
    print(conf_matrix)
    print(f"ROC-AUC Score: {roc_auc}")
    print("="*30)



Classifier: Decision Trees
Accuracy: 0.7994
Precision: 0.8002227306770837
Recall: 0.7994000000000001
F1 Score: 0.7987865199334806
Confusion Matrix:
[[718 151  14  47   8   5   2   9  31  15]
 [ 70 813   5  31   6   9   5   7  39  15]
 [  7   2 933  12   7  18  12   7   1   1]
 [ 11  16   7 756  38  30  31  64  24  23]
 [  2   0   2  35 854  45   7   7  17  31]
 [  3   7   4  23 124 781  14  18  14  12]
 [  4   6  10  55  23  24 772  86   5  15]
 [ 20  12   7  52  26  10 141 677  13  42]
 [ 44  26  10  13   3   9   5   3 868  19]
 [ 18  37   0   3  39   3  11  15  52 822]]
ROC-AUC Score: 0.8885555555555555
Classifier: Random Forest
Accuracy: 0.8752
Precision: 0.8778761441820097
Recall: 0.8751999999999999
F1 Score: 0.8744924398968751
Confusion Matrix:
[[775 152   3  31   6   1   2   5  18   7]
 [ 29 918   0  20   4   2   1   4  15   7]
 [  4   2 981   1   0   6   0   4   1   1]
 [ 10   9   2 883  16  20  10  28  20   2]
 [  0   2   1  28 916  36   2   1   8   6]
 [  0   3   1   9 117 852

In [None]:
# Perform PCA
pca = PCA(n_components=15)
train_data_pca = pca.fit_transform(train_data_flat)
test_data_pca = pca.transform(test_data_flat)

In [None]:
# Training and evaluation for each classifier
for clf_name, clf in classifiers.items():
    # Fit the classifier on the training data
    clf.fit(train_data_pca, train_labels)

    # Make predictions on the test data
    y_pred = clf.predict(test_data_pca)

    # Calculate metrics
    accuracy = accuracy_score(test_labels, y_pred)
    precision = precision_score(test_labels, y_pred, average='macro')
    recall = recall_score(test_labels, y_pred, average='macro')
    f1 = f1_score(test_labels, y_pred, average='macro')
    conf_matrix = confusion_matrix(test_labels, y_pred)
    roc_auc = roc_auc_score(test_labels, clf.predict_proba(test_data_pca), multi_class='ovo')
        # Print the metrics
    print(f"Classifier: {clf_name}")
    print(f"Accuracy: {accuracy}")
    print(f"Precision: {precision}")
    print(f"Recall: {recall}")
    print(f"F1 Score: {f1}")
    print("Confusion Matrix:")
    print(conf_matrix)
    print(f"ROC-AUC Score: {roc_auc}")
    print("="*30)

Classifier: Decision Trees
Accuracy: 0.8105
Precision: 0.8104735258682592
Recall: 0.8105
F1 Score: 0.8095633348606948
Confusion Matrix:
[[730 153   9  32   5   6   7   4  32  22]
 [ 76 817   5  25   7  17   7   5  26  15]
 [  4   3 938  11   3  21   6   4   4   6]
 [ 15  10  11 798  21  30  24  70  12   9]
 [  3   0   2  41 838  52  13   8  20  23]
 [  2   6   1  17 104 823   9  11  14  13]
 [  8   3   4  40  25  30 790  76   8  16]
 [ 13  14   6  46  33   9 139 663  13  64]
 [ 40  19  15   6   4  25   2   2 875  12]
 [ 13  39   3   6  13  17   5   7  64 833]]
ROC-AUC Score: 0.8947222222222223
Classifier: Random Forest
Accuracy: 0.8934
Precision: 0.8968001087511304
Recall: 0.8934000000000001
F1 Score: 0.8927820907283724
Confusion Matrix:
[[796 149   2  18   6   0   2   3  20   4]
 [ 21 926   0  23   4   2   0   2  13   9]
 [  4   1 983   2   0   6   1   1   2   0]
 [  6   8   1 916  18  19   4  23   5   0]
 [  0   1   0  20 939  21   3   3   7   6]
 [  0   2   0  11 109 867   2   2   5

In [None]:
# Perform PCA
pca = PCA(n_components=20)
train_data_pca = pca.fit_transform(train_data_flat)
test_data_pca = pca.transform(test_data_flat)

In [None]:
# Training and evaluation for each classifier
for clf_name, clf in classifiers.items():
    # Fit the classifier on the training data
    clf.fit(train_data_pca, train_labels)

    # Make predictions on the test data
    y_pred = clf.predict(test_data_pca)

    # Calculate metrics
    accuracy = accuracy_score(test_labels, y_pred)
    precision = precision_score(test_labels, y_pred, average='macro')
    recall = recall_score(test_labels, y_pred, average='macro')
    f1 = f1_score(test_labels, y_pred, average='macro')
    conf_matrix = confusion_matrix(test_labels, y_pred)
    roc_auc = roc_auc_score(test_labels, clf.predict_proba(test_data_pca), multi_class='ovo')
        # Print the metrics
    print(f"Classifier: {clf_name}")
    print(f"Accuracy: {accuracy}")
    print(f"Precision: {precision}")
    print(f"Recall: {recall}")
    print(f"F1 Score: {f1}")
    print("Confusion Matrix:")
    print(conf_matrix)
    print(f"ROC-AUC Score: {roc_auc}")
    print("="*30)

Classifier: Decision Trees
Accuracy: 0.8031
Precision: 0.8024346289998506
Recall: 0.8030999999999999
F1 Score: 0.8020514696073532
Confusion Matrix:
[[726 138  14  31  10   3   8  14  32  24]
 [ 90 801  11  27   5   9   7   4  26  20]
 [  4   4 933  11   3  25   7   8   3   2]
 [ 20  16  19 769  22  40  24  71   9  10]
 [  2   3   2  38 834  49   7  18  17  30]
 [  8   6   9  16 106 803   9   8  22  13]
 [  6   3   6  49  15  26 789  75   3  28]
 [  9  10   7  63  24   6 135 663  14  69]
 [ 38  18  12   4   8  23   2   0 872  23]
 [ 10  38   5   4  25   9   5   9  54 841]]
ROC-AUC Score: 0.890611111111111
Classifier: Random Forest
Accuracy: 0.9012
Precision: 0.9044122152877474
Recall: 0.9012000000000002
F1 Score: 0.9006346746385953
Confusion Matrix:
[[802 143   1  23   3   0   2   4  17   5]
 [ 11 934   0  26   4   5   0   2  12   6]
 [  5   0 985   2   0   5   1   0   1   1]
 [  7  12   1 922  12  17   6  19   4   0]
 [  0   0   0  18 939  22   4   2   8   7]
 [  0   1   0  10 103 872 

In [None]:
# Perform PCA
pca = PCA(n_components=25)
train_data_pca = pca.fit_transform(train_data_flat)
test_data_pca = pca.transform(test_data_flat)

In [None]:
# Training and evaluation for each classifier
for clf_name, clf in classifiers.items():
    # Fit the classifier on the training data
    clf.fit(train_data_pca, train_labels)

    # Make predictions on the test data
    y_pred = clf.predict(test_data_pca)

    # Calculate metrics
    accuracy = accuracy_score(test_labels, y_pred)
    precision = precision_score(test_labels, y_pred, average='macro')
    recall = recall_score(test_labels, y_pred, average='macro')
    f1 = f1_score(test_labels, y_pred, average='macro')
    conf_matrix = confusion_matrix(test_labels, y_pred)
    roc_auc = roc_auc_score(test_labels, clf.predict_proba(test_data_pca), multi_class='ovo')
        # Print the metrics
    print(f"Classifier: {clf_name}")
    print(f"Accuracy: {accuracy}")
    print(f"Precision: {precision}")
    print(f"Recall: {recall}")
    print(f"F1 Score: {f1}")
    print("Confusion Matrix:")
    print(conf_matrix)
    print(f"ROC-AUC Score: {roc_auc}")
    print("="*30)

Classifier: Decision Trees
Accuracy: 0.8039
Precision: 0.8034706956853785
Recall: 0.8039
F1 Score: 0.802862432836468
Confusion Matrix:
[[733 142   8  33  10   4   8   8  40  14]
 [ 79 808   6  26   9   9   7   8  31  17]
 [  4   2 933  10   1  20   7  15   2   6]
 [ 16  14  14 781  25  33  32  68  10   7]
 [  1   1   3  37 835  44  10  21  21  27]
 [  4   5   5  30  99 805   8   9  21  14]
 [  9   7   6  48  20  32 795  65   4  14]
 [ 20  13   5  57  23   9 135 648  18  72]
 [ 37  19  15   5   8  19   1   3 876  17]
 [ 26  34   8   7  20   8   6   5  61 825]]
ROC-AUC Score: 0.8910555555555553
Classifier: Random Forest
Accuracy: 0.9025
Precision: 0.9052364447550231
Recall: 0.9024999999999999
F1 Score: 0.9018871789032017
Confusion Matrix:
[[809 141   2  19   5   0   2   2  14   6]
 [ 17 932   0  23   6   3   0   1  12   6]
 [  4   1 985   2   0   4   1   1   1   1]
 [ 10   8   0 931  13  17   5  14   2   0]
 [  0   1   0  21 938  21   3   3   7   6]
 [  0   1   1  10  95 881   3   4   3 

In [None]:
# Perform PCA
pca = PCA(n_components=30)
train_data_pca = pca.fit_transform(train_data_flat)
test_data_pca = pca.transform(test_data_flat)

In [None]:
# Training and evaluation for each classifier
for clf_name, clf in classifiers.items():
    # Fit the classifier on the training data
    clf.fit(train_data_pca, train_labels)

    # Make predictions on the test data
    y_pred = clf.predict(test_data_pca)

    # Calculate metrics
    accuracy = accuracy_score(test_labels, y_pred)
    precision = precision_score(test_labels, y_pred, average='macro')
    recall = recall_score(test_labels, y_pred, average='macro')
    f1 = f1_score(test_labels, y_pred, average='macro')
    conf_matrix = confusion_matrix(test_labels, y_pred)
    roc_auc = roc_auc_score(test_labels, clf.predict_proba(test_data_pca), multi_class='ovo')
        # Print the metrics
    print(f"Classifier: {clf_name}")
    print(f"Accuracy: {accuracy}")
    print(f"Precision: {precision}")
    print(f"Recall: {recall}")
    print(f"F1 Score: {f1}")
    print("Confusion Matrix:")
    print(conf_matrix)
    print(f"ROC-AUC Score: {roc_auc}")
    print("="*30)

Classifier: Decision Trees
Accuracy: 0.8058
Precision: 0.80526823210456
Recall: 0.8058
F1 Score: 0.8047868859563618
Confusion Matrix:
[[734 140  12  26   5   3   4  11  46  19]
 [100 782  12  24   6  11  10   9  34  12]
 [  4   2 934  10   0  21   8  10   5   6]
 [ 16  13  11 798  32  25  21  66  11   7]
 [  5   4   1  34 832  60  10  15  16  23]
 [  7   5   8  30  97 798   8   8  29  10]
 [  6   3   5  31  16  30 805  74  11  19]
 [ 16  12  10  43  27   7 129 664  15  77]
 [ 31  18  13   5   7  17   1   5 888  15]
 [ 24  30   1   7  19  14   8   6  68 823]]
ROC-AUC Score: 0.8921111111111113
Classifier: Random Forest
Accuracy: 0.9053
Precision: 0.9079174729535877
Recall: 0.9053000000000001
F1 Score: 0.9046353714060894
Confusion Matrix:
[[808 144   2  19   4   0   2   3  15   3]
 [ 11 940   1  19   4   4   0   2  12   7]
 [  2   1 985   2   0   7   1   0   2   0]
 [  8   9   1 930  14  18   3  15   1   1]
 [  0   1   0  15 945  16   5   2   8   8]
 [  0   1   0  10  92 880   3   5   5  