In [27]:
import numpy as np
import pandas as pd
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.svm import LinearSVC
from sklearn import metrics
from sklearn.metrics import accuracy_score

## Taking input

In [30]:
train_data=pd.read_csv(r"/content/mnist_train.csv")
test_data=pd.read_csv(r"/content/mnist_test.csv")

train_data=train_data.to_numpy()    # train_data shape: (60000, 785)
m, n = train_data.shape
test_data=test_data.to_numpy()      # test_data shape: (10000, 785)

X_train=train_data[:,1:]            # X_train shape: (60000, 784)
y_train=train_data[:,0]             # y_train shape: (60000,)
X_test=test_data[:,1:]              # X_test shape: (10000, 784)
y_test=test_data[:,0]               # y_test shape: (10000,)

X_train = X_train / 255.0           # Normalizing the data
X_test = X_test / 255.0

## Task-1: Classifier for 10 classes, 1 for each digit.

In [None]:

classifiers = {}
for digit in range(10):
    # Creating a new dataset where the target is 1 if it's the current digit and 0 otherwise
    y_binary_train = np.where(y_train == digit, 1, 0)
    y_binary_test = np.where(y_test == digit, 1, 0)

    # Training the SVM classifier
    classifier = SVC(kernel='linear', C=1.0, random_state=42)
    classifier.fit(X_train, y_binary_train)
    classifiers[digit] = classifier


## Task-2: classifier performance using precision, recall, and F1-score

In [29]:
accuracy_scores = {}
for digit in range(10):
    # Predict on the test set
    y_pred = classifiers[digit].predict(X_test)

    # Calculate accuracy for the current digit
    accuracy = accuracy_score(np.where(y_test == digit, 1, 0), y_pred)
    accuracy_scores[digit] = accuracy
    f1_score = metrics.f1_score(y_test, y_pred, average='weighted', labels=np.unique(y_pred))
    precision = metrics.precision_score(y_test, y_pred, average='weighted', labels=np.unique(y_pred))
    recall = metrics.recall_score(y_test, y_pred, average='weighted', labels=np.unique(y_pred))
    print(f"Accuracy for digit {digit}: {accuracy}")
    print(f"F1 score for digit {digit}: {f1_score}")
    print(f"Precision for digit {digit}: {precision}")
    print(f"Recall for digit {digit}: {recall}")
    print()



Accuracy for digit 0: 1.0
F1 score for digit 0: 0.0
Precision for digit 0: 0.0
Recall for digit 0: 0.0

Accuracy for digit 1: 0.99
F1 score for digit 1: 0.6769811575150841
Precision for digit 1: 0.6281639928698752
Recall for digit 1: 1.0

Accuracy for digit 2: 0.98
F1 score for digit 2: 0.0570409982174688
Precision for digit 2: 0.030947775628626693
Recall for digit 2: 0.36363636363636365

Accuracy for digit 3: 0.98
F1 score for digit 3: 0.05876951331496787
Precision for digit 3: 0.03196803196803197
Recall for digit 3: 0.36363636363636365

Accuracy for digit 4: 0.98
F1 score for digit 4: 0.061895551257253385
Precision for digit 4: 0.03382663847780127
Recall for digit 4: 0.36363636363636365

Accuracy for digit 5: 0.98
F1 score for digit 5: 0.056487202118270075
Precision for digit 5: 0.03062200956937799
Recall for digit 5: 0.36363636363636365

Accuracy for digit 6: 0.97
F1 score for digit 6: 0.05876951331496787
Precision for digit 6: 0.03196803196803197
Recall for digit 6: 0.3636363636363

## Predicting new input.

In [26]:
# 4. Prediction (Example)
def predict_digit(image_data, classifiers):
    # Use all classifiers to predict
    predictions = []
    for digit in range(10):
        predictions.append(classifiers[digit].predict(image_data.reshape(1, -1))[0])

    # Return the digit with the highest confidence
    return np.argmax(predictions)

# Example prediction
image_data = train_data[20000][1:]/255.0
predicted_digit = predict_digit(image_data, classifiers)
print(f"Predicted digit: {predicted_digit}")

Predicted digit: 5
