In [None]:
# SVM Classification on MNIST Dataset

# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
import utils  # This should contain necessary utility functions for loading data and saving results


In [None]:

# Paths to the dataset CSV files
train_path = 'data/mnist_train_small.csv'
test_path = 'data/mnist_test.csv'

# Load the data using the utility functions
X_train, y_train, X_test, y_test = utils.load_data(train_path, test_path)

# Check the shape of the datasets
print(f"Training data shape: {X_train.shape}")
print(f"Test data shape: {X_test.shape}")

In [None]:

# Initialize and train the SVM classifier
svm_model = SVC(kernel='rbf', C=1.0, gamma='scale')  # You can try other kernels like 'linear', 'poly', etc.
svm_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = svm_model.predict(X_test)


In [None]:
# Generate confusion matrix and classification report
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

# Print classification report to the console
print("Classification Report:\n", class_report)


In [None]:
# Save classification report and confusion matrix in the results folder
utils.save_classification_report(class_report, 'results/classification_report.txt')

# Save confusion matrix as a heatmap image
plt.figure(figsize=(10, 7))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', cbar=False, xticklabels=np.unique(y_test), yticklabels=np.unique(y_test))
plt.title('Confusion Matrix - SVM')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.savefig('results/confusion_matrix.png')
plt.show()


In [None]:
# Optional: Save predictions to CSV for further analysis
test_predictions_df = pd.DataFrame({'True Label': y_test, 'Predicted Label': y_pred})
test_predictions_df.to_csv('results/test_predictions.csv', index=False)

# Optional: Validation accuracy
accuracy = np.mean(y_test == y_pred)
print(f"Validation Accuracy: {accuracy:.4f}")

# Save accuracy to a text file
with open('results/validation_accuracy.txt', 'w') as f:
    f.write(f"Validation Accuracy: {accuracy:.4f}")

In [None]:
!pip install shap

In [None]:
import shap 

In [None]:
# Create SHAP explainer
explainer = shap.KernelExplainer(svm_model.predict, X_train[:100])  # Limit sample size for efficiency

# Calculate SHAP values for the test data
shap_values = explainer.shap_values(X_test[:100])


In [None]:
# SHAP dependence plot for a specific feature
shap.dependence_plot(0, shap_values, X_test[:100], feature_names=feature_names)  # Feature index 0 (change as needed)


In [None]:
import matplotlib.pyplot as plt

# Save SHAP summary plot
shap.summary_plot(shap_values, X_test[:100], feature_names=feature_names, show=False)
plt.savefig('./results/shap_summary_plot.png')

# Save SHAP dependence plot for a specific feature
shap.dependence_plot(0, shap_values, X_test[:100], feature_names=feature_names, show=False)
plt.savefig('./results/shap_dependence_plot_feature_0.png')


In [None]:
# SHAP force plot for a single prediction
shap.force_plot(explainer.expected_value, shap_values[0], X_test[0], feature_names=feature_names)
