# Import Required Libraries
Import the necessary libraries, including pandas, numpy, matplotlib, and sklearn.

In [None]:
# Import the necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

# Load TinyMNIST Data
Load the TinyMNIST dataset from the CSV files using pandas.

In [None]:
# Load TinyMNIST Data

# Load the training data and labels
train_data = pd.read_csv('trainData.csv', header=None)
train_labels = pd.read_csv('trainLabels.csv', header=None)

# Load the test data and labels
test_data = pd.read_csv('testData.csv', header=None)
test_labels = pd.read_csv('testLabels.csv', header=None)

# Display the shapes of the loaded data
print(f'Training data shape: {train_data.shape}')
print(f'Training labels shape: {train_labels.shape}')
print(f'Test data shape: {test_data.shape}')
print(f'Test labels shape: {test_labels.shape}')

# Data Preprocessing
Preprocess the data, including normalization and splitting into training and test sets.

In [None]:
# Normalize the data
train_data = train_data / 255.0
test_data = test_data / 255.0

# Split the training data into training and validation sets
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(train_data, train_labels, test_size=0.2, random_state=42)

# Display the shapes of the split data
print(f'Training data shape after split: {X_train.shape}')
print(f'Validation data shape: {X_val.shape}')
print(f'Training labels shape after split: {y_train.shape}')
print(f'Validation labels shape: {y_val.shape}')

# Backward Elimination Algorithm
Implement the Backward Elimination algorithm to select features.

In [None]:
# Backward Elimination Algorithm

from sklearn.feature_selection import RFE

# Initialize the Naive Bayes classifier
nb_classifier = GaussianNB()

# Initialize RFE with the Naive Bayes classifier and the number of features to select
rfe = RFE(estimator=nb_classifier, n_features_to_select=1, step=1)

# Fit RFE on the training data
rfe.fit(X_train, y_train.values.ravel())

# Get the ranking of the features
ranking = rfe.ranking_

# Get the support mask of the selected features
support = rfe.support_

# Print the ranking of the features
print(f'Feature ranking: {ranking}')

# Print the selected features
print(f'Selected features: {support}')

# Transform the training and validation data to select the features
X_train_selected = rfe.transform(X_train)
X_val_selected = rfe.transform(X_val)

# Train the Naive Bayes classifier on the selected features
nb_classifier.fit(X_train_selected, y_train.values.ravel())

# Predict on the validation set
y_val_pred = nb_classifier.predict(X_val_selected)

# Calculate the accuracy on the validation set
accuracy = accuracy_score(y_val, y_val_pred)
print(f'Validation accuracy with selected features: {accuracy}')

# Forward Selection Algorithm
Implement the Forward Selection algorithm to select features.

In [None]:
# Forward Selection Algorithm

from sklearn.feature_selection import SequentialFeatureSelector

# Initialize the Naive Bayes classifier
nb_classifier = GaussianNB()

# Initialize Forward Selection with the Naive Bayes classifier
forward_selector = SequentialFeatureSelector(nb_classifier, n_features_to_select=1, direction='forward')

# Fit Forward Selection on the training data
forward_selector.fit(X_train, y_train.values.ravel())

# Get the support mask of the selected features
support_forward = forward_selector.get_support()

# Print the selected features
print(f'Selected features (Forward Selection): {support_forward}')

# Transform the training and validation data to select the features
X_train_selected_forward = forward_selector.transform(X_train)
X_val_selected_forward = forward_selector.transform(X_val)

# Train the Naive Bayes classifier on the selected features
nb_classifier.fit(X_train_selected_forward, y_train.values.ravel())

# Predict on the validation set
y_val_pred_forward = nb_classifier.predict(X_val_selected_forward)

# Calculate the accuracy on the validation set
accuracy_forward = accuracy_score(y_val, y_val_pred_forward)
print(f'Validation accuracy with selected features (Forward Selection): {accuracy_forward}')

# Naive Bayes Optimal Classifier
Use the Naive Bayes classifier to classify the data and evaluate performance.

In [None]:
# Naive Bayes Optimal Classifier

# Train the Naive Bayes classifier on the entire training data
nb_classifier = GaussianNB()
nb_classifier.fit(X_train, y_train.values.ravel())

# Predict on the validation set
y_val_pred = nb_classifier.predict(X_val)

# Calculate the accuracy on the validation set
accuracy = accuracy_score(y_val, y_val_pred)
print(f'Validation accuracy: {accuracy}')

# Predict on the test set
y_test_pred = nb_classifier.predict(test_data)

# Calculate the accuracy on the test set
test_accuracy = accuracy_score(test_labels, y_test_pred)
print(f'Test accuracy: {test_accuracy}')

# Plot CCR vs Number of Selected Features
Plot the Correct Classification Rate (CCR) against the number of selected features for both algorithms.

In [None]:
# Plot CCR vs Number of Selected Features

# Initialize lists to store the number of features and corresponding CCRs for both algorithms
num_features_backward = []
ccr_backward = []
num_features_forward = []
ccr_forward = []

# Backward Elimination
for n_features in range(1, X_train.shape[1] + 1):
    rfe = RFE(estimator=nb_classifier, n_features_to_select=n_features, step=1)
    rfe.fit(X_train, y_train.values.ravel())
    X_val_selected = rfe.transform(X_val)
    y_val_pred = nb_classifier.predict(X_val_selected)
    accuracy = accuracy_score(y_val, y_val_pred)
    num_features_backward.append(n_features)
    ccr_backward.append(accuracy)

# Forward Selection
for n_features in range(1, X_train.shape[1] + 1):
    forward_selector = SequentialFeatureSelector(nb_classifier, n_features_to_select=n_features, direction='forward')
    forward_selector.fit(X_train, y_train.values.ravel())
    X_val_selected_forward = forward_selector.transform(X_val)
    y_val_pred_forward = nb_classifier.predict(X_val_selected_forward)
    accuracy_forward = accuracy_score(y_val, y_val_pred_forward)
    num_features_forward.append(n_features)
    ccr_forward.append(accuracy_forward)

# Plot the CCR vs Number of Selected Features
plt.figure(figsize=(10, 6))
plt.plot(num_features_backward, ccr_backward, label='Backward Elimination', marker='o')
plt.plot(num_features_forward, ccr_forward, label='Forward Selection', marker='x')
plt.xlabel('Number of Selected Features')
plt.ylabel('Correct Classification Rate (CCR)')
plt.title('CCR vs Number of Selected Features')
plt.legend()
plt.grid(True)
plt.show()