In [10]:
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
import numpy as np
# Load MNIST dataset
mnist = fetch_openml('mnist_784', version=1, as_frame=False)
X = mnist.data
y = mnist.target.astype(int)

# Partition the data into multiple subsets
num_partitions = 5
X_partitions = np.array_split(X, num_partitions)
y_partitions = np.array_split(y, num_partitions)

# Initialize list to store centroids
centroids = []

# Loop through each partition
for X_partition, y_partition in zip(X_partitions, y_partitions):
    # Perform KNN on each partition to find the nearest neighbors
    n_neighbors = 5
    knn_model = KNeighborsClassifier(n_neighbors=n_neighbors)
    knn_model.fit(X_partition, y_partition)
    distances, indices = knn_model.kneighbors(X_partition)

    # Calculate the centroid from each set of nearest neighbors
    centroid = np.mean(X_partition[indices], axis=1)

    # Append the centroid to the list of centroids
    centroids.append(centroid)

# Stack the centroids to form dataset R
R = np.vstack(centroids)

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(R, y, test_size=0.2, random_state=42)

# Initialize Decision Tree classifier
dt_model = DecisionTreeClassifier()

# Define hyperparameters to tune
param_grid = {
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Perform Grid Search with Cross-Validation
grid_search = GridSearchCV(estimator=dt_model, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_search.fit(X_train, y_train)

# Get the best model
best_dt_model = grid_search.best_estimator_

# Evaluate the best model
accuracy = best_dt_model.score(X_test, y_test)
print("Model Accuracy:", accuracy)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')

# Print the results
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)


Model Accuracy: 0.9156428571428571
Accuracy: 0.8865714285714286
Precision: 0.890930248515603
Recall: 0.8865714285714286


In [11]:
# prompt: Accuracy, Precision, Recall for each partision and get the best model for 0.90 and up for last and best one

# Initialize lists to store metrics for each partition
accuracy_scores = []
precision_scores = []
recall_scores = []

# Loop through each partition
for X_partition, y_partition in zip(X_partitions, y_partitions):
    # Split the partition into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X_partition, y_partition, test_size=0.2, random_state=42)

    # Perform Grid Search with Cross-Validation
    grid_search = GridSearchCV(estimator=dt_model, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=-1)
    grid_search.fit(X_train, y_train)

    # Get the best model for the partition
    best_dt_model = grid_search.best_estimator_

    # Evaluate the best model
    y_pred = best_dt_model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')

    # Store the metrics
    accuracy_scores.append(accuracy)
    precision_scores.append(precision)
    recall_scores.append(recall)

# Print the metrics for each partition
for i in range(len(accuracy_scores)):
    print("Partition", i + 1)
    print("Accuracy:", accuracy_scores[i])
    print("Precision:", precision_scores[i])
    print("Recall:", recall_scores[i])

# Find the best model based on accuracy
best_model_index = np.argmax(accuracy_scores)

# Check if the best model has an accuracy of at least 0.90
if accuracy_scores[best_model_index] >= 0.90:
    print("Best model found with accuracy:", accuracy_scores[best_model_index])
    print("Partition:", best_model_index + 1)
else:
    print("No model found with accuracy of at least 0.90")


Partition 1
Accuracy: 0.8132142857142857
Precision: 0.8120236235013203
Recall: 0.8132142857142857
Partition 2
Accuracy: 0.8135714285714286
Precision: 0.8139403859570821
Recall: 0.8135714285714286
Partition 3
Accuracy: 0.8146428571428571
Precision: 0.8143006676738636
Recall: 0.8146428571428571
Partition 4
Accuracy: 0.8014285714285714
Precision: 0.8025781444426834
Recall: 0.8014285714285714
Partition 5
Accuracy: 0.84
Precision: 0.8397264951549795
Recall: 0.84
No model found with accuracy of at least 0.90


In [12]:
import tensorflow as tf
from tensorflow.keras import layers, models

# Load MNIST dataset
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Normalize pixel values to be between 0 and 1
x_train, x_test = x_train / 255.0, x_test / 255.0

# Define RNN model
model = models.Sequential([
    layers.SimpleRNN(128, input_shape=(x_train.shape[1:]), activation='relu', return_sequences=True),
    layers.Dropout(0.2),
    layers.SimpleRNN(128, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(10, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Train the model
model.fit(x_train, y_train, epochs=5, batch_size=64, validation_data=(x_test, y_test))

# Evaluate the model
test_loss, test_acc = model.evaluate(x_test, y_test)
print('Test accuracy:', test_acc)


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test accuracy: 0.9717000126838684
