In [5]:
import numpy as np
data = np.load('../Dataset/pathmnist.npz')
data_train = data['train_images']
data_test = data['test_images']
data_val = data['val_images']
label_train = data['train_labels']
label_test = data['test_labels']
label_val = data['val_labels']
print(len(data_train))
print(len(data_test))

89996
7180


In [5]:
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn import svm
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
import matplotlib.pyplot as plt


# Flatten the images and normalize pixel values

n_samples = len(data_train)
data_temp = data_train.reshape((n_samples, -1))
data_tr = StandardScaler().fit_transform(data_temp)

n_samples = len(data_test)
data_temp = data_test.reshape((n_samples, -1))
data_te = StandardScaler().fit_transform(data_temp)
n_samples = len(data_val)
data_temp = data_val.reshape((n_samples, -1))
data_va = StandardScaler().fit_transform(data_temp)

In [1]:
# Tried kernals and regularization parameters
kernels = {'rbf'}
C = {1}
# Initialize an empty dictionary to store results
accuracy_results = {'kernel': [], 'C': [], 'accu': []}

In [6]:
# trying different svm kernals with different regularization parameters
for i in kernels:
    for c in C:
        # Create an SVM classifier
        clf_svm = SVC(kernel= i , C = c)

        # Train the classifier
        clf_svm.fit(data_tr, label_train)

        # Make predictions on the test set
        y_pred = clf_svm.predict(data_te)

        # Evaluate the performance
        accu_svm = accuracy_score(label_test, y_pred)

        # storing performance values for plotting

        accuracy_results['kernel'].append(i)
        accuracy_results['C'].append(c)
        accuracy_results['accu'].append(accu_svm)

        # Display classification report
        print(f'Report for SVM with {i} kernal and regularization parameter = {c}:')
        print(classification_report(label_test, y_pred))

  y = column_or_1d(y, warn=True)


Report for SVM with rbf kernal and regularization parameter = 1:
              precision    recall  f1-score   support

           0       0.94      0.91      0.93      1338
           1       0.89      1.00      0.94       847
           2       0.36      0.65      0.46       339
           3       0.58      0.29      0.38       634
           4       0.94      0.66      0.77      1035
           5       0.45      0.48      0.47       592
           6       0.32      0.18      0.23       741
           7       0.68      0.40      0.50       421
           8       0.55      0.88      0.68      1233

    accuracy                           0.67      7180
   macro avg       0.63      0.61      0.60      7180
weighted avg       0.69      0.67      0.66      7180



In [49]:
# Apply random sample and mini-batch for training, as the training data set is massive

# Parameters
num_batch = 200
batch_size = 10000
batch_index = []

import warnings

# To ignore all warnings
warnings.filterwarnings("ignore")

In [50]:
# Perform mini-batch updates
if len(batch_index) != 0:
    print(f'{len(batch_index)} batches already exists, please check')
else:
    for i in range(num_batch):
        # Randomly sample a mini-batch from the training set
        batch_index.append(np.random.choice(len(data_tr), size=batch_size, replace=False))


In [51]:
from sklearn.ensemble import RandomForestClassifier

# Create an empty Random Forest classifier
rf_classifier = RandomForestClassifier(n_estimators = 100, random_state=42, warm_start= True)

# Perform mini-batch updates
for i in range(num_batch):
    # use the randomly sampled indices for mini-batches
    data_batch = [data_tr[j] for j in batch_index[i]]
    label_batch = [label_train[j] for j in batch_index[i]]

    # Train the Random Forest classifier on the current mini-batch
    rf_classifier.fit(data_batch, label_batch)

# Make predictions on the test set
rf_predictions = rf_classifier.predict(data_te)

# Evaluate accuracy on the test set
accuracy = accuracy_score(label_test, rf_predictions)
print(f"Random Forest Accuracy: {accuracy}")


Random Forest Accuracy: 0.5811977715877438


In [33]:
from sklearn.ensemble import RandomForestClassifier

# Create an empty Random Forest classifier
rf_classifier_2 = RandomForestClassifier(n_estimators = 100, random_state=42)
rf_classifier_2.fit(data_tr, label_train)


  return fit_method(estimator, *args, **kwargs)


Random Forest Accuracy: 0.5139275766016713


In [34]:

# Make predictions on the test set
rf_predictions2 = rf_classifier_2.predict(data_te)

# Evaluate accuracy on the test set
accuracy = accuracy_score(label_test, rf_predictions2)
print(f"Random Forest Accuracy: {accuracy}")


Random Forest Accuracy: 0.6497214484679665


In [9]:
from tensorflow.keras.utils import to_categorical

# Convert integer labels to one-hot encoded format
label_train_one_hot = to_categorical(label_train, num_classes=9)
label_valid_one_hot = to_categorical(label_val, num_classes=9)



In [11]:
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# # Data augmentation for the training set
# train_datagen = ImageDataGenerator(
#     rescale=1./255,
#     shear_range=0.2,
#     zoom_range=0.2,
#     horizontal_flip=True
# )

# # No augmentation for the validation set
# valid_datagen = ImageDataGenerator(rescale=1./255)

# Pad all images to the maximum dimensions
data_train_padded = np.array([np.pad(img, ((2, 2), (2, 2), (0, 0)), mode='constant') for img in data_train])
data_valid_padded = np.array([np.pad(img, ((2, 2), (2, 2), (0, 0)), mode='constant') for img in data_val])


# Load ResNet-50 pre-trained on ImageNet
resnet_model = ResNet50(weights='imagenet', include_top=False, input_shape=(32, 32, 3))

# Freeze the convolutional layers
for layer in resnet_model.layers:
    layer.trainable = False

# Add classification head
temp = resnet_model.output
temp = GlobalAveragePooling2D()(temp)
temp = Dense(256, activation='relu')(temp)
predictions = Dense(9, activation='softmax')(temp)  # Adjust 'num_classes' based on your problem

# Create the full model
model_B = Model(inputs=resnet_model.input, outputs=predictions)

# Compile the model
model_B.compile(optimizer=Adam(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])




In [13]:

# Train the model
model_B.fit(
    data_train_padded, label_train_one_hot,
    epochs= 10,  # Adjust the number of epochs
    validation_data=(data_valid_padded,label_valid_one_hot)
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x2c50304b460>