#Caltech101

In [None]:
import time
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import os
import cv2
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import LinearSVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
import tensorflow_datasets as tfds  # Import tensorflow_datasets

# Check for GPU availability
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
if tf.config.list_physical_devices('GPU'):
    print("GPU is available. TensorFlow will use GPU for computations.")
else:
    print("GPU is not available. TensorFlow will use CPU.")

# Set image dimensions
IMG_WIDTH, IMG_HEIGHT = 128, 128

# Load Caltech 101 dataset using tensorflow_datasets
(ds_train, ds_test), ds_info = tfds.load(
    'caltech101',
    split=['train', 'test'],
    shuffle_files=True,
    as_supervised=True,  # Load images and labels as tuples
    with_info=True,
)

# Preprocess images and labels
def preprocess_image(image, label):
    image = tf.image.resize(image, (IMG_WIDTH, IMG_HEIGHT))
    image = tf.cast(image, tf.float32) / 255.0
    return image, label

BATCH_SIZE = 32

ds_train = ds_train.map(preprocess_image).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
ds_test = ds_test.map(preprocess_image).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

num_classes = ds_info.features['label'].num_classes

# Function to evaluate models (3 trials per model)
def evaluate_model(model_builder, ds_train, ds_test, is_cnn=False, model_name=""):
    times, accuracies = [], []

    for trial in range(3):
        print(f"Training {model_name} - Trial {trial+1}/3...")

        model = model_builder()

        start_time = time.time()
        if is_cnn:
            history = model.fit(
                ds_train,
                epochs=5,
                verbose=1,
                validation_data=ds_test,
            )
            _, accuracy = model.evaluate(ds_test, verbose=1)
        else:
            # For non-CNN models, need to extract data from the dataset
            X_train = np.concatenate([images.numpy().reshape(images.shape[0], -1) for images, _ in ds_train], axis=0)
            y_train = np.concatenate([labels.numpy() for _, labels in ds_train], axis=0)
            X_test = np.concatenate([images.numpy().reshape(images.shape[0], -1) for images, _ in ds_test], axis=0)
            y_test = np.concatenate([labels.numpy() for _, labels in ds_test], axis=0)

            model.fit(X_train, y_train)
            y_pred = model.predict(X_test)
            accuracy = accuracy_score(y_test, y_pred)

        time_taken = time.time() - start_time
        times.append(time_taken)
        accuracies.append(accuracy)

        print(f"{model_name} - Trial {trial+1} - Accuracy: {accuracy:.4f}, Time: {time_taken:.2f} sec")

        del model
        import gc
        gc.collect()

    mean_accuracy = np.mean(accuracies)
    mean_time = np.mean(times)

    print(f"{model_name} - Mean Accuracy: {mean_accuracy:.4f}, Mean Time: {mean_time:.2f} sec")

    return mean_accuracy, mean_time, accuracies, times

# Define model builders
def build_cnn():
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_WIDTH, IMG_HEIGHT, 3)),
        MaxPooling2D((2, 2)),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Flatten(),
        Dense(128, activation='relu'),
        Dropout(0.5),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer=Adam(learning_rate=0.001),
                  loss='sparse_categorical_crossentropy',  # Use sparse_categorical_crossentropy
                  metrics=['accuracy'])
    return model

def build_knn():
    return KNeighborsClassifier(n_neighbors=3)

def build_svm():
    return LinearSVC(max_iter=1000, dual=False)

def build_rf():
    return RandomForestClassifier(n_estimators=100, max_depth=15, random_state=42)

# Run evaluations and save results
with open("results.txt", "w") as result_text:
    result_text.write("# Model Evaluation Results (3 trials per model)\n\n")

    results = {}

    # CNN
    mean_acc, mean_time, acc_trials, time_trials = evaluate_model(
        build_cnn, ds_train, ds_test, is_cnn=True, model_name="CNN"
    )
    results["CNN"] = (mean_acc, mean_time, acc_trials, time_trials)

    # KNN
    mean_acc, mean_time, acc_trials, time_trials = evaluate_model(
        build_knn, ds_train, ds_test, model_name="KNN"
    )
    results["KNN"] = (mean_acc, mean_time, acc_trials, time_trials)

    # Linear SVM
    mean_acc, mean_time, acc_trials, time_trials = evaluate_model(
        build_svm, ds_train, ds_test, model_name="Linear SVM"
    )
    results["Linear SVM"] = (mean_acc, mean_time, acc_trials, time_trials)

    # Random Forest
    mean_acc, mean_time, acc_trials, time_trials = evaluate_model(
        build_rf, ds_train, ds_test, model_name="Random Forest"
    )
    results["Random Forest"] = (mean_acc, mean_time, acc_trials, time_trials)

    # Write results to the file
    for model_name, (mean_acc, mean_time, acc_trials, time_trials) in results.items():
        result_text.write(f"Model: {model_name}\n")
        result_text.write(f"  Mean Accuracy: {mean_acc:.4f}\n")
        result_text.write(f"  Mean Time (seconds): {mean_time:.4f}\n")
        result_text.write("  Accuracy Trials: ")
        result_text.write(", ".join(f"{acc:.4f}" for acc in acc_trials))
        result_text.write("\n")
        result_text.write("  Time Trials (seconds): ")
        result_text.write(", ".join(f"{time:.4f}" for time in time_trials))
        result_text.write("\n\n")

# Plot results
names = list(results.keys())
mean_accuracies = [results[k][0] for k in names]
mean_times = [results[k][1] for k in names]

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))

# Accuracy plot
ax1.bar(names, mean_accuracies, color='green', alpha=0.7)
ax1.set_ylabel('Mean Accuracy')
ax1.set_title('Model Accuracy Comparison (3 Trials)')
ax1.set_ylim(0, 1.0)
for i, v in enumerate(mean_accuracies):
    ax1.text(i, v + 0.02, f'{v:.2f}', ha='center')

# Time plot
ax2.bar(names, mean_times, color='blue', alpha=0.7)
ax2.set_ylabel('Mean Training Time (s)')
ax2.set_title('Model Training Time Comparison (3 Trials)')
for i, v in enumerate(mean_times):
    ax2.text(i, v + 1, f'{v:.1f}s', ha='center')

plt.tight_layout()
plt.savefig('model_comparison_all_images.png')
plt.show()

print("Evaluation complete! Results saved to 'results.txt' and 'model_comparison_all_images.png'")

Num GPUs Available:  1
GPU is available. TensorFlow will use GPU for computations.
Downloading and preparing dataset 131.05 MiB (download: 131.05 MiB, generated: 132.86 MiB, total: 263.91 MiB) to /root/tensorflow_datasets/caltech101/3.0.2...


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Extraction completed...: 0 file [00:00, ? file/s]

Extraction completed...: 0 file [00:00, ? file/s]

Generating splits...:   0%|          | 0/2 [00:00<?, ? splits/s]

Generating train examples...:   0%|          | 0/3060 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/caltech101/incomplete.QK57OU_3.0.2/caltech101-train.tfrecord*...:   0%|   …

Generating test examples...:   0%|          | 0/6084 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/caltech101/incomplete.QK57OU_3.0.2/caltech101-test.tfrecord*...:   0%|    …

Dataset caltech101 downloaded and prepared to /root/tensorflow_datasets/caltech101/3.0.2. Subsequent calls will reuse this data.
Training CNN - Trial 1/3...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/5
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 133ms/step - accuracy: 0.0117 - loss: 5.0571 - val_accuracy: 0.0186 - val_loss: 4.5472
Epoch 2/5
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 41ms/step - accuracy: 0.0454 - loss: 4.4907 - val_accuracy: 0.1532 - val_loss: 4.3038
Epoch 3/5
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 33ms/step - accuracy: 0.0920 - loss: 4.1983 - val_accuracy: 0.2179 - val_loss: 3.7251
Epoch 4/5
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 41ms/step - accuracy: 0.1924 - loss: 3.5841 - val_accuracy: 0.3437 - val_loss: 3.3453
Epoch 5/5
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 41ms/step - accuracy: 0.2992 - loss: 2.9674 - val_accuracy: 0.3703 - val_loss: 3.1660
[1m191/191[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - accuracy: 0.3640 - loss: 3.1730
CNN - Trial 1 - Accuracy: 0.3703, Time: 39.40 sec
Training CNN - Trial 2/3...
Epoch 1/5
