In [1]:
import tensorflow as tf
from tensorflow.keras import datasets
import numpy as np
import cv2
from sklearn.metrics import silhouette_score
from sklearn.cluster import KMeans
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
import optuna
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier

# Load CIFAR-10 dataset

2024-02-15 20:28:00.016145: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-02-15 20:28:00.207217: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-02-15 20:28:00.207303: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-02-15 20:28:00.234920: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-02-15 20:28:00.293584: I tensorflow/core/platform/cpu_feature_guar

In [2]:
def preprocess(images):
    images = images.astype('float32') / 255.0
    return images

In [3]:
def extract_sift_features(images):
    sift = cv2.SIFT_create()
    keypoints_list = []
    descriptors_list = []
    for image in images:
        image = (image * 255).astype(np.uint8)  # Convert image to uint8
        keypoints, descriptors = sift.detectAndCompute(image, None)
        if descriptors is not None and len(keypoints) > 0:  # Ensure valid descriptors
            descriptors_list.append(descriptors)
            keypoints_list.append(keypoints)
    return keypoints_list, descriptors_list

In [4]:
def bow_representation(keypoints, descriptors, kmeans):
    bow_representation = np.zeros((len(keypoints), kmeans.n_clusters), dtype=np.float32)
    for i in range(len(keypoints)):
        if descriptors[i] is not None:
            words = kmeans.predict(descriptors[i])
            for word in words:
                bow_representation[i, word] += 1
    return bow_representation

In [5]:
(train_images, train_labels), (test_images, test_labels) = datasets.cifar10.load_data()

# Preprocessing

train_images = preprocess(train_images)
test_images = preprocess(test_images)

# Feature extraction using SIFT


train_keypoints, train_descriptors = extract_sift_features(train_images)
test_keypoints, test_descriptors = extract_sift_features(test_images)

# Flatten descriptors
train_descriptors_flat = np.vstack(train_descriptors)
test_descriptors_flat = np.vstack(test_descriptors)


In [6]:
kmeans = KMeans(n_clusters=12, random_state=42)
kmeans.fit(train_descriptors_flat)

# Generate Bag-of-Visual Words representation


train_bow = bow_representation(train_keypoints, train_descriptors, kmeans)
test_bow = bow_representation(test_keypoints, test_descriptors, kmeans)

# Scale features
scaler = StandardScaler()
train_bow_scaled = scaler.fit_transform(train_bow)
test_bow_scaled = scaler.transform(test_bow)

Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7f3998566d40>
Traceback (most recent call last):
  File "/home/siddharth/anaconda3/lib/python3.10/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/home/siddharth/anaconda3/lib/python3.10/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
  File "/home/siddharth/anaconda3/lib/python3.10/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
  File "/home/siddharth/anaconda3/lib/python3.10/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_p

In [None]:
# import optuna


# def objective(trial):
#     C = trial.suggest_loguniform('C', 1e-10, 1e10)
#     # kernel = trial.suggest_categorical('kernel', ['linear', 'rbf', 'poly', 'sigmoid'])
#     svm = SVC(C=C, kernel='linear')
    
#     # Fit the classifier to the training data
#     svm.fit(train_bow_scaled, train_labels[:len(train_bow)].ravel())

#     # Make predictions on the training and test data
#     train_predictions = svm.predict(train_bow_scaled)
#     test_predictions = svm.predict(test_bow_scaled)

#     # Calculate accuracy
#     train_accuracy = accuracy_score(train_labels[:len(train_bow)], train_predictions)
#     test_accuracy = accuracy_score(test_labels[:len(test_bow)], test_predictions)

#     # Return the test accuracy as the objective value to maximize
#     return test_accuracy

# # Create a study object and optimize the objective function
# study = optuna.create_study(direction='maximize')
# study.optimize(objective, n_trials=100)

# Get the best hyperparameters
# best_params = study.best_params
best_svm = SVC()

# Fit the classifier with the best hyperparameters
best_svm.fit(train_bow_scaled, train_labels[:len(train_bow)].ravel())

# Make predictions on the test data using the best classifier
test_predictions = best_svm.predict(test_bow_scaled)

# silhouette = silhouette_score(test_bow_scaled, test_labels)
# print("Silhouette Score:", silhouette)
# Calculate accuracy
test_accuracy = accuracy_score(test_labels[:len(test_bow)], test_predictions)

# print("Best hyperparameters:", best_params)
# print("Test Accuracy with best model:", test_accuracy)/


  y = column_or_1d(y, warn=True)


ValueError: Found input variables with inconsistent numbers of samples: [9976, 10000]

In [None]:
# def objective(trial):
#     C = trial.suggest_loguniform('C', 1e-10, 1e10)
#     max_iter = trial.suggest_int('max_iter', 100, 1000)
    
#     # Initialize the classifier with the suggested hyperparameters
#     clf = LogisticRegression(C=C, max_iter=max_iter, random_state=42)
    
#     # Fit the classifier to the training data
#     clf.fit(train_bow_scaled, train_labels[:len(train_bow)].ravel())

#     # Make predictions on the training and test data
#     train_predictions = clf.predict(train_bow_scaled)
#     test_predictions = clf.predict(test_bow_scaled)

#     # Calculate accuracy
#     train_accuracy = accuracy_score(train_labels[:len(train_bow)], train_predictions)
#     test_accuracy = accuracy_score(test_labels[:len(test_bow)], test_predictions)

#     # Return the test accuracy as the objective value to maximize
#     return test_accuracy

# # Create a study object and optimize the objective function
# study = optuna.create_study(direction='maximize')
# study.optimize(objective, n_trials=100)

# # Get the best hyperparameters
# best_params = study.best_params
best_lr = LogisticRegression(random_state=42)

# Fit the classifier with the best hyperparameters
best_lr.fit(train_bow_scaled, train_labels[:len(train_bow)].ravel())

# Make predictions on the test data using the best classifier
test_predictions = best_lr.predict(test_bow_scaled)

# Calculate accuracy
test_accuracy = accuracy_score(test_labels[:len(test_bow)], test_predictions)

print("Best hyperparameters:", best_params)
print("Test Accuracy with best model:", test_accuracy)


[I 2024-02-15 11:41:20,048] A new study created in memory with name: no-name-82265fbf-dddb-4f39-8531-314af29237a8
  C = trial.suggest_loguniform('C', 1e-10, 1e10)
[I 2024-02-15 11:41:20,313] Trial 0 finished with value: 0.09873696872493985 and parameters: {'C': 0.0729990374975001, 'max_iter': 244}. Best is trial 0 with value: 0.09873696872493985.
  C = trial.suggest_loguniform('C', 1e-10, 1e10)
[I 2024-02-15 11:41:20,431] Trial 1 finished with value: 0.10174418604651163 and parameters: {'C': 1.207702686091211e-06, 'max_iter': 134}. Best is trial 1 with value: 0.10174418604651163.
  C = trial.suggest_loguniform('C', 1e-10, 1e10)
[I 2024-02-15 11:41:20,646] Trial 2 finished with value: 0.0985364875701684 and parameters: {'C': 324217.90020133666, 'max_iter': 369}. Best is trial 1 with value: 0.10174418604651163.
  C = trial.suggest_loguniform('C', 1e-10, 1e10)
[I 2024-02-15 11:41:20,823] Trial 3 finished with value: 0.0985364875701684 and parameters: {'C': 1123760559.5992298, 'max_iter': 

Best hyperparameters: {'C': 8.620385894156097e-05, 'max_iter': 522}
Test Accuracy with best model: 0.10184442662389735


In [None]:
# from sklearn.metrics import silhouette_score


# def objective(trial):
#     n_neighbors = trial.suggest_int('n_neighbors', 1, 100)
#     knn_clf = KNeighborsClassifier(n_neighbors=n_neighbors)
    
#     # Fit the classifier to the training data
#     knn_clf.fit(train_bow_scaled, train_labels[:len(train_bow)].ravel())

#     # Make predictions on the training and test data
#     train_predictions = knn_clf.predict(train_bow_scaled)
#     test_predictions = knn_clf.predict(test_bow_scaled)

#     # Calculate accuracy
#     train_accuracy = accuracy_score(train_labels[:len(train_bow)], train_predictions)
#     test_accuracy = accuracy_score(test_labels[:len(test_bow)], test_predictions)

#     # Return the test accuracy as the objective value to maximize
#     return test_accuracy

# # Create a study object and optimize the objective function
# study = optuna.create_study(direction='maximize')
# study.optimize(objective, n_trials=10)

# # Get the best hyperparameters
# best_params = study.best_params
best_knn = KNeighborsClassifier()

# Fit the classifier with the best hyperparameters
best_knn.fit(train_bow_scaled, train_labels[:len(train_bow)].ravel())

# Make predictions on the test data using the best classifier
test_predictions = best_knn.predict(test_bow_scaled)

# Calculate accuracy
test_accuracy = accuracy_score(test_labels[:len(test_bow)], test_predictions)

print("Best hyperparameters:", best_params)
print("Test Accuracy with best model:", test_accuracy)

[I 2024-02-15 12:38:49,619] A new study created in memory with name: no-name-8e25e500-3811-489b-84c7-da78885ab2bf
[I 2024-02-15 12:39:59,282] Trial 0 finished with value: 0.09522854851643946 and parameters: {'n_neighbors': 1}. Best is trial 0 with value: 0.09522854851643946.
[I 2024-02-15 12:41:05,170] Trial 1 finished with value: 0.09943865276663993 and parameters: {'n_neighbors': 13}. Best is trial 1 with value: 0.09943865276663993.
[I 2024-02-15 12:41:48,312] Trial 2 finished with value: 0.10044105854049719 and parameters: {'n_neighbors': 63}. Best is trial 2 with value: 0.10044105854049719.
[I 2024-02-15 12:42:26,346] Trial 3 finished with value: 0.09402566158781074 and parameters: {'n_neighbors': 86}. Best is trial 2 with value: 0.10044105854049719.
[I 2024-02-15 12:43:04,457] Trial 4 finished with value: 0.09482758620689655 and parameters: {'n_neighbors': 33}. Best is trial 2 with value: 0.10044105854049719.
[I 2024-02-15 12:43:41,867] Trial 5 finished with value: 0.0990376904570

ValueError: Found input variables with inconsistent numbers of samples: [9976, 10000]