In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
from numpy import load
import numpy as np
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.decomposition import PCA
from sklearn.neural_network import MLPClassifier
from sklearn.pipeline import Pipeline
import seaborn as sns
import matplotlib.pyplot as plt
import joblib
import time
from tqdm import tqdm
from sklearn.utils import parallel_backend

In [3]:
from google.colab import drive
import numpy as np

# Mount Google Drive
drive.mount('/content/drive')

# Define the path to the files on Google Drive
drive_path = '/content/drive/My Drive/'
y_filename = 'yaleExtB_target.npy'
X_filename = 'yaleExtB_data.npy'

# Copy files from Google Drive to Colab
!cp "{drive_path}{y_filename}" .
!cp "{drive_path}{X_filename}" .

# Load data
y = np.load(y_filename)
X = np.load(X_filename)

# Display confirmation message
print(f'Data loaded successfully.\nTarget shape: {y.shape}, Data shape: {X.shape}')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Data loaded successfully.
Target shape: (1500,), Data shape: (1500, 5236)


In [4]:
import tensorflow as tf
print("Tensorflow version " + tf.__version__)
#Check if GPU is available
if tf.test.gpu_device_name():
  print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))
else:
  print("No GPU available. Please ensure you have configured GPU acceleration in your Colab runtime.")

Tensorflow version 2.15.0
Default GPU Device: /device:GPU:0


In [5]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
# Define the parameter grid for grid search
param_dist = {
    'pca__n_components': [50, 100, 200, 400, 600, 800, None],
    'mlp__hidden_layer_sizes': [(100,), (200,), (400,), (600,), (800,), (1000,)],
    'mlp__activation': ['tanh', 'relu', 'logistic'],
    'mlp__solver': ['sgd', 'adam'],
    'mlp__batch_size': [256],
    'mlp__max_iter': [1000],
    'mlp__learning_rate_init': [0.01, 0.05],
    'mlp__momentum': [0.9, 0.5],
    'mlp__early_stopping': [True],
    'mlp__validation_fraction': [0.1, 0.01],
}


In [7]:
#Start Measuring Time
start_time = time.time()

In [8]:
def gpu():
  with tf.device('/device:GPU:0'):
    # Create the PCA object
    pca = PCA(whiten=True)

    # Create the MLP classifier
    mlp = MLPClassifier()

    # Create the pipeline
    pipeline = Pipeline([
        ('pca', pca),
        ('mlp', mlp)
    ])

    # Create the random search object
    random_search = RandomizedSearchCV(pipeline, param_distributions=param_dist, n_iter=10, cv=5, scoring='accuracy', verbose=2, n_jobs=-1)

    # Fit the random search to the data
    random_search.fit(X_train, y_train)

    # Print the best hyperparameters
    print("Best Hyperparameters:", random_search.best_params_)

    # Save the best model to disk
    best_model = random_search.best_estimator_
    joblib.dump(best_model, 'best_model_with_pca_random_search.joblib')

    # Evaluate the model with the best hyperparameters on the test set
    y_pred = best_model.predict(X_test)

    # Print classification report
    print("Classification Report:")
    print(classification_report(y_test, y_pred))
gpu()

Fitting 5 folds for each of 10 candidates, totalling 50 fits


  pid = os.fork()
  pid = os.fork()


Best Hyperparameters: {'pca__n_components': 200, 'mlp__validation_fraction': 0.1, 'mlp__solver': 'adam', 'mlp__momentum': 0.9, 'mlp__max_iter': 1000, 'mlp__learning_rate_init': 0.05, 'mlp__hidden_layer_sizes': (800,), 'mlp__early_stopping': True, 'mlp__batch_size': 256, 'mlp__activation': 'logistic'}
Classification Report:
              precision    recall  f1-score   support

         2.0       1.00      0.88      0.93         8
         3.0       0.92      1.00      0.96        12
         4.0       1.00      1.00      1.00         8
         5.0       0.88      1.00      0.93         7
         6.0       1.00      0.92      0.96        12
         7.0       1.00      1.00      1.00         9
         8.0       1.00      0.67      0.80         9
         9.0       0.67      0.91      0.77        11
        11.0       1.00      0.71      0.83        14
        12.0       1.00      1.00      1.00        14
        13.0       0.67      1.00      0.80         6
        15.0       0.85   

In [9]:
#End Measuring Time
end_time = time.time()
runtime = end_time - start_time
print(f"  Total Runtime: {runtime} seconds")

  Total Runtime: 77.60971784591675 seconds
