## Kaleb Ray - Big Data HW2

# DNN MINIST and CIFAR10

### Import Packages

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import umap
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, f1_score, log_loss
from sklearn.neural_network import MLPClassifier
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from tensorflow.keras.datasets import cifar10
import warnings
warnings.filterwarnings('ignore')

### Load MINIST

Since CIFAR10 is by default an 80-20 train-test split, we will use an 80-20 split on the MINIST data since the split is not clarified in the assignment.

In [4]:
# Load the MNIST dataset
mnist = fetch_openml('mnist_784', version = 1, as_frame = False)
# Separate the features and target
X, y = mnist.data, mnist.target
# Convert target to numeric
y = y.astype(int)
print("Shape of X:", X.shape)
print("Shape of y:", y.shape)

x_train_minist, x_test_minist, y_train_minist, y_test_minist = train_test_split(X, y, test_size = 0.2, random_state = 0)

Shape of X: (70000, 784)
Shape of y: (70000,)


### Load CIFAR10

In [5]:
# Load CIFAR-10 dataset
(x_train_cifar, y_train_cifar), (x_test_cifar, y_test_cifar) = cifar10.load_data()
# Print summary of CIFAR-10 data
print("Training data shape:", x_train_cifar.shape)  # (50000, 32, 32, 3)
print("Training labels shape:", y_train_cifar.shape)  # (50000, 1)
print("Testing data shape:", x_test_cifar.shape)  # (10000, 32, 32, 3)
print("Testing labels shape:", y_test_cifar.shape)  # (10000, 1)

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
[1m170498071/170498071[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 0us/step
Training data shape: (50000, 32, 32, 3)
Training labels shape: (50000, 1)
Testing data shape: (10000, 32, 32, 3)
Testing labels shape: (10000, 1)


## D-NN for MINIST and CIFAR10

In [6]:
# To normalize minist, implement a form of MinMax normalization by dividing by 255
# Scales values from [0, 1]
x_train_minist = x_train_minist / 255.0
x_test_minist = x_test_minist / 255.0

# For CIFAR, we use z-score standardization
# First, retrieve the mean and standard deviation of each RGB channel
mean_cifar = np.mean(x_train_cifar, axis = (0, 1, 2))
std_cifar = np.std(x_train_cifar, axis = (0, 1, 2))
# Divide by 255 for data, mean, and std to scale values from [0, 1], scaling
### mean and std accordingly with the data
x_train_cifar_norm = x_train_cifar / 255.0
x_test_cifar_norm = x_test_cifar / 255.0
mean_cifar_norm = mean_cifar / 255.0
std_cifar_norm = std_cifar / 255.0
x_train_cifar = (x_train_cifar_norm - mean_cifar_norm) / std_cifar_norm
x_test_cifar = (x_test_cifar_norm - mean_cifar_norm) / std_cifar_norm

### Model Metrics

In [7]:
def compute_measure(true_label, predicted_label):

  t_idx = (true_label == predicted_label)
  f_idx = np.logical_not(t_idx)
  p_idx = (true_label > 0)
  n_idx = np.logical_not(p_idx)
  tp = np.sum(np.logical_and(t_idx, p_idx))

  tn = np.sum(np.logical_and(t_idx, n_idx))

  fp = np.sum(n_idx) - tn
  fn = np.sum(p_idx) - tp
  tp_fp_tn_fn_list = []

  with np.errstate(divide = 'ignore'):
    sen = (1.0 * tp) / (tp + fn)
  with np.errstate(divide = 'ignore'):
    spec = (1.0 * tn) / (tn + fp)
  with np.errstate(divide = 'ignore'):
    f1 = tp / (tp + 0.5 * (fp + fn))

  acc = (tp + tn) * 1.0 / (tp + fp + tn + fn)

  d = np.log2(1 + acc) + np.log2(1 + (sen + spec) / 2)
  f1_micro = f1_score(true_label, predicted_label, average = 'micro')
  f1_macro = f1_score(true_label, predicted_label, average = 'macro')

  ans = []
  ans.append(acc)
  ans.append(sen)
  ans.append(spec)
  ans.append(f1_micro)
  ans.append(f1_macro)
  ans.append(d)

  return ans

In [8]:
def model_metrics(true, pred):

  ans = compute_measure(true, pred)
  print("Accuracy is {0:4f}".format(ans[0]))
  print("Sensitivity is {0:4f}".format(ans[1]))
  print("Specificity is {0:4f}".format(ans[2]))
  print("F1-Score Micro is {0:4f}".format(ans[3]))
  print("F1-Score Macro is {0:4f}".format(ans[4]))
  print("Diagnostic Index is {0:4f}".format(ans[5]))

### MINIST

**SGD**

In [None]:
clf_minist_sgd = MLPClassifier(
    hidden_layer_sizes = (200, 100, 50),
    max_iter = 100,
    activation = 'relu',
    learning_rate = 'adaptive',
    solver = 'sgd',
    learning_rate_init = 0.01,
    random_state = 0,
    warm_start = True
)

# Log Loss
train_losses = []
test_losses = []

n_epoch = 100

for _ in range(n_epoch):
  clf_minist_sgd.partial_fit(x_train_minist, y_train_minist, classes = np.unique(y))
  train_losses.append(log_loss(y_train_minist, clf_minist_sgd.predict_proba(x_train_minist)))
  test_losses.append(log_loss(y_test_minist, clf_minist_sgd.predict_proba(x_test_minist)))

# Predictions on the test set
y_pred_minist = clf_minist_sgd.predict(x_test_minist)

In [None]:
# Plot losses for MINIST
fig = plt.figure(figsize = (7, 3))
plt.plot(train_losses, label = 'Training Loss')
plt.plot(test_losses, 'b-', label = 'Test Loss')
plt.title('Training and Test Loss Curves')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.grid('on')
plt.show()

In [None]:
# Check for vanishing / exploding gradients
final_train_loss = train_losses[-1]
if np.isnan(final_train_loss) or final_train_loss > 1e3:
  print("Warning: Exploding gradients detected!")
# no improvement after 10 epochs
elif train_losses[10] - final_train_loss < 1e-5:
  print("Warning: Potential vanishing gradients detected!")
else:
  print("No vanishing or exploding gradients detected!")

In [None]:
# Confusion Matrix for MINIST
cm = confusion_matrix(y_test_minist, y_pred_minist)
print(cm)

In [None]:
# Metrics for MINIST
model_metrics(y_test_minist, y_pred_minist)

In [None]:
# Calculate eta for train and test loss curves
eta_train = 1 - (np.std(train_losses)/(np.mean(train_losses) + (1e-10)))**2
print("Eta of Train Curve: ", eta_train)
eta_test = 1 - (np.std(test_losses)/(np.mean(test_losses) + (1e-10)))**2
print("Eta of Test Curve: ", eta_test)
eta_train_test = eta_test / eta_train
print("Eta Test over Train: ", eta_train_test)

**ADAM**

In [None]:
clf_minist_adam = MLPClassifier(
    hidden_layer_sizes = (200, 100, 50),
    max_iter = 100,
    activation = 'relu',
    learning_rate = 'adaptive',
    solver = 'adam',
    learning_rate_init = 0.01,
    random_state = 0,
    warm_start = True
)

# Log Loss
train_losses = []
test_losses = []

n_epoch = 100

for _ in range(n_epoch):
  clf_minist_adam.partial_fit(x_train_minist, y_train_minist, classes = np.unique(y))
  train_losses.append(log_loss(y_train_minist, clf_minist_adam.predict_proba(x_train_minist)))
  test_losses.append(log_loss(y_test_minist, clf_minist_adam.predict_proba(x_test_minist)))

# Predictions on the test set
y_pred_minist = clf_minist_adam.predict(x_test_minist)

In [None]:
# Plot losses for MINIST
fig = plt.figure(figsize = (7, 3))
plt.plot(train_losses, label = 'Training Loss')
plt.plot(test_losses, 'b-', label = 'Test Loss')
plt.title('Training and Test Loss Curves')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.grid('on')
plt.show()

In [None]:
# Check for vanishing / exploding gradients
final_train_loss = train_losses[-1]
if np.isnan(final_train_loss) or final_train_loss > 1e3:
  print("Warning: Exploding gradients detected!")
# no improvement after 10 epochs
elif train_losses[10] - final_train_loss < 1e-5:
  print("Warning: Potential vanishing gradients detected!")
else:
  print("No vanishing or exploding gradients detected!")

In [None]:
# Confusion Matrix for MINIST
cm = confusion_matrix(y_test_minist, y_pred_minist)
print(cm)

In [None]:
# Metrics for MINIST
model_metrics(y_test_minist, y_pred_minist)

In [None]:
# Calculate eta for train and test loss curves
eta_train = 1 - (np.std(train_losses)/(np.mean(train_losses) + (1e-10)))**2
print("Eta of Train Curve: ", eta_train)
eta_test = 1 - (np.std(test_losses)/(np.mean(test_losses) + (1e-10)))**2
print("Eta of Test Curve: ", eta_test)
eta_train_test = eta_test / eta_train
print("Eta Test over Train: ", eta_train_test)

### CIFAR10

In [None]:
# Must flatten the data before MLP
x_train_cifar = x_train_cifar.reshape(x_train_cifar.shape[0], -1)
x_test_cifar = x_test_cifar.reshape(x_test_cifar.shape[0], -1)

**SGD**

In [None]:
clf_cifar_sgd = MLPClassifier(
    hidden_layer_sizes = (200, 100, 50),
    max_iter = 100,
    activation = 'relu',
    learning_rate = 'adaptive',
    solver = 'sgd',
    learning_rate_init = 0.01,
    random_state = 0,
    warm_start = True
)

# Log Loss
train_losses = []
test_losses = []

n_epoch = 100

for _ in range(n_epoch):
  clf_cifar_sgd.partial_fit(x_train_cifar, y_train_cifar, classes = np.unique(y_train_cifar))
  train_losses.append(log_loss(y_train_cifar, clf_cifar_sgd.predict_proba(x_train_cifar)))
  test_losses.append(log_loss(y_test_cifar, clf_cifar_sgd.predict_proba(x_test_cifar)))

# Predictions on the test set
y_pred_cifar = clf_cifar_sgd.predict(x_test_cifar)

In [None]:
# Plot losses for CIFAR
fig = plt.figure(figsize = (7, 3))
plt.plot(train_losses, label = 'Training Loss')
plt.plot(test_losses, 'b-', label = 'Test Loss')
plt.title('Training and Test Loss Curves')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.grid('on')
plt.show()

In [None]:
# Check for vanishing / exploding gradients
final_train_loss = train_losses[-1]
if np.isnan(final_train_loss) or final_train_loss > 1e3:
  print("Warning: Exploding gradients detected!")
# no improvement after 10 epochs
elif train_losses[10] - final_train_loss < 1e-5:
  print("Warning: Potential vanishing gradients detected!")
else:
  print("No vanishing or exploding gradients detected!")

In [None]:
# Confusion Matrix for MINIST
cm = confusion_matrix(y_test_cifar, y_pred_cifar)
print(cm)

In [None]:
# Metrics for MINIST
model_metrics(y_test_cifar, y_pred_cifar)

In [None]:
# Calculate eta for train and test loss curves
eta_train = 1 - (np.std(train_losses)/(np.mean(train_losses) + (1e-10)))**2
print("Eta of Train Curve: ", eta_train)
eta_test = 1 - (np.std(test_losses)/(np.mean(test_losses) + (1e-10)))**2
print("Eta of Test Curve: ", eta_test)
eta_train_test = eta_test / eta_train
print("Eta Test over Train: ", eta_train_test)

**ADAM**

In [None]:
clf_cifar_adam = MLPClassifier(
    hidden_layer_sizes = (200, 100, 50),
    max_iter = 100,
    activation = 'relu',
    learning_rate = 'adaptive',
    solver = 'adam',
    learning_rate_init = 0.01,
    random_state = 0,
    warm_start = True
)

# Log Loss
train_losses = []
test_losses = []

n_epoch = 100

for _ in range(n_epoch):
  clf_cifar_adam.partial_fit(x_train_cifar, y_train_cifar, classes = np.unique(y_train_cifar))
  train_losses.append(log_loss(y_train_cifar, clf_cifar_adam.predict_proba(x_train_cifar)))
  test_losses.append(log_loss(y_test_cifar, clf_cifar_adam.predict_proba(x_test_cifar)))

# Predictions on the test set
y_pred_cifar = clf_cifar_adam.predict(x_test_cifar)

In [None]:
# Plot losses for CIFAR
fig = plt.figure(figsize = (7, 3))
plt.plot(train_losses, label = 'Training Loss')
plt.plot(test_losses, 'b-', label = 'Test Loss')
plt.title('Training and Test Loss Curves')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.grid('on')
plt.show()

In [None]:
# Check for vanishing / exploding gradients
final_train_loss = train_losses[-1]
if np.isnan(final_train_loss) or final_train_loss > 1e3:
  print("Warning: Exploding gradients detected!")
# no improvement after 10 epochs
elif train_losses[10] - final_train_loss < 1e-5:
  print("Warning: Potential vanishing gradients detected!")
else:
  print("No vanishing or exploding gradients detected!")

In [None]:
# Confusion Matrix for MINIST
cm = confusion_matrix(y_test_cifar, y_pred_cifar)
print(cm)

In [None]:
# Metrics for MINIST
model_metrics(y_test_cifar, y_pred_cifar)

In [None]:
# Calculate eta for train and test loss curves
eta_train = 1 - (np.std(train_losses)/(np.mean(train_losses) + (1e-10)))**2
print("Eta of Train Curve: ", eta_train)
eta_test = 1 - (np.std(test_losses)/(np.mean(test_losses) + (1e-10)))**2
print("Eta of Test Curve: ", eta_test)
eta_train_test = eta_test / eta_train
print("Eta Test over Train: ", eta_train_test)

### PCA MINIST

In [None]:
def doPCA_DNN(train_data, test_data, need_exp_var):
  pca = PCA(n_components = train_data.shape[1])
  data_pca = pca.fit_transform(train_data)
  exp_var = np.cumsum(pca.explained_variance_ratio_)
  n_comp = np.argmax(exp_var >= need_exp_var) + 1
  train_pca = data_pca[:, :n_comp]
  test_pca = pca.transform(test_data)[:, :n_comp]
  return train_pca, test_pca

**SGD**

In [None]:
train_minist_pca, test_minist_pca = doPCA_DNN(x_train_minist, x_test_minist)

train_losses = []
test_losses = []

n_epoch = 100

for _ in range(n_epoch):
  clf_minist_sgd.partial_fit(train_minist_pca, y_train_minist, classes = np.unique(y))
  train_losses.append(log_loss(y_train_minist, clf_minist_sgd.predict_proba(train_minist_pca)))
  test_losses.append(log_loss(y_test_minist, clf_minist_sgd.predict_proba(test_minist_pca)))

# Predictions on the test set
y_pred_minist = clf_minist_sgd.predict(test_minist_pca)

In [None]:
# Plot losses for MINIST
fig = plt.figure(figsize = (7, 3))
plt.plot(train_losses, label = 'Training Loss')
plt.plot(test_losses, 'b-', label = 'Test Loss')
plt.title('Training and Test Loss Curves')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.grid('on')
plt.show()

In [None]:
# Check for vanishing / exploding gradients
final_train_loss = train_losses[-1]
if np.isnan(final_train_loss) or final_train_loss > 1e3:
  print("Warning: Exploding gradients detected!")
# no improvement after 10 epochs
elif train_losses[10] - final_train_loss < 1e-5:
  print("Warning: Potential vanishing gradients detected!")
else:
  print("No vanishing or exploding gradients detected!")

In [None]:
# Confusion Matrix for MINIST
cm = confusion_matrix(y_test_minist, y_pred_minist)
print(cm)

In [None]:
# Metrics for MINIST
model_metrics(y_test_minist, y_pred_minist)

In [None]:
# Calculate eta for train and test loss curves
eta_train = 1 - (np.std(train_losses)/(np.mean(train_losses) + (1e-10)))**2
print("Eta of Train Curve: ", eta_train)
eta_test = 1 - (np.std(test_losses)/(np.mean(test_losses) + (1e-10)))**2
print("Eta of Test Curve: ", eta_test)
eta_train_test = eta_test / eta_train
print("Eta Test over Train: ", eta_train_test)

**ADAM**

In [None]:
train_losses = []
test_losses = []

n_epoch = 100

for _ in range(n_epoch):
  clf_minist_adam.partial_fit(train_minist_pca, y_train_minist, classes = np.unique(y))
  train_losses.append(log_loss(y_train_minist, clf_minist_adam.predict_proba(train_minist_pca)))
  test_losses.append(log_loss(y_test_minist, clf_minist_adam.predict_proba(test_minist_pca)))

# Predictions on the test set
y_pred_minist = clf_minist_adam.predict(test_minist_pca)

In [None]:
# Plot losses for MINIST
fig = plt.figure(figsize = (7, 3))
plt.plot(train_losses, label = 'Training Loss')
plt.plot(test_losses, 'b-', label = 'Test Loss')
plt.title('Training and Test Loss Curves')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.grid('on')
plt.show()

In [None]:
# Check for vanishing / exploding gradients
final_train_loss = train_losses[-1]
if np.isnan(final_train_loss) or final_train_loss > 1e3:
  print("Warning: Exploding gradients detected!")
# no improvement after 10 epochs
elif train_losses[10] - final_train_loss < 1e-5:
  print("Warning: Potential vanishing gradients detected!")
else:
  print("No vanishing or exploding gradients detected!")

In [None]:
# Confusion Matrix for MINIST
cm = confusion_matrix(y_test_minist, y_pred_minist)
print(cm)

In [None]:
# Metrics for MINIST
model_metrics(y_test_minist, y_pred_minist)

In [None]:
# Calculate eta for train and test loss curves
eta_train = 1 - (np.std(train_losses)/(np.mean(train_losses) + (1e-10)))**2
print("Eta of Train Curve: ", eta_train)
eta_test = 1 - (np.std(test_losses)/(np.mean(test_losses) + (1e-10)))**2
print("Eta of Test Curve: ", eta_test)
eta_train_test = eta_test / eta_train
print("Eta Test over Train: ", eta_train_test)

### PCA CIFAR

**SGD**

In [None]:
train_cifar_pca, test_cifar_pca = doPCA_DNN(x_train_cifar, x_test_cifar)

train_losses = []
test_losses = []

n_epoch = 100

for _ in range(n_epoch):
  clf_cifar_sgd.partial_fit(train_cifar_pca, y_train_cifar, classes = np.unique(y_train_cifar))
  train_losses.append(log_loss(y_train_cifar, clf_cifar_sgd.predict_proba(train_cifar_pca)))
  test_losses.append(log_loss(y_test_cifar, clf_cifar_sgd.predict_proba(test_cifar_pca)))

# Predictions on the test set
y_pred_cifar = clf_cifar_sgd.predict(test_cifar_pca)

In [None]:
# Plot losses for CIFAR
fig = plt.figure(figsize = (7, 3))
plt.plot(train_losses, label = 'Training Loss')
plt.plot(test_losses, 'b-', label = 'Test Loss')
plt.title('Training and Test Loss Curves')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.grid('on')
plt.show()

In [None]:
# Check for vanishing / exploding gradients
final_train_loss = train_losses[-1]
if np.isnan(final_train_loss) or final_train_loss > 1e3:
  print("Warning: Exploding gradients detected!")
# no improvement after 10 epochs
elif train_losses[10] - final_train_loss < 1e-5:
  print("Warning: Potential vanishing gradients detected!")
else:
  print("No vanishing or exploding gradients detected!")

In [None]:
# Confusion Matrix for CIFAR
cm = confusion_matrix(y_test_cifar, y_pred_cifar)
print(cm)

In [None]:
# Metrics for CIFAR
model_metrics(y_test_cifar, y_pred_cifar)

In [None]:
# Calculate eta for train and test loss curves
eta_train = 1 - (np.std(train_losses)/(np.mean(train_losses) + (1e-10)))**2
print("Eta of Train Curve: ", eta_train)
eta_test = 1 - (np.std(test_losses)/(np.mean(test_losses) + (1e-10)))**2
print("Eta of Test Curve: ", eta_test)
eta_train_test = eta_test / eta_train
print("Eta Test over Train: ", eta_train_test)

**ADAM**

In [None]:
train_losses = []
test_losses = []

n_epoch = 100

for _ in range(n_epoch):
  clf_cifar_adam.partial_fit(train_cifar_pca, y_train_cifar, classes = np.unique(y_train_cifar))
  train_losses.append(log_loss(y_train_cifar, clf_cifar_adam.predict_proba(train_cifar_pca)))
  test_losses.append(log_loss(y_test_cifar, clf_cifar_adam.predict_proba(test_cifar_pca)))

# Predictions on the test set
y_pred_cifar = clf_cifar_adam.predict(test_cifar_pca)

In [None]:
# Plot losses for CIFAR
fig = plt.figure(figsize = (7, 3))
plt.plot(train_losses, label = 'Training Loss')
plt.plot(test_losses, 'b-', label = 'Test Loss')
plt.title('Training and Test Loss Curves')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.grid('on')
plt.show()

In [None]:
# Check for vanishing / exploding gradients
final_train_loss = train_losses[-1]
if np.isnan(final_train_loss) or final_train_loss > 1e3:
  print("Warning: Exploding gradients detected!")
# no improvement after 10 epochs
elif train_losses[10] - final_train_loss < 1e-5:
  print("Warning: Potential vanishing gradients detected!")
else:
  print("No vanishing or exploding gradients detected!")

In [None]:
# Confusion Matrix for CIFAR
cm = confusion_matrix(y_test_cifar, y_pred_cifar)
print(cm)

In [None]:
# Metrics for CIFAR
model_metrics(y_test_cifar, y_pred_cifar)

In [None]:
# Calculate eta for train and test loss curves
eta_train = 1 - (np.std(train_losses)/(np.mean(train_losses) + (1e-10)))**2
print("Eta of Train Curve: ", eta_train)
eta_test = 1 - (np.std(test_losses)/(np.mean(test_losses) + (1e-10)))**2
print("Eta of Test Curve: ", eta_test)
eta_train_test = eta_test / eta_train
print("Eta Test over Train: ", eta_train_test)

### PCA + T-SNE MINIST

In [None]:
def doTSNE_DNN(train_data, test_data):
  tsne = TSNE(n_components = 2, random_state = 0)
  train_tsne = tsne.fit_transform(train_data)
  test_tsne = tsne.transform(test_data)
  return train_tsne, test_tsne

In [None]:
train_minist_tsne, test_minist_tsne = doTSNE_DNN(train_minist_pca, test_minist_pca)

**SGD**

In [None]:
train_losses = []
test_losses = []

n_epoch = 100

for _ in range(n_epoch):
  clf_minist_sgd.partial_fit(train_minist_tsne, y_train_minist, classes = np.unique(y))
  train_losses.append(log_loss(y_train_minist, clf_minist_sgd.predict_proba(train_minist_tsne)))
  test_losses.append(log_loss(y_test_minist, clf_minist_sgd.predict_proba(test_minist_tsne)))

# Predictions on the test set
y_pred_minist = clf_minist_sgd.predict(test_minist_tsne)

In [None]:
# Plot losses for MINIST
fig = plt.figure(figsize = (7, 3))
plt.plot(train_losses, label = 'Training Loss')
plt.plot(test_losses, 'b-', label = 'Test Loss')
plt.title('Training and Test Loss Curves')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.grid('on')
plt.show()

In [None]:
# Check for vanishing / exploding gradients
final_train_loss = train_losses[-1]
if np.isnan(final_train_loss) or final_train_loss > 1e3:
  print("Warning: Exploding gradients detected!")
# no improvement after 10 epochs
elif train_losses[10] - final_train_loss < 1e-5:
  print("Warning: Potential vanishing gradients detected!")
else:
  print("No vanishing or exploding gradients detected!")

In [None]:
# Confusion Matrix for MINIST
cm = confusion_matrix(y_test_minist, y_pred_minist)
print(cm)

In [None]:
# Metrics for MINIST
model_metrics(y_test_minist, y_pred_minist)

In [None]:
# Calculate eta for train and test loss curves
eta_train = 1 - (np.std(train_losses)/(np.mean(train_losses) + (1e-10)))**2
print("Eta of Train Curve: ", eta_train)
eta_test = 1 - (np.std(test_losses)/(np.mean(test_losses) + (1e-10)))**2
print("Eta of Test Curve: ", eta_test)
eta_train_test = eta_test / eta_train
print("Eta Test over Train: ", eta_train_test)

**ADAM**

In [None]:
train_losses = []
test_losses = []

n_epoch = 100

for _ in range(n_epoch):
  clf_minist_adam.partial_fit(train_minist_tsne, y_train_minist, classes = np.unique(y))
  train_losses.append(log_loss(y_train_minist, clf_minist_adam.predict_proba(train_minist_tsne)))
  test_losses.append(log_loss(y_test_minist, clf_minist_adam.predict_proba(test_minist_tsne)))

# Predictions on the test set
y_pred_minist = clf_minist_adam.predict(test_minist_tsne)

In [None]:
# Plot losses for MINIST
fig = plt.figure(figsize = (7, 3))
plt.plot(train_losses, label = 'Training Loss')
plt.plot(test_losses, 'b-', label = 'Test Loss')
plt.title('Training and Test Loss Curves')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.grid('on')
plt.show()

In [None]:
# Check for vanishing / exploding gradients
final_train_loss = train_losses[-1]
if np.isnan(final_train_loss) or final_train_loss > 1e3:
  print("Warning: Exploding gradients detected!")
# no improvement after 10 epochs
elif train_losses[10] - final_train_loss < 1e-5:
  print("Warning: Potential vanishing gradients detected!")
else:
  print("No vanishing or exploding gradients detected!")

In [None]:
# Confusion Matrix for MINIST
cm = confusion_matrix(y_test_minist, y_pred_minist)
print(cm)

In [None]:
# Metrics for MINIST
model_metrics(y_test_minist, y_pred_minist)

In [None]:
# Calculate eta for train and test loss curves
eta_train = 1 - (np.std(train_losses)/(np.mean(train_losses) + (1e-10)))**2
print("Eta of Train Curve: ", eta_train)
eta_test = 1 - (np.std(test_losses)/(np.mean(test_losses) + (1e-10)))**2
print("Eta of Test Curve: ", eta_test)
eta_train_test = eta_test / eta_train
print("Eta Test over Train: ", eta_train_test)

### PCA + T-SNE CIFAR

In [None]:
train_cifar_tsne, test_cifar_tsne = doTSNE_DNN(train_cifar_pca, test_cifar_pca)

**SGD**

In [None]:
train_losses = []
test_losses = []

n_epoch = 100

for _ in range(n_epoch):
  clf_cifar_sgd.partial_fit(train_cifar_tsne, y_train_cifar, classes = np.unique(y_train_cifar))
  train_losses.append(log_loss(y_train_cifar, clf_cifar_sgd.predict_proba(train_cifar_tsne)))
  test_losses.append(log_loss(y_test_cifar, clf_cifar_sgd.predict_proba(test_cifar_tsne)))

# Predictions on the test set
y_pred_cifar = clf_cifar_sgd.predict(test_cifar_tsne)

In [None]:
# Plot losses for MINIST
fig = plt.figure(figsize = (7, 3))
plt.plot(train_losses, label = 'Training Loss')
plt.plot(test_losses, 'b-', label = 'Test Loss')
plt.title('Training and Test Loss Curves')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.grid('on')
plt.show()

In [None]:
# Check for vanishing / exploding gradients
final_train_loss = train_losses[-1]
if np.isnan(final_train_loss) or final_train_loss > 1e3:
  print("Warning: Exploding gradients detected!")
# no improvement after 10 epochs
elif train_losses[10] - final_train_loss < 1e-5:
  print("Warning: Potential vanishing gradients detected!")
else:
  print("No vanishing or exploding gradients detected!")

In [None]:
# Confusion Matrix for CIFAR
cm = confusion_matrix(y_test_cifar, y_pred_cifar)
print(cm)

In [None]:
# Metrics for CIFAR
model_metrics(y_test_cifar, y_pred_cifar)

In [None]:
# Calculate eta for train and test loss curves
eta_train = 1 - (np.std(train_losses)/(np.mean(train_losses) + (1e-10)))**2
print("Eta of Train Curve: ", eta_train)
eta_test = 1 - (np.std(test_losses)/(np.mean(test_losses) + (1e-10)))**2
print("Eta of Test Curve: ", eta_test)
eta_train_test = eta_test / eta_train
print("Eta Test over Train: ", eta_train_test)

**ADAM**

In [None]:
train_losses = []
test_losses = []

n_epoch = 100

for _ in range(n_epoch):
  clf_cifar_adam.partial_fit(train_cifar_tsne, y_train_cifar, classes = np.unique(y_train_cifar))
  train_losses.append(log_loss(y_train_cifar, clf_cifar_adam.predict_proba(train_cifar_tsne)))
  test_losses.append(log_loss(y_test_cifar, clf_cifar_adam.predict_proba(test_cifar_tsne)))

# Predictions on the test set
y_pred_cifar = clf_cifar_adam.predict(test_cifar_tsne)

In [None]:
# Plot losses for MINIST
fig = plt.figure(figsize = (7, 3))
plt.plot(train_losses, label = 'Training Loss')
plt.plot(test_losses, 'b-', label = 'Test Loss')
plt.title('Training and Test Loss Curves')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.grid('on')
plt.show()

In [None]:
# Check for vanishing / exploding gradients
final_train_loss = train_losses[-1]
if np.isnan(final_train_loss) or final_train_loss > 1e3:
  print("Warning: Exploding gradients detected!")
# no improvement after 10 epochs
elif train_losses[10] - final_train_loss < 1e-5:
  print("Warning: Potential vanishing gradients detected!")
else:
  print("No vanishing or exploding gradients detected!")

In [None]:
# Confusion Matrix for CIFAR
cm = confusion_matrix(y_test_cifar, y_pred_cifar)
print(cm)

In [None]:
# Metrics for CIFAR
model_metrics(y_test_cifar, y_pred_cifar)

In [None]:
# Calculate eta for train and test loss curves
eta_train = 1 - (np.std(train_losses)/(np.mean(train_losses) + (1e-10)))**2
print("Eta of Train Curve: ", eta_train)
eta_test = 1 - (np.std(test_losses)/(np.mean(test_losses) + (1e-10)))**2
print("Eta of Test Curve: ", eta_test)
eta_train_test = eta_test / eta_train
print("Eta Test over Train: ", eta_train_test)