In [1]:
import wandb

from utils.neural_network import NeuralNetwork
from utils.wandb_classes import WandbTrainer, WandbCallback
from utils.helper_functions import get_optimizer, load_data

In [2]:
wandb.login()

wandb: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
wandb: Currently logged in as: bullseye2608 (bullseye2608-indian-institute-of-technology-madras) to https://api.wandb.ai. Use `wandb login --relogin` to force relogin


True

In [3]:
X_train, y_train, X_val, y_val, X_test, y_test = load_data('fashion_mnist')

### Trials

In [4]:
nn = NeuralNetwork(layer_sizes=[784, 128, 128, 128, 10], 
                   activation_functions=['relu', 'relu', 'relu', 'softmax'], 
                   weight_init='xavier',
                   weight_decay=0.0, LOG_EACH=True)

H, A = nn.forward_propagation(X_train)
loss = nn.compute_loss(H[-1], y_train)
print(f'{nn.compute_accuracy(X_val, y_val) :>.6f}')

nn.set_optimizer({'name':'nadam', 'learning_rate':0.001, 'epsilon':1e-7, 'beta1':0.95})

0.125000


In [5]:
num_trial_datapoints = 54000

LOG_EACH = True

nn.train(X_train[:num_trial_datapoints], 
         y_train[:num_trial_datapoints], 
         X_val, y_val, 
         batch_size=64, 
         num_epochs=5, 
         loss_type='cross_entropy', 
         log_every=5000)

LOG_EACH = False

print('--'*20,'DONE','--'*20)
print(nn.compute_accuracy(X_test, y_test))

Running NadamOptimizer self.learning_rate = 0.001 self.beta1 = 0.95 self.beta2 = 0.999 self.epsilon = 1e-07
Epoch 1/5, Iteration   0/844 --> Train Loss: 2.26874, Val Loss: 2.17015
---------------------------------------- DONE ----------------------------------------
0.8787


### WANDB SWEEP

In [6]:
# # Create New sweep
# import yaml

# with open("sweep_config.yaml", "r") as file:
#         sweep_config = yaml.safe_load(file)

# sweep_id = wandb.sweep(sweep_config, 
#                        entity="bullseye2608-indian-institute-of-technology-madras",
#                        project="fashion_mnist_hp_search")

# # Run the sweep
# wandb.agent(sweep_id, wandb_sweep_helper_function, count=60)

In [7]:
# # Continue the sweep

# sweep_id_cont = "bullseye2608-indian-institute-of-technology-madras/fashion_mnist_hp_search/vhbqpquu"
# trainer = WandbTrainer()

# wandb.agent(sweep_id_cont, trainer.train, count=10)

### WANDB RUNS

In [8]:
import numpy as np
from sklearn.metrics import confusion_matrix, accuracy_score
import wandb
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from datetime import datetime

from utils.helper_functions import OptimalConfig

In [9]:


# # Set random seeds for reproducibility
# np.random.seed(42)

# # Fashion MNIST class names
# class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
#                'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']



# # Create and log a confusion matrix visualization
# def plot_confusion_matrix(y_true, y_pred, run_id):
#     cm = confusion_matrix(y_true, y_pred)
#     cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    
#     # Create a DataFrame for better visualization
#     cm_df = pd.DataFrame(cm, index=class_names, columns=class_names)
#     cm_norm_df = pd.DataFrame(cm_normalized, index=class_names, columns=class_names)
    
#     # Create figure with normalized confusion matrix
#     plt.figure(figsize=(10, 8))
#     cmap = sns.diverging_palette(220, 20, as_cmap=True)
#     sns.heatmap(cm_norm_df, annot=True, fmt='.2f', cmap=cmap, 
#                 linewidths=.5, cbar_kws={"shrink": .8})
    
#     plt.title(f'Normalized Confusion Matrix - Run {run_id+1}', fontsize=16)
#     plt.ylabel('True Label', fontsize=12)
#     plt.xlabel('Predicted Label', fontsize=12)
    
#     # Calculate metrics
#     precision = np.diag(cm) / np.sum(cm, axis=0)
#     recall = np.diag(cm) / np.sum(cm, axis=1)
#     f1 = 2 * precision * recall / (precision + recall)
    
#     # Save and return the figure
#     cm_filename = f"confusion_matrix_run_{run_id+1}.png"
#     plt.savefig(cm_filename, dpi=300, bbox_inches='tight')
#     plt.close()
    
#     return cm_filename, cm, precision, recall, f1

# # Run the optimal model configuration multiple times
# def run_multiple_experiments(num_runs=17):
    
#     # Define optimal hyperparameter configuration
#     optimal_config = OptimalConfig()
    
#     # Lists to store results from all runs
#     all_metrics = []
#     all_cms = []
#     all_y_preds = []
#     run_ids = []
    
#     # Create a group ID for all runs
#     group_id = f"optimal-config-{datetime.now().strftime('%Y%m%d-%H%M%S')}"
    
#     # Run the model multiple times
#     for run_id in range(num_runs):
#         # Set a different random seed for each run
#         np.random.seed(42 + run_id)
        
#         # Initialize a new wandb run
#         run = wandb.init(
#             project="fashion_mnist_hp_search",
#             name=f"optimal-run-{run_id+1}",
#             tags=["optimal-config"],
#             group=group_id,
#             config=optimal_config,
#         )
#         wandb.config.update(optimal_config)
        
#         config = optimal_config
        
#         layer_sizes = [784] + [config.hidden_size]*config.hidden_layers + [10]
#         activation_functions = [config.activation]*config.hidden_layers + ['softmax']
        
#         nn = NeuralNetwork(layer_sizes=layer_sizes, 
#                         activation_functions=activation_functions,
#                         weight_init=config.weight_init, 
#                         weight_decay=config.weight_decay)
        
#         wandb_callback = WandbCallback()
        
#         optimizer = get_optimizer(config.optimizer, config.learning_rate)
#         nn.set_optimizer(optimizer)
        
#         nn.train(
#             X_train,
#             y_train,
#             X_val,
#             y_val,
#             batch_size=config.batch_size,
#             num_epochs=config.epochs,
#             loss_type=config.loss,
#             log_every=1000,
#             callback=wandb_callback
#         )
        
#         test_accuracy = nn.compute_accuracy(X_test, y_test)
#         wandb.log({"test_accuracy": test_accuracy})
        
#         # Log the configuration
        
        
#         # Train the model
        
#         # Evaluate the model
#         test_loss, test_acc = model.evaluate(x_test, y_test_cat, verbose=0)
        
#         # Get predictions
#         y_pred_probs = model.predict(x_test)
#         y_pred = np.argmax(y_pred_probs, axis=1)
        
#         # Store predictions
#         all_y_preds.append(y_pred)
        
#         # Create and log confusion matrix
#         cm_file, cm, precision, recall, f1 = plot_confusion_matrix(y_test_labels, y_pred, run_id)
#         all_cms.append(cm)
        
#         # Log confusion matrix image
#         wandb.log({"confusion_matrix": wandb.Image(cm_file)})
        
#         # Log interactive confusion matrix
#         wandb.log({"confusion_matrix_plot": wandb.plot.confusion_matrix(
#             probs=None,
#             y_true=y_test_labels,
#             preds=y_pred,
#             class_names=class_names
#         )})
        
#         # Log metrics
#         run_metrics = {
#             'test_loss': test_loss,
#             'test_accuracy': test_acc,
#             'run_id': run_id + 1
#         }
        
#         # Log per-class metrics
#         for i, class_name in enumerate(class_names):
#             run_metrics[f"precision_{class_name}"] = precision[i]
#             run_metrics[f"recall_{class_name}"] = recall[i]
#             run_metrics[f"f1_{class_name}"] = f1[i]
        
#         # Log all metrics
#         wandb.log(run_metrics)
        
#         # Store metrics for aggregate analysis
#         all_metrics.append(run_metrics)
#         run_ids.append(run.id)
        
#         # Finish the run
#         wandb.finish()
        
#         print(f"Completed run {run_id+1}/{num_runs} with accuracy: {test_acc:.4f}")
    
#     # Create an aggregate analysis run
#     run = wandb.init(
#         project="fashion-mnist-optimal",
#         name=f"aggregate-analysis-{num_runs}-runs",
#         tags=["aggregate", "analysis", group_id],
#         group=group_id
#     )
    
#     # Calculate average confusion matrix
#     avg_cm = np.mean(all_cms, axis=0)
#     std_cm = np.std(all_cms, axis=0)
    
#     # Create and log aggregate confusion matrix
#     plt.figure(figsize=(12, 10))
#     avg_cm_normalized = avg_cm.astype('float') / avg_cm.sum(axis=1)[:, np.newaxis]
#     avg_cm_df = pd.DataFrame(avg_cm_normalized, index=class_names, columns=class_names)
    
#     sns.heatmap(avg_cm_df, annot=True, fmt='.2f', cmap='viridis', 
#                 linewidths=.5, cbar_kws={"shrink": .8})
    
#     # Calculate aggregate metrics
#     accuracies = [m['test_accuracy'] for m in all_metrics]
#     mean_acc = np.mean(accuracies)
#     std_acc = np.std(accuracies)
    
#     plt.title(f'Aggregate Confusion Matrix (17 Runs)\nMean Accuracy: {mean_acc:.4f} ± {std_acc:.4f}', fontsize=16)
#     plt.ylabel('True Label', fontsize=12)
#     plt.xlabel('Predicted Label', fontsize=12)
    
#     # Save and log the aggregate confusion matrix
#     agg_cm_file = "aggregate_confusion_matrix.png"
#     plt.savefig(agg_cm_file, dpi=300, bbox_inches='tight')
#     plt.close()
    
#     wandb.log({"aggregate_confusion_matrix": wandb.Image(agg_cm_file)})
    
#     # Log aggregate metrics
#     agg_metrics = {
#         'mean_accuracy': mean_acc,
#         'std_accuracy': std_acc,
#         'min_accuracy': min(accuracies),
#         'max_accuracy': max(accuracies),
#         'num_runs': num_runs
#     }
    
#     # Create a summary table with links to all runs
#     run_table = wandb.Table(columns=["Run ID", "Accuracy", "Link"])
#     for i, (run_id, metrics) in enumerate(zip(run_ids, all_metrics)):
#         run_link = f"https://wandb.ai/[your-username]/fashion-mnist-optimal/runs/{run_id}"
#         run_table.add_data(i+1, metrics['test_accuracy'], run_link)
    
#     wandb.log({"runs_summary": run_table})
#     wandb.log(agg_metrics)
    
#     # Finish the aggregate run
#     wandb.finish()
    
#     return all_metrics, all_cms, all_y_preds

In [None]:
# Fashion MNIST class names
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
               'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']



# Create and log a confusion matrix visualization
def plot_confusion_matrix(y_true, y_pred, run_id):
    cm = confusion_matrix(y_true, y_pred)
    cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    
    # Create a DataFrame for better visualization
    cm_df = pd.DataFrame(cm, index=class_names, columns=class_names)
    cm_norm_df = pd.DataFrame(cm_normalized, index=class_names, columns=class_names)
    
    # Create figure with normalized confusion matrix
    plt.figure(figsize=(10, 8))
    cmap = sns.diverging_palette(220, 20, as_cmap=True)
    sns.heatmap(cm_norm_df, annot=True, fmt='.2f', cmap=cmap, 
                linewidths=.5, cbar_kws={"shrink": .8})
    
    plt.title(f'Normalized Confusion Matrix - Run {run_id+1}', fontsize=16)
    plt.ylabel('True Label', fontsize=12)
    plt.xlabel('Predicted Label', fontsize=12)
    
    # Calculate metrics
    precision = np.diag(cm) / np.sum(cm, axis=0)
    recall = np.diag(cm) / np.sum(cm, axis=1)
    f1 = 2 * precision * recall / (precision + recall)
    
    # Save and return the figure
    cm_filename = f"./confusion_matrices/confusion_matrix_run_{run_id+1}.png"
    plt.savefig(cm_filename, dpi=300, bbox_inches='tight')
    plt.close()
    
    return cm_filename, cm, precision, recall, f1

# Run the optimal model configuration multiple times
def run_multiple_experiments():
    
    # Define optimal hyperparameter configuration
    
    # Lists to store results from all runs
    all_metrics = []
    all_cms = []
    all_y_preds = []
    run_ids = []
    
    # Create a group ID for all runs
    group_id = f"optimal-config-{datetime.now().strftime('%Y%m%d-%H%M%S')}"
    
    # Set changes in runs
    variation = [
        {},
        {'num_layers': 4},
        {'batch_size': 32, 'num_layers': 4},
        {'epochs':15, 'batch_size': 128, 'num_layers': 5},
    ]
    num_runs = len(variation)
    
    # Run the model multiple times
    for run_id in range(num_runs):
        # Set a different random seed for each run
        np.random.seed(42 + run_id)
        config = OptimalConfig(**variation[run_id])
        
        # Initialize a new wandb run
        run = wandb.init(
            project="confusion_matrix_trials",
            name=f"optimal-run-{run_id+1}",
            tags=["optimal-config"],
            group=group_id,
            config=config,
        )
        wandb.config.update(config)
        config.print_config()
        
        
        layer_sizes = [784] + [config.hidden_size]*config.num_layers + [10]
        activation_functions = [config.activation]*config.num_layers + ['softmax']
        
        nn = NeuralNetwork(layer_sizes=layer_sizes, 
                        activation_functions=activation_functions,
                        weight_init=config.weight_init, 
                        weight_decay=config.weight_decay)
        
        wandb_callback = WandbCallback()
        
        optimizer = get_optimizer(config.optimizer, config.learning_rate)
        nn.set_optimizer(optimizer)
        
        nn.train(
            X_train,
            y_train,
            X_val,
            y_val,
            batch_size=config.batch_size,
            num_epochs=config.epochs,
            loss_type=config.loss,
            log_every=1000,
            callback=wandb_callback
        )
        
        test_accuracy = nn.compute_accuracy(X_val, y_val)
        wandb.log({"test_accuracy": test_accuracy})
        
        # Log the configuration
        
        
        # Train the model
        
        # Evaluate the model
        test_loss = nn.compute_loss(nn.predict(X_val), y_val)
        test_acc = nn.compute_accuracy(X_val, y_val)
        
        # Get predictions
        y_pred_probs = nn.predict(X_val)
        y_pred = np.argmax(y_pred_probs, axis=1)
        
        # Store predictions
        all_y_preds.append(y_pred)
        
        # Create and log confusion matrix
        cm_file, cm, precision, recall, f1 = plot_confusion_matrix(y_val, y_pred, run_id)
        all_cms.append(cm)
        
        # Log confusion matrix image
        wandb.log({"confusion_matrix": wandb.Image(cm_file)})
        
        # Log interactive confusion matrix
        wandb.log({"confusion_matrix_plot": wandb.plot.confusion_matrix(
            probs=y_pred_probs,
            y_true=y_val,
            class_names=class_names
        )})
        
        # Log metrics
        run_metrics = {
            'test_loss': test_loss,
            'test_accuracy': test_acc,
            'run_id': run_id + 1
        }
        
        # Log per-class metrics
        for i, class_name in enumerate(class_names):
            run_metrics[f"precision_{class_name}"] = precision[i]
            run_metrics[f"recall_{class_name}"] = recall[i]
            run_metrics[f"f1_{class_name}"] = f1[i]
        
        # Log all metrics
        wandb.log(run_metrics)
        
        # Store metrics for aggregate analysis
        all_metrics.append(run_metrics)
        run_ids.append(run.id)
        
        # Finish the run
        wandb.finish()
        
        print(f"Completed run {run_id+1}/{num_runs} with accuracy: {test_acc:.4f}")
    
    # Create an aggregate analysis run
    run = wandb.init(
        project="confusion_matrix_trials",
        name=f"aggregate-analysis-{num_runs}-runs",
        tags=["aggregate", "analysis", group_id],
        group=group_id
    )
    
    # Calculate average confusion matrix
    avg_cm = np.mean(all_cms, axis=0)
    std_cm = np.std(all_cms, axis=0)
    
    # Create and log aggregate confusion matrix
    plt.figure(figsize=(12, 10))
    avg_cm_normalized = avg_cm.astype('float') / avg_cm.sum(axis=1)[:, np.newaxis]
    avg_cm_df = pd.DataFrame(avg_cm_normalized, index=class_names, columns=class_names)
    
    sns.heatmap(avg_cm_df, annot=True, fmt='.2f', cmap='viridis', 
                linewidths=.5, cbar_kws={"shrink": .8})
    
    # Calculate aggregate metrics
    accuracies = [m['test_accuracy'] for m in all_metrics]
    mean_acc = np.mean(accuracies)
    std_acc = np.std(accuracies)
    
    plt.title(f'Aggregate Confusion Matrix (17 Runs)\nMean Accuracy: {mean_acc:.4f} ± {std_acc:.4f}', fontsize=16)
    plt.ylabel('True Label', fontsize=12)
    plt.xlabel('Predicted Label', fontsize=12)
    
    # Save and log the aggregate confusion matrix
    agg_cm_file = "./confusion_matrices/"+"aggregate_confusion_matrix.png"
    plt.savefig(agg_cm_file, dpi=300, bbox_inches='tight')
    plt.close()
    
    wandb.log({"aggregate_confusion_matrix": wandb.Image(agg_cm_file)})
    
    # Log aggregate metrics
    agg_metrics = {
        'mean_accuracy': mean_acc,
        'std_accuracy': std_acc,
        'min_accuracy': min(accuracies),
        'max_accuracy': max(accuracies),
        'num_runs': num_runs
    }
    
    # Create a summary table with links to all runs
    run_table = wandb.Table(columns=["Run ID", "Accuracy"])
    for i, (run_id, metrics) in enumerate(zip(run_ids, all_metrics)):
        run_table.add_data(i+1, metrics['test_accuracy'])
    
    wandb.log({"runs_summary": run_table})
    wandb.log(agg_metrics)
    
    # Finish the aggregate run
    wandb.finish()
    
    return all_metrics, all_cms, all_y_preds

In [11]:
metrics, cms, y_preds = run_multiple_experiments()
    
print("All runs completed!")
print(f"Average accuracy: {np.mean([m['test_accuracy'] for m in metrics]):.4f}")

epochs: 10
batch_size: 64
loss: cross_entropy
optimizer: adam
learning_rate: 0.001
momentum: 0.9
beta: 0.9
beta1: 0.9
beta2: 0.999
epsilon: 1e-08
weight_decay: 0
weight_init: xavier
num_layers: 3
hidden_size: 128
activation: relu


0,1
epoch,▁▂▃▃▄▅▆▆▇█
f1_Ankle boot,▁
f1_Bag,▁
f1_Coat,▁
f1_Dress,▁
f1_Pullover,▁
f1_Sandal,▁
f1_Shirt,▁
f1_Sneaker,▁
f1_T-shirt/top,▁

0,1
epoch,9.0
f1_Ankle boot,0.94571
f1_Bag,0.96909
f1_Coat,0.79325
f1_Dress,0.8986
f1_Pullover,0.80586
f1_Sandal,0.96364
f1_Shirt,0.70081
f1_Sneaker,0.92403
f1_T-shirt/top,0.84517


Completed run 1/4 with accuracy: 0.8848


epochs: 10
batch_size: 64
loss: cross_entropy
optimizer: adam
learning_rate: 0.001
momentum: 0.9
beta: 0.9
beta1: 0.9
beta2: 0.999
epsilon: 1e-08
weight_decay: 0
weight_init: xavier
num_layers: 4
hidden_size: 128
activation: relu


0,1
epoch,▁▂▃▃▄▅▆▆▇█
f1_Ankle boot,▁
f1_Bag,▁
f1_Coat,▁
f1_Dress,▁
f1_Pullover,▁
f1_Sandal,▁
f1_Shirt,▁
f1_Sneaker,▁
f1_T-shirt/top,▁

0,1
epoch,9.0
f1_Ankle boot,0.95588
f1_Bag,0.96315
f1_Coat,0.81519
f1_Dress,0.8853
f1_Pullover,0.817
f1_Sandal,0.96265
f1_Shirt,0.71942
f1_Sneaker,0.93663
f1_T-shirt/top,0.81217


Completed run 2/4 with accuracy: 0.8852


epochs: 10
batch_size: 32
loss: cross_entropy
optimizer: adam
learning_rate: 0.001
momentum: 0.9
beta: 0.9
beta1: 0.9
beta2: 0.999
epsilon: 1e-08
weight_decay: 0
weight_init: xavier
num_layers: 4
hidden_size: 128
activation: relu


0,1
epoch,▁▂▃▃▄▅▆▆▇█
f1_Ankle boot,▁
f1_Bag,▁
f1_Coat,▁
f1_Dress,▁
f1_Pullover,▁
f1_Sandal,▁
f1_Shirt,▁
f1_Sneaker,▁
f1_T-shirt/top,▁

0,1
epoch,9.0
f1_Ankle boot,0.95844
f1_Bag,0.96302
f1_Coat,0.81376
f1_Dress,0.8953
f1_Pullover,0.82506
f1_Sandal,0.96919
f1_Shirt,0.71064
f1_Sneaker,0.94835
f1_T-shirt/top,0.84237


Completed run 3/4 with accuracy: 0.8925


epochs: 15
batch_size: 128
loss: cross_entropy
optimizer: adam
learning_rate: 0.001
momentum: 0.9
beta: 0.9
beta1: 0.9
beta2: 0.999
epsilon: 1e-08
weight_decay: 0
weight_init: xavier
num_layers: 5
hidden_size: 128
activation: relu


0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
f1_Ankle boot,▁
f1_Bag,▁
f1_Coat,▁
f1_Dress,▁
f1_Pullover,▁
f1_Sandal,▁
f1_Shirt,▁
f1_Sneaker,▁
f1_T-shirt/top,▁

0,1
epoch,14.0
f1_Ankle boot,0.94702
f1_Bag,0.96259
f1_Coat,0.81182
f1_Dress,0.90213
f1_Pullover,0.82072
f1_Sandal,0.96186
f1_Shirt,0.67055
f1_Sneaker,0.93952
f1_T-shirt/top,0.82931


Completed run 4/4 with accuracy: 0.8850


ValueError: This table expects 3 columns: ['Run ID', 'Accuracy', 'Link'], found 2