In [None]:
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from keras.callbacks import Callback
import pandas as pd
import os
import json
from datetime import datetime
import numpy as np


class ExperimentTracker(Callback):
    """Custom callback to track and save experiment data after each epoch"""
    
    def __init__(self, experiment_no, x_test=None, y_test=None, save_model=True):
        super().__init__()
        self.experiment_no = experiment_no
        self.x_test = x_test
        self.y_test = y_test
        self.save_model = save_model
        
        # Initialize tracking lists
        self.train_data = []
        self.test_data = []
        
        # Create directories if they don't exist
        os.makedirs(f'models/experiment_{experiment_no}', exist_ok=True)
        os.makedirs('experiment_logs', exist_ok=True)
    
    def on_epoch_end(self, epoch, logs=None):
        """Save model and log data after each epoch"""
        logs = logs or {}
        
        # Save model after each epoch
        if self.save_model:
            model_path = f'models/experiment_{self.experiment_no}/model_epoch_{epoch+1}.h5'
            self.model.save(model_path)
            print(f"Model saved: {model_path}")
        
        # Prepare training data for CSV
        train_row = {
            'experiment_no': self.experiment_no,
            'epoch': epoch + 1,
            'train_loss': logs.get('loss', 0),
            'train_accuracy': logs.get('accuracy', 0),
            'val_loss': logs.get('val_loss', 0),
            'val_accuracy': logs.get('val_accuracy', 0),
            'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        }
        self.train_data.append(train_row)
        
        # Evaluate on test set if provided
        if self.x_test is not None and self.y_test is not None:
            test_loss, test_accuracy = self.model.evaluate(self.x_test, self.y_test, verbose=0)
            test_row = {
                'experiment_no': self.experiment_no,
                'epoch': epoch + 1,
                'test_loss': test_loss,
                'test_accuracy': test_accuracy,
                'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
            }
            self.test_data.append(test_row)
        
        # Save/update CSV files after each epoch
        self._save_csv_files()
    
    def _save_csv_files(self):
        """Save training and test data to CSV files"""
        # Save training experiment data
        train_df = pd.DataFrame(self.train_data)
        train_csv_path = f'experiment_logs/train_experiment_{self.experiment_no}.csv'
        train_df.to_csv(train_csv_path, index=False)
        
        # Save test experiment data if available
        if self.test_data:
            test_df = pd.DataFrame(self.test_data)
            test_csv_path = f'experiment_logs/test_experiment_{self.experiment_no}.csv'
            test_df.to_csv(test_csv_path, index=False)
    
    def on_train_end(self, logs=None):
        """Save final experiment summary"""
        summary = {
            'experiment_no': self.experiment_no,
            'total_epochs': len(self.train_data),
            'final_train_loss': self.train_data[-1]['train_loss'],
            'final_train_accuracy': self.train_data[-1]['train_accuracy'],
            'final_val_loss': self.train_data[-1]['val_loss'],
            'final_val_accuracy': self.train_data[-1]['val_accuracy'],
            'best_val_accuracy': max([row['val_accuracy'] for row in self.train_data]),
            'training_completed': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        }
        
        if self.test_data:
            summary['final_test_loss'] = self.test_data[-1]['test_loss']
            summary['final_test_accuracy'] = self.test_data[-1]['test_accuracy']
        
        # Save summary as JSON
        with open(f'experiment_logs/experiment_{self.experiment_no}_summary.json', 'w') as f:
            json.dump(summary, f, indent=2)

# Example usage:
def run_experiment(experiment_no, x_train, y_train, x_val, y_val, x_test=None, y_test=None):
    """Run a complete experiment with tracking"""
    
    # Create your model
    model = Sequential([
        Conv2D(filters=8, kernel_size=(3,3), activation='relu', input_shape=(28,28,1)),
        MaxPooling2D(pool_size=(2,2)),
        Flatten(),
        Dense(128, activation='relu'),
        Dense(64, activation='relu'),
        Dense(32, activation='relu'),
        Dense(16, activation='relu'),
        Dense(10, activation='softmax')
    ])
    
    # Show model summary
    model.summary(show_trainable=True)
    
    # Compile model
    model.compile(
        optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    
    # Create experiment tracker callback
    tracker = ExperimentTracker(
        experiment_no=experiment_no,
        x_test=x_test,
        y_test=y_test,
        save_model=True
    )
    
    # Train model with tracking
    history = model.fit(
        x_train, y_train,
        epochs=10,
        batch_size=32,
        validation_data=(x_val, y_val),
        callbacks=[tracker],
        verbose=1
    )
    
    return model, history

# Example with MNIST data:
if __name__ == "__main__":
    # Load and preprocess data (example with MNIST)
    (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
    
    # Normalize and reshape
    x_train = x_train.astype('float32') / 255.0
    x_test = x_test.astype('float32') / 255.0
    x_train = x_train.reshape(-1, 28, 28, 1)
    x_test = x_test.reshape(-1, 28, 28, 1)
    
    # Create validation split
    split_idx = int(0.8 * len(x_train))
    x_val = x_train[split_idx:]
    y_val = y_train[split_idx:]
    x_train = x_train[:split_idx]
    y_train = y_train[:split_idx]
    
    # Run experiment
    experiment_number = 1
    model, history = run_experiment(
        experiment_no=experiment_number,
        x_train=x_train,
        y_train=y_train,
        x_val=x_val,
        y_val=y_val,
        x_test=x_test,
        y_test=y_test
    )
    
    print(f"\nExperiment {experiment_number} completed!")
    print(f"Files saved:")
    print(f"- Models: models/experiment_{experiment_number}/")
    print(f"- Training log: experiment_logs/train_experiment_{experiment_number}.csv")
    print(f"- Test log: experiment_logs/test_experiment_{experiment_number}.csv")
    print(f"- Summary: experiment_logs/experiment_{experiment_number}_summary.json")

# Additional utility functions:
def load_experiment_data(experiment_no):
    """Load experiment data from CSV files"""
    train_df = pd.read_csv(f'experiment_logs/train_experiment_{experiment_no}.csv')
    
    try:
        test_df = pd.read_csv(f'experiment_logs/test_experiment_{experiment_no}.csv')
        return train_df, test_df
    except FileNotFoundError:
        return train_df, None

def compare_experiments(experiment_numbers):
    """Compare multiple experiments"""
    comparison_data = []
    
    for exp_no in experiment_numbers:
        try:
            with open(f'experiment_logs/experiment_{exp_no}_summary.json', 'r') as f:
                summary = json.load(f)
                comparison_data.append(summary)
        except FileNotFoundError:
            print(f"Summary for experiment {exp_no} not found")
    
    return pd.DataFrame(comparison_data)

# Example of comparing experiments:
# comparison_df = compare_experiments([1, 2, 3])
# print(comparison_df[['experiment_no', 'final_val_accuracy', 'best_val_accuracy']])

2025-09-22 13:31:37.370206: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-09-22 13:31:37.371096: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-09-22 13:31:37.376183: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-09-22 13:31:37.399922: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1758526297.435276   16580 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1758526297.44

Epoch 1/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.8036 - loss: 0.5872



Model saved: models/experiment_1/model_epoch_1.h5
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 8ms/step - accuracy: 0.8037 - loss: 0.5870 - val_accuracy: 0.9682 - val_loss: 0.1128
Epoch 2/10
[1m1497/1500[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 9ms/step - accuracy: 0.9721 - loss: 0.0899



Model saved: models/experiment_1/model_epoch_2.h5
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 11ms/step - accuracy: 0.9721 - loss: 0.0899 - val_accuracy: 0.9741 - val_loss: 0.0875
Epoch 3/10
[1m1495/1500[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 12ms/step - accuracy: 0.9826 - loss: 0.0577



Model saved: models/experiment_1/model_epoch_3.h5
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 13ms/step - accuracy: 0.9826 - loss: 0.0577 - val_accuracy: 0.9800 - val_loss: 0.0685
Epoch 4/10
[1m1495/1500[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 9ms/step - accuracy: 0.9879 - loss: 0.0391



Model saved: models/experiment_1/model_epoch_4.h5
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 11ms/step - accuracy: 0.9879 - loss: 0.0391 - val_accuracy: 0.9800 - val_loss: 0.0722
Epoch 5/10
[1m1498/1500[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 8ms/step - accuracy: 0.9915 - loss: 0.0272



Model saved: models/experiment_1/model_epoch_5.h5
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 10ms/step - accuracy: 0.9915 - loss: 0.0272 - val_accuracy: 0.9828 - val_loss: 0.0636
Epoch 6/10
[1m1497/1500[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 8ms/step - accuracy: 0.9930 - loss: 0.0220



Model saved: models/experiment_1/model_epoch_6.h5
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 10ms/step - accuracy: 0.9930 - loss: 0.0220 - val_accuracy: 0.9831 - val_loss: 0.0625
Epoch 7/10
[1m1494/1500[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 8ms/step - accuracy: 0.9954 - loss: 0.0147



Model saved: models/experiment_1/model_epoch_7.h5
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 10ms/step - accuracy: 0.9954 - loss: 0.0147 - val_accuracy: 0.9803 - val_loss: 0.0763
Epoch 8/10
[1m1498/1500[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 8ms/step - accuracy: 0.9952 - loss: 0.0149



Model saved: models/experiment_1/model_epoch_8.h5
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 9ms/step - accuracy: 0.9952 - loss: 0.0149 - val_accuracy: 0.9842 - val_loss: 0.0685
Epoch 9/10
[1m1496/1500[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 8ms/step - accuracy: 0.9964 - loss: 0.0109



Model saved: models/experiment_1/model_epoch_9.h5
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 9ms/step - accuracy: 0.9964 - loss: 0.0109 - val_accuracy: 0.9807 - val_loss: 0.0836
Epoch 10/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.9970 - loss: 0.0093



Model saved: models/experiment_1/model_epoch_10.h5
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 10ms/step - accuracy: 0.9970 - loss: 0.0093 - val_accuracy: 0.9837 - val_loss: 0.0759

Experiment 1 completed!
Files saved:
- Models: models/experiment_1/
- Training log: experiment_logs/train_experiment_1.csv
- Test log: experiment_logs/test_experiment_1.csv
- Summary: experiment_logs/experiment_1_summary.json
