<a href="https://colab.research.google.com/github/CogNetSys/MetaStrata/blob/main/autoGrokML_01.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%%capture cap_cell1
!pip uninstall -y torch torchvision torchaudio
!pip cache purge
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121

In [None]:
%%capture cap_cell2
import os
import time
import logging
import json
import datetime
import numpy as np
import matplotlib.pyplot as plt
from collections import deque

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

print("🔧 Logging is set up!") # Keep the logging setup

from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.base import BaseEstimator
from skopt import BayesSearchCV
from skopt.space import Real, Integer
from tqdm import tqdm

import pandas as pd  # For CSV handling
import copy

logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
print("🔧 Logging is set up!")

In [None]:
%%capture cap_cell3
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"🚀 Using device: {device}")

# Choose dataset (must match exactly)
dataset_name = "PIMA"  # Change this value as needed

enable_multimodal = False  # (Currently, only single-domain tabular datasets are supported)

if dataset_name == "Iris":
    from sklearn import datasets
    iris = datasets.load_iris()
    X = iris.data
    y = iris.target
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    np.random.seed(42)
    extra_features = np.random.normal(0, 1, size=(X.shape[0], 3))
    X = np.concatenate([X, extra_features], axis=1)
    print("✅ Using Iris dataset with simulated extra features.")
elif dataset_name == "Wine":
    from sklearn import datasets
    wine = datasets.load_wine()
    X = wine.data
    y = wine.target
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    np.random.seed(42)
    extra_features = np.random.normal(0, 1, size=(X.shape[0], 2))
    X = np.concatenate([X, extra_features], axis=1)
    print("✅ Using Wine dataset with simulated extra features.")
elif dataset_name == "Titanic":
    try:
        import seaborn as sns
        df = sns.load_dataset("titanic").dropna()
        X = df[['pclass', 'age', 'sibsp', 'parch', 'fare']].values
        y = df["survived"].values
        scaler = StandardScaler()
        X = scaler.fit_transform(X)
        np.random.seed(42)
        extra_features = np.random.normal(0, 1, size=(X.shape[0], 2))
        X = np.concatenate([X, extra_features], axis=1)
        print("✅ Using Titanic dataset with simulated extra features.")
    except Exception as e:
        print("⚠️ Could not load Titanic dataset; falling back to Iris.")
        dataset_name = "Iris"
elif dataset_name == "Digits":
    from sklearn import datasets
    digits = datasets.load_digits()
    X = digits.data
    y = digits.target
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    np.random.seed(42)
    extra_features = np.random.normal(0, 1, size=(X.shape[0], 2))
    X = np.concatenate([X, extra_features], axis=1)
    print("✅ Using Digits dataset with simulated extra features.")
elif dataset_name == "PIMA":
    url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
    column_names = ["Pregnancies", "Glucose", "BloodPressure", "SkinThickness", "Insulin", "BMI", "DiabetesPedigreeFunction", "Age", "Outcome"]
    df = pd.read_csv(url, header=None, names=column_names)
    X = df.drop("Outcome", axis=1).values
    y = df["Outcome"].values
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    np.random.seed(42)
    extra_features = np.random.normal(0, 1, size=(X.shape[0], 2))
    X = np.concatenate([X, extra_features], axis=1)
    print("✅ Using PIMA Diabetes dataset with simulated extra features.")
elif dataset_name == "Heart":
    url = "https://raw.githubusercontent.com/selva86/datasets/master/heart.csv"
    try:
        df = pd.read_csv(url)
        df["AHD"] = df["AHD"].apply(lambda x: 0 if x=="No" else 1)
        feature_columns = ["Age", "Sex", "ChestPain", "RestBP", "Chol", "Fbs", "RestECG", "MaxHR", "ExAng", "Oldpeak", "Slope", "Ca", "Thal"]
        numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
        feature_columns = [col for col in feature_columns if col in numeric_cols]
        X = df[feature_columns].values
        y = df["AHD"].values
        scaler = StandardScaler()
        X = scaler.fit_transform(X)
        np.random.seed(42)
        extra_features = np.random.normal(0, 1, size=(X.shape[0], 1))
        X = np.concatenate([X, extra_features], axis=1)
        print("✅ Using Heart Disease dataset with simulated extra features.")
    except Exception as e:
        print("⚠️ Could not load Heart dataset; falling back to Titanic.")
        dataset_name = "Titanic"
        import seaborn as sns
        df = sns.load_dataset("titanic").dropna()
        X = df[['pclass', 'age', 'sibsp', 'parch', 'fare']].values
        y = df["survived"].values
        scaler = StandardScaler()
        X = scaler.fit_transform(X)
        np.random.seed(42)
        extra_features = np.random.normal(0, 1, size=(X.shape[0], 2))
        X = np.concatenate([X, extra_features], axis=1)
else:
    raise ValueError(f"Dataset '{dataset_name}' not recognized. Please choose from Iris, Wine, Titanic, Digits, PIMA, or Heart.")

X = np.array(X)
y = np.array(y)
X_tensor = torch.tensor(X, dtype=torch.float32)
y_tensor = torch.tensor(y, dtype=torch.long)
X_train, X_val, y_train, y_val = train_test_split(X_tensor, y_tensor, test_size=0.2, random_state=42, stratify=y_tensor)
X_train = X_train.to(device)
X_val = X_val.to(device)
y_train = y_train.to(device)
y_val = y_val.to(device)

input_size = X_train.shape[1]
print(f"📊 Dataset: {dataset_name} | Training: {X_train.shape}, Validation: {X_val.shape}")
print(f"🔢 Unique classes: {torch.unique(y_tensor)}")

In [None]:
%%capture cap_cell4
def mixup(x, y, alpha=0.2):
    lam = np.random.beta(alpha, alpha)
    index = torch.randperm(x.size(0)).to(x.device)
    mixed_x = lam * x + (1 - lam) * x[index, :]
    num_classes = int(torch.max(y)) + 1
    y_onehot = torch.nn.functional.one_hot(y, num_classes=num_classes).float()
    mixed_y = lam * y_onehot + (1 - lam) * y_onehot[index, :]
    return mixed_x, mixed_y

def monte_carlo_dropout(model, x, epoch, num_samples=10):
    model.train()  # Enable dropout during inference
    preds = []
    for _ in range(num_samples):
        outputs = model(x)  # For deep models, no epoch input is required
        preds.append(torch.softmax(outputs, dim=1).unsqueeze(0))
    preds = torch.cat(preds, dim=0)
    mean_preds = preds.mean(dim=0)
    variance = preds.var(dim=0).mean().item()
    return mean_preds, variance

def monte_carlo_dropout_per_sample(model, x, epoch, num_samples=10):
    model.train()
    preds = []
    for _ in range(num_samples):
        outputs = model(x)
        preds.append(torch.softmax(outputs, dim=1).unsqueeze(0))
    preds = torch.cat(preds, dim=0)
    sample_variances = preds.var(dim=0).mean(dim=1)
    return sample_variances

In [None]:
%%capture cap_cell5
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

# Deep model candidate: a simple MLP defined using PyTorch.
class MLPModel(nn.Module):
    def __init__(self, input_size, hidden_size=64, num_layers=2, dropout=0.3, num_classes=2):
        super(MLPModel, self).__init__()
        layers = []
        in_features = input_size
        for i in range(num_layers):
            layers.append(nn.Linear(in_features, hidden_size))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout))
            in_features = hidden_size
        layers.append(nn.Linear(in_features, num_classes))
        self.network = nn.Sequential(*layers)
    def forward(self, x):
        return self.network(x)

# Global SimpleMLPWrapper for Bayesian optimization and candidate evaluation.
class SimpleMLPWrapper(BaseEstimator):
    def __init__(self, input_size, hidden_size=64, num_layers=2, dropout=0.3, num_classes=2, epochs=20):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.dropout = dropout
        self.num_classes = num_classes
        self.epochs = epochs
        self.model = MLPModel(input_size, hidden_size, num_layers, dropout, num_classes).to(device) # Move MLPModel to device in constructor
    def fit(self, X, y, sample_weight=None):
        self.model.train()
        optimizer = optim.Adam(self.model.parameters(), lr=0.01)
        criterion = nn.CrossEntropyLoss()
        X_tensor = torch.tensor(X, dtype=torch.float32).to(device)
        y_tensor = torch.tensor(y, dtype=torch.long).to(device)
        for epoch in range(self.epochs):
            optimizer.zero_grad()
            outputs = self.model(X_tensor)
            loss = criterion(outputs, y_tensor)
            loss.backward()
            optimizer.step()
        return self
    def predict(self, X):
        self.model.eval()
        X_tensor = torch.tensor(X, dtype=torch.float32).to(device)
        with torch.no_grad():
            outputs = self.model(X_tensor)
        return np.argmax(outputs.cpu().numpy(), axis=1)
    def predict_proba(self, X):
        self.model.eval()
        X_tensor = torch.tensor(X, dtype=torch.float32).to(device)
        with torch.no_grad():
            outputs = self.model(X_tensor)
            proba = torch.softmax(outputs, dim=1).cpu().numpy()
        return proba


In [None]:
%%capture cap_cell6
from sklearn.model_selection import cross_val_score
from skopt import BayesSearchCV
from skopt.space import Integer, Real

def select_model(X, y, input_size):
    print("🔎 Evaluating candidate models for dataset:", dataset_name)
    candidates = {}
    candidate_scores = {}  # To store scores for later use

    # Candidate 1: Logistic Regression
    lr = LogisticRegression(max_iter=400)
    candidates["LogReg"] = lr
    score_lr = cross_val_score(lr, X, y, cv=3, scoring='accuracy').mean()
    candidate_scores["LogReg"] = score_lr
    print(f"📈 Candidate LogReg CV Accuracy: {score_lr*100:.2f}%")

    # Candidate 2: Random Forest
    rf = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=42)
    candidates["RF"] = rf
    score_rf = cross_val_score(rf, X, y, cv=3, scoring='accuracy').mean()
    candidate_scores["RF"] = score_rf
    print(f"📈 Candidate RF CV Accuracy: {score_rf*100:.2f}%")

    # Candidate 3: Deep Model (MLP) with Bayesian Optimization
    print("🔎 Tuning MLP with BayesSearchCV...")
    mlp_wrapper = SimpleMLPWrapper(input_size, num_classes=int(np.max(y)) + 1, epochs=20)  # Instantiate base MLP wrapper with current input_size
    search_space_mlp = {
        'hidden_size': Integer(32, 128),  # Reduced search space
        'num_layers': Integer(1, 3),       # Reduced search space
        'dropout': Real(0.1, 0.5, prior='uniform')  # Reduced search space
    }
    bayes_search_mlp = BayesSearchCV(
        mlp_wrapper,
        search_space_mlp,
        n_iter=3,  # Reduced iterations for faster turnaround
        cv=2,      # Reduced CV folds for speed
        scoring='accuracy',
        random_state=42,
        n_jobs=-1  # Use all available CPUs
    )
    bayes_search_mlp.fit(X, y)
    best_mlp_params = bayes_search_mlp.best_params_
    print(f"🏆 Best MLP params found by BayesSearchCV: {best_mlp_params}")

    # Re-create MLP with best parameters
    best_mlp = SimpleMLPWrapper(input_size, num_classes=int(np.max(y)) + 1, epochs=20,
                                hidden_size=best_mlp_params['hidden_size'],
                                num_layers=best_mlp_params['num_layers'],
                                dropout=best_mlp_params['dropout'])
    from sklearn.model_selection import train_test_split  # Import locally if needed.
    X_train_temp, X_test_temp, y_train_temp, y_test_temp = train_test_split(X, y, test_size=0.3, random_state=42)
    best_mlp.fit(X_train_temp, y_train_temp)
    score_mlp = (best_mlp.predict(X_test_temp) == y_test_temp).mean()  # Evaluate on test split.
    candidate_scores["MLP"] = score_mlp
    print(f"📈 Candidate MLP CV Accuracy: {score_mlp*100:.2f}%")

    # Force selection of MLP for RL testing
    best_model_name = "MLP"  # Force select MLP
    selected_model = best_mlp  # Use the optimized MLP model
    print("⚠️⚠️⚠️ FORCING MODEL SELECTION: OVERRIDING TO ALWAYS SELECT MLP for RL Testing! ⚠️⚠️⚠️")
    print(f"✅ Selected Model: {best_model_name} with {candidate_scores[best_model_name]*100:.2f}% accuracy (OVERRIDDEN for RL Testing)")
    is_deep_model = True  # MLP is our deep model
    print(f"DEBUG: Cell 6 - Best Model Name: {best_model_name}, Is Deep Model: {is_deep_model}")
    return best_model_name, selected_model, candidate_scores, is_deep_model, best_mlp_params


In [None]:
%%capture cap_cell7
# Cell 7: Reinforcement Learning Agent for Learning Rate Tuning (Q-Learning)
import numpy as np

class LearningRateAgent:
    def __init__(self, action_space, learning_rate=0.1, discount_factor=0.9, exploration_rate=0.1):
        self.action_space = action_space  # Discrete action space: ["increase", "decrease", "maintain"]
        self.lr = learning_rate          # Learning rate for Q-Learning update
        self.gamma = discount_factor     # Discount factor for Q-Learning
        self.epsilon = exploration_rate   # Exploration rate for epsilon-greedy policy
        self.q_table = {}                # Q-Table to store Q-values: Q(state, action)

    def get_state_key(self, state):
        # Convert state (which might be a dictionary or tuple) into a hashable key for Q-Table
        return tuple(state.values()) if isinstance(state, dict) else tuple(state)

    def choose_action(self, state):
        state_key = self.get_state_key(state)
        if np.random.uniform(0, 1) < self.epsilon: # Exploration
            action = np.random.choice(self.action_space) # Explore random action
        else: # Exploitation
            if state_key in self.q_table:
                q_values = self.q_table[state_key]
                best_action_index = np.argmax(q_values)
                action = self.action_space[best_action_index] # Exploit best action from Q-Table
            else: # If state is new, explore randomly
                action = np.random.choice(self.action_space) # Explore random action
        return action

    def update_q_table(self, state, action, reward, next_state):
        state_key = self.get_state_key(state)
        next_state_key = self.get_state_key(next_state)

        if state_key not in self.q_table:
            self.q_table[state_key] = np.zeros(len(self.action_space)) # Initialize Q-values for new state

        if next_state_key not in self.q_table:
            self.q_table[next_state_key] = np.zeros(len(self.action_space)) # Initialize Q-values for new next_state

        current_q_value = self.q_table[state_key][self.action_space.index(action)]
        max_next_q_value = np.max(self.q_table[next_state_key]) # Get max Q-value for next state

        # Q-Learning update rule
        new_q_value = current_q_value + self.lr * (reward + self.gamma * max_next_q_value - current_q_value)
        self.q_table[state_key][self.action_space.index(action)] = new_q_value

In [None]:
%%capture cap_compute_dynamic_weights
# Define compute_dynamic_weights function
def compute_dynamic_weights(model, X_val, y_val, criterion, device):
    """
    Compute a dynamic weight for the accuracy reward based on the gradient norm
    computed on the validation set. The idea is that if the average gradient norm is small,
    the model is near convergence and we want to boost the reward (by using an inverse relationship).

    Parameters:
        model (torch.nn.Module): The PyTorch model.
        X_val (torch.Tensor): Validation input data.
        y_val (torch.Tensor): Validation labels.
        criterion: Loss function (e.g., nn.CrossEntropyLoss()).
        device (torch.device): Device on which to run computations.

    Returns:
        float: A dynamic weight scalar.
    """
    # Ensure the model is in training mode to compute gradients
    model.train()
    # Make sure the validation data is on the correct device
    X_val = X_val.to(device)
    y_val = y_val.to(device)

    # Forward pass and loss calculation on validation data
    outputs = model(X_val)
    loss = criterion(outputs, y_val)

    # Zero out existing gradients and perform backpropagation to compute gradients
    model.zero_grad()
    loss.backward()

    # Compute the total gradient norm over all parameters
    total_norm = 0.0
    for p in model.parameters():
        if p.grad is not None:
            total_norm += p.grad.data.norm(2).item() ** 2
    total_norm = total_norm ** 0.5

    # Compute the dynamic weight: use an inverse relationship so that a smaller gradient norm results in a larger weight.
    dynamic_weight = 1.0 / (total_norm + 1e-6)
    # Optionally, scale the weight (here, multiplied by 100)
    return dynamic_weight * 100.0

# Reminder: Ensure that your device (device), the mixup function (if used), and the LearningRateAgent class
# (from Cell 7) are defined in prior cells.
print("✅ compute_dynamic_weights function defined.")


In [None]:
%%capture cap_cell8
# Cell 8: Iterative Fine-Tuning (Grokking) - Robust Accuracy Priority & Direct Print Debugging
# (RL TUNING FOR DEEP MODELS + SKLEARN) - VERSION 11: REFINED REWARD FUNCTION V6
# - DYNAMIC GRADIENT WEIGHTING & ADAPTIVE NORMALIZATION - UNBOUNDLOCALERROR FIXED (DEFINITELY, REALLY FINAL FIX!)
from sklearn.metrics import log_loss, accuracy_score, roc_auc_score, f1_score
from collections import deque
import torch.optim as optim  # Import optimizer
import numpy as np  # For normalization

# Import the LearningRateAgent class (ensure Cell 7 is run before Cell 8)
from __main__ import LearningRateAgent  # Assuming LearningRateAgent is defined in Cell 7

# --- Helper functions for adaptive normalization ---
def adaptive_normalize(value, moving_avg, epsilon=1e-9):
    """Normalize the reward component by its moving average magnitude."""
    return value / (moving_avg + epsilon)

def update_moving_average(history, new_value, window=10):
    """Update the moving average (absolute values) over a fixed window."""
    history.append(np.abs(new_value))
    if len(history) > window:
        history.pop(0)
    return np.mean(history)
# --- End helper functions ---

# Define grok_deep_model_v11 function
def grok_deep_model_v11(model_wrapper, X_train, y_train, X_val, y_val, min_iterations=10, max_iterations=20, tolerance=0.001):
    print("🔄 STARTING GROKKING ITERATIONS - RL LEARNING RATE TUNING ENABLED FOR DEEP MODELS (MLP) - VERSION 11: REFINED REWARD V6 - DYNAMIC GRADIENT WEIGHTING & ADAPTIVE NORMALIZATION")

    model = model_wrapper.model.to(device)  # Access the PyTorch model from the wrapper and move to device
    optimizer = optim.Adam(model.parameters(), lr=0.01)  # Initialize optimizer
    criterion = nn.CrossEntropyLoss()  # Loss criterion

    best_model_composite = None
    best_composite_metric = float('inf')
    best_metrics_composite = {}

    best_accuracy_model = None
    best_accuracy_val_acc = 0.0
    best_metrics_accuracy = {}

    # RL Agent Initialization
    action_space = ["increase", "decrease", "maintain"]
    rl_agent = LearningRateAgent(action_space=action_space, exploration_rate=0.3)
    current_learning_rate = 0.01
    print(f"   Initial Learning Rate (RL Agent Initialized - GROK_DEEP_V11): {current_learning_rate:.4f}")

    prev_val_acc = 0.0
    prev_metric = None
    val_acc_history = deque(maxlen=10)
    prev_val_acc_moving_avg = 0.0
    val_gap_history = deque(maxlen=10)
    prev_gap = 0.0
    ma_history_acc_reward = deque(maxlen=10)
    ma_history_gap_penalty = deque(maxlen=10)
    ma_history_gap_reduction_reward = deque(maxlen=10)

    X_val_cpu = X_val.cpu()  # Move validation data to CPU for efficiency
    y_val_cpu = y_val.cpu()  # Move validation labels to CPU

    for it in range(max_iterations):
        model.train()  # Set to training mode

        # Mixup Data Augmentation (optional)
        use_mixup = False  # Set to True if you want to enable mixup
        if use_mixup:
            mixed_X_train, mixed_y_train = mixup(X_train, y_train)
        else:
            mixed_X_train, mixed_y_train = X_train, y_train

        # Forward pass and loss calculation
        optimizer.zero_grad()
        outputs = model(mixed_X_train)
        loss = criterion(outputs, y_train) if not use_mixup else criterion(outputs, mixed_y_train)
        loss.backward()
        optimizer.step()

        model.eval()
        with torch.no_grad():
            train_outputs = model(X_train)
            val_outputs = model(X_val)
            train_preds_proba = torch.softmax(train_outputs, dim=1).cpu().numpy()
            val_preds_proba = torch.softmax(val_outputs, dim=1).cpu().numpy()
            train_loss = log_loss(y_train.cpu().numpy(), train_preds_proba)
            val_loss = log_loss(y_val_cpu.numpy(), val_preds_proba)
            train_preds = np.argmax(train_preds_proba, axis=1)
            val_preds = np.argmax(val_preds_proba, axis=1)
            train_acc = accuracy_score(y_train.cpu().numpy(), train_preds)
            val_acc = accuracy_score(y_val_cpu.numpy(), val_preds)
            gap = val_loss - train_loss
            composite_metric = val_loss + abs(gap)

        val_acc_history.append(val_acc)
        val_acc_moving_avg = np.mean(val_acc_history)
        # Update val_gap_history before computing moving average gap
        val_gap_history.append(gap)
        val_gap_moving_avg = np.mean(val_gap_history)

        try:
            auc = roc_auc_score(y_val_cpu.numpy(), val_preds_proba[:, 1])
        except:
            auc = np.nan
        f1 = f1_score(y_val_cpu.numpy(), val_preds, average='weighted')

        # Define the RL state
        state = {
            'val_acc': val_acc,
            'val_loss': val_loss,
            'gap': gap,
            'train_acc': train_acc,
            'train_loss': train_loss,
            'learning_rate': current_learning_rate,
            'val_acc_change': val_acc - prev_val_acc,
            'val_acc_moving_avg': val_acc_moving_avg,
            'val_gap_moving_avg': val_gap_moving_avg,
            'iteration': it + 1
        }

        # RL Agent selects an action
        action = rl_agent.choose_action(state)
        print(f"   RL Agent Action (GROK_DEEP_V11): {action}")

        # Apply action: adjust learning rate accordingly
        if action == "increase":
            current_learning_rate *= 1.1
        elif action == "decrease":
            current_learning_rate *= 0.9
        for param_group in optimizer.param_groups:
            param_group['lr'] = current_learning_rate
        print(f"   Learning Rate Adjusted (GROK_DEEP_V11): {current_learning_rate:.4f}")

        update_composite = False
        update_accuracy = False
        reason = ""

        if val_acc > best_accuracy_val_acc and gap < 0.01:
            update_accuracy = True
            reason = "🏆 accuracy & small gap"
            print(f"DEBUG: Iteration {it+1} - ACCURACY UPDATE TRIGGERED - Val Acc: {val_acc*100:.2f}%, Gap: {gap:.4f} (GROK_DEEP_V11)")
        elif composite_metric < best_composite_metric:
            update_composite = True
            reason = "composite metric"
            print(f"DEBUG: Iteration {it+1} - COMPOSITE UPDATE TRIGGERED - Composite: {composite_metric:.4f} (GROK_DEEP_V11)")

        status_emoji = "😊" if update_accuracy or update_composite else "Processing..."
        selection_reason_log = f"({reason})" if update_accuracy or update_composite else "()"
        print(f"Iteration {it+1}: Val Acc={val_acc*100:.2f}%, Gap={gap:.4f}, Composite={composite_metric:.4f} {status_emoji} {selection_reason_log} (GROK_DEEP_V11)")

        if update_accuracy:
            best_accuracy_val_acc = val_acc
            best_metrics_accuracy = {"Val Acc": val_acc, "AUC": auc, "F1": f1, "Composite": composite_metric, "Iteration": it+1, "Gap": gap, "Learning Rate": current_learning_rate}
            best_accuracy_model = copy.deepcopy(model_wrapper)
            print(f"🎯 NEW BEST ACCURACY MODEL: Iteration {it+1} - {reason.upper()} - Val Acc: {val_acc*100:.2f}%, Gap: {gap:.4f}, Composite: {composite_metric:.4f}, LR: {current_learning_rate:.4f} (GROK_DEEP_V11)")
            print(f"   Best Accuracy Model (Iteration {it+1} - GROK_DEEP_V11): {best_accuracy_model}")

        if update_composite:
            best_composite_metric = composite_metric
            best_metrics_composite = {"Val Acc": val_acc, "AUC": auc, "F1": f1, "Composite": composite_metric, "Iteration": it+1, "Gap": gap, "Learning Rate": current_learning_rate}
            best_model_composite = copy.deepcopy(model_wrapper)
            print(f"👉 New best composite model at iteration {it+1} - {reason} - Val Acc {val_acc*100:.2f}%, Gap {gap:.4f}, Composite {composite_metric:.4f}, LR: {current_learning_rate:.4f}! (GROK_DEEP_V11)")
            print(f"   Best Composite Model (Iteration {it+1} - GROK_DEEP_V11): {best_model_composite}")

        accuracy_threshold = 0.70
        reward_accuracy_sustained = (val_acc_moving_avg - prev_val_acc_moving_avg) * 100 if (val_acc_moving_avg > accuracy_threshold and (val_acc_moving_avg - prev_val_acc_moving_avg) > 0) else 0
        penalty_gap = -val_gap_moving_avg * 10
        penalty_gap = max(-0.05, penalty_gap)
        gap_reduction_reward = max(0, (prev_gap - gap) * 50)

        normalized_reward_accuracy_sustained = adaptive_normalize(reward_accuracy_sustained, np.mean(list(ma_history_acc_reward)))
        normalized_penalty_gap = adaptive_normalize(penalty_gap, np.mean(list(ma_history_gap_penalty)))
        normalized_gap_reduction_reward = adaptive_normalize(gap_reduction_reward, np.mean(list(ma_history_gap_reduction_reward)))

        # NEW - DYNAMIC WEIGHTING FOR ACCURACY REWARD (Dynamic gradient-based weighting)
        dynamic_weight_accuracy = compute_dynamic_weights(model, X_val, y_val, criterion, device) / 100.0
        reward = (normalized_reward_accuracy_sustained * dynamic_weight_accuracy) + normalized_penalty_gap + normalized_gap_reduction_reward
        print(f"   RL Reward (V11 - Refined V8): Sustained Accuracy={reward_accuracy_sustained:.2f} (Normalized={normalized_reward_accuracy_sustained:.4f}, Weight={dynamic_weight_accuracy:.4f}), Gap Penalty={penalty_gap:.2f} (Normalized={normalized_penalty_gap:.4f}), Gap Reduction Reward={gap_reduction_reward:.2f} (Normalized={normalized_gap_reduction_reward:.4f}), Total Reward={reward:.4f} (GROK_DEEP_V11)")

        ma_history_acc_reward.append(normalized_reward_accuracy_sustained)
        ma_history_gap_penalty.append(normalized_penalty_gap)
        ma_history_gap_reduction_reward.append(normalized_gap_reduction_reward)

        next_state = {
            'val_acc': val_acc,
            'val_loss': val_loss,
            'gap': gap,
            'train_acc': train_acc,
            'train_loss': train_loss,
            'learning_rate': current_learning_rate,
            'val_acc_change': val_acc - prev_val_acc,
            'val_acc_moving_avg': val_acc_moving_avg,
            'val_gap_moving_avg': val_gap_moving_avg,
            'iteration': it + 2
        }

        rl_agent.update_q_table(state, action, reward, next_state)

        prev_val_acc = val_acc
        prev_val_acc_moving_avg = val_acc_moving_avg
        prev_gap = gap
        prev_metric = composite_metric

        if it + 1 >= min_iterations and best_composite_metric != float('inf'):
            if it + 1 > min_iterations and ((prev_metric - composite_metric) if prev_metric is not None else 0) < tolerance:
                print(f"🛑 Early stopping at iteration {it+1} (no significant composite metric improvement). (GROK_DEEP_V11)")
                break
        prev_metric = composite_metric

    print(f"Best Accuracy Model before return (GROK_DEEP_V11): {best_accuracy_model}")
    print(f"Best Composite Model before return (GROK_DEEP_V11): {best_model_composite}")
    print(f"Final Q-Table after grokking iterations (GROK_DEEP_V11):\n{rl_agent.q_table}")
    print("DEBUG: Returning best_accuracy_model with type:", type(best_accuracy_model))

    # Safeguard: if no best_accuracy_model was selected, default to the original model_wrapper
    if best_accuracy_model is None:
        print("⚠️ No best_accuracy_model was selected during training; defaulting to original model_wrapper.")
        best_accuracy_model = model_wrapper

    return best_accuracy_model, best_metrics_accuracy

# Import grok_deep_model_v11 function AFTER it's defined
from __main__ import grok_deep_model_v11

def grok_non_deep_model(model, X_train, y_train, X_val, y_val, min_iterations=10, max_iterations=20, tolerance=0.001):
    print("👍 Using the selected non-deep model as is (no grokking for this model type - GROK_NON_DEEP).")
    final_model = selected_model
    final_metrics = best_metrics_accuracy
    return final_model, final_metrics

if selected_model_name in ["LogReg", "RF"]:
    final_model, final_metrics = grok_non_deep_model(selected_model, X_train_np, y_train_np,
                                                      X_val.cpu().numpy(), y_val.cpu().numpy(),
                                                      min_iterations=10, max_iterations=20, tolerance=0.001)
elif selected_model_name == "MLP":
    print("--- RL TUNING EXPERIMENT: VERSION 11 (REFINED STATE & REFINED REWARD V6) ---")
    final_model, final_metrics = grok_deep_model_v11(selected_model, X_train, y_train, X_val, y_val,
                                                     min_iterations=10, max_iterations=20, tolerance=0.001)
else:
    print("👍 Using the selected model as is (no grokking for this model type).")
    final_model = selected_model


In [None]:
%%capture cap_cell9
from skopt import BayesSearchCV
from sklearn.model_selection import cross_val_score

if selected_model_name == "MLP":
    print("🔎 Tuning hyperparameters for the deep model (MLP) via Bayesian Optimization on dataset:", dataset_name)
    search_space = {
        'hidden_size': Integer(32, 200),
        'num_layers': Integer(1, 4),
        'dropout': Real(0.1, 0.7, prior='uniform')
    }
    opt = BayesSearchCV(SimpleMLPWrapper(input_size, num_classes=int(np.max(y_train_np)) + 1),
                        search_space, n_iter=5, cv=3, scoring='accuracy', random_state=42)
    opt.fit(X_train_np, y_train_np)
    best_params = opt.best_params_
    print(f"🏆 Best Hyperparameters for MLP: {best_params}")
    tuned_model = SimpleMLPWrapper(input_size,
                                   hidden_size=best_params['hidden_size'],
                                   num_layers=best_params['num_layers'],
                                   dropout=best_params['dropout'],
                                   num_classes=int(np.max(y_train_np)) + 1,
                                   epochs=50)
    tuned_model.fit(X_train_np, y_train_np)
    tuned_score = np.mean(cross_val_score(tuned_model, X_train_np, y_train_np, cv=3, scoring='accuracy'))
    candidate_mlp_score = candidate_scores["MLP"]
    print(f"📊 Candidate MLP Score: {candidate_mlp_score*100:.2f}% vs Tuned MLP Score: {tuned_score*100:.2f}%")
    if tuned_score < candidate_mlp_score:
        print("⚖️ Tuned model performs worse than candidate. Rolling back to candidate deep model.")
        final_model = selected_model
    else:
        final_model = tuned_model
else:
    print("👍 Using the selected non-deep model (with grokking refinements) as is.")
    final_model = selected_model

In [None]:
%%capture cap_cell10
# Cell 9: Final Evaluation & Dynamic Strategy Switching - DEBUGGING VERSION
from sklearn.metrics import accuracy_score, log_loss

print("🔎 Evaluating final AutoGrokML model on dataset:", dataset_name)
X_val_np = X_val.cpu().numpy()
y_val_np = y_val.cpu().numpy()

print("DEBUG: Model being used for final evaluation (final_model):", final_model) # <---- DEBUG PRINT 1: Log final_model object

# ADDED: Re-initialize and set parameters for non-deep models (like Logistic Regression)
if isinstance(final_model, LogisticRegression) and 'model_params' in final_metrics: # Check for model type and if params are available
    print("🛠️ Re-initializing and applying learned parameters to final Logistic Regression model...")
    final_model_reinitialized = LogisticRegression(max_iter=400) # Create a *new* LogisticRegression instance
    final_model_reinitialized.coef_ = final_metrics['model_params']['coef_'] # Set learned coefficients
    final_model_reinitialized.intercept_ = final_metrics['model_params']['intercept_'] # Set learned intercept
    final_model = final_model_reinitialized # *Replace* final_model with the re-initialized and parameterized model
    print("   ✅ Successfully applied learned parameters to final_model.") # Confirmation message
else:
    print("   ⚠️ Parameter application not implemented or not applicable for model type.") # Message if no parameter application


if selected_model_name == "MLP":
    final_preds = final_model.predict(X_val_np)
    final_preds_proba = final_model.predict_proba(X_val_np)
else:
    final_preds = final_model.predict(X_val_np)
    final_preds_proba = final_model.predict_proba(X_val_np)

print("DEBUG: First 5 predictions of final_model:", final_preds[:5]) # <---- DEBUG PRINT 2: Log first few predictions
print("DEBUG: Shape of final_preds:", final_preds.shape) # <---- DEBUG PRINT 3: Log shape of predictions

final_acc = accuracy_score(y_val_np, final_preds)
final_logloss = log_loss(y_val_np, final_preds_proba)
X_train_np = X_train.cpu().numpy()
y_train_np = y_train.cpu().numpy()
train_preds_proba = final_model.predict_proba(X_train_np)
train_loss = log_loss(y_train_np, train_preds_proba)
gap = final_logloss - train_loss

print(f"🤖 Final AutoGrokML Model Accuracy: {final_acc*100:.2f}%")
print(f"📉 Final AutoGrokML Model Log Loss: {final_logloss:.4f}")
print(f"⚖️ Generalization Gap (Val Loss - Train Loss): {gap:.4f}")

print("🔎 Training baseline model (Logistic Regression) for dataset:", dataset_name)
baseline_model = LogisticRegression(max_iter=400)
baseline_model.fit(X_train_np, y_train_np)
baseline_preds = baseline_model.predict(X_val_np)
baseline_acc = accuracy_score(y_val_np, baseline_preds)
baseline_probs = baseline_model.predict_proba(X_val_np)
baseline_logloss = log_loss(y_val_np, baseline_probs)
print(f"💡 Baseline (Logistic Regression) Accuracy: {baseline_acc*100:.2f}%")
print(f"📉 Baseline (Logistic Regression) Log Loss: {baseline_logloss:.4f}")

if final_acc < baseline_acc:
    print("⚠️ AutoGrokML did not outperform the baseline.")
    print("🔍 Next Steps: Consider adjusting sample weighting, fine-tuning the learning rate, or employing ensemble techniques to improve generalization.")
else:
    print("🚀 AutoGrokML outperforms the baseline!")

In [None]:
%%capture cap_cell11
# Cell 10: Visualization of Learned Representations (Deep Models Only) - DEVICE FIX
if selected_model_name == "MLP":
    final_model.model.eval()
    with torch.no_grad():
        X_train_tensor = torch.tensor(X_train_np, dtype=torch.float32).to(device) # Move X_train_tensor to device <---- ADDED .to(device)
        outputs = final_model.model(X_train_tensor)
        outputs_np = outputs.cpu().numpy() # Move outputs to CPU before numpy conversion <---- ADDED .cpu()
    pca = PCA(n_components=2)
    latent_2d = pca.fit_transform(outputs_np)
    plt.figure(figsize=(8,6))
    plt.scatter(latent_2d[:,0], latent_2d[:,1], c=y_train_np, cmap='viridis', alpha=0.7)
    plt.title(f"MLP Latent Space Visualization for {dataset_name}")
    plt.xlabel("PCA Component 1")
    plt.ylabel("PCA Component 2")
    plt.colorbar()
    plt.show()
else:
    print("🖼 Visualization not applicable for non-deep models.")

In [None]:
%%capture cap_cell12
def reset_vram():
    print("🔄 Resetting VRAM...")
    torch.cuda.empty_cache()
    import gc
    gc.collect()
    print("✅ VRAM has been cleared.")

reset_vram()

# Combine captured output from all cells and save to text file
combined_output = ""
combined_output += "Cell 1 Output:\n" + cap_cell1.stdout + "\n"
combined_output += "Cell 2 Output:\n" + cap_cell2.stdout + "\n"
combined_output += "Cell 3 Output:\n" + cap_cell3.stdout + "\n"
combined_output += "Cell 4 Output:\n" + cap_cell4.stdout + "\n"
combined_output += "Cell 5 Output:\n" + cap_cell5.stdout + "\n"
combined_output += "Cell 6 Output:\n" + cap_cell6.stdout + "\n"
combined_output += "Cell 7 Output:\n" + cap_cell7.stdout + "\n"
combined_output += "Cell 8 Output:\n" + cap_cell8.stdout + "\n"
combined_output += "Cell 9 Output:\n" + cap_cell9.stdout + "\n"
combined_output += "Cell 10 Output:\n" + cap_cell10.stdout + "\n"
# CORRECTED: Removed the line trying to capture Cell 11 output, as Cell 11 has no stdout output to capture.
# combined_output += "Cell 11 Output:\n" + cap_cell11.stdout + "\n"  <-- REMOVED THIS LINE

with open('full_output.txt', 'w') as f:
    f.write(combined_output)

print("✅ Full notebook output saved to 'full_output.txt'")

In [None]:
# Cell 13: Display full_output.txt Content in Console
with open('full_output.txt', 'r') as f:
    full_output_text = f.read()

print("----- FULL NOTEBOOK OUTPUT (from full_output.txt) -----")
print(full_output_text)
print("----- END OF FULL NOTEBOOK OUTPUT -----")

----- FULL NOTEBOOK OUTPUT (from full_output.txt) -----
Cell 1 Output:
Found existing installation: torch 2.5.1+cu121
Uninstalling torch-2.5.1+cu121:
  Successfully uninstalled torch-2.5.1+cu121
Found existing installation: torchvision 0.20.1+cu121
Uninstalling torchvision-0.20.1+cu121:
  Successfully uninstalled torchvision-0.20.1+cu121
Found existing installation: torchaudio 2.5.1+cu121
Uninstalling torchaudio-2.5.1+cu121:
  Successfully uninstalled torchaudio-2.5.1+cu121
Files removed: 6
Looking in indexes: https://download.pytorch.org/whl/cu121
Collecting torch
  Downloading https://download.pytorch.org/whl/cu121/torch-2.5.1%2Bcu121-cp311-cp311-linux_x86_64.whl (780.5 MB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/780.5 MB[0m [31m?[0m eta [36m-:--:--[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.3/780.5 MB[0m [31m132.3 MB/s[0m eta [36m0:00:06[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.9/780.5 MB[

In [None]:
%%capture cap_cell13
def run_rl_experiment(dataset_name):
    # Load dataset based on dataset_name
    if dataset_name == "Iris":
        from sklearn import datasets
        iris = datasets.load_iris()
        X = iris.data
        y = iris.target
        scaler = StandardScaler()
        X = scaler.fit_transform(X)
        np.random.seed(42)
        extra_features = np.random.normal(0, 1, size=(X.shape[0], 3))
        X = np.concatenate([X, extra_features], axis=1)
        print("✅ Using Iris dataset with simulated extra features.")
    elif dataset_name == "Wine":
        from sklearn import datasets
        wine = datasets.load_wine()
        X = wine.data
        y = wine.target
        scaler = StandardScaler()
        X = scaler.fit_transform(X)
        np.random.seed(42)
        extra_features = np.random.normal(0, 1, size=(X.shape[0], 2))
        X = np.concatenate([X, extra_features], axis=1)
        print("✅ Using Wine dataset with simulated extra features.")
    elif dataset_name == "Titanic":
        import seaborn as sns
        df = sns.load_dataset("titanic").dropna()
        X = df[['pclass', 'age', 'sibsp', 'parch', 'fare']].values
        y = df["survived"].values
        scaler = StandardScaler()
        X = scaler.fit_transform(X)
        np.random.seed(42)
        extra_features = np.random.normal(0, 1, size=(X.shape[0], 2))
        X = np.concatenate([X, extra_features], axis=1)
        print("✅ Using Titanic dataset with simulated extra features.")
    elif dataset_name == "Digits":
        from sklearn import datasets
        digits = datasets.load_digits()
        X = digits.data
        y = digits.target
        scaler = StandardScaler()
        X = scaler.fit_transform(X)
        np.random.seed(42)
        extra_features = np.random.normal(0, 1, size=(X.shape[0], 2))
        X = np.concatenate([X, extra_features], axis=1)
        print("✅ Using Digits dataset with simulated extra features.")
    elif dataset_name == "PIMA":
        url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
        column_names = ["Pregnancies", "Glucose", "BloodPressure", "SkinThickness", "Insulin", "BMI", "DiabetesPedigreeFunction", "Age", "Outcome"]
        df = pd.read_csv(url, header=None, names=column_names)
        X = df.drop("Outcome", axis=1).values
        y = df["Outcome"].values
        scaler = StandardScaler()
        X = scaler.fit_transform(X)
        np.random.seed(42)
        extra_features = np.random.normal(0, 1, size=(X.shape[0], 2))
        X = np.concatenate([X, extra_features], axis=1)
        print("✅ Using PIMA Diabetes dataset with simulated extra features.")
    elif dataset_name == "Heart":
        url = "https://raw.githubusercontent.com/selva86/datasets/master/heart.csv"
        try:
            df = pd.read_csv(url)
            df["AHD"] = df["AHD"].apply(lambda x: 0 if x=="No" else 1)
            feature_columns = ["Age", "Sex", "ChestPain", "RestBP", "Chol", "Fbs", "RestECG", "MaxHR", "ExAng", "Oldpeak", "Slope", "Ca", "Thal"]
            numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
            feature_columns = [col for col in feature_columns if col in numeric_cols]
            X = df[feature_columns].values
            y = df["AHD"].values
            scaler = StandardScaler()
            X = scaler.fit_transform(X)
            np.random.seed(42)
            extra_features = np.random.normal(0, 1, size=(X.shape[0], 1))
            X = np.concatenate([X, extra_features], axis=1)
            print("✅ Using Heart Disease dataset with simulated extra features.")
        except Exception as e:
            print("⚠️ Could not load Heart dataset; falling back to Titanic.")
            dataset_name = "Titanic"
            import seaborn as sns
            df = sns.load_dataset("titanic").dropna()
            X = df[['pclass', 'age', 'sibsp', 'parch', 'fare']].values
            y = df["survived"].values
            scaler = StandardScaler()
            X = scaler.fit_transform(X)
            np.random.seed(42)
            extra_features = np.random.normal(0, 1, size=(X.shape[0], 2))
            X = np.concatenate([X, extra_features], axis=1)
    else:
        raise ValueError(f"Dataset '{dataset_name}' not recognized. Please choose from Iris, Wine, Titanic, Digits, PIMA, or Heart.")

    # Convert to numpy arrays (if not already) and then create torch tensors on CPU
    X = np.array(X)
    y = np.array(y)
    # Create torch tensors on CPU (do not move to GPU here)
    X_tensor = torch.tensor(X, dtype=torch.float32)
    y_tensor = torch.tensor(y, dtype=torch.long)

    # Use train_test_split on CPU numpy arrays
    from sklearn.model_selection import train_test_split
    X_train_np, X_val_np, y_train_np, y_val_np = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

    # Convert numpy arrays to torch tensors and move to device
    X_train = torch.tensor(X_train_np, dtype=torch.float32).to(device)
    X_val = torch.tensor(X_val_np, dtype=torch.float32).to(device)
    y_train = torch.tensor(y_train_np, dtype=torch.long).to(device)
    y_val = torch.tensor(y_val_np, dtype=torch.long).to(device)

    input_size = X_train.shape[1]
    print(f"📊 Dataset: {dataset_name} | Training: {X_train.shape}, Validation: {X_val.shape}")

    # Select and train the model using our deep model (MLP) candidate and RL tuning (Version 11)
    selected_model_name, selected_model, candidate_scores, is_deep_model, best_mlp_params = select_model(X_train_np, y_train_np, input_size)
    if is_deep_model:
        # Run our RL tuning procedure using grok_deep_model_v11
        best_model, best_metrics = grok_deep_model_v11(selected_model, X_train, y_train, X_val, y_val)
        # Final evaluation
        final_preds = best_model.predict(X_val_np)
        final_preds_proba = best_model.predict_proba(X_val_np)
        from sklearn.metrics import accuracy_score, log_loss
        final_acc = accuracy_score(y_val_np, final_preds)
        final_logloss = log_loss(y_val_np, final_preds_proba)
        train_preds_proba = best_model.predict_proba(X_train_np)
        train_loss = log_loss(y_train_np, train_preds_proba)
        gap = final_logloss - train_loss
        print(f"Dataset: {dataset_name} | Final Accuracy: {final_acc*100:.2f}%, Log Loss: {final_logloss:.4f}, Gap: {gap:.4f}")
    else:
        print("Non-deep model selected; skipping RL tuning evaluation.")

# List of datasets to evaluate
datasets = ["Titanic", "PIMA", "Heart", "Digits", "Iris", "Wine"]
for ds in datasets:
    run_rl_experiment(ds)

print("✅ Cross-dataset experiments completed.")
