<a href="https://colab.research.google.com/github/OneFineStarstuff/Cosmic-Brilliance/blob/main/self_referential_training_that_runs_on_TF_2_x_(Python_3_9%E2%80%933_11).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install --upgrade pip setuptools wheel

In [None]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import os
import math
import numpy as np
import tensorflow as tf


class MetaIntelligence(tf.keras.Model):
    """Simple 4->16->4 network that supports self-modifying updates."""

    def __init__(self, hidden=16):
        super().__init__()
        self.d1 = tf.keras.layers.Dense(hidden, activation="relu",
                                        kernel_initializer="he_normal")
        # Return logits; we’ll apply softmax in the loss for control.
        self.d2 = tf.keras.layers.Dense(4, activation=None,
                                        kernel_initializer="glorot_uniform")

    def call(self, inputs, training=False):
        x = self.d1(inputs)
        logits = self.d2(x)
        return logits


def l2_weight_decay(model, weight=1e-4):
    reg = tf.add_n([tf.nn.l2_loss(v) for v in model.trainable_variables])
    return weight * reg


@tf.function
def self_modify_step(model,
                     optimizer,
                     batch_size=128,
                     wd=1e-4,
                     entropy_bonus=0.0,
                     label_sharpen=0.0):
    """
    One self-referential update using pseudo-labeling:
      1) Draw random inputs.
      2) Predict logits -> probs.
      3) Create pseudo-labels via argmax (optionally sharpen).
      4) Minimize CE(pseudo, probs) + weight_decay - entropy_bonus * H(probs).
    """
    # Random “experience” — replace with real data if available.
    x = tf.random.normal([batch_size, 4], dtype=tf.float32)

    with tf.GradientTape() as tape:
        logits = model(x, training=True)
        probs = tf.nn.softmax(logits, axis=-1)

        # Pseudo-labels: stop gradient so the target is fixed in this step.
        pseudo_idx = tf.argmax(probs, axis=-1)
        pseudo = tf.one_hot(pseudo_idx, depth=4)
        pseudo = tf.stop_gradient(pseudo)

        # Optional temperature sharpening: move pseudo toward sharper targets.
        if label_sharpen > 0.0:
            sharpened = tf.pow(probs + 1e-8, 1.0 - label_sharpen)
            sharpened = sharpened / tf.reduce_sum(sharpened, axis=-1, keepdims=True)
            pseudo = tf.stop_gradient(sharpened)

        ce = tf.keras.losses.categorical_crossentropy(pseudo, probs, from_logits=False)
        ce = tf.reduce_mean(ce)

        # Encourage confident predictions (via CE); optionally add entropy bonus to counter-collapse.
        entropy = -tf.reduce_mean(tf.reduce_sum(probs * tf.math.log(probs + 1e-8), axis=-1))
        reg = l2_weight_decay(model, weight=wd)

        loss = ce + reg - entropy_bonus * entropy

    grads = tape.gradient(loss, model.trainable_variables)

    # Safety: replace None grads with zeros; clip to avoid explosions.
    safe_grads = []
    for g, v in zip(grads, model.trainable_variables):
        if g is None:
            safe_grads.append(tf.zeros_like(v))
        else:
            safe_grads.append(g)
    safe_grads, _ = tf.clip_by_global_norm(safe_grads, 1.0)

    optimizer.apply_gradients(zip(safe_grads, model.trainable_variables))

    # Diagnostics
    max_conf = tf.reduce_mean(tf.reduce_max(probs, axis=-1))
    return {
        "loss": loss,
        "ce": ce,
        "entropy": entropy,
        "max_conf": max_conf
    }


@tf.function
def supervised_step(model, optimizer, x, y, wd=1e-4):
    """Standard supervised update for comparison."""
    with tf.GradientTape() as tape:
        logits = model(x, training=True)
        loss = tf.reduce_mean(
            tf.keras.losses.sparse_categorical_crossentropy(y, logits, from_logits=True)
        )
        loss += l2_weight_decay(model, weight=wd)
    grads = tape.gradient(loss, model.trainable_variables)
    grads, _ = tf.clip_by_global_norm(grads, 1.0)
    optimizer.apply_gradients(zip(grads, model.trainable_variables))
    return loss


def run_self_modification(steps=200, batch_size=256, lr=1e-3, wd=1e-4,
                          entropy_bonus=0.0, label_sharpen=0.0, seed=42):
    tf.random.set_seed(seed)
    np.random.seed(seed)

    model = MetaIntelligence(hidden=16)
    optimizer = tf.keras.optimizers.Adam(lr)

    # Build model by running a dummy forward pass
    _ = model(tf.zeros([1, 4], dtype=tf.float32))

    print("Starting self-modification...")
    for t in range(1, steps + 1):
        metrics = self_modify_step(
            model, optimizer,
            batch_size=batch_size,
            wd=wd,
            entropy_bonus=entropy_bonus,
            label_sharpen=label_sharpen
        )
        if t % max(1, steps // 10) == 0 or t == 1:
            print(f"[{t:04d}] loss={metrics['loss']:.4f} "
                  f"ce={metrics['ce']:.4f} "
                  f"H={metrics['entropy']:.4f} "
                  f"max_conf={metrics['max_conf']:.3f}")

    # Quick probe on random inputs
    x_probe = tf.random.normal([8, 4])
    logits = model(x_probe, training=False)
    probs = tf.nn.softmax(logits, axis=-1)
    preds = tf.argmax(probs, axis=-1)
    print("Sample probs:\n", np.round(probs.numpy(), 3))
    print("Preds:", preds.numpy().tolist())
    return model


def run_supervised_demo(steps=200, batch_size=256, lr=1e-3, wd=1e-4, seed=7):
    """Optional: shows standard supervised updates on a synthetic rule."""
    tf.random.set_seed(seed)
    np.random.seed(seed)

    model = MetaIntelligence(hidden=16)
    optimizer = tf.keras.optimizers.Adam(lr)
    _ = model(tf.zeros([1, 4], dtype=tf.float32))

    def synth_data(n):
        x = tf.random.normal([n, 4])
        # Label is index of max absolute feature (simple, learnable rule).
        y = tf.argmax(tf.abs(x), axis=-1, output_type=tf.int32)
        return x, y

    print("Starting supervised demo...")
    for t in range(1, steps + 1):
        x, y = synth_data(batch_size)
        loss = supervised_step(model, optimizer, x, y, wd=wd)
        if t % max(1, steps // 10) == 0 or t == 1:
            print(f"[{t:04d}] sup_loss={loss:.4f}")

    # Evaluate
    x_eval, y_eval = synth_data(512)
    logits = model(x_eval, training=False)
    preds = tf.argmax(logits, axis=-1, output_type=tf.int32)
    acc = tf.reduce_mean(tf.cast(tf.equal(preds, y_eval), tf.float32))
    print(f"Supervised accuracy: {acc.numpy():.3f}")
    return model


if __name__ == "__main__":
    # Choose one:
    # 1) Pure self-referential updating (pseudo-label bootstrapping).
    model = run_self_modification(
        steps=300,
        batch_size=256,
        lr=1e-3,
        wd=1e-4,
        entropy_bonus=0.0,     # Try 0.05 to encourage exploration
        label_sharpen=0.0,     # Try 0.2 to softly sharpen pseudo-labels
        seed=42
    )

    # 2) Or run a supervised sanity check:
    # model = run_supervised_demo(steps=300, batch_size=256, lr=1e-3, wd=1e-4, seed=7)