In [None]:

import os, time, itertools

import numpy as np
import matplotlib.pyplot as plt
import imageio.v2 as imageio

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.metrics import confusion_matrix, classification_report

In [None]:
# CIFAR-10 CNN met Keras — visuals & GIF voor LinkedIn
# Vereist: tensorflow >= 2.10, scikit-learn, matplotlib, imageio

import os, time, itertools

import numpy as np
import matplotlib.pyplot as plt
import imageio.v2 as imageio

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.metrics import confusion_matrix, classification_report

# ========== 0) Reproduceerbaarheid & device ==========
SEED = 42
tf.keras.utils.set_random_seed(SEED)
tf.config.experimental.enable_op_determinism = False  # zet True voor perfecte determinisme (kan trager zijn)
print("TF version:", tf.__version__)
print("GPU available:", len(tf.config.list_physical_devices('GPU')) > 0)

# ========== 1) Data ==========
(num_classes, input_shape) = (10, (32, 32, 3))
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()
y_train = y_train.squeeze(); y_test = y_test.squeeze()

mean = np.array([0.4914, 0.4822, 0.4465])
std  = np.array([0.2023, 0.1994, 0.2010])

def normalize(x):
    return (x/255.0 - mean) / std

x_train = normalize(x_train.astype("float32"))
x_test  = normalize(x_test.astype("float32"))

batch_size = 128
AUTOTUNE = tf.data.AUTOTUNE

augment = keras.Sequential(
    [
        layers.RandomCrop(32, 32),
        layers.RandomFlip("horizontal"),
        layers.RandomContrast(0.1),
        layers.RandomBrightness(factor=0.1),
    ],
    name="augment",
)

train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)) \
    .shuffle(10_000, seed=SEED) \
    .batch(batch_size) \
    .map(lambda x,y: (augment(x, training=True), y), num_parallel_calls=AUTOTUNE) \
    .prefetch(AUTOTUNE)

test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(batch_size).prefetch(AUTOTUNE)

# ========== 2) Model ==========
def make_model():
    inp = layers.Input(shape=input_shape)
    x = layers.Conv2D(32, 3, padding="same", activation="relu")(inp)
    x = layers.Conv2D(32, 3, padding="same", activation="relu")(x)
    x = layers.MaxPool2D()(x)
    x = layers.Dropout(0.1)(x)

    x = layers.Conv2D(64, 3, padding="same", activation="relu")(x)
    x = layers.Conv2D(64, 3, padding="same", activation="relu")(x)
    x = layers.MaxPool2D()(x)
    x = layers.Dropout(0.2)(x)

    x = layers.Conv2D(128, 3, padding="same", activation="relu", name="last_conv")(x)
    x = layers.Conv2D(128, 3, padding="same", activation="relu")(x)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(128, activation="relu")(x)
    x = layers.Dropout(0.3)(x)
    out = layers.Dense(num_classes, activation="softmax")(x)
    model = keras.Model(inp, out)
    return model

model = make_model()
model.compile(
    optimizer=keras.optimizers.AdamW(learning_rate=3e-4, weight_decay=1e-2),
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)
model.summary()

# ========== 3) Train ==========
EPOCHS = 20
history = model.fit(train_ds, validation_data=test_ds, epochs=EPOCHS, verbose=1)

# ========== 4) Evaluatie ==========
test_loss, test_acc = model.evaluate(test_ds, verbose=0)
print(f"Test accuracy: {test_acc:.3f}")

# ========== 5) Helpers voor visualisaties ==========
classes = ["airplane","automobile","bird","cat","deer","dog","frog","horse","ship","truck"]

def denorm(x):
    # x in [-?,?] gestandaardiseerd; zet terug naar 0..1 voor plot
    return np.clip(x*std + mean, 0, 1)

def make_prediction_grid(x, y, probs, n=64, ncol=8, fp_only=False):
    idx = np.arange(len(x))
    if fp_only:
        preds = probs.argmax(1)
        wrong = idx[preds != y]
        idx = wrong if len(wrong) > 0 else idx
    idx = idx[:n]
    xg = denorm(x[idx])
    preds = probs[idx].argmax(1)

    plt.figure(figsize=(10,10))
    for i, k in enumerate(idx):
        plt.subplot(n//ncol, ncol, i+1)
        plt.imshow(xg[i])
        color = "g" if preds[i] == y[k] else "r"
        plt.title(f"P:{classes[preds[i]]}\nT:{classes[y[k]]}", fontsize=8, color=color)
        plt.axis("off")
    plt.tight_layout()

def plot_confusion_matrix(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred, labels=np.arange(num_classes))
    plt.figure(figsize=(8,6))
    plt.imshow(cm, cmap="Blues")
    plt.colorbar()
    tick_marks = np.arange(num_classes)
    plt.xticks(tick_marks, classes, rotation=45, ha="right")
    plt.yticks(tick_marks, classes)
    plt.xlabel("Predicted"); plt.ylabel("True")
    plt.title("CIFAR-10 Confusion Matrix")
    plt.tight_layout()
    return cm

# ========== 6) Voorspellingen voor visuals ==========
y_prob = model.predict(test_ds, verbose=0)
y_pred = y_prob.argmax(1)

# ========== 7) Opslaan van artifacts ==========
os.makedirs("artifacts", exist_ok=True)

# Confusion matrix
_ = plot_confusion_matrix(y_test, y_pred)
plt.savefig("artifacts/confusion_matrix.png", dpi=200); plt.close()

# Prediction grid (mix) & foute voorspellingen grid
x_test_vis = x_test  # genormaliseerd; denorm doen we in plotter
make_prediction_grid(x_test_vis, y_test, y_prob, n=64, ncol=8, fp_only=False)
plt.savefig("artifacts/prediction_grid.png", dpi=200); plt.close()

make_prediction_grid(x_test_vis, y_test, y_prob, n=64, ncol=8, fp_only=True)
plt.savefig("artifacts/wrong_predictions_grid.png", dpi=200); plt.close()

# ========== 8) Grad-CAM (op laatste conv-laag 'last_conv') ==========
def gradcam(imgs, model, layer_name="last_conv", class_idx=None):
    # imgs = N x 32 x 32 x 3 (genormaliseerd)
    grad_model = keras.Model([model.inputs], [model.get_layer(layer_name).output, model.output])
    with tf.GradientTape() as tape:
        conv_out, preds = grad_model(imgs, training=False)
        if class_idx is None:
            class_idx = tf.argmax(preds, axis=1)
        one_hot = tf.one_hot(class_idx, num_classes)
        loss = tf.reduce_sum(one_hot * preds)
    grads = tape.gradient(loss, conv_out)          # [N,H,W,C]
    weights = tf.reduce_mean(grads, axis=(1,2), keepdims=True)
    cam = tf.nn.relu(tf.reduce_sum(weights * conv_out, axis=-1, keepdims=True))  # [N,H,W,1]
    # normalize per image
    cam_min = tf.reduce_min(cam, axis=(1,2,3), keepdims=True)
    cam_max = tf.reduce_max(cam, axis=(1,2,3), keepdims=True)
    cam = (cam - cam_min) / (cam_max - cam_min + 1e-6)
    return cam.numpy()

# Visualiseer Grad-CAM op 8 samples
idx = np.random.RandomState(SEED).choice(len(x_test), size=8, replace=False)
imgs = x_test[idx]
cams = gradcam(imgs, model, "last_conv")
vis = denorm(imgs)

fig, axs = plt.subplots(2, 8, figsize=(16,4))
for i in range(8):
    axs[0,i].imshow(vis[i]); axs[0,i].axis("off")
    axs[1,i].imshow(vis[i]); axs[1,i].imshow(cams[i,:,:,0], alpha=0.5, cmap="jet"); axs[1,i].axis("off")
axs[0,0].set_ylabel("Original"); axs[1,0].set_ylabel("Grad-CAM")
plt.tight_layout(); plt.savefig("artifacts/gradcam_grid.png", dpi=200); plt.close()

# ========== 9) (Optioneel) GIF met wisselende voorspellingen ==========
frames = []
for _ in range(6):
    sel = np.random.RandomState(None).choice(len(x_test), size=16, replace=False)
    imgs = x_test[sel]
    probs = model.predict(imgs, verbose=0)
    preds = probs.argmax(1)
    grid = np.zeros((2*32, 8*32, 3))
    block = 32
    for i in range(16):
        r = i//8; c = i%8
        im = denorm(imgs[i])
        grid[r*block:(r+1)*block, c*block:(c+1)*block] = im
    plt.figure(figsize=(8,4))
    plt.imshow(grid); plt.axis("off")
    title = " | ".join(classes[p] for p in preds[:8])
    plt.title(f"Predictions: {title}")
    plt.tight_layout()
    plt.savefig("artifacts/_tmp.png", dpi=120); plt.close()
    frames.append(imageio.imread("artifacts/_tmp.png"))
imageio.mimsave("artifacts/preds.gif", frames, duration=0.9)

# ========== 10) Rapport ==========
print(classification_report(y_test, y_pred, target_names=classes))
print("\nSaved:")
print("- artifacts/confusion_matrix.png")
print("- artifacts/prediction_grid.png")
print("- artifacts/wrong_predictions_grid.png")
print("- artifacts/gradcam_grid.png")
print("- artifacts/preds.gif")


In [None]:
# California Housing — XGBoost zonder callbacks/early_stopping, pure sklearn-API

from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from xgboost import XGBRegressor
from scipy.stats import uniform, randint
import numpy as np
import xgboost as xgb
# 1) Data
data = fetch_california_housing()
X, y = data.data, data.target
feature_names = data.feature_names

# 2) Train/Test split (testset pas op het eind gebruiken)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.20, random_state=42
)

# 3) Basismodel (geen callbacks, geen early_stopping, alles via sklearn-API)
base = XGBRegressor(
    objective="reg:squarederror",
    # Houd n_estimators redelijk; tuning kiest de beste waarde
    n_estimators=300,
    learning_rate=0.1,
    subsample=1.0,
    colsample_bytree=0.8,
    max_depth=5,
    tree_method="hist",   # "gpu_hist" + predictor="gpu_predictor" als je GPU hebt
    random_state=42,
    n_jobs=-1,
    # eval_metric in constructor i.p.v. fit (compatibel met oudere wrappers)
    eval_metric="rmse"
)

# 4) Tuning-ruimte (zonder early stopping)
param_dist = {
    "max_depth": randint(3, 9),             # 3..8
    "learning_rate": uniform(0.02, 0.18),   # 0.02..0.20
    "n_estimators": randint(100, 1201),     # 100..1200
    "subsample": uniform(0.7, 0.3),         # 0.70..1.00
    "colsample_bytree": uniform(0.7, 0.3),  # 0.70..1.00
    "reg_lambda": uniform(0.0, 2.0),        # L2-regularisatie
    "reg_alpha": uniform(0.0, 0.5),         # L1-regularisatie
}

# 5) RandomizedSearchCV — optimaliseer op RMSE
rs = RandomizedSearchCV(
    estimator=base,
    param_distributions=param_dist,
    n_iter=40,                              # pas aan naar wens (meer = beter)
    scoring="neg_root_mean_squared_error",
    cv=3,
    random_state=42,
    n_jobs=-1,
    verbose=0
)

# 6) Fit (geen extra kwargs!)
rs.fit(X_train, y_train)

best_model = rs.best_estimator_
print("Best params:", rs.best_params_)

# 7) Evaluatie op de testset
y_pred = best_model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)
print(f"MAE : {mae:.3f}")
print(f"RMSE: {rmse:.3f}")
print(f"R^2 : {r2:.3f}")

# 8) (Optioneel) Feature importance (gain)
importances = best_model.get_booster().get_score(importance_type="gain")
name_map = {f"f{i}": n for i, n in enumerate(feature_names)}
importances_named = {name_map.get(k, k): v for k, v in importances.items()}
print("\nTop features (gain):")
for k in sorted(importances_named, key=importances_named.get, reverse=True)[:10]:
    print(f"- {k}: {importances_named[k]:.3f}")

# 9) (Optioneel) Model opslaan/laden
# best_model.save_model("best_model_housing.ubj")
# loaded = XGBRegressor()
# loaded.load_model("best_model_housing.ubj")
