In [5]:
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Nowoczesne sieci MLP do klasyfikacji obrazów (MLP-Mixer, FNet, gMLP)\n",
    "\n",
    "Implementacja na podstawie: [keras.io/examples/vision/mlp_image_classification/](https://keras.io/examples/vision/mlp_image_classification/)\n",
    "\n",
    "W notatniku znajdziesz implementację trzech modeli: MLP-Mixer, FNet oraz gMLP na zbiorze CIFAR-100."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 1. Importy i przygotowanie danych\n",
    "import numpy as np\n",
    "import tensorflow as tf\n",
    "import keras\n",
    "from keras import layers\n",
    "\n",
    "num_classes = 100\n",
    "input_shape = (32, 32, 3)\n",
    "\n",
    "(x_train, y_train), (x_test, y_test) = keras.datasets.cifar100.load_data()\n",
    "x_train = x_train.astype(\"float32\") / 255.0\n",
    "x_test = x_test.astype(\"float32\") / 255.0\n",
    "y_train = keras.utils.to_categorical(y_train, num_classes)\n",
    "y_test = keras.utils.to_categorical(y_test, num_classes)\n",
    "\n",
    "print(f\"x_train shape: {x_train.shape} - y_train shape: {y_train.shape}\")\n",
    "print(f\"x_test shape: {x_test.shape} - y_test shape: {y_test.shape}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 2. Hiperparametry\n",
    "weight_decay = 0.0001\n",
    "batch_size = 128\n",
    "num_epochs = 5  # Dla testów, docelowo 50+\n",
    "dropout_rate = 0.2\n",
    "image_size = 32\n",
    "patch_size = 4\n",
    "num_patches = (image_size // patch_size) ** 2\n",
    "embedding_dim = 64\n",
    "num_blocks = 4\n",
    "\n",
    "print(f\"Liczba patchy: {num_patches}\")\n",
    "print(f\"Wymiar embeddingu: {embedding_dim}\")\n",
    "print(f\"Liczba bloków: {num_blocks}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 3. Warstwa wycinania patchy\n",
    "class Patches(layers.Layer):\n",
    "    def __init__(self, patch_size):\n",
    "        super().__init__()\n",
    "        self.patch_size = patch_size\n",
    "\n",
    "    def call(self, images):\n",
    "        batch_size = tf.shape(images)[0]\n",
    "        patches = tf.image.extract_patches(\n",
    "            images=images,\n",
    "            sizes=[1, self.patch_size, self.patch_size, 1],\n",
    "            strides=[1, self.patch_size, self.patch_size, 1],\n",
    "            rates=[1, 1, 1, 1],\n",
    "            padding=\"VALID\",\n",
    "        )\n",
    "        patch_dims = patches.shape[-1]\n",
    "        patches = tf.reshape(patches, [batch_size, -1, patch_dims])\n",
    "        return patches"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 4. Warstwa embeddingu patchy\n",
    "class PatchEncoder(layers.Layer):\n",
    "    def __init__(self, num_patches, embedding_dim):\n",
    "        super().__init__()\n",
    "        self.projection = layers.Dense(embedding_dim)\n",
    "        self.position_embedding = layers.Embedding(\n",
    "            input_dim=num_patches, output_dim=embedding_dim\n",
    "        )\n",
    "\n",
    "    def call(self, patch):\n",
    "        positions = tf.range(start=0, limit=num_patches, delta=1)\n",
    "        encoded = self.projection(patch) + self.position_embedding(positions)\n",
    "        return encoded"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 5. MLP-Mixer Block\n",
    "class MLPMixerLayer(layers.Layer):\n",
    "    def __init__(self, num_patches, hidden_units, dropout_rate):\n",
    "        super().__init__()\n",
    "        self.mlp1 = keras.Sequential([\n",
    "            layers.Dense(num_patches, activation=\"gelu\"),\n",
    "            layers.Dense(num_patches),\n",
    "            layers.Dropout(dropout_rate),\n",
    "        ])\n",
    "        self.mlp2 = keras.Sequential([\n",
    "            layers.Dense(hidden_units, activation=\"gelu\"),\n",
    "            layers.Dense(hidden_units),\n",
    "            layers.Dropout(dropout_rate),\n",
    "        ])\n",
    "        self.normalize = layers.LayerNormalization(epsilon=1e-6)\n",
    "\n",
    "    def call(self, inputs):\n",
    "        x = self.normalize(inputs)\n",
    "        x_channels = tf.transpose(x, perm=[0, 2, 1])\n",
    "        mlp1_outputs = self.mlp1(x_channels)\n",
    "        mlp1_outputs = tf.transpose(mlp1_outputs, perm=[0, 2, 1])\n",
    "        x = mlp1_outputs + inputs\n",
    "        x_patches = self.normalize(x)\n",
    "        mlp2_outputs = self.mlp2(x_patches)\n",
    "        x = x + mlp2_outputs\n",
    "        return x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 6. FNet Block\n",
    "class FNetLayer(layers.Layer):\n",
    "    def __init__(self, embedding_dim, dropout_rate):\n",
    "        super().__init__()\n",
    "        self.ffn = keras.Sequential([\n",
    "            layers.Dense(embedding_dim, activation=\"gelu\"),\n",
    "            layers.Dropout(dropout_rate),\n",
    "            layers.Dense(embedding_dim),\n",
    "        ])\n",
    "        self.normalize1 = layers.LayerNormalization(epsilon=1e-6)\n",
    "        self.normalize2 = layers.LayerNormalization(epsilon=1e-6)\n",
    "\n",
    "    def call(self, inputs):\n",
    "        x = tf.signal.fft2d(tf.cast(inputs, tf.complex64))\n",
    "        x = tf.math.real(x) + inputs\n",
    "        x = self.normalize1(x)\n",
    "        x_ffn = self.ffn(x)\n",
    "        x = x + x_ffn\n",
    "        return self.normalize2(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 7. gMLP Block\n",
    "class gMLPLayer(layers.Layer):\n",
    "    def __init__(self, num_patches, embedding_dim, dropout_rate):\n",
    "        super().__init__()\n",
    "        self.channel_projection1 = keras.Sequential([\n",
    "            layers.Dense(embedding_dim * 2, activation=\"gelu\"),\n",
    "            layers.Dropout(dropout_rate),\n",
    "        ])\n",
    "        self.channel_projection2 = layers.Dense(embedding_dim)\n",
    "        self.spatial_projection = layers.Dense(num_patches, bias_initializer=\"Ones\")\n",
    "        self.normalize1 = layers.LayerNormalization(epsilon=1e-6)\n",
    "        self.normalize2 = layers.LayerNormalization(epsilon=1e-6)\n",
    "\n",
    "    def spatial_gating_unit(self, x):\n",
    "        u, v = tf.split(x, num_or_size_splits=2, axis=2)\n",
    "        v = self.normalize2(v)\n",
    "        v_channels = tf.transpose(v, perm=[0, 2, 1])\n",
    "        v_projected = self.spatial_projection(v_channels)\n",
    "        v_projected = tf.transpose(v_projected, perm=[0, 2, 1])\n",
    "        return u * v_projected\n",
    "\n",
    "    def call(self, inputs):\n",
    "        x = self.normalize1(inputs)\n",
    "        x_projected = self.channel_projection1(x)\n",
    "        x_spatial = self.spatial_gating_unit(x_projected)\n",
    "        x_projected = self.channel_projection2(x_spatial)\n",
    "        return x + x_projected"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 8. Budowa modelu\n",
    "def build_classifier(blocks, name=\"mlp_model\"):\n",
    "    inputs = keras.Input(shape=input_shape)\n",
    "    patches = Patches(patch_size)(inputs)\n",
    "    encoded_patches = PatchEncoder(num_patches, embedding_dim)(patches)\n",
    "    x = encoded_patches\n",
    "    for block in blocks:\n",
    "        x = block(x)\n",
    "    x = layers.GlobalAvgPool1D()(x)\n",
    "    x = layers.Dropout(dropout_rate)(x)\n",
    "    outputs = layers.Dense(num_classes, activation=\"softmax\")(x)\n",
    "    model = keras.Model(inputs, outputs, name=name)\n",
    "    return model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 9. Trening i ewaluacja\n",
    "def run_experiment(model):\n",
    "    model.compile(\n",
    "        optimizer=keras.optimizers.Adam(learning_rate=1e-3),\n",
    "        loss=\"categorical_crossentropy\",\n",
    "        metrics=[\"accuracy\"],\n",
    "    )\n",
    "    \n",
    "    print(f\"Trenowanie modelu: {model.name}\")\n",
    "    print(f\"Liczba parametrów: {model.count_params():,}\")\n",
    "    \n",
    "    history = model.fit(\n",
    "        x_train, y_train,\n",
    "        batch_size=batch_size,\n",
    "        epochs=num_epochs,\n",
    "        validation_split=0.1,\n",
    "        verbose=1\n",
    "    )\n",
    "    \n",
    "    _, test_acc = model.evaluate(x_test, y_test, verbose=0)\n",
    "    print(f\"Test accuracy: {test_acc:.2%}\")\n",
    "    return history"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Testowanie implementacji - MLP-Mixer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# MLP-Mixer\n",
    "print(\"=== MLP-Mixer ===\")\n",
    "mlpmixer_blocks = [MLPMixerLayer(num_patches, embedding_dim, dropout_rate) for _ in range(num_blocks)]\n",
    "mlpmixer_model = build_classifier(mlpmixer_blocks, name=\"mlp_mixer\")\n",
    "mlpmixer_history = run_experiment(mlpmixer_model)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Testowanie implementacji - FNet"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# FNet\n",
    "print(\"\\n=== FNet ===\")\n",
    "fnet_blocks = [FNetLayer(embedding_dim, dropout_rate) for _ in range(num_blocks)]\n",
    "fnet_model = build_classifier(fnet_blocks, name=\"fnet\")\n",
    "fnet_history = run_experiment(fnet_model)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Testowanie implementacji - gMLP"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# gMLP\n",
    "print(\"\\n=== gMLP ===\")\n",
    "gmlp_blocks = [gMLPLayer(num_patches, embedding_dim, dropout_rate) for _ in range(num_blocks)]\n",
    "gmlp_model = build_classifier(gmlp_blocks, name=\"gmlp\")\n",
    "gmlp_history = run_experiment(gmlp_model)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Porównanie wyników"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Porównanie wyników\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "models = [mlpmixer_model, fnet_model, gmlp_model]\n",
    "histories = [mlpmixer_history, fnet_history, gmlp_history]\n",
    "model_names = [\"MLP-Mixer\", \"FNet\", \"gMLP\"]\n",
    "\n",
    "# Porównanie accuracy\n",
    "plt.figure(figsize=(12, 4))\n",
    "\n",
    "plt.subplot(1, 2, 1)\n",
    "for i, history in enumerate(histories):\n",
    "    plt.plot(history.history['accuracy'], label=f'{model_names[i]} - Train')\n",
    "    plt.plot(history.history['val_accuracy'], label=f'{model_names[i]} - Val')\n",
    "plt.title('Accuracy podczas treningu')\n",
    "plt.xlabel('Epoch')\n",
    "plt.ylabel('Accuracy')\n",
    "plt.legend()\n",
    "plt.grid(True)\n",
    "\n",
    "plt.subplot(1, 2, 2)\n",
    "for i, history in enumerate(histories):\n",
    "    plt.plot(history.history['loss'], label=f'{model_names[i]} - Train')\n",
    "    plt.plot(history.history['val_loss'], label=f'{model_names[i]} - Val')\n",
    "plt.title('Loss podczas treningu')\n",
    "plt.xlabel('Epoch')\n",
    "plt.ylabel('Loss')\n",
    "plt.legend()\n",
    "plt.grid(True)\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.show()\n",
    "\n",
    "# Podsumowanie wyników\n",
    "print(\"\\n=== Podsumowanie wyników ===\")\n",
    "for i, model in enumerate(models):\n",
    "    test_loss, test_acc = model.evaluate(x_test, y_test, verbose=0)\n",
    "    print(f\"{model_names[i]}: Test Accuracy = {test_acc:.2%}, Test Loss = {test_loss:.4f}\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}

NameError: name 'null' is not defined