In [1]:
{
 "cells": [
  {"cell_type": "markdown", "metadata": {}, "source": [
   "# Klasyfikacja wieloklasowa obrazów z użyciem LogisticRegression\n",
   "\n",
   "- Przygotowanie danych obrazowych\n",
   "- Kodowanie etykiet\n",
   "- Trening i ewaluacja modelu\n",
   "- Testowanie trybów 'ovr' i 'multinomial'\n",
   "\n",
   "---"
  ]},
  {"cell_type": "markdown", "metadata": {}, "source": [
   "## 1. Importy i przygotowanie środowiska"
  ]},
  {"cell_type": "code", "metadata": {}, "source": [
   "import os\n",
   "import shutil\n",
   "import random\n",
   "import pandas as pd\n",
   "import numpy as np\n",
   "import cv2 as cv\n",
   "from sklearn.preprocessing import LabelEncoder\n",
   "from sklearn.linear_model import LogisticRegression\n",
   "from sklearn.metrics import confusion_matrix, accuracy_score, recall_score, precision_score, f1_score"
  ]},
  {"cell_type": "markdown", "metadata": {}, "source": [
   "## 2. Funkcja do podziału danych na train/test"
  ]},
  {"cell_type": "code", "metadata": {}, "source": [
   "def gen_train_test(container_dir, max_category_sample=500):\n",
   "    data_dir = os.path.join(os.path.dirname(container_dir), 'data')\n",
   "    train_dir = os.path.join(data_dir, 'train')\n",
   "    test_dir = os.path.join(data_dir, 'test')\n",
   "    os.makedirs(train_dir, exist_ok=True)\n",
   "    os.makedirs(test_dir, exist_ok=True)\n",
   "    test_labels = []\n",
   "\n",
   "    for class_name in os.listdir(container_dir):\n",
   "        class_path = os.path.join(container_dir, class_name)\n",
   "        if not os.path.isdir(class_path):\n",
   "            continue\n",
   "        files = [f for f in os.listdir(class_path) if os.path.isfile(os.path.join(class_path, f))]\n",
   "        random.shuffle(files)\n",
   "        train_files = files[:max_category_sample]\n",
   "        test_files = files[max_category_sample:max_category_sample+max_category_sample//2]\n",
   "\n",
   "        # train\n",
   "        train_class_dir = os.path.join(train_dir, class_name)\n",
   "        os.makedirs(train_class_dir, exist_ok=True)\n",
   "        for f in train_files:\n",
   "            shutil.copy(os.path.join(class_path, f), os.path.join(train_class_dir, f))\n",
   "\n",
   "        # test\n",
   "        for f in test_files:\n",
   "            shutil.copy(os.path.join(class_path, f), os.path.join(test_dir, f))\n",
   "            test_labels.append({'filename': f, 'class': class_name})\n",
   "\n",
   "    pd.DataFrame(test_labels).to_csv(os.path.join(data_dir, 'test_labels.csv'), index=False)"
  ]},
  {"cell_type": "markdown", "metadata": {}, "source": [
   "## 3. Funkcje do ładowania obrazów"
  ]},
  {"cell_type": "code", "metadata": {}, "source": [
   "def load_train_images(container_path, newSize=(64, 64), interpol=cv.INTER_AREA, colorConv=None, stand=False, ignore_image_a=True, norm=True, max_sample=200):\n",
   "    categories = sorted(os.listdir(container_path))\n",
   "    train_img = []\n",
   "    labels = []\n",
   "    for cat in categories:\n",
   "        cat_dir = os.path.join(container_path, cat)\n",
   "        if not os.path.isdir(cat_dir):\n",
   "            continue\n",
   "        files = os.listdir(cat_dir)[:max_sample]\n",
   "        for f in files:\n",
   "            img = cv.imread(os.path.join(cat_dir, f), cv.IMREAD_UNCHANGED)\n",
   "            if img is None:\n",
   "                continue\n",
   "            if ignore_image_a and img.shape[-1] == 4:\n",
   "                img = img[..., :3]\n",
   "            if colorConv is not None:\n",
   "                img = cv.cvtColor(img, colorConv)\n",
   "            img = cv.resize(img, newSize, interpolation=interpol)\n",
   "            img = img.astype(np.float32)\n",
   "            if norm:\n",
   "                img /= 255.0\n",
   "            train_img.append(img.flatten())\n",
   "            labels.append(cat)\n",
   "    le = LabelEncoder()\n",
   "    y = le.fit_transform(labels)\n",
   "    X = {\n",
   "        \"data\": np.array(train_img),\n",
   "        \"categories_name\": le.classes_.tolist(),\n",
   "        \"categories_count\": len(le.classes_),\n",
   "        \"labels\": y\n",
   "    }\n",
   "    return X\n",
   "\n",
   "def load_test_images(container_path, newSize=(64, 64), interpol=cv.INTER_AREA, colorConv=None, stand=False, ignore_image_a=True, norm=True, max_sample=200, test_labels_path=None):\n",
   "    test_img = []\n",
   "    labels = []\n",
   "    files = os.listdir(container_path)[:max_sample]\n",
   "    if test_labels_path:\n",
   "        test_labels = pd.read_csv(test_labels_path)\n",
   "        label_dict = dict(zip(test_labels['filename'], test_labels['class']))\n",
   "    else:\n",
   "        label_dict = {}\n",
   "    for f in files:\n",
   "        img = cv.imread(os.path.join(container_path, f), cv.IMREAD_UNCHANGED)\n",
   "        if img is None:\n",
   "            continue\n",
   "        if ignore_image_a and img.shape[-1] == 4:\n",
   "            img = img[..., :3]\n",
   "        if colorConv is not None:\n",
   "            img = cv.cvtColor(img, colorConv)\n",
   "        img = cv.resize(img, newSize, interpolation=interpol)\n",
   "        img = img.astype(np.float32)\n",
   "        if norm:\n",
   "            img /= 255.0\n",
   "        test_img.append(img.flatten())\n",
   "        labels.append(label_dict.get(f, \"unknown\"))\n",
   "    le = LabelEncoder()\n",
   "    y = le.fit_transform(labels)\n",
   "    X = {\n",
   "        \"data\": np.array(test_img),\n",
   "        \"categories_name\": le.classes_.tolist(),\n",
   "        \"categories_count\": len(le.classes_),\n",
   "        \"labels\": y\n",
   "    }\n",
   "    return X"
  ]},
  {"cell_type": "markdown", "metadata": {}, "source": [
   "## 4. Przygotowanie danych (przykład użycia)"
  ]},
  {"cell_type": "code", "metadata": {}, "source": [
   "# gen_train_test('data_src', max_category_sample=200)\n",
   "train = load_train_images('data/train', newSize=(128,128), max_sample=200)\n",
   "test = load_test_images('data/test', newSize=(128,128), max_sample=500, test_labels_path='data/test_labels.csv')"
  ]},
  {"cell_type": "markdown", "metadata": {}, "source": [
   "## 5. Trening i ewaluacja modelu (tryb domyślny 'ovr')"
  ]},
  {"cell_type": "code", "metadata": {}, "source": [
   "clf = LogisticRegression(max_iter=200, multi_class='ovr')\n",
   "clf.fit(train['data'], train['labels'])\n",
   "y_pred = clf.predict(test['data'])\n",
   "\n",
   "print(\"Confusion matrix:\n\", confusion_matrix(test['labels'], y_pred))\n",
   "print(\"Accuracy:\", accuracy_score(test['labels'], y_pred))\n",
   "print(\"Recall:\", recall_score(test['labels'], y_pred, average='macro'))\n",
   "print(\"Precision:\", precision_score(test['labels'], y_pred, average='macro'))\n",
   "print(\"F1:\", f1_score(test['labels'], y_pred, average='macro'))"
  ]},
  {"cell_type": "markdown", "metadata": {}, "source": [
   "## 6. Testowanie różnych trybów multi_class ('ovr' i 'multinomial')"
  ]},
  {"cell_type": "code", "metadata": {}, "source": [
   "for mode in ['ovr', 'multinomial']:\n",
   "    clf = LogisticRegression(max_iter=200, multi_class=mode)\n",
   "    clf.fit(train['data'], train['labels'])\n",
   "    y_pred = clf.predict(test['data'])\n",
   "    print(f'=== {mode} ===')\n",
   "    print(\"Confusion matrix:\n\", confusion_matrix(test['labels'], y_pred))\n",
   "    print(\"Accuracy:\", accuracy_score(test['labels'], y_pred))\n",
   "    print(\"Recall:\", recall_score(test['labels'], y_pred, average='macro'))\n",
   "    print(\"Precision:\", precision_score(test['labels'], y_pred, average='macro'))\n",
   "    print(\"F1:\", f1_score(test['labels'], y_pred, average='macro'))"
  ]}
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "name": "python",
   "version": "3.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}


{'cells': [{'cell_type': 'markdown',
   'metadata': {},
   'source': ['# Klasyfikacja wieloklasowa obrazów z użyciem LogisticRegression\n',
    '\n',
    '- Przygotowanie danych obrazowych\n',
    '- Kodowanie etykiet\n',
    '- Trening i ewaluacja modelu\n',
    "- Testowanie trybów 'ovr' i 'multinomial'\n",
    '\n',
    '---']},
  {'cell_type': 'markdown',
   'metadata': {},
   'source': ['## 1. Importy i przygotowanie środowiska']},
  {'cell_type': 'code',
   'metadata': {},
   'source': ['import os\n',
    'import shutil\n',
    'import random\n',
    'import pandas as pd\n',
    'import numpy as np\n',
    'import cv2 as cv\n',
    'from sklearn.preprocessing import LabelEncoder\n',
    'from sklearn.linear_model import LogisticRegression\n',
    'from sklearn.metrics import confusion_matrix, accuracy_score, recall_score, precision_score, f1_score']},
  {'cell_type': 'markdown',
   'metadata': {},
   'source': ['## 2. Funkcja do podziału danych na train/test']},
  {'cell_type': '