diff --git a/_posts/collab-copy.ipynb b/_posts/collab-copy.ipynb
deleted file mode 100644
index 07d524dd57..0000000000
--- a/_posts/collab-copy.ipynb
+++ /dev/null
@@ -1,1637 +0,0 @@
-{
-  "nbformat": 4,
-  "nbformat_minor": 0,
-  "metadata": {
-    "colab": {
-      "name": "collab with movie lens.ipynb",
-      "provenance": [],
-      "collapsed_sections": []
-    },
-    "kernelspec": {
-      "name": "python3",
-      "display_name": "Python 3"
-    }
-  },
-  "cells": [
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "CzsnxJnag3RK",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "from fastai.collab import *\n",
-        "from fastai.tabular import *"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "gvIUgsO8b4A_",
-        "colab_type": "text"
-      },
-      "source": [
-        "## test with example data"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "FC0EeNhcjBom",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "path = untar_data(URLs.ML_SAMPLE)"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "O1d5iFjmSsLV",
-        "colab_type": "text"
-      },
-      "source": [
-        "## Full data - official download link : http://files.grouplens.org/datasets/movielens/ml-100k.zip"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "wT2xbHZAj1Gz",
-        "colab_type": "text"
-      },
-      "source": [
-        "### data download"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "G-GwNuuQhJKl",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "path = Config.data_path()"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "tk52VwlVgk8-",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "import requests\n",
-        "r = requests.get(\"http://files.grouplens.org/datasets/movielens/ml-100k.zip\")\n",
-        "with open(path/'ml-100k.zip', \"wb\") as f:\n",
-        "    for chunk in r.iter_content(chunk_size=128):\n",
-        "        f.write(chunk)\n",
-        "    "
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "s4FVmdh_jJSH",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "!unzip {path/'ml-100k.zip'} -d {path}"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "Dgx3FkpBjyIO",
-        "colab_type": "code",
-        "outputId": "d68ab4a5-68e7-44f5-81b6-94112d33cbf0",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 415
-        }
-      },
-      "source": [
-        "data_path = path/'ml-100k'\n",
-        "data_path.ls()"
-      ],
-      "execution_count": 0,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "[PosixPath('/root/.fastai/data/movie_lens_sample/ml-100k/ub.test'),\n",
-              " PosixPath('/root/.fastai/data/movie_lens_sample/ml-100k/u2.base'),\n",
-              " PosixPath('/root/.fastai/data/movie_lens_sample/ml-100k/ua.base'),\n",
-              " PosixPath('/root/.fastai/data/movie_lens_sample/ml-100k/u.genre'),\n",
-              " PosixPath('/root/.fastai/data/movie_lens_sample/ml-100k/u.item'),\n",
-              " PosixPath('/root/.fastai/data/movie_lens_sample/ml-100k/u.occupation'),\n",
-              " PosixPath('/root/.fastai/data/movie_lens_sample/ml-100k/u2.test'),\n",
-              " PosixPath('/root/.fastai/data/movie_lens_sample/ml-100k/u4.test'),\n",
-              " PosixPath('/root/.fastai/data/movie_lens_sample/ml-100k/mku.sh'),\n",
-              " PosixPath('/root/.fastai/data/movie_lens_sample/ml-100k/u5.base'),\n",
-              " PosixPath('/root/.fastai/data/movie_lens_sample/ml-100k/u3.base'),\n",
-              " PosixPath('/root/.fastai/data/movie_lens_sample/ml-100k/u.user'),\n",
-              " PosixPath('/root/.fastai/data/movie_lens_sample/ml-100k/allbut.pl'),\n",
-              " PosixPath('/root/.fastai/data/movie_lens_sample/ml-100k/README'),\n",
-              " PosixPath('/root/.fastai/data/movie_lens_sample/ml-100k/u1.test'),\n",
-              " PosixPath('/root/.fastai/data/movie_lens_sample/ml-100k/ub.base'),\n",
-              " PosixPath('/root/.fastai/data/movie_lens_sample/ml-100k/u5.test'),\n",
-              " PosixPath('/root/.fastai/data/movie_lens_sample/ml-100k/u3.test'),\n",
-              " PosixPath('/root/.fastai/data/movie_lens_sample/ml-100k/ua.test'),\n",
-              " PosixPath('/root/.fastai/data/movie_lens_sample/ml-100k/u.data'),\n",
-              " PosixPath('/root/.fastai/data/movie_lens_sample/ml-100k/u4.base'),\n",
-              " PosixPath('/root/.fastai/data/movie_lens_sample/ml-100k/u.info'),\n",
-              " PosixPath('/root/.fastai/data/movie_lens_sample/ml-100k/u1.base')]"
-            ]
-          },
-          "metadata": {
-            "tags": []
-          },
-          "execution_count": 14
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "i84VBBj_mevg",
-        "colab_type": "text"
-      },
-      "source": [
-        "Looking through the data"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "e84oAE02mMsS",
-        "colab_type": "code",
-        "outputId": "8999d6f2-118b-4b59-f038-566be9dd23ad",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 138
-        }
-      },
-      "source": [
-        "pd.read_csv(data_path/'u.info', encoding='latin-1', header=None)"
-      ],
-      "execution_count": 0,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/html": [
-              "<div>\n",
-              "<style scoped>\n",
-              "    .dataframe tbody tr th:only-of-type {\n",
-              "        vertical-align: middle;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe tbody tr th {\n",
-              "        vertical-align: top;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe thead th {\n",
-              "        text-align: right;\n",
-              "    }\n",
-              "</style>\n",
-              "<table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              "    <tr style=\"text-align: right;\">\n",
-              "      <th></th>\n",
-              "      <th>0</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <th>0</th>\n",
-              "      <td>943 users</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>1</th>\n",
-              "      <td>1682 items</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>2</th>\n",
-              "      <td>100000 ratings</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table>\n",
-              "</div>"
-            ],
-            "text/plain": [
-              "                0\n",
-              "0       943 users\n",
-              "1      1682 items\n",
-              "2  100000 ratings"
-            ]
-          },
-          "metadata": {
-            "tags": []
-          },
-          "execution_count": 28
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "qtA7r4usnSqX",
-        "colab_type": "code",
-        "outputId": "5aebcc1f-7f79-4b9c-e8cb-6e5f7a62055a",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 407
-        }
-      },
-      "source": [
-        "pd.read_csv(data_path/'u.user', encoding='latin-1', delimiter='|', header=None)"
-      ],
-      "execution_count": 0,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/html": [
-              "<div>\n",
-              "<style scoped>\n",
-              "    .dataframe tbody tr th:only-of-type {\n",
-              "        vertical-align: middle;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe tbody tr th {\n",
-              "        vertical-align: top;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe thead th {\n",
-              "        text-align: right;\n",
-              "    }\n",
-              "</style>\n",
-              "<table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              "    <tr style=\"text-align: right;\">\n",
-              "      <th></th>\n",
-              "      <th>0</th>\n",
-              "      <th>1</th>\n",
-              "      <th>2</th>\n",
-              "      <th>3</th>\n",
-              "      <th>4</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <th>0</th>\n",
-              "      <td>1</td>\n",
-              "      <td>24</td>\n",
-              "      <td>M</td>\n",
-              "      <td>technician</td>\n",
-              "      <td>85711</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>1</th>\n",
-              "      <td>2</td>\n",
-              "      <td>53</td>\n",
-              "      <td>F</td>\n",
-              "      <td>other</td>\n",
-              "      <td>94043</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>2</th>\n",
-              "      <td>3</td>\n",
-              "      <td>23</td>\n",
-              "      <td>M</td>\n",
-              "      <td>writer</td>\n",
-              "      <td>32067</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>3</th>\n",
-              "      <td>4</td>\n",
-              "      <td>24</td>\n",
-              "      <td>M</td>\n",
-              "      <td>technician</td>\n",
-              "      <td>43537</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>4</th>\n",
-              "      <td>5</td>\n",
-              "      <td>33</td>\n",
-              "      <td>F</td>\n",
-              "      <td>other</td>\n",
-              "      <td>15213</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>...</th>\n",
-              "      <td>...</td>\n",
-              "      <td>...</td>\n",
-              "      <td>...</td>\n",
-              "      <td>...</td>\n",
-              "      <td>...</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>938</th>\n",
-              "      <td>939</td>\n",
-              "      <td>26</td>\n",
-              "      <td>F</td>\n",
-              "      <td>student</td>\n",
-              "      <td>33319</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>939</th>\n",
-              "      <td>940</td>\n",
-              "      <td>32</td>\n",
-              "      <td>M</td>\n",
-              "      <td>administrator</td>\n",
-              "      <td>02215</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>940</th>\n",
-              "      <td>941</td>\n",
-              "      <td>20</td>\n",
-              "      <td>M</td>\n",
-              "      <td>student</td>\n",
-              "      <td>97229</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>941</th>\n",
-              "      <td>942</td>\n",
-              "      <td>48</td>\n",
-              "      <td>F</td>\n",
-              "      <td>librarian</td>\n",
-              "      <td>78209</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>942</th>\n",
-              "      <td>943</td>\n",
-              "      <td>22</td>\n",
-              "      <td>M</td>\n",
-              "      <td>student</td>\n",
-              "      <td>77841</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table>\n",
-              "<p>943 rows × 5 columns</p>\n",
-              "</div>"
-            ],
-            "text/plain": [
-              "       0   1  2              3      4\n",
-              "0      1  24  M     technician  85711\n",
-              "1      2  53  F          other  94043\n",
-              "2      3  23  M         writer  32067\n",
-              "3      4  24  M     technician  43537\n",
-              "4      5  33  F          other  15213\n",
-              "..   ...  .. ..            ...    ...\n",
-              "938  939  26  F        student  33319\n",
-              "939  940  32  M  administrator  02215\n",
-              "940  941  20  M        student  97229\n",
-              "941  942  48  F      librarian  78209\n",
-              "942  943  22  M        student  77841\n",
-              "\n",
-              "[943 rows x 5 columns]"
-            ]
-          },
-          "metadata": {
-            "tags": []
-          },
-          "execution_count": 35
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "acayeQSgmiC-",
-        "colab_type": "code",
-        "outputId": "efe49703-2a8c-40f9-a33c-c8ee9441fc40",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 363
-        }
-      },
-      "source": [
-        "u_occ=pd.read_csv(data_path/'u.occupation', encoding='latin-1')\n",
-        "u_occ.values"
-      ],
-      "execution_count": 0,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "array([['artist'],\n",
-              "       ['doctor'],\n",
-              "       ['educator'],\n",
-              "       ['engineer'],\n",
-              "       ['entertainment'],\n",
-              "       ['executive'],\n",
-              "       ['healthcare'],\n",
-              "       ['homemaker'],\n",
-              "       ['lawyer'],\n",
-              "       ['librarian'],\n",
-              "       ['marketing'],\n",
-              "       ['none'],\n",
-              "       ['other'],\n",
-              "       ['programmer'],\n",
-              "       ['retired'],\n",
-              "       ['salesman'],\n",
-              "       ['scientist'],\n",
-              "       ['student'],\n",
-              "       ['technician'],\n",
-              "       ['writer']], dtype=object)"
-            ]
-          },
-          "metadata": {
-            "tags": []
-          },
-          "execution_count": 32
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "tpv7_GSij_7K",
-        "colab_type": "text"
-      },
-      "source": [
-        "### getting rating matrix -> item / data"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "IBkZjpKLkE3y",
-        "colab_type": "code",
-        "outputId": "ef110ca6-d44f-49dc-9cb3-52d4b5bd21f0",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 35
-        }
-      },
-      "source": [
-        "items = pd.read_csv(data_path/'u.item', encoding='latin-1', delimiter='|',header=None,\n",
-        "            names = [item, title, 'year', 'N', 'URL', *[f'g({i})' for i in range(19)]]            \n",
-        "            )\n",
-        "len(items)"
-      ],
-      "execution_count": 0,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "1682"
-            ]
-          },
-          "metadata": {
-            "tags": []
-          },
-          "execution_count": 36
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "NahDvzoVnyWC",
-        "colab_type": "code",
-        "outputId": "0a9a6390-38cb-458e-d2f6-0942b7135448",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 348
-        }
-      },
-      "source": [
-        "data = pd.read_csv(data_path/'u.data', encoding='latin-1', sep='\\t', header=None, names=[user, item, 'ratings', 'timestamp']); data.head(10)"
-      ],
-      "execution_count": 0,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/html": [
-              "<div>\n",
-              "<style scoped>\n",
-              "    .dataframe tbody tr th:only-of-type {\n",
-              "        vertical-align: middle;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe tbody tr th {\n",
-              "        vertical-align: top;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe thead th {\n",
-              "        text-align: right;\n",
-              "    }\n",
-              "</style>\n",
-              "<table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              "    <tr style=\"text-align: right;\">\n",
-              "      <th></th>\n",
-              "      <th>userId</th>\n",
-              "      <th>movieId</th>\n",
-              "      <th>ratings</th>\n",
-              "      <th>timestamp</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <th>0</th>\n",
-              "      <td>196</td>\n",
-              "      <td>242</td>\n",
-              "      <td>3</td>\n",
-              "      <td>881250949</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>1</th>\n",
-              "      <td>186</td>\n",
-              "      <td>302</td>\n",
-              "      <td>3</td>\n",
-              "      <td>891717742</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>2</th>\n",
-              "      <td>22</td>\n",
-              "      <td>377</td>\n",
-              "      <td>1</td>\n",
-              "      <td>878887116</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>3</th>\n",
-              "      <td>244</td>\n",
-              "      <td>51</td>\n",
-              "      <td>2</td>\n",
-              "      <td>880606923</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>4</th>\n",
-              "      <td>166</td>\n",
-              "      <td>346</td>\n",
-              "      <td>1</td>\n",
-              "      <td>886397596</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>5</th>\n",
-              "      <td>298</td>\n",
-              "      <td>474</td>\n",
-              "      <td>4</td>\n",
-              "      <td>884182806</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>6</th>\n",
-              "      <td>115</td>\n",
-              "      <td>265</td>\n",
-              "      <td>2</td>\n",
-              "      <td>881171488</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>7</th>\n",
-              "      <td>253</td>\n",
-              "      <td>465</td>\n",
-              "      <td>5</td>\n",
-              "      <td>891628467</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>8</th>\n",
-              "      <td>305</td>\n",
-              "      <td>451</td>\n",
-              "      <td>3</td>\n",
-              "      <td>886324817</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>9</th>\n",
-              "      <td>6</td>\n",
-              "      <td>86</td>\n",
-              "      <td>3</td>\n",
-              "      <td>883603013</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table>\n",
-              "</div>"
-            ],
-            "text/plain": [
-              "   userId  movieId  ratings  timestamp\n",
-              "0     196      242        3  881250949\n",
-              "1     186      302        3  891717742\n",
-              "2      22      377        1  878887116\n",
-              "3     244       51        2  880606923\n",
-              "4     166      346        1  886397596\n",
-              "5     298      474        4  884182806\n",
-              "6     115      265        2  881171488\n",
-              "7     253      465        5  891628467\n",
-              "8     305      451        3  886324817\n",
-              "9       6       86        3  883603013"
-            ]
-          },
-          "metadata": {
-            "tags": []
-          },
-          "execution_count": 45
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "S3K_3Zk8plis",
-        "colab_type": "text"
-      },
-      "source": [
-        "\n",
-        "for comprehension, substitute movieid with title"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "oi9XEaIYp0oG",
-        "colab_type": "code",
-        "outputId": "4bfe68ca-2f7c-4c9b-a5ba-e43ba6c40718",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 348
-        }
-      },
-      "source": [
-        "rating_matrix = data.merge(items[[item, title]]); rating_matrix.head(10)"
-      ],
-      "execution_count": 78,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/html": [
-              "<div>\n",
-              "<style scoped>\n",
-              "    .dataframe tbody tr th:only-of-type {\n",
-              "        vertical-align: middle;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe tbody tr th {\n",
-              "        vertical-align: top;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe thead th {\n",
-              "        text-align: right;\n",
-              "    }\n",
-              "</style>\n",
-              "<table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              "    <tr style=\"text-align: right;\">\n",
-              "      <th></th>\n",
-              "      <th>userId</th>\n",
-              "      <th>movieId</th>\n",
-              "      <th>ratings</th>\n",
-              "      <th>timestamp</th>\n",
-              "      <th>title</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <th>0</th>\n",
-              "      <td>196</td>\n",
-              "      <td>242</td>\n",
-              "      <td>3</td>\n",
-              "      <td>881250949</td>\n",
-              "      <td>Kolya (1996)</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>1</th>\n",
-              "      <td>63</td>\n",
-              "      <td>242</td>\n",
-              "      <td>3</td>\n",
-              "      <td>875747190</td>\n",
-              "      <td>Kolya (1996)</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>2</th>\n",
-              "      <td>226</td>\n",
-              "      <td>242</td>\n",
-              "      <td>5</td>\n",
-              "      <td>883888671</td>\n",
-              "      <td>Kolya (1996)</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>3</th>\n",
-              "      <td>154</td>\n",
-              "      <td>242</td>\n",
-              "      <td>3</td>\n",
-              "      <td>879138235</td>\n",
-              "      <td>Kolya (1996)</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>4</th>\n",
-              "      <td>306</td>\n",
-              "      <td>242</td>\n",
-              "      <td>5</td>\n",
-              "      <td>876503793</td>\n",
-              "      <td>Kolya (1996)</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>5</th>\n",
-              "      <td>296</td>\n",
-              "      <td>242</td>\n",
-              "      <td>4</td>\n",
-              "      <td>884196057</td>\n",
-              "      <td>Kolya (1996)</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>6</th>\n",
-              "      <td>34</td>\n",
-              "      <td>242</td>\n",
-              "      <td>5</td>\n",
-              "      <td>888601628</td>\n",
-              "      <td>Kolya (1996)</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>7</th>\n",
-              "      <td>271</td>\n",
-              "      <td>242</td>\n",
-              "      <td>4</td>\n",
-              "      <td>885844495</td>\n",
-              "      <td>Kolya (1996)</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>8</th>\n",
-              "      <td>201</td>\n",
-              "      <td>242</td>\n",
-              "      <td>4</td>\n",
-              "      <td>884110598</td>\n",
-              "      <td>Kolya (1996)</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>9</th>\n",
-              "      <td>209</td>\n",
-              "      <td>242</td>\n",
-              "      <td>4</td>\n",
-              "      <td>883589606</td>\n",
-              "      <td>Kolya (1996)</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table>\n",
-              "</div>"
-            ],
-            "text/plain": [
-              "   userId  movieId  ratings  timestamp         title\n",
-              "0     196      242        3  881250949  Kolya (1996)\n",
-              "1      63      242        3  875747190  Kolya (1996)\n",
-              "2     226      242        5  883888671  Kolya (1996)\n",
-              "3     154      242        3  879138235  Kolya (1996)\n",
-              "4     306      242        5  876503793  Kolya (1996)\n",
-              "5     296      242        4  884196057  Kolya (1996)\n",
-              "6      34      242        5  888601628  Kolya (1996)\n",
-              "7     271      242        4  885844495  Kolya (1996)\n",
-              "8     201      242        4  884110598  Kolya (1996)\n",
-              "9     209      242        4  883589606  Kolya (1996)"
-            ]
-          },
-          "metadata": {
-            "tags": []
-          },
-          "execution_count": 78
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "bPeJONBTp052",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "data_bunch = CollabDataBunch.from_df(rating_matrix, valid_pct=0.1, item_name=title, seed=42)"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "wLpohU3xrym6",
-        "colab_type": "code",
-        "outputId": "d259348b-e5d7-4850-f55c-9d9695b42164",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 198
-        }
-      },
-      "source": [
-        "data_bunch.show_batch()"
-      ],
-      "execution_count": 0,
-      "outputs": [
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/html": [
-              "<table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              "    <tr style=\"text-align: right;\">\n",
-              "      <th>userId</th>\n",
-              "      <th>title</th>\n",
-              "      <th>target</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <td>445</td>\n",
-              "      <td>Sabrina (1995)</td>\n",
-              "      <td>2.0</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>56</td>\n",
-              "      <td>Nightmare on Elm Street, A (1984)</td>\n",
-              "      <td>5.0</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>599</td>\n",
-              "      <td>I Know What You Did Last Summer (1997)</td>\n",
-              "      <td>4.0</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>804</td>\n",
-              "      <td>Day the Earth Stood Still, The (1951)</td>\n",
-              "      <td>4.0</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>650</td>\n",
-              "      <td>Rear Window (1954)</td>\n",
-              "      <td>4.0</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table>"
-            ],
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ]
-          },
-          "metadata": {
-            "tags": []
-          }
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "h3Rg6syNp1By",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "learn = collab_learner( data_bunch, n_factors=40, y_range=[0,5.5], wd=1e-01)"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "8j9XxLoTp1JI",
-        "colab_type": "code",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 111
-        },
-        "outputId": "0bbcc97b-a183-47f6-c171-dc3ef73e1df5"
-      },
-      "source": [
-        "learn.lr_find()"
-      ],
-      "execution_count": 74,
-      "outputs": [
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/html": [
-              "\n",
-              "    <div>\n",
-              "        <style>\n",
-              "            /* Turns off some styling */\n",
-              "            progress {\n",
-              "                /* gets rid of default border in Firefox and Opera. */\n",
-              "                border: none;\n",
-              "                /* Needs to be in here for Safari polyfill so background images work as expected. */\n",
-              "                background-size: auto;\n",
-              "            }\n",
-              "            .progress-bar-interrupted, .progress-bar-interrupted::-webkit-progress-bar {\n",
-              "                background: #F44336;\n",
-              "            }\n",
-              "        </style>\n",
-              "      <progress value='0' class='' max='1', style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
-              "      0.00% [0/1 00:00<00:00]\n",
-              "    </div>\n",
-              "    \n",
-              "<table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              "    <tr style=\"text-align: left;\">\n",
-              "      <th>epoch</th>\n",
-              "      <th>train_loss</th>\n",
-              "      <th>valid_loss</th>\n",
-              "      <th>time</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "  </tbody>\n",
-              "</table><p>\n",
-              "\n",
-              "    <div>\n",
-              "        <style>\n",
-              "            /* Turns off some styling */\n",
-              "            progress {\n",
-              "                /* gets rid of default border in Firefox and Opera. */\n",
-              "                border: none;\n",
-              "                /* Needs to be in here for Safari polyfill so background images work as expected. */\n",
-              "                background-size: auto;\n",
-              "            }\n",
-              "            .progress-bar-interrupted, .progress-bar-interrupted::-webkit-progress-bar {\n",
-              "                background: #F44336;\n",
-              "            }\n",
-              "        </style>\n",
-              "      <progress value='85' class='' max='1406', style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
-              "      6.05% [85/1406 00:00<00:11 1.9202]\n",
-              "    </div>\n",
-              "    "
-            ],
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ]
-          },
-          "metadata": {
-            "tags": []
-          }
-        },
-        {
-          "output_type": "stream",
-          "text": [
-            "LR Finder is complete, type {learner_name}.recorder.plot() to see the graph.\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "8H2IxgLn4NSI",
-        "colab_type": "code",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 314
-        },
-        "outputId": "913a3475-d04b-4e0c-e654-7f1cb1449c4f"
-      },
-      "source": [
-        "learn.recorder.plot(skip_end=15, suggestion=True)"
-      ],
-      "execution_count": 75,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "Min numerical gradient: 1.74E-01\n",
-            "Min loss divided by 10: 2.51E-02\n"
-          ],
-          "name": "stdout"
-        },
-        {
-          "output_type": "display_data",
-          "data": {
-            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZgAAAEGCAYAAABYV4NmAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0\ndHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nO3deXxV5bXw8d/KPE8kJJCBQIQECHME\nxTqiAnbQWrV1rNZ7rR2s7e1gh3trW9/3vW29ttfWtta2aKvWDlRbbRVUVHBgMEiAhIQhJGQiM5nH\nk/O8f5yTECBzzj5Dsr6fTz6SvffZZ20zrDzTesQYg1JKKeVqfp4OQCml1NSkCUYppZQlNMEopZSy\nhCYYpZRSltAEo5RSyhIBng7AleLj4016erqnw1BKKZ+xd+/eemNMghX3nlIJJj09ndzcXE+HoZRS\nPkNETlh1b+0iU0opZQnLEoyIbBKRWhHJH+Z8rIi8ICIHRGSPiGQPOhcjIptFpEhECkXkQqviVEop\nZQ0rWzBPARtGOP9tIM8YsxS4A3h00LlHgS3GmCxgGVBoVZBKKaWsYVmCMcbsABpHuGQR8Ibz2iIg\nXUQSRSQauAT4nfNcjzGmyao4lVJKWcOTYzD7gesBRGQ1MAdIAeYCdcCTIrJPRH4rIuHD3URE7hGR\nXBHJraurc0fcSimlxsCTCeaHQIyI5AH3AfuAPhwz21YCvzLGrADagW8OdxNjzBPGmBxjTE5CgiUz\n7ZRSSk2Ax6YpG2NagLsARESAEuA4EAZUGGN2Oy/dzAgJRimllHfyWAvGOVMsyPnpvwE7jDEtxphq\noFxEMp3n1gGHPBKkUj7AGMPf9lZwqKrF06EodQbLWjAi8hxwGRAvIhXAg0AggDHmcWAh8HsRMUAB\ncPegl98HPOtMQMdxtnSUUuc6XNPKV/+6H4ArF87ki1fMZ3lqjIejUsrCBGOMuXmU8zuBBcOcywNy\nrIhLqanm/dJTAHzmork8v6+C637xLhfPj+f/fXwJqXFhHo5OTWe6kl8pH5db2sjMyGD+6yMLeeeB\nK/jmxiz2njjFI68e9nRoapqbUrXIlJqOcktPkZMei4gQERzAvZdmUFzbxpaCanpsdoIC9O9I5Rn6\nnaeUDzvZ3EllUyer5sSdcXxDdhKtXTbeK673UGRKaYJRyqflOsdfzk+PPeP4h+bHExEcwJb8ak+E\npRSgCUYpn7b3xClCA/1ZOCvqjOPBAf5ckTWTVw/VYOuzeyg6Nd1pglHKh+WeaGR5agyB/uf+KG/I\nTqKxvWdglplS7qYJRikf1dZt41BVCzlndY/1uywzgeAAP7bkn3RzZEo5aIJRykfllTVhN5CTHjfk\n+bCgAC5dkMCWgmrsduPm6JTSBKOUz8o90YgIrEgbftX+xiVJ1LR0k1ehO14o99MEo5SP2nviFJmJ\nkUSFBA57zRVZiQT6i84mUx6hCUYpH2Trs/PBiVOcP0z3WL/o0EDWZsTzSv5JjNFuMuVemmCU8kFF\n1a209/QNO8A/2MbsJMobOzl0UqstK/fSBKOUD9p7wjH1eNWc0RPMVYsS8RO0m0y5nSYYpXzQ+6WN\nzIoOITkmdNRrZ0QEsyItlneOadkY5V6aYJTyQXtPnGLVHEeBy7FYMzeOgxXNdPTYLI5MqdMsSzAi\nsklEakUkf5jzsSLygogcEJE9IpJ91nl/EdknIv+0KkalfFFlUycnm7vIGUP3WL/Vc+Ow2c1A19rZ\nth+p48l3S1wVolKAtS2Yp4ANI5z/NpBnjFkK3AE8etb5+4FCa0JTyne9UVQLwNrz4sf8mpz0OPz9\nhN3HG4c8/+MtRXz/pUO8clBX/SvXsSzBGGN2AEN/NzssAt5wXlsEpItIIoCIpAAfBn5rVXxK+apX\nC6qZGx/O/JkRY35NRHAA2bOj2FNy7o/kyeZOCqpaCPQXvvn8QaqaOke8V1lDB997sYBrHn2bk80j\nX6umN0+OwewHrgcQkdXAHCDFee5/gW8AWgZWqUGaO3vZWdzA1YsSxzz+0m/NvBnklTfR1dt3xvFt\nhY4W0S9vXUVvn52v/DmPvrNKyxhj2FPSyGefzuXS/3mTZ3ad4NDJFjbnVkzugdSU5skE80MgRkTy\ngPuAfUCfiHwEqDXG7B3LTUTkHhHJFZHcuro6C8NVyvPeOlyLzW64enHSuF+7Zm4cPX129pWdWTZm\nW2ENaXFhXLlwJt//2GJ2lzTy+PbigfPvlzbyySd2cdOvd7K7pJHPXZrBOw9cwZq5cTy/r1IXcKph\neWzLZGNMC3AXgDj+FCsBjgOfBD4mItcAIUCUiDxjjLltmPs8ATwBkJOTo9/pakrbWlBNQmQwK1KH\nrz82nJz0OERgd0kDF2bMAKCjx8a7xQ3ctmYOIsINq1LYcbSen7x2hJmRwbx88CRvHq4jPiKY7310\nEZ88P43QIH8APrEyhW/87QB55U2sSBv7hAM1fXisBSMiMSIS5Pz034AdxpgWY8y3jDEpxph04FPA\nG8MlF6Wmk67ePt46XOdYOOk3vu4xcJSNWZgUdcZA/ztH6+mx2bly4UwARIT/c102SVEhfH3zAT4o\na+KBDVns+MZl3HnR3IHkAo5CmsEBfjz/QeXkH05NSZa1YETkOeAyIF5EKoAHgUAAY8zjwELg9yJi\ngALgbqtiUWoqePdYPR09fayfQPdYvzXz4vjj7jJ6bHaCAvzYVlhLZEgA5889XdMsOjSQTXeez44j\nddx0firRoUMX04wMCWT94iReOlDFf35kIcEB/kNep6YvyxKMMebmUc7vBBaMcs1bwFuui0op37W1\noJrI4AAunDdjwvdYM3cGT75byoGKJlamxbKtqJZLFyScsyNmZlIkmUmRo97v4yuTeXF/FW8W1bEh\ne+KJT01NupJfKS/Tbetj74lTZwye2/rsvF5Yy+VZMwkKmPiP7WpnS2V3SSP7K5qob+vmyoWJE77f\nxefFEx8RzAv7dDaZOpcmGKW8zJPvlvKJX73HQ/8sHNiJcu+JUzS290yqewwgLjyIzMRIdh1vYFth\nLf5+wmWZCRO+X4C/H9ctn80bRbWcau+ZVGxq6tEEo5SXyS1tJNBf2PRuCff9aR/dtj62FtQQFODH\npZNIBv3WzItj74lTvHqomlVzYokJCxr9RSO4fmUKvX2Gfx6omnRsamrRBKOUFzHGkFfexEeXzebb\n12TxrwMn+fSmPWwtqOZD58UTETz5YdPVc+Po6OnjSE3bwOyxyVg0O4qspEj+prPJ1Fk0wSjlRSpO\ndVLf1sOK1BjuuSSDRz+1nL0nTlHZ1Mn6xRMfKxls9aAZY+smMf4y2PUrk8krb+JYbZtL7qemBk0w\nSnmRvHLHKvvlqY6Fi9cuT+apu1ZzzZIkNmTPcsl7zIwMISMhnLnx4WQkjL2e2UiuW5FMcIAfvx5U\nAUApj63kV0qdK6+8ieAAP7JmnZ4ifNF58Vw0jsrJY/HwjcvwG2cts5HMjAzhljVp/GHnCb5w+Xmk\nx4e77N7Kd2kLRimLGWPGXK8rr7yJ7OToc9aluNrKtFiWT6DczEg+d2kGAX7Cz9845tL7Kt+lCUYp\nC9n67Nz2u93c99y+Ua/t7bOTX9ns8l/87jIzKoTbL5jDC/sqOF6nYzFKE4xSlvrN2yW8e6yBfx44\nSW7pSNsjweHqVrptdp9NMACfvTSDoAA/bcUoQBOMUpY5WtPKT187wpULZxIfEcxPXjsy4vX7Bgb4\nfTfBJEQGc8eF6fwjr1JnlClNMEpZwdZn52ubDxAe7M9/X7+Uz12WwXvFDewsbhj2NXllTcRHBJES\nG+rGSF3vs5fMIyTQn59tO+rpUJSHaYJRygK/ebuE/eVN/ODabBIig7l1TRqJUcH89LUjww7455Wf\nYllKzLh3qvQ2MyIcrZiXDlSxv7xp9BeoKUsTjFIu1t81tjE7iY8sdaxdCQn05wuXn8ee0kbePXZu\nK6a5s5fiunaf7h4b7J5L5hEfEcyNv97JEzuKz9mCWU0PmmCUcrHvvJBPeLA/P7g2+4zWyCfPT2VW\ndAiPvHb4nFbMgQrn+Eva1EgwceFBvPyli7l0QQL/7+UiPvXETk40tHs6LOVmmmCUcqGyhg72lDby\nucsySIgMPuNccIA/X7ziPPaVNfHWkbozzuWVORLM0pSpkWDAMeD/xO2reOTGZRRVt7Lx0bd583Ct\np8NSbmRZghGRTSJSKyL5w5yPFZEXROSAiOwRkWzn8VQReVNEDolIgYjcb1WMSrna1oJqADYOU9bl\nxlWppMSG8tA/D1Hb0jVwPK+8iYyE8GF3j/RVIsInVqWw9cuXMDsmlAf/UUBvn93TYSk3sbIF8xSw\nYYTz3wbyjDFLgTuAR53HbcBXjTGLgAuAL4jIIgvjVMplthRUk50cRWpc2JDngwL8ePiGZVQ3d3HD\n4zspa+gYqKDcX39sKpodE8q3NmZR1tjBX3N1czJXqmnporyxw9NhDMmyBGOM2QGMtLJsEfCG89oi\nIF1EEo0xJ40xHziPtwKFQLJVcSrlKjUtXew9cYoNo2wKdmHGDP747xfQ0tXLJx5/j9cLa2lo75ky\n4y/DuSJrJivSYvj5G0fp6u3zdDhTxs+2HeW6X7zr6TCG5MkxmP3A9QAishqYA6QMvkBE0oEVwO7h\nbiIi94hIrojk1tXVDXeZUpZ71dk9Npa96ZenxvDXz16Ivwj3PJ0LwIopMoNsOCLC16/O5GRzF8/u\nLvN0OFNGc2ev13atejLB/BCIEZE84D5gHzDwZ42IRAB/A75sjGkZ7ibGmCeMMTnGmJyEhMnv9qfU\nRG0pqCYjIZzzZkaOfjEwPzGSv957IekzwokMCSAzaWyv82Vrz4tnbcYMfvnmMdq7bZ4OZ0po7uwl\nShPMmYwxLcaYu4wxy3GMwSQAxwFEJBBHcnnWGPO8p2JUaqxOtfew63jjmFovg6XGhfHiFy/iX/dd\nbHkFZW/xtfWZNLT38NR7pZ4OZUpo0QRzLhGJEZH+zcD/DdhhjGkRx8KB3wGFxpifeCo+pcbj9cIa\n+uyGDYvHvylYZEggaTOGnhQwFa1Mi2Vd1kx+vb2Y5s5eT4fj81q6bNOvi0xEngN2ApkiUiEid4vI\nvSJyr/OShUC+iBwGNgL905EvAm4HrhCRPOfHNVbFqZQrbC2oJjkmlOzkKE+H4hP+4+oFtHTZ+N07\nJZ4Oxec5xmC8c+9Iy6Iyxtw8yvmdwIIhjr8D+HYxJjWttHXb2HG0ntsvmOPzdcTcZfHsaC6eH88/\n8ir5ypXz9f/bBBljHGMwIdOsBaPUdPFmUS09Nvu4x1+mu/WLkzjR0MFRLes/YR09ffTZzfTrIlOn\nNXf28qMtRTr3f4raUlBNfEQwK9Om7kJJK1y1KBGA1w7VeDgS39U/hqUJZhr7a245v3qrmN0lI+9o\nqHxPR4+NN4tquWpRIv5+2s0zHolRISxLjRlYP6TGrz/B6Cyyaay/PpW3lnNQE/fKwWo6evr4+Aot\nNjERVy9KZH9FM9XNXaNfrM6hLZhprra1i9wTpwBNMFPRn3PLmRsfzvnp2j02EesXO7vJCrWbbCJa\nNMFMb68dqsEYCAvyp/yU+xLMlvyT3Pbb3di0cq1lSurb2VPSyI05KToLaoIyEiKYGx+u3WQTpC2Y\naW5LfjVz48PJSY+jzE0tmD674YevFPHOsXr2lOq4j1U27y3HT+ATK1NGv1gNSUS4elEiu4430NJ1\n5qLLn752hJse38lbh2uH3WZ6uhsYg9FpytNPc0cvO4sbWL84ibS4UMobO93yvlvyqyltcCSzrfn6\nl6EVbH12Nu+t4PLMmSRGhXg6HJ929eJEevsMbx0+Xaz2nweqeHTbUQqqmrnzyfe56dc72XX83K2m\np7uWLhsiEBninQstNcFYaFtRDTa7YUN2EqmxYTR39lpeGsMYw+Pbi5kbH86VCxPZWlCDXfdDd7m3\nj9ZT09LNjTmpng7F5y1PjSU+Imigm+xYbSvf2HyAlWkxvP+fV/LQddmUNXbwqSd28Zmn3qe1S8vL\n9Gvp7CUyOAA/L53BqAlmDJ7ZdYJNEyhpsSW/mlnRISxNjibNuQGV1QP97xU3cLCymXsumcc1S5Ko\nbuliv3O/d+U6f8ktZ0Z4EFdkzfR0KD7P30+4cmEibx2u41R7D599ei+hgf788tZVhAUFcPsFc9j+\n9cv51sYsdhyp47bf7aG5Q5MMeHclZdAEMya/e6eEX20vHlc/cEePje1H6li/OAk/PxnY4bDC4oH+\nx7cXkxAZzMdXJLMuK5EAP2GLDqC6VENbN68X1vDxFckEBeiPkCtcvTiRtm4bN/16JyX17fz85hUk\nRZ/uegwJ9Oezl2bwy1tXcqiqmVt+u4tT7T0ejNg7tHjxXjCgCWZUrV29lNS3U9faTdU45upvP1xH\nt83Oeufuhv0JxsqB/vzKZt4+Ws9nLppLSKA/0WGBXJgxg6351TpI6kIv7Kukt89w0/naPeYqazPi\nCQvy52htG19fn8Xa8+KHvO7qxUk8cUcOR2vbuPk3u6hv63ZzpN7FmzcbA00wozpUdXqvs31lp8b8\nui0F1cSFBw2sj4gODSQqJMDSgf7HtxcTGRzArRekDRzbkJ1EaUMHh2taLXvf6cQYw19yy1meGsOC\nxKm/QZi7hAT6c8eF6dyUk8K9l84b8drLM2fy5J3nU9rQzs1P7KLbNn1LMHlzoUvQBDOqfGeCCfAT\n9pWNbSyj29bHG4W1XLUwkYBBm0ilxoVZ1oI50dDOywdPcssFaWd8w121KBERx3iQmrzn9pRzpKaN\nm1dr68XVvrkxix/fsGxMa4ouOi+eh29YxtHaNnYfn75T8b29BeOdc9u8SEFlMzMjg0mfET5sC+a9\nY/X89PUj2OwGu4HOHhut3TbWZyeecV1aXJhlLYnfv3eCAD8/7r5o7hnHZ0aGkDMnli351Xz5ynN2\nR1DjsL+8ie+9WMAlCxK4YZUmGE9bt3AmQf5+7DhSxyULpud26S1dvUSHeW+CsXLDsU0iUisi+cOc\njxWRF0TkgIjsEZHsQec2iMhhETkmIt+0KsaxyK9qJjs5mhVpMeRXtQzZHN/0bilF1a1EBAcQExpI\nckwoN65K4UPnnflNnxoXRsWpTpdPGzbGsLWgmksWJDBziDUZ6xcnUVTdSml9u0vfdzppbO/hc8/s\nJSEymEc/uVwLW3qBsKAAzp8by46jdaNfPAV12/ro6rV7dQvGyi6yp4ANI5z/NpBnjFkK3AE8CiAi\n/sAvcOxyuQi4WUQWWRjnsDp7+jhW20b27ChWpMXQY7NTePLMFkhXbx/vHKvj+hXJPH33Gn7/mdU8\neddqHr5x2TkzjFLjwuix2altde3A5JGaNiqbOlm3cOgps/0TDbbqbLIJ6bMb7v/TPurbevjVbSuJ\nDQ8a/UXKLS6Zn8CRmjZONrtnEbM3Ob2K33s7oixLMMaYHcBInaOLgDec1xYB6SKSCKwGjhljjhtj\neoA/AddaFedICqtbsBtYnBzNCudeH2d3k+0sbqCr184VCxOHusUZUmNDAVxek+x1Z6HA4dZkpMaF\nkZ0cpdOVJ+h/Xz/C20fr+f61i1maEuPpcNQg/V1jbx+p93Ak7tfSaQO8t1Q/eHaQfz9wPYCIrAbm\nAClAMlA+6LoK57Ehicg9IpIrIrl1da5tKhdUNgOQnRxNYlQIs6JDzhno31ZUQ1iQPxfMixv1fv2L\nLcsaXJtg3iiqZYkzxuFsWJzEvrImLYs+TgVVzfz8jWPcuCqFT+m0ZK+TlRTJzMhgtk/DbjJvL3QJ\nnk0wPwRiRCQPuA/YB4x7vqEx5gljTI4xJichwbUDffmVLcSGBTLbueBrRVoM+8pPt2CMMbxRWMvF\n8+MJDvAf9X7JsaGIuLYF09jewwdlp0ZdUd6/ne9rh7QVMx4vHzyJv5/wnQ8v1IrJXkhEuHh+Au8c\nradvmpVEavHyzcbAgwnGGNNijLnLGLMcxxhMAnAcqAQG/6mY4jzmdv0D/P2/WFakxlLe2Emdcwyl\n8GQrVc1drMsavXsMIDjAn8TIEJeuhXmzqBZj4MpRuujOmxnJvIRw7SYbp1cLalgzN46YMB138VaX\nLIinubOXA9OsJFJ/9WltwQxBRGJEpP+n9t+AHcaYFuB9YL6IzHWe/xTworvj67b1caSmlcWzoweO\nrUhz9L/nlTu+kbc5xz4uyxp7yyktLsyl9cjeKKplZmQwi2dHjXrt+sVJ7DreSFPHuSU2alu6uOon\n27Vi7SDH69o4WtvG1YvG9geE8oyL5ycgAjum2TjMtO4iE5HngJ1ApohUiMjdInKviNzrvGQhkC8i\nh3HMGLsfwBhjA74IbAUKgb8YYwqsinM4R2va6O0zZCef/sWdnRztXHDp6CbbVlTLstQYZkaOvVx7\nSlyoy7rIemx2th+pY93CmWOqprphcRJ9dsO2wtpzzj2zu4yjtW08vPWwlpVxeu2Q4w+Iq5yz8JR3\nigsPYkly9LSbrtxf8NObV/JbNr/NGHPzKOd3AkOu/DPGvAy8bEVcY5XfP8A/qAUTEujPotlR7Ctr\noq61m/0VTXxlnIsX0+LCeGFfJd22vjGN24zk/dJG2rptXDHGLrqlKdHMig5hS0E1n1h1epOsHpud\n5/aUERkcwN4Tp9h1vJELM2ZMKrap4NVDNWQnR5EcE+rpUNQoLpmfwK+2F3v9ynZXau7sJTTQ36sL\nrnpvZB6WX9VMZHDAwMyvfitSY9hf0cS2QsdWyOMt154aG4YxUHlq8uMwrxfWEBzgx4eGKQx4tv7d\nA3ccqaOjxzZwfGtBNXWt3Tx841ISIoN57M2jk47N19W2dvFB2SmuWqitF19wyYIE+uyG945Nn26y\nli7vT6aaYIaRX9nCotlR53Q9rUiLpaOnjyfePk5SVMiYxj4GS5vh3BdmkgnGGEdX19qMGYQGjb0l\ntD47iW6bne2Ddg98eucJ0uLCuHpREvdcPI93jzWw98TYC3v6sq0F1Wx89O1zqvJuK3RMnrh6sY6/\n+IIVaTFEBAdMq24yx14w3rvIEjTBDMnWZ6fwZAvZydHnnOsf6D9e184VC2eOe+pqaqxryvYX17VT\n1tjBujEs8BxsdXocsWGBA6v6i6pb2FPayG0XpOHnJ9yyJo3YsEB+8eaxScXnK57dXUbhyRb+84X8\nM8aeXi2oJjUulKwkrZjsCwL9/VibMYMdR+qnzRiiL3QHaoIZQnFdO902+xkD/P3S4sKIc5YKWTeB\n3QxnRgYTFOBHxSQTzLZRVu8PJ8Dfj3ULE9lWVEuPzc4fdp4gOMCPm5xb/4YHB/CZi+byRlHtwDjU\nVNXc2ct7x+pJjgllS0E1/8irAqCt28a7xxq4elGSrn3xIZcsSKCyqZNDJ1tGv3gKaOm0aYLxRUMN\n8PcTEZanxhAc4MfajLGNfQzm5yekxIZOqgVjjOGfB06yeHYUsycwAL1hcRKtXTZePVTN3/dV8rFl\ns89Y53HH2nQigwP45VtTuxWzrbAGm93ws5uXszIthu/+I5/q5i62H66jp8+u05N9zIeXzCIyJICf\nvnbE06G4hbdvlwyaYIaUX9VMSKAf8xIihjz/jQ2Z/OKWleMa+xgsNTZsUlOV9544xcHKZm5enTb6\nxUP40HzH7oHf/UcBHT193HFh+hnno0MDuWPtHF7Jr+boFN6obEt+NUlRIaxIjeWRm5bT02fngb8d\nYKtzs7hVc2I9HaIah9jwID5/2Xm8Xlg7LdZztXj5ZmOgCWZIBZUtLJoVNWxJ9qykKK6cxF+3aXFh\nk6pH9uS7pUSHBnL9ymFLtI0oJNCfyzITaGzvYXlqDEtSzm2pfca5r8w/D5yccJzerL3bxvYjdWzI\nTsLPT5gbH863Ni5k+5E6XjpQxbqsmWdsFqd8w10XpTMrOoT/frnQ5dtieJM+u6G1W7vIfI4xhsJq\nxwwyq6TGhdLSZRtYiTselU2dbCmo5lOrUwkLmvgMkg3ZswC448I5Q56fERFMckwox318D5k+u8HW\nZz/n+PYjdXTb7ANbGQDcfsEc1mbMcM4e0+nJvigk0J+vXp3J/opm/nVwav5xBNDqA2ViQBPMOaqa\nu2jtspGZZF2C6V9bM5GSMX/YWQpwTrfWeH14ySw23ZnDdcuHbwXNS4jgeF3bpN7H077+1/18+Gfv\nnLHuB+CV/GpmhAexeu7pKth+fsJPP7mcL185n0un6Q6JU8HHVySTlRTJj7cWDblB4FTQ7AOFLkET\nzDmKnDNQrJyemh4fDji6unqH+Ot6OB09Nv60p5wNi5Mmvbrc30+4IitxxBIz8+LDKalv99lpn5VN\nnfw9r5LDNa38v5cLB4539fbxRmENVy1KPKcbNDEqhC9fucCrV0erkfn7Cd+6ZiHljZ08u6sMcNQW\nfK+4nsfeOEplk+9vTuYLdcjAwlIxvqqo2jGonWlhgslMjOTzl2Xwy7eKOdncyS9vXTmmar3Pf1BJ\nc2cvd12Ubllsg2UkhNPR00d1Sxezon2vXMqf9pRhgI8um80zu8pYtzCRyzNn8l5xPe09fQNbGKip\n59IFCVw8P56fvXGU7Ufq2FPSSGevozWTV97Mbz+d4+EIJ6d/szFvTzD6Z9pZDle3khwTaunsDBHh\nGxuyeOTGZeSWnuK6X7xL8ShdUXa74cl3S1iSHO222U39s+iO1/neOEyPzc6f3i/nisyZPHzDUhYk\nRvCNzQc41d7DKweriQwJmNA0c+U7vrkxi86ePspPdXBTTgq/uSOHL15+Hq8X1rD3xEib7Xq/011k\n3t1G0ARzlsPVrZa2Xgb7xKoU/vjva2jtsnHdL949Zzvmwd4+Vk9xXTuf+VC62xb/zUtwdOX54jjM\nq4cc9dVuu2AOIYH+/OSm5TR19PCt5w/yWmENVy5M1G6wKW7x7GgKvr+eN756Gd+/NpurFiXy+csz\nSIgM5kev+HbVcF/pItOfsEF6bHaK69rclmAActLj+PsXLiIqJJCv/DmPzp5zByXtdsPjbxWTEBnM\nh5fMdltsSVEhhAX5U+yDLZhndp0gNS50YM/27ORovnzlArYUVNPU0XvG7DE1dZ091TwsKIAvrZvP\nntJG3jrsu3XLfGGzMdAEc4biujZsduP2+lOpcWH8z43LKG3o4OGth885/6vtxew83sCX1s1361/d\nIo71Ib42VflYbSu7jjdyywygxQ0AACAASURBVOo5Zwzi33tpBqvmxBIe5K+zxKaxT52fypwZYfxo\nS5HPrpVp7uwl0F8IDZzclh9Ws/S3lYhsEpFaEckf5ny0iLwkIvtFpEBE7hp07sfOY4Ui8jNxQ7/Q\nYecAf5aFU5SHc2HGDO64cA5PvlfCnpLT/cPvHK3nkVcP87Fls7ltzcRW7k+GL05VfmZXGUH+ftyU\nk3LGcX8/YdOd5/PCFy6acBUG5fsC/f34j6sWUFTdyksHqjwdzoQ0O1fxe3utvDElGBEJFxE/578X\niMjHRGQsbbOngA0jnP8CcMgYswy4DHhERIJEZC1wEbAUyAbOBy4dS6yTUVTdSqC/DIw9uNsDG7JI\niQ3lG5v309nTR1VTJ1/60z4yEiL47+uXeOSbaV58OJVNnXT1jn89wc+2HWXjo2+7ta+7o8fG3/ZW\ncM2SJGZEBJ9zPjo0kAWJWiF5uvvo0tksmhXFI68eocc29qUC3sIXKinD2FswO4AQEUkGXgVux5E8\nRmSM2QGMNF3DAJHO1kmE81qb83gIEAQEA4FAzRhjnbDD1S1kJEQQ6KESIeHBAfz4E46usv9+pZDP\nP/sB3b19PH77KsKDPTNbZF5COMZAacP4u8m25FdTeLKFquYuCyIb2ot5VbR227jtgqErFCgFjkW1\n39iQSVljB3/OLfd0OOPW4gOFLmHsCUaMMR3A9cAvjTE3Aotd8P6PAQuBKuAgcL8xxu7cTvlN4KTz\nY6sxpnD427hGUXWrx/f/6O8q+8POE+SVN/HwjcvIGKbopjtkTHCqcnNHL4XVjkWrH7hx87K/fVBB\nZmKkFqpUo7p0QQIr0mLY9E6Jz43FTLkEIyIXArcC/3Iec0Un9nogD5gNLAceE5EoETkPR+JJAZKB\nK0Tk4mECu0dEckUkt65u4rNCmjt6OdncZWmJmLF6YEMWy1NjuH/dfK5ZMsujscyNn9hU5fdLG+nv\nGXPX7pjGGA5VtXBhxgyv75tWnici3HXRXErq29nuYzthTrUusi8D3wJeMMYUiMg8HC2MyboLeN44\nHANKgCzg48AuY0ybMaYNeAW4cKgbGGOeMMbkGGNyEhImPjPocE3/AL/n++fDgwP4+xcu4itXLfB0\nKIQHB5AUFTLuFszukgaCAvxYnhrDByOs73GlquYu2nv6OG+m51p8yrdszE4iMSqYp94t9XQo49LS\nZSPayxdZwhgTjDFmuzHmY8aYHzkH++uNMV9ywfuXAesARCQRyASOO49fKiIBzskElwKWdpEddnbn\nuHMNjK+YlxBO8TinKu8uaWR5agxrM2ZwqKplyPU9rnbE+UeCDuKrsQr09+O2NXPYfqRu1Goa3sIY\nMzCLzNuNdRbZH51dV+FAPnBIRL4+htc9B+wEMkWkQkTuFpF7ReRe5yUPAWtF5CCwDXjAGFMPbAaK\ncYzL7Af2G2NeGvfTjUNRdStRIQHMig6x8m180ryEcErq2sY8G6y1q5f8ymbWzI1jZVosNrvhQEWT\nxVEysDnafG3BqHG4eU0aQf5+/OG9Uk+HMibtPX302Y1PdJGNtY21yBjTIiK34uiu+iawF3h4pBcZ\nY24e5XwVcPUQx/uAz44xNpdwDPBHad/9EObFR9DSZaOhvYf4Iab+nm3viVPYDayZO2NgX50PyppY\nM2+GpXEerWkjPiKY2PDRC4cq1S8+IpiPLpvN5r0VfHV9pte3DFp8pEwMjH0MJtDZVXUd8KIxphfH\nVOIpwRjDETfWIPM1p2uSja2bbHdJIwF+wso5McSFBzE3PtwtA/1HattYkKitFzV+d65Np72nj825\nFZ4OZVS+UocMxp5gfg2UAuHADhGZA7RYFZS7VTZ10tpt0wQzjNNTlcfWR737eANLU6IHdtxcmRbL\nvrJTli64NMZwrKZVx1/UhCxJcVQp//3OUq+fsuwrm43B2Af5f2aMSTbGXOOc8XUCuNzi2Nymv0TM\nwln6y2kos2NCCQrwG1NNso4eGwcqms/oDls5J4aG9h5ONIx/B8+xqmzqpL2nj/naglETdOfadE40\ndPDWkVpPhzKiKdeCcdYM+0n/ehMReQRHa2ZK6N9kTP/6HZq/nzB3RviYWjAfnGjCZjesGbQVcf+i\nRyunKx+tdcQ2f6Z+DdXEbHBOWX7GuQumt5qKYzCbgFbgJudHC/CkVUG5W5Fzk7FILx/c86R5CeFj\nGoPZXdKAv5+Qk346wcyfGUlEcICl4zBHB6YoawtGTUygvx/XLU9mx5E6TrX3eDqcYQ10kfnA76ux\nJpgMY8yDxpjjzo/vA/OsDMydDle3eMUCS282LyGcssYOevtGLgy4+3gj2bOjiBhUO83fT1iRFmNp\ngjlS00ZCZPCYtp5WajgfXTYbm93wSn61p0MZVktnLyIQGTJFFloCnSLyof5PROQioNOakNyrt8/O\niYYOsnT8ZUTz4iOw2Q1ljcOPo3T19pFXPvR05JVpsRypaaXVuVGSqx2tadX1L2rSFs+OYl5COP/I\nq/R0KMNq6bIRGRyAn5/3L6kYa4K5F/iFiJSKSCmOIpVuXadilUB/P/Y/eDWfvTTD06F4tbFMVc4r\nb6Knz87qQd1j/VbOicVuYH9588AxY8yEtgE4mzGGo7VtOoamJk1EuHZZMntKG6l2YxXw8Wj2kUKX\nMPZZZPude7YsBZYaY1YAV1gamRuFBPr7RH+mJ80bw1Tl3ccbEYHz556bYJanxiByeqD/cHUrt/9u\nD8t/8Co1LZP7Qa5s6qRDZ5ApF/nY8tkYA//00s3IfKXQJYxzR0tjTIsxpn/9y39YEI/yUtGhgcyZ\nEcbTu05Q19p9zvmKUx08u/sES5Ojh/zmjw4NZP7MCN4+Wsd//v0gGx/dwZ7SRrp67ewvn1wZmaM1\nOoNMuc7c+HCWJEfz4n7vTDANbd1DbqbnjSazs5b3dwAql/rZp1bQ0NbD3b9/n44e28Dxpo4ePr1p\nD129fTx847JhX79qTizvl57iuT3l3H7BHLb9h2OT0v4ilRN1RGeQKRf72LLZHKhopmScRV7dob6t\nh3gfKYc0mQTj3ctdlcstS43h5zevIL+ymfv+uA9bn52u3j7u/n0u5Y2d/OaOnBHHQW5ZPYebclJ4\n5f6L+f612aTGhZESGzqwDmmijtbqDDLlWh9ZNgsRxw6p3sQYQ11bN/GRvtGCGXGem4i0MnQiESDU\nkoiUV7tyUSLf/9hi/usfBTz4YgF1rd18UHaKX9yyctRilktSovnxDWe2cDITIyfdgtEZZMrVZkWH\nsjo9jhf3V/Klded5TRHctm4bPTY78RG+8cfUiC0YY0ykMSZqiI9IY4z3T8JWlrj9wnQ+e+k8nt1d\nxquHanjwI4smvPNmZlIkx+va6bGNvL5mOHa7ziBT1vjY8tkU17Vz6KT3lF2sb3MsAB1LVXNvMJku\nMjWNPbA+i3sumce3NmZx50VzJ3yfzKRIbHYz4c2eqpp1BpmyxjXZswjwE6/qJqtvc0ywmQ6D/Goa\n8/MTvn3NwkmvH+qvYD2WbrI+u+H+P+3jDztLByoz988g0xaMcrXY8CDWnhfPa4U1ng5lQIMzwUyJ\nLrLJEJFNIlIrIvnDnI8WkZdEZL+IFIjIXYPOpYnIqyJSKCKHRCTdqjiVZ82LjyDAT8Y00F94soV/\n5FXx3X8U8M2/HaTb1jeQmHQMRlnhgnlxHK9rp9FLapPVObvIErQFw1PAhhHOfwE45FzAeRnwiIj0\np+U/AA8bYxYCqwHvrp+tJiwowI95CeEcGUOCea+4HoA7LpzDn3PLueU3u9ld0qgzyJRlzndWpXDH\nhnljUe9cg+Yru7ZalmCMMTuAxpEuASLFMT0jwnmtTUQWAQHGmNec92kzxli3kYjyuMykqDG1YHYW\nNzAvIZwfXJvNL25ZSUFVM28U1er6F2WZJcnRBPn7kVs60q8y96lv6yY2LJBAf98Y3fBklI8BC4Eq\n4CBwvzHGDiwAmkTkeRHZJyIPi4j/cDcRkXv696mpq6tzT+TKpbKSIh27io5QCLO3z86ekkYudE6F\n/vDSWWy+dy0ZCeFcuiDBXaGqaSYk0J8lKdG87yUJpqGtx2dmkIFnE8x6IA+YDSwHHhORKBxrcy4G\nvgacj2NbgDuHu4kx5gljTI4xJichQX/R+KL+AfojNcPPJDtY2Ux7Tx9rM+IHjmUnR7Ptq5dxzyVa\nqFRZJyc9loOVzS4pzDpZ9W3dmmDG6C7geecWzMeAEiALqADynPvO2IC/Ays9GKeyWP9ePIdH6Cbb\nWdwAOAZdlXKnnDlx9PYZDlQ0j36xxerbupnhIzPIwLMJpgxYByAiiUAmcBx4H4gRkf7myBXAIY9E\nqNwiOSaU8CD/Eacq7yxuICsp0mfm/6upo3/Lb2/oJvO1LjLLVuOLyHM4ZofFi0gF8CAQCGCMeRx4\nCHhKRA7iKD3zgDGm3vnarwHbnBMA9gK/sSpO5Xl+fsL8xEiKqodeMd1t6+P90kZuXp3m5siUgrjw\nIM6bGeHxmWRdvX20dttI8JE6ZGBhgjHG3DzK+Srg6mHOvYZj7xk1TWQlRbK1oBpjzDl1n/LKmui2\n2VmbMXKtM6WskjMnlpcPnsRuNx7bSXJgFb+PTFEGXcmvvMSCxEhOdfRS13buXjPvFTcgAmvmaoJR\nnpGTHkdLl41jEyxp5AoNPlaHDDTBKC8x0kD/zuMNZM+OJjrMN3bxU1PP+emeH4fpb8H4Sql+0ASj\nvETmMAmms6ePvLImLtTuMeVBaXFhxEcEk1vquXEY7SJTaoJmRAQTHxF0ToLZe+IUPX12TTDKo0SE\n89NjyT3hyRaMsw6ZtmCUGr/MpHM3H9t5vB5/PxmoCaWUp+Skx1He2El1c5dH3r++rZuI4ABCAoct\nbOJ1NMEor5GZGMWRmjbs9tObqL5X3MCylGgignV/O+VZOc71MJ5qxdS39fhMmf5++lOrvEZmUgSd\nvX18ffMB+ux22rptHKho5t5L53k6NKVYNDuK0EB/cktP8ZGls93+/vWt3T630FgTjPIaa+bOIC48\niDcP1xIRHEBEcABr5sbx8RUpng5NKQL9/ViRFsOeEs+0YBrau5kbH+6R954oTTDKa6THh/PBf13l\n6TCUGtblmTP5vy8Xcri6dWDmo7vUt/X43FikjsEopdQYfWJVCkEBfvxx9wm3vq+tz86pjh6f6yLT\nBKOUUmMUFx7Eh5fM4vkPKunosbntfRvbezAGEnxskF8TjFJKjcOta9Jo7bbx0v4qt71nvQ+WiQFN\nMEopNS6r5sSSmRjJM7vK3PaeA6v4NcEopdTUJSLcekEaByubOVDR5Jb3HKhDpl1kSik1tX18RTJh\nQf4866ZWzEAlZR8qEwMWJxgR2SQitSKSP8z5aBF5SUT2i0iBiNx11vkoEakQkcesjFMppcYjMiSQ\na5fP5sX9VTR39lr+fvVt3QQF+BHpYxUtrG7BPAVsGOH8F4BDxphlOHa/fEREBrcBHwJ2WBadUkpN\n0C2r59DZ28ff91Va/l51bd3Ehwedsxmft7M0wRhjdgAjLXs1QKRza+QI57U2ABFZBSQCr1oZo1JK\nTcSSlGiWpUTzzK4TGGNGf8Ek1Lf1+Fz3GHh+DOYxYCFQBRwE7jfG2EXED3gE+NpoNxCRe0QkV0Ry\n6+rqrI1WKaUGuWVNGkdr29hXbu1gf0Nbt89NUQbPJ5j1QB4wG1gOPCYiUcDngZeNMRWj3cAY84Qx\nJscYk5OQkGBttEopNciHl84mNNCfzXtH/VU1KfVt3T610Vg/TyeYu4DnjcMxoATIAi4EvigipcD/\nAHeIyA89F6ZSSp0rIjiAjdlJvLS/iq7ePkvew243NGgX2YSUAesARCQRyASOG2NuNcakGWPScXST\n/cEY803PhamUUkO7ISeF1i4bWwuqLbl/S1cvNrvRLrKzichzwE4g0znd+G4RuVdE7nVe8hCwVkQO\nAtuAB4wx9VbGpJRSrnTB3BmkxIZa1k3mq4ssweJy/caYm0c5XwVcPco1T+GY7qyUUl7Hz0/4xMoU\nfvbGUSqbOkmOCXXp/etafbMOGXi+i0wppXzeDatSMAZe+MD1rZjTLRhNMEopNe2kxoVxwbw4Nu+t\ncPmamAYf7iLTBKOUUi5w46pUShs6eL/0lEvvW9/Wg59ATJgmGKWUmpY2LkkiPMifzXvLXXrf+rZu\n4sKD8ffzrTIxoAlGKaVcIiwogA8vncW/Dpx06W6X9W09Ptk9BppglFLKZT68dDbtPX3kurCbrN5H\ny8SAJhillHKZnDmx+PsJu0saXHI/YwzH69pIjQtzyf3cTROMUkq5SHhwAEtTotl1fKQi8mNX3thJ\nS5eNJcnRLrmfu2mCUUopF7pg3gz2lze5ZBwmv6oZgOzkqEnfyxM0wSillAutmRuHzW7Ye2Ly4zD5\nlc0E+AmZSZEuiMz9NMEopZQL5aTHOcZhXNBNdrCymQWJkQQH+LsgMvfTBKOUUi4UERzAkuRodh2f\n3EC/MYaCqhaf7R4DTTBKKeVya+bFsb9icuMwJ5u7aGzv8dkBftAEo5RSLnfBvBn09hk+ODHxrZQP\nVjoG+BdrglFKKdXPFethCiqb8RNYmKRdZEoppZwiQwLJnuQ4TH5VC/NnRhIa5JsD/GBhghGRTSJS\nKyL5w5yPFpGXRGS/iBSIyF3O48tFZKfz2AER+aRVMSqllCWKi/nuy4+x6f4rMX5+EBUFn/88FBeP\n+RYHK5tZ7MMD/GBtC+YpYMMI578AHDLGLAMuAx4RkSCgA7jDGLPY+fr/FZEYC+NUSinXeeUVWLqU\nFa9uJrKnEzEGWlvht7+FpUsd50dR29JFXWs32bN9d/wFLEwwxpgdwEgTwQ0QKSICRDivtRljjhhj\njjrvUQXUAglWxamUUi5TXAw33AAdHfjZzppB1tsLHR2O86O0ZPpX8C9J0QQzUY8BC4Eq4CBwvzHG\nPvgCEVkNBAHDfjVE5B4RyRWR3Lq6OivjVUqpkT3yiCORjKS3F3760xEvOVjRgggsnKVdZBO1HsgD\nZgPLgcdEZOD/pojMAp4G7jo78QxmjHnCGJNjjMlJSNCGjlLKg555ZmwJ5umnR7wkv6qZufHhRAQH\nuDA49/NkgrkLeN44HANKgCwAZ6L5F/AdY8wuD8aolFJj19bmkusKKpt9eoFlP08mmDJgHYCIJAKZ\nwHHnQP8LwB+MMZs9GJ9SSo1PRMSkr2to66aqucvnB/jB2mnKzwE7gUwRqRCRu0XkXhG513nJQ8Ba\nETkIbAMeMMbUAzcBlwB3ikie82O5VXEqpZTL3HYbBAaOfE1gINx++7Cn86taAHx+ijKAZR18xpib\nRzlfBVw9xPFngGesiksppSzz1a/C738/8jhMYCB85SvDns7vLxGjLRillFIDMjJg82YICzunJWMC\nAx3HN292XDeMgqpm5swIIzp0lJaQD9AEo5RSrrRxIxw4APfcA1FRGBFag8Iou/5Wx/GNG0d8+cHK\n5ikx/gKaYJRSyvUyMuCxx6C5me7uXlZ+7a/88dPfGLHlAtDa1Ut5YyeLZvv++AtoglFKKUuFBPqz\naFYUeWWjl+4/UtMKwMJZvrlF8tk0wSillMWWp8ZwsLKZPrsZ8brCk44Ek+nDJfoH0wSjlFIWW54W\nQ0dP30ALZThF1S1EhgQwOzrETZFZSxOMUkpZbHlqLAB55SN3kxWdbGVhUhSOGsC+TxOMUkpZLH1G\nGDFhgSOOwxhjOFzdStYUGX8BTTBKKWU5EWFZSsyILZjKpk5au21kTZHxF9AEo5RSbrE8NYYjta20\ndduGPF80MMCvLRillFLjsDwtBmPgYEXzkOeLqh01yDTBKKWUGpflKY6d34frJiusbiUtLszn94AZ\nTBOMUkq5QWx4EOkzwsgrPzXk+cPVrWRNodYLaIJRSim3WZY69EB/V28fx+vayPLxLZLPpglGKaXc\nZHlqDDUt3Zxs7jzj+LHaNuwGbcGMh4hsEpFaEckf5ny0iLwkIvtFpEBE7hp07tMictT58Wkr41RK\nKXdYkeZYcLnreMMZxwtPOgb4NcGMz1PAhhHOfwE4ZIxZBlwGPCIiQSISBzwIrAFWAw+KSKzFsSql\nlKWWJkeTPiOMp947gTGn65IVVbcSEujHnBnhHozO9SxNMMaYHUDjSJcAkeKoixDhvNYGrAdeM8Y0\nGmNOAa8xcqJSSimv5+cnfOZDc9lf3sQHZacH+w9Xt5KZGIm/39QoEdPP02MwjwELgSrgIHC/McYO\nJAPlg66rcB47h4jcIyK5IpJbV1dndbxKKTUpN6xKITo0kN++XTJwrKi6ZUqt4O/n6QSzHsgDZgPL\ngcdEZFz/l40xTxhjcowxOQkJCVbEqJRSLhMWFMAta9LYWlBNWUMHda3d1Lf1TKkFlv08nWDuAp43\nDseAEiALqARSB12X4jymlFI+79MXpuMnwpPvlQys4J9KRS77eTrBlAHrAEQkEcgEjgNbgatFJNY5\nuH+185hSSvm8pOgQPrpsNn95v5z3Sx1jMVOxi8zSmgQi8hyO2WHxIlKBY2ZYIIAx5nHgIeApETkI\nCPCAMabe+dqHgPedt/qBMWakyQJKKeVT7v7QXF7YV8lvdhwnMSqYuPAgT4fkcpYmGGPMzaOcr8LR\nOhnq3CZgkxVxKaWUp2UnR7Nmbhy7SxpZPTfO0+FYwtNdZEopNW3928XzgKm3wLLf1CnbqZRSPmZd\n1ky+ePl5XLditqdDsYQmGKWU8hA/P+Fr6zM9HYZltItMKaWUJTTBKKWUsoQmGKWUUpbQBKOUUsoS\nmmCUUkpZQhOMUkopS2iCUUopZQlNMEoppSwhg7ft9HUiUgecOOtwNNA8yrHBn4/273igfhJhDhXP\nWK8Z77Oc/Xn/v6fSswz+92SeZzLPMtw5/T47fUy/NmOLdbRrrPjaZBpjrKlVY4yZ0h/AE6MdG/z5\naP8Gcl0dz1ivGe+zjPAMU+ZZXPU8k3kW/T4b+ftMvzZT92sz2sd06CJ7aQzHXhrnv10dz1ivGe+z\nnP35S8NcM1He8CxjjWM0k3mW4c7p95lr6Ndm5OOe/NqMaEp1kbmDiOQaY3I8HYcrTKVngan1PFPp\nWWBqPc9Uehaw9nmmQwvG1Z7wdAAuNJWeBabW80ylZ4Gp9TxT6VnAwufRFoxSSilLaAtGKaWUJTTB\nKKWUssS0TjAisklEakUkfwKvXSUiB0XkmIj8TERk0Ln7RKRIRApE5MeujXrYeFz+LCLyPRGpFJE8\n58c1ro982Jgs+do4z39VRIyIxLsu4hHjseJr85CIHHB+XV4VEbdsiWjRszzs/Hk5ICIviEiM6yMf\nNiYrnudG58++XUQsnwwwmWcY5n6fFpGjzo9PDzo+4s/VkKya/+wLH8AlwEogfwKv3QNcAAjwCrDR\nefxy4HUg2Pn5TB9+lu8BX5sqXxvnuVRgK44FufG++ixA1KBrvgQ87sPPcjUQ4Pz3j4Af+fL3GbAQ\nyATeAnK89Rmc8aWfdSwOOO78b6zz37EjPe9IH9O6BWOM2QE0Dj4mIhkiskVE9orI2yKSdfbrRGQW\njh/wXcbxf/4PwHXO058DfmiM6Xa+R621T+Fg0bN4jIXP81PgG4DbZrdY8SzGmJZBl4bjpuex6Fle\nNcbYnJfuAlKsfYrTLHqeQmPMYXfE73y/CT3DMNYDrxljGo0xp4DXgA0T/T0xrRPMMJ4A7jPGrAK+\nBvxyiGuSgYpBn1c4jwEsAC4Wkd0isl1Ezrc02pFN9lkAvujsutgkIrHWhTomk3oeEbkWqDTG7Lc6\n0DGY9NdGRP6viJQDtwLftTDW0bji+6zfZ3D8dexJrnweTxnLMwwlGSgf9Hn/c03oeQPG+KbTgohE\nAGuBvw7qXgwe520CcDQvLwDOB/4iIvOcWd9tXPQsvwIewvHX8UPAIzh+AbjdZJ9HRMKAb+PojvEo\nF31tMMZ8B/iOiHwL+CLwoMuCHCNXPYvzXt8BbMCzroluQjG47Hk8ZaRnEJG7gPudx84DXhaRHqDE\nGPNxV8eiCeZMfkCTMWb54IMi4g/sdX76Io5fvIOb8SlApfPfFcDzzoSyR0TsOIrj1VkZ+BAm/SzG\nmJpBr/sN8E8rAx7FZJ8nA5gL7Hf+0KUAH4jIamNMtcWxn80V32eDPQu8jAcSDC56FhG5E/gIsM7d\nf4ydxdVfG08Y8hkAjDFPAk8CiMhbwJ3GmNJBl1QClw36PAXHWE0lE3leqwegvP0DSGfQ4BjwHnCj\n898CLBvmdWcPeF3jPH4v8APnvxfgaG6Kjz7LrEHXfAX4ky9/bc66phQ3DfJb9LWZP+ia+4DNPvws\nG4BDQII7v7+s/j7DTYP8E30Ghh/kL8ExwB/r/HfcWJ53yLg88QX1lg/gOeAk0Iuj5XE3jr9ytwD7\nnd/03x3mtTlAPlAMPMbpqghBwDPOcx8AV/jwszwNHAQO4PirbZY7nsWq5znrmlLcN4vMiq/N35zH\nD+AoXJjsw89yDMcfYnnOD7fMiLPweT7uvFc3UANs9cZnYIgE4zz+GefX5Bhw12jPO9KHlopRSill\nCZ1FppRSyhKaYJRSSllCE4xSSilLaIJRSillCU0wSimlLKEJRk1pItLm5vd7z0X3uUxEmsVRLblI\nRP5nDK+5TkQWueL9lXIFTTBKjYOIjFj9whiz1oVv97ZxrMZeAXxERC4a5frrAE0wymtoglHTznCV\nZkXko84ipftE5HURSXQe/56IPC0i7wJPOz/fJCJvichxEfnSoHu3Of97mfP8ZmcL5Nn+/TNE5Brn\nsb3OfTVGLMFjjOnEsQCxv2jnv4vI+yKyX0T+JiJhIrIW+BjwsLPVkzGJirpKuYQmGDUdDVdp9h3g\nAmPMCuBPOMr691sEXGmMudn5eRaO0uargQdFJHCI91kBfNn52nnARSISAvwax14aq4CE0YJ1VrGe\nD+xwHnreGHO+MWYZUAjcbYx5D0e1ha8bY5YbY4pHeE6l3EKLXappZZRquSnAn517XwThqMPU70Vn\nS6Lfv4xjz59uEakFbJksMwAAAWxJREFUEjmznDnAHmNMhfN983DUi2oDjhtj+u/9HHDPMOFeLCL7\ncSSX/zWni3Jmi8j/AWKACBwbqI3nOZVyC00waroZttIs8HPgJ8aYF0XkMhw7evZrP+va7kH/7mPo\nn6WxXDOSt40xHxGRucAuEfmLMSYPeAq4zhiz31mF+LIhXjvScyrlFtpFpqYV49gJskREbgQQh2XO\n09GcLkH+6aFe7wKHgXkiku78/JOjvcDZ2vkh8IDzUCRw0tktd+ugS1ud50Z7TqXcQhOMmurCRKRi\n0Md/4PilfLez+6kAuNZ57fdwdCntBeqtCMbZzfZ5YIvzfVqB5jG89HHgEmdi+i9gN/AuUDTomj8B\nX3dOUshg+OdUyi20mrJSbiYiEcaYNuessl8AR40xP/V0XEq5mrZglHK/f3cO+hfg6Jb7tYfjUcoS\n2oJRSillCW3BKKWUsoQmGKWUUpbQBKOUUsoSmmCUUkpZQhOMUkopS/x/XiYVFtmjuGcAAAAASUVO\nRK5CYII=\n",
-            "text/plain": [
-              "<Figure size 432x288 with 1 Axes>"
-            ]
-          },
-          "metadata": {
-            "tags": []
-          }
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "C0xMaxB_5Qdw",
-        "colab_type": "code",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 198
-        },
-        "outputId": "41488be2-8c9c-499c-efc3-b3928b45de84"
-      },
-      "source": [
-        "learn.fit_one_cycle(5, 2.51E-02)\n",
-        "learn.save('stage-1')"
-      ],
-      "execution_count": 76,
-      "outputs": [
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/html": [
-              "<table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              "    <tr style=\"text-align: left;\">\n",
-              "      <th>epoch</th>\n",
-              "      <th>train_loss</th>\n",
-              "      <th>valid_loss</th>\n",
-              "      <th>time</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <td>0</td>\n",
-              "      <td>1.028524</td>\n",
-              "      <td>1.064036</td>\n",
-              "      <td>00:10</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>1</td>\n",
-              "      <td>1.100684</td>\n",
-              "      <td>1.106313</td>\n",
-              "      <td>00:10</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>2</td>\n",
-              "      <td>0.962634</td>\n",
-              "      <td>0.996034</td>\n",
-              "      <td>00:10</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>3</td>\n",
-              "      <td>0.815722</td>\n",
-              "      <td>0.856966</td>\n",
-              "      <td>00:10</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>4</td>\n",
-              "      <td>0.605413</td>\n",
-              "      <td>0.837510</td>\n",
-              "      <td>00:10</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table>"
-            ],
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ]
-          },
-          "metadata": {
-            "tags": []
-          }
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "06fH3ay8VV8_",
-        "colab_type": "text"
-      },
-      "source": [
-        "## Interpretation"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "mXPLxkEhVYI9",
-        "colab_type": "text"
-      },
-      "source": [
-        "### Setup\n",
-        "- most seen movie dataframe, select only 1000,\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "l11BezK96xth",
-        "colab_type": "code",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 207
-        },
-        "outputId": "901303f3-a966-4836-f85a-6710e2433908"
-      },
-      "source": [
-        "item_freq = rating_matrix.groupby(title).ratings.count();"
-      ],
-      "execution_count": 90,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "[Index([''Til There Was You (1997)', '1-900 (1994)', '101 Dalmatians (1996)',\n",
-              "        '12 Angry Men (1957)', '187 (1997)', '2 Days in the Valley (1996)',\n",
-              "        '20,000 Leagues Under the Sea (1954)', '2001: A Space Odyssey (1968)',\n",
-              "        '3 Ninjas: High Noon At Mega Mountain (1998)', '39 Steps, The (1935)',\n",
-              "        ...\n",
-              "        'Yankee Zulu (1994)', 'Year of the Horse (1997)', 'You So Crazy (1994)',\n",
-              "        'Young Frankenstein (1974)', 'Young Guns (1988)',\n",
-              "        'Young Guns II (1990)', 'Young Poisoner's Handbook, The (1995)',\n",
-              "        'Zeus and Roxanne (1997)', 'unknown',\n",
-              "        'Á köldum klaka (Cold Fever) (1994)'],\n",
-              "       dtype='object', name='title', length=1664)]"
-            ]
-          },
-          "metadata": {
-            "tags": []
-          },
-          "execution_count": 90
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "VXxWh6H86rGO",
-        "colab_type": "text"
-      },
-      "source": [
-        "\n",
-        "### bias\n",
-        "\n",
-        "sort movie depends on most seen movie."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "qUsEB9oE7QcC",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "item_freq.sort_values(ascending=False, inplace=True)"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "M91ULItX_vWv",
-        "colab_type": "code",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 69
-        },
-        "outputId": "7bae6d04-c93f-4a22-de06-303cf619f803"
-      },
-      "source": [
-        "movie_all = item_freq.index.values"
-      ],
-      "execution_count": 126,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "array(['Star Wars (1977)', 'Contact (1997)', 'Fargo (1996)', 'Return of the Jedi (1983)', 'Liar Liar (1997)',\n",
-              "       'English Patient, The (1996)', 'Scream (1996)', 'Toy Story (1995)', 'Air Force One (1997)',\n",
-              "       'Independence Day (ID4) (1996)'], dtype=object)"
-            ]
-          },
-          "metadata": {
-            "tags": []
-          },
-          "execution_count": 126
-        },
-        {
-          "output_type": "stream",
-          "text": [
-            "ERROR! Session/line number was not unique in database. History logging moved to new session 61\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "MDYtNNm2C2Ul",
-        "colab_type": "text"
-      },
-      "source": [
-        "load learned `movie bias` parameter"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "2weywOtvC5wS",
-        "colab_type": "code",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 345
-        },
-        "outputId": "c243172c-6e7f-44a0-a598-706c91d3c6b8"
-      },
-      "source": [
-        "mb = learn.bias(movie_all)"
-      ],
-      "execution_count": 127,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "You're trying to access an item that isn't in the training data.\n",
-            "                  If it was in your original data, it may have been split such that it's only in the validation set now.\n"
-          ],
-          "name": "stdout"
-        },
-        {
-          "output_type": "error",
-          "ename": "TypeError",
-          "evalue": "ignored",
-          "traceback": [
-            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-            "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
-            "\u001b[0;32m<ipython-input-127-d16a51800aff>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mmb\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlearn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbias\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmovie_all\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
-            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/fastai/collab.py\u001b[0m in \u001b[0;36mbias\u001b[0;34m(self, arr, is_item)\u001b[0m\n\u001b[1;32m     85\u001b[0m         \u001b[0mm\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     86\u001b[0m         \u001b[0mlayer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mi_bias\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mis_item\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0mm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mu_bias\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 87\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mlayer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0midx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msqueeze\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     88\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     89\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mweight\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0marr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mCollection\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mis_item\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mbool\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m    530\u001b[0m             \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_slow_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    531\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 532\u001b[0;31m             \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    533\u001b[0m         \u001b[0;32mfor\u001b[0m \u001b[0mhook\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_forward_hooks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    534\u001b[0m             \u001b[0mhook_result\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mhook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/torch/nn/modules/sparse.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, input)\u001b[0m\n\u001b[1;32m    112\u001b[0m         return F.embedding(\n\u001b[1;32m    113\u001b[0m             \u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mweight\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpadding_idx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmax_norm\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 114\u001b[0;31m             self.norm_type, self.scale_grad_by_freq, self.sparse)\n\u001b[0m\u001b[1;32m    115\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    116\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mextra_repr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/torch/nn/functional.py\u001b[0m in \u001b[0;36membedding\u001b[0;34m(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq, sparse)\u001b[0m\n\u001b[1;32m   1482\u001b[0m         \u001b[0;31m# remove once script supports set_grad_enabled\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1483\u001b[0m         \u001b[0m_no_grad_embedding_renorm_\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mweight\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmax_norm\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnorm_type\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1484\u001b[0;31m     \u001b[0;32mreturn\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0membedding\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mweight\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpadding_idx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mscale_grad_by_freq\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msparse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1485\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1486\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;31mTypeError\u001b[0m: embedding(): argument 'indices' (position 2) must be Tensor, not NoneType"
-          ]
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "u0UsOlL0EyCP",
-        "colab_type": "text"
-      },
-      "source": [
-        "`You're trying to access an item that isn't in the training data.\n",
-        "                  If it was in your original data, it may have been split such that it's only in the validation set now.`"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "UwNAHjyqE50A",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "mb = learn.bias(movie_all[:1300]) #changed to 100"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "kQW_rjTVCp7a",
-        "colab_type": "text"
-      },
-      "source": [
-        "average the movie ratings.\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "ae0u8serDFg0",
-        "colab_type": "code",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 138
-        },
-        "outputId": "96c3e213-1f53-435f-9457-c36bd3be55b0"
-      },
-      "source": [
-        "avg = rating_matrix.groupby(title).ratings.mean(); avg[:5]"
-      ],
-      "execution_count": 152,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "title\n",
-              "'Til There Was You (1997)    2.333333\n",
-              "1-900 (1994)                 2.600000\n",
-              "101 Dalmatians (1996)        2.908257\n",
-              "12 Angry Men (1957)          4.344000\n",
-              "187 (1997)                   3.024390\n",
-              "Name: ratings, dtype: float64"
-            ]
-          },
-          "metadata": {
-            "tags": []
-          },
-          "execution_count": 152
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "PrZun89FFcsn",
-        "colab_type": "code",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 190
-        },
-        "outputId": "40323aef-67d2-4277-e5ee-4d39aadc94c1"
-      },
-      "source": [
-        "[f'N.{i+1}, {item_freq[i]} rated, {v}, rating: {avg[v]:.3f}' for i, v in enumerate(movie_all[:10])]  "
-      ],
-      "execution_count": 157,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "['N.1, 583 rated, Star Wars (1977), rating: 4.358',\n",
-              " 'N.2, 509 rated, Contact (1997), rating: 3.804',\n",
-              " 'N.3, 508 rated, Fargo (1996), rating: 4.156',\n",
-              " 'N.4, 507 rated, Return of the Jedi (1983), rating: 4.008',\n",
-              " 'N.5, 485 rated, Liar Liar (1997), rating: 3.157',\n",
-              " 'N.6, 481 rated, English Patient, The (1996), rating: 3.657',\n",
-              " 'N.7, 478 rated, Scream (1996), rating: 3.441',\n",
-              " 'N.8, 452 rated, Toy Story (1995), rating: 3.878',\n",
-              " 'N.9, 431 rated, Air Force One (1997), rating: 3.631',\n",
-              " 'N.10, 429 rated, Independence Day (ID4) (1996), rating: 3.438']"
-            ]
-          },
-          "metadata": {
-            "tags": []
-          },
-          "execution_count": 157
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "4jUiqQVfDFGT",
-        "colab_type": "text"
-      },
-      "source": [
-        "arrange (bias[i], mostly seen[i], rating avg[i]) and sort ky bias. ascending and descening"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "nC-Qo1K1FTbt",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "bias_tuple = [(b, tle, avg[tle]) for b, tle in zip(mb, movie_all[:100])]"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "WjfX4ZmdNbTG",
-        "colab_type": "code",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 190
-        },
-        "outputId": "37145444-4284-40ca-bc72-fe9280fb9c73"
-      },
-      "source": [
-        "sorted(bias_tuple, key = itemgetter(0))[:10]"
-      ],
-      "execution_count": 159,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "[(tensor(-0.0995), 'Volcano (1997)', 2.808219178082192),\n",
-              " (tensor(-0.0329), 'Broken Arrow (1996)', 3.031496062992126),\n",
-              " (tensor(-0.0316), 'Evita (1996)', 2.9806949806949805),\n",
-              " (tensor(-0.0292), \"Dante's Peak (1997)\", 2.933333333333333),\n",
-              " (tensor(0.0581), 'Murder at 1600 (1997)', 3.0871559633027523),\n",
-              " (tensor(0.0769), \"Devil's Own, The (1997)\", 3.1083333333333334),\n",
-              " (tensor(0.1038), 'Liar Liar (1997)', 3.156701030927835),\n",
-              " (tensor(0.1078), 'Twister (1996)', 3.2150170648464163),\n",
-              " (tensor(0.1155), 'Saint, The (1997)', 3.1234177215189876),\n",
-              " (tensor(0.1742), 'Mission: Impossible (1996)', 3.313953488372093)]"
-            ]
-          },
-          "metadata": {
-            "tags": []
-          },
-          "execution_count": 159
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "PUKWfxGWNqRF",
-        "colab_type": "code",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 190
-        },
-        "outputId": "e30c0e1d-46fd-478b-e378-15ac44d102e6"
-      },
-      "source": [
-        "sorted(bias_tuple, key = itemgetter(0), reverse=True)[:10]"
-      ],
-      "execution_count": 160,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "[(tensor(0.8650), \"Schindler's List (1993)\", 4.466442953020135),\n",
-              " (tensor(0.8147), 'Shawshank Redemption, The (1994)', 4.445229681978798),\n",
-              " (tensor(0.8005), 'Casablanca (1942)', 4.45679012345679),\n",
-              " (tensor(0.7782), 'Titanic (1997)', 4.2457142857142856),\n",
-              " (tensor(0.7671), 'Star Wars (1977)', 4.3584905660377355),\n",
-              " (tensor(0.7615), 'Silence of the Lambs, The (1991)', 4.28974358974359),\n",
-              " (tensor(0.7280), 'L.A. Confidential (1997)', 4.161616161616162),\n",
-              " (tensor(0.7063), 'Usual Suspects, The (1995)', 4.385767790262173),\n",
-              " (tensor(0.6968), \"One Flew Over the Cuckoo's Nest (1975)\", 4.291666666666667),\n",
-              " (tensor(0.6828), 'Godfather, The (1972)', 4.283292978208232)]"
-            ]
-          },
-          "metadata": {
-            "tags": []
-          },
-          "execution_count": 160
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Q9x36lZb6s46",
-        "colab_type": "text"
-      },
-      "source": [
-        "\n",
-        "### weight\n",
-        "\n",
-        "load weight depends on most seen movie, lesson factors 40 to 3, sort movie title depends on each factor.\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "YZ3FI5vtN4cT",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "mw = learn.weight(movie_all[:1300])"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "Ilenn8U7N4kZ",
-        "colab_type": "code",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 35
-        },
-        "outputId": "54006db6-f34f-48a7-c46f-44bf76790752"
-      },
-      "source": [
-        "mw.shape"
-      ],
-      "execution_count": 164,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "torch.Size([1300, 40])"
-            ]
-          },
-          "metadata": {
-            "tags": []
-          },
-          "execution_count": 164
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "c-rmuENnN4tb",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "ld0, ld1, ld2 = mw.pca(3).t()"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "DB-ftM3_N41F",
-        "colab_type": "code",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 35
-        },
-        "outputId": "d5a40082-0dc8-427b-b6e2-4fd5dd2d3fa5"
-      },
-      "source": [
-        "ld0.shape"
-      ],
-      "execution_count": 171,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "torch.Size([1300])"
-            ]
-          },
-          "metadata": {
-            "tags": []
-          },
-          "execution_count": 171
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "gfIGTVcEN466",
-        "colab_type": "code",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 363
-        },
-        "outputId": "17eaa0a2-3e85-44c1-c8f9-a9299c63c51c"
-      },
-      "source": [
-        "sorted([(i,j) for i, j in zip(ld0, movie_all[:1300])], key=itemgetter(0), reverse=True)[:20]"
-      ],
-      "execution_count": 174,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "[(tensor(1.1216), 'Jungle2Jungle (1997)'),\n",
-              " (tensor(0.9222), 'Congo (1995)'),\n",
-              " (tensor(0.9222), 'Down Periscope (1996)'),\n",
-              " (tensor(0.8669), 'Bio-Dome (1996)'),\n",
-              " (tensor(0.8663), 'D3: The Mighty Ducks (1996)'),\n",
-              " (tensor(0.8648), \"McHale's Navy (1997)\"),\n",
-              " (tensor(0.8421), 'Beautician and the Beast, The (1997)'),\n",
-              " (tensor(0.8385), 'Ghost and the Darkness, The (1996)'),\n",
-              " (tensor(0.8381), 'Grumpier Old Men (1995)'),\n",
-              " (tensor(0.8138), 'Striking Distance (1993)'),\n",
-              " (tensor(0.7659), 'Murder at 1600 (1997)'),\n",
-              " (tensor(0.7655), 'Home Alone 3 (1997)'),\n",
-              " (tensor(0.7507), 'Twister (1996)'),\n",
-              " (tensor(0.7502), 'Body Parts (1991)'),\n",
-              " (tensor(0.7365), 'Volcano (1997)'),\n",
-              " (tensor(0.7341), 'G.I. Jane (1997)'),\n",
-              " (tensor(0.7302), 'Net, The (1995)'),\n",
-              " (tensor(0.7212), 'Leave It to Beaver (1997)'),\n",
-              " (tensor(0.7206), 'Batman & Robin (1997)'),\n",
-              " (tensor(0.7019), 'First Knight (1995)')]"
-            ]
-          },
-          "metadata": {
-            "tags": []
-          },
-          "execution_count": 174
-        }
-      ]
-    }
-  ]
-}
\ No newline at end of file
diff --git a/_posts/_2020-02-09-fast.ai-nlp-note-10.md b/_posts/nlp/_2020-02-09-fast.ai-nlp-note-10.md
similarity index 100%
rename from _posts/_2020-02-09-fast.ai-nlp-note-10.md
rename to _posts/nlp/_2020-02-09-fast.ai-nlp-note-10.md
diff --git a/_posts/_2020-02-29-ELMO-ulmfit.md b/_posts/nlp/_2020-02-29-ELMO-ulmfit.md
similarity index 100%
rename from _posts/_2020-02-29-ELMO-ulmfit.md
rename to _posts/nlp/_2020-02-29-ELMO-ulmfit.md
diff --git a/_posts/part1/2020-04-15-lesson06-note.md b/_posts/part1/2020-04-15-lesson06-note.md
deleted file mode 100644
index 7947f32a92..0000000000
--- a/_posts/part1/2020-04-15-lesson06-note.md
+++ /dev/null
@@ -1,64 +0,0 @@
----
-layout: post
-title: "BatchNorm, Convolution arithmetic"
-author: dionne
-categories: [ fastai-v3 ]
-image: assets/images/week8/gate.png
----
-
-
-Fastai2 package dev version
-
-1. fastai said it is `editable install` - but what is it?
-2. what is egg-link, egg-info, fastai-1.0.34-py3.6.egg-info
-3. find -E . -regex ".*\fast.+"  >>  	`.*\` is in need because `find` matches the whole match.
-4. I can't rename env, but should clone and remove beforehand.
-
--------
-
-# Lesson 06
-
-- Tabular data: categorical variable vs Continuous variable.
-- if datatype is int, fastai think it is classification, not a regression.
-- Root mean square percentage error. as loss function.
--  when you assign the y_range, it's better to assign little bit more than actual maximum.
--  intermediate layers, which is weight matrix is 1) 1000, and 2) 500 -> which means our parameter would be 500*1000.
-
-
-~~~python
-learn.model
-~~~
-
-### What is drop out and embedding drop out?
-
-[Nitish Srivastava, Dropout: A Simple way to prevent Neural Networks from Overfitting]()
-
-- you can dropout `p` value, make it specified to specific layer, or make it applied to all the layers.
-- Pytorch code 1) bernoulli, which decides whether you will hold it? 2) and divide the noise value depends on noise value. so noise became 2 or remain 0.
-	 - According to pytorch code, We do change at training time, but we do nothing at test time. and this means you don't have to do anything special with inference time.'
-	 - <b>TODO</b>: find at forums `what is inference time`?
-
-- Embedding dropout is just a dropout.
-	- It's different between continuous variable and embedding layer.  <b>TODO</b> Still can't understand. why embedding dropout is effective. or,... in need.
-	- Let's delete at random, some of the results of the embedding. 
-	- and It worked well especially at Kaggle
-
-### Batch Normalization
-
-[Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift]() -> came out false! According to [How Does Batch Normalization Help Optimization?]()
-
-- The key was  `multiplicative` bias {\gamma} and `additive` bias {\beta}`
-- Explain
-	- Let \$$ \hat{y}  = f(w_1, w_2, w_3, ... , x)} $$ ,  loss = MSE , Then `y_range` should be between 1 and 5`
-	- And Activation function ends with `-1 -> +1`
-	- To mitigate this problem, we can add the other parameter, like \$$w_n$$
-	- But there're so much interactions in the process so just re-scale the output.
-
-### Momentum parameter at BatchNorm1d
-- Different from momentum at optimization.
--  Exponentially weighted moving average. 
-	-  Small number: `mean standard deviation` would be less from mini_batch to mini_batch >> less regularization effect. Large Number
-
-	
-### What is convolution?
-	
\ No newline at end of file
diff --git a/_posts/part1/_2020-04-13-lesson05-note.md b/_posts/part1/_2020-04-13-lesson05-note.md
deleted file mode 100644
index 8f54b14e44..0000000000
--- a/_posts/part1/_2020-04-13-lesson05-note.md
+++ /dev/null
@@ -1,3 +0,0 @@
-Two ways are in derivatives:
-	1) finite differenciate
-	2) analytic
\ No newline at end of file
diff --git a/_posts/part1v3/.ipynb_checkpoints/_2020-04-17-lessonn07-note-checkpoint.ipynb b/_posts/part1v3/.ipynb_checkpoints/_2020-04-17-lessonn07-note-checkpoint.ipynb
new file mode 100644
index 0000000000..6e3eb033f5
--- /dev/null
+++ b/_posts/part1v3/.ipynb_checkpoints/_2020-04-17-lessonn07-note-checkpoint.ipynb
@@ -0,0 +1,49 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 2019 v3 fastai course, Lesson07"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "- What's the problem of 'deep' neural network?\n",
+    "- why 56-layer model is worse than 20-layer?</br>\n",
+    "\n",
+    "[Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/_posts/part1/2020-04-02-qna-image-segmentation.md b/_posts/part1v3/2020-04-02-qna-image-segmentation.md
similarity index 100%
rename from _posts/part1/2020-04-02-qna-image-segmentation.md
rename to _posts/part1v3/2020-04-02-qna-image-segmentation.md
diff --git a/_posts/part1v3/2020-04-15-v3-2019-lesson06-note.md b/_posts/part1v3/2020-04-15-v3-2019-lesson06-note.md
new file mode 100644
index 0000000000..ad4fb5eec1
--- /dev/null
+++ b/_posts/part1v3/2020-04-15-v3-2019-lesson06-note.md
@@ -0,0 +1,144 @@
+---
+layout: post
+title: "fastai 2019 course-v3 Part1, lesson06"
+author: dionne
+categories: [ fastai-v3 ]
+image: assets/images/att_00069.png
+---
+
+# Lesson 06
+
+## Rossmann(Tabular)
+
+- Tabular data: be careful on Categorical variable vs Continuous variable.
+- if datatype is int, fastai think it is classification, not a regression.
+- Root mean square percentage error. as loss function.
+- When you assign the y_range, it's better to assign little bit more than actual maximum. > because it's sigmoid.
+-  intermediate layers, which is weight matrix is 1) 1000, and 2) 500 -> which means our parameter would be 500*1000.
+
+
+~~~python
+learn.model
+~~~
+
+### What is dropout and embedding dropout?
+
+[Nitish Srivastava, Dropout: A Simple way to prevent Neural Networks from Overfitting](http://jmlr.org/papers/v15/srivastava14a.html)
+
+- you can dropout with `p` value, make it specified to specific layer, or make it applied to all the layers.
+- Pytorch code 1) bernoulli, which decides whether you will hold it? 2) and divide the noise value depends on noise value. so noise became 2 or remain 0.
+	 - According to pytorch code, We do change at training time, but we do nothing at test time. and this means you don't have to do anything special with inference time.'
+	 - <b>TODO</b>: find at forums `what is inference time` - Related to NVIDIA, GPU.
+
+- Embedding dropout is just a dropout.
+	- It's different between continuous variable and embedding layer.  <b>TODO</b> Still can't understand. why embedding dropout is effective. or,... in need.
+	- Let's delete at random, some of the results of the embedding. 
+	- and It worked well especially at Kaggle
+
+### Batch Normalization
+
+[Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift](https://arxiv.org/pdf/1502.03167.pdf) -> came out false! According to [How Does Batch Normalization Help Optimization?](https://arxiv.org/pdf/1805.11604.pdf)
+
+- The key was  `multiplicative` bias {\gamma} and `additive` bias {\beta}`
+- Explain
+	- Let \$$ \hat{y}  = f(w_1, w_2, w_3, ... , x)} $$ ,  loss = MSE , Then `y_range` should be between 1 and 5`
+	- And Activation function ends with `-1 -> +1`
+	- To mitigate this problem, we can add the other parameter, like \$$w_n$$
+	- But there're so much interactions in the process so just re-scale the output.
+
+### Momentum parameter at BatchNorm1d
+- Different from momentum like in optimization.
+- This momentum is Exponentially weighted moving average of the mean, instead of deviation. 
+	-  If this is small number: `mean standard deviation` would be less from mini\_batch to mini\_batch >> less regularization effect. (If this is large number, variation would be greater from mini\_batch to mini\_batch >> more regularization effect)
+	-  TODO: can't sure, but i understand, this is not about `how to update parameter` but about `how much reflect previous value when scale and shift`
+
+<End of Rossmann>
+
+Q. Preference between batchnorm and the other regularizations(drop out, weight decay)
+A. Nope, always try and see the results
+
+## lesson6-pets-more
+
+### Data Augmentation
+
+- Last reg
+- `get_transforms` has lots of params (even not yet learned all) -> check documentation
+	- Remember you can implement all the doc contents bc it's made from nbdev
+	- TODO: try this!!
+- Essence of data augmentation is you should maintain the label, while somewhat making sense.
+	- ex) tilt, because it's optically sensible, you can always change the angle of the data view.
+- zeros, border, and reflection but always `reflection` works most of the time, so that is the default
+
+### Convolutional Kernel(What is convolution?)
+
+
+- Will make heat\_map from scratch, which means the parts convolution focuses on
+
+![setosa_visualization]()
+
+- http://setosa.io/ev/image-kernels/
+	- javascript thing
+	- How convolution works
+	- Kernel. which does element-wise multiplication, and sum them up
+	- so it has on pixel less at borders -> so it uses padding, and fastai uses reflection as said.
+- why this Kernel(matrix) helps catching horizontal edge side?
+	- because this kernel`(picture2)` weights differently, depends on `x axis`
+	- why familiar, because it's similar intuition with fugus`(paper)` paper
+- CNN from different viewpoints`link`
+	- output of pixel is results from different linear equations.
+	- If you connect this with represents of neural network nodes, you can see that the specific inp nodes connected with specific out nodes.
+	- **Summarize**: cnn does 1) matmul some of the elements are always zero 2) same weight for every row, which is called `weight time? weight..?, 1:18:50` `(picture)`
+
+#### Further lowdown
+
+- Because generally image has 3  channels, we need rank 3 kernel.
+- And **do multiply with all channel output is one pixel**.(`draw by your self`)
+	- but this kernel will catch one feature, like horizontal, so that we make more kernel so that output becomes (h * w * kernel)
+	- And that `kernel` come to `channel`
+- **Conv2d**: with 3 by 3 kernel, stride 2 conv -> (h/2 * w/2 * kernel)
+	- skip or jump over input pixel
+	- to protect from memory out of control
+
+~~~python
+learn.model
+learn.summary()
+~~~
+TODO: understand yourself the blocks of conv-kernel: 
+
+- Usually use big kernel size at first layer (will study this at part2)
+
+
+- Bottom right highlighting kernel(`pic / draw`)
+- `torch.tensor.expand`: for memory efficient, because we should do RGB
+- We do not make separate kernel, but make rank 4 kernel
+	- 4d tensor is just stacked kernel
+- `t[None].shape` create new unit axis, and why? we make this -> it should move unit of batch, not one size image.
+
+### Average pooling, feature
+
+- suppose our pre-trained model results in size of `11 by 11 by 512 ` `pic 4` and my classification task has 37 classes
+	* take the first face of channel, which is 11 by 11 and `mean` it, so that make rank 2 tensor, 512 by 1
+	* and make 2d matrix, which is 512 by 37 and multiply so that we can get 37 by 1 matrix.
+
+- Feature, at convolution block
+	- So, when we transfer-learning without unfreeze, every element of last matrix (512 by 1) should represent(or could catch) each feature. 	
+
+### Heatmap, Hook
+
+~~~
+hook_output(model[0]) -> acts -> avg_acts
+~~~
+- if we average the block with `axis=feature`, result of matrix(11 by 11) depicts `how activated was that area?` -> it is heatmap, `avg_acts`
+
+- and acts comes from hook, which is more advanced pytorch feature.
+	- hook into pytorch machine itself, and run any arbitrary Pytorch code
+	- Why this is cool?: Normally it gives set of outputs of forward pass, but we can interrupt and hook the forward pass.
+	- Also can store the output of the convolutional part of the model, which is before avg_pooling
+- Thinking back when we do cut off `after` the conv part.
+	- but with fast.ai the original convolutional part of the model would be *the first thing in the model*, specifically could be given from `learn.model.eval()[0]`
+	- And this is gotten from `hooked_output` and having hooked the output, we can pass our x_minibatch to output.
+	- Not directly, but with normalized, minibatch, put on to the gpu
+	- `one_item()` function do it, when we have one data `TODO: this is assignment` do it yourself without one_item function
+	- and `.cuda()` put it on gpu
+
+- you should print out very often the shape of tensor, and try think why.
\ No newline at end of file
diff --git a/_posts/part1/_2019-12-20-lecture1-note.md b/_posts/part1v3/_2019-12-20-lecture1-note.md
similarity index 100%
rename from _posts/part1/_2019-12-20-lecture1-note.md
rename to _posts/part1v3/_2019-12-20-lecture1-note.md
diff --git a/_posts/part1/_2019-12-31-lesson1-fastai.md b/_posts/part1v3/_2019-12-31-lesson1-fastai.md
similarity index 100%
rename from _posts/part1/_2019-12-31-lesson1-fastai.md
rename to _posts/part1v3/_2019-12-31-lesson1-fastai.md
diff --git a/_posts/part1/_2020-01-27-lesson02-note.md b/_posts/part1v3/_2020-01-27-lesson02-note.md
similarity index 100%
rename from _posts/part1/_2020-01-27-lesson02-note.md
rename to _posts/part1v3/_2020-01-27-lesson02-note.md
diff --git a/_posts/part1/_2020-02-08-fastai-lesson03.md b/_posts/part1v3/_2020-02-08-fastai-lesson03.md
similarity index 100%
rename from _posts/part1/_2020-02-08-fastai-lesson03.md
rename to _posts/part1v3/_2020-02-08-fastai-lesson03.md
diff --git a/_posts/_2020-03-03-collab.md b/_posts/part1v3/_2020-03-03-collab.md
similarity index 100%
rename from _posts/_2020-03-03-collab.md
rename to _posts/part1v3/_2020-03-03-collab.md
diff --git a/_posts/part1v3/_2020-04-13-lesson05-note.md b/_posts/part1v3/_2020-04-13-lesson05-note.md
new file mode 100644
index 0000000000..0febba8297
--- /dev/null
+++ b/_posts/part1v3/_2020-04-13-lesson05-note.md
@@ -0,0 +1,9 @@
+Two ways are in derivatives:
+	1) finite differenciate
+	2) analytic
+	
+### GD, SGD, online GD
+
+- gradient descent - treat whole data as batch.
+- SGD - depends on batch size, update
+- online gradient descent - treat one data as batch size. 
diff --git a/_posts/part1v3/_2020-04-17-lessonn07-note.ipynb b/_posts/part1v3/_2020-04-17-lessonn07-note.ipynb
new file mode 100644
index 0000000000..6e3eb033f5
--- /dev/null
+++ b/_posts/part1v3/_2020-04-17-lessonn07-note.ipynb
@@ -0,0 +1,49 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 2019 v3 fastai course, Lesson07"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "- What's the problem of 'deep' neural network?\n",
+    "- why 56-layer model is worse than 20-layer?</br>\n",
+    "\n",
+    "[Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/_posts/part1v4/_2020-04-23-lesson06.md b/_posts/part1v4/_2020-04-23-lesson06.md
new file mode 100644
index 0000000000..522acbbb7e
--- /dev/null
+++ b/_posts/part1v4/_2020-04-23-lesson06.md
@@ -0,0 +1,17 @@
+---
+layout: post
+title: "fastai 2020 course-v4 Part1, lesson06"
+author: dionne
+categories: [ fastai-v3 ]
+image: assets/images/att_00069.png
+---
+
+
+Lesson06
+
+Q. why use steepest, not minimum?
+A. we also consider the minimum, and use minimum point divided by 10
+
+### Unfreezing and transfer learning
+- We throw away last layer.
+- 
\ No newline at end of file
diff --git a/_site/2019/01/dps-week1/index.html b/_site/2019/01/dps-week1/index.html
index f09de8d911..263316b1e8 100644
--- a/_site/2019/01/dps-week1/index.html
+++ b/_site/2019/01/dps-week1/index.html
@@ -19,9 +19,9 @@
 <meta property="og:site_name" content="SpellOnYou" />
 <meta property="og:image" content="http://localhost:4000/assets/images/week1/offsite.JPG" />
 <meta property="og:type" content="article" />
-<meta property="article:published_time" content="2019-01-11T00:00:00+01:00" />
+<meta property="article:published_time" content="2019-01-11T00:00:00+09:00" />
 <script type="application/ld+json">
-{"description":"The 1th week retropect at Digital Product School","author":{"@type":"Person","name":"dionne"},"@type":"BlogPosting","url":"http://localhost:4000/2019/01/dps-week1/","publisher":{"@type":"Organization","logo":{"@type":"ImageObject","url":"http://localhost:4000/assets/images/logo.png"},"name":"dionne"},"image":"http://localhost:4000/assets/images/week1/offsite.JPG","headline":"Digital Product School week 1","dateModified":"2019-01-11T00:00:00+01:00","datePublished":"2019-01-11T00:00:00+01:00","mainEntityOfPage":{"@type":"WebPage","@id":"http://localhost:4000/2019/01/dps-week1/"},"@context":"http://schema.org"}</script>
+{"description":"The 1th week retropect at Digital Product School","author":{"@type":"Person","name":"dionne"},"@type":"BlogPosting","url":"http://localhost:4000/2019/01/dps-week1/","publisher":{"@type":"Organization","logo":{"@type":"ImageObject","url":"http://localhost:4000/assets/images/logo.png"},"name":"dionne"},"image":"http://localhost:4000/assets/images/week1/offsite.JPG","headline":"Digital Product School week 1","dateModified":"2019-01-11T00:00:00+09:00","datePublished":"2019-01-11T00:00:00+09:00","mainEntityOfPage":{"@type":"WebPage","@id":"http://localhost:4000/2019/01/dps-week1/"},"@context":"http://schema.org"}</script>
 <!-- End Jekyll SEO tag -->
 
 
@@ -161,96 +161,101 @@
     "body": "  {% if page. url ==  /  %}            {% assign latest_post = site. posts[0] %}          &lt;div class= topfirstimage  style= background-image: url({% if latest_post. image contains  ://  %}{{ latest_post. image }}{% else %} {{site. baseurl}}/{{ latest_post. image}}{% endif %}); height: 200px;  background-size: cover;  background-repeat: no-repeat; &gt;&lt;/div&gt;           {{ latest_post. title }}  :       {{ latest_post. excerpt | strip_html | strip_newlines | truncate: 136 }}               In         {% for category in latest_post. categories %}        {{ category }},         {% endfor %}                                {{ latest_post. date | date: '%b %d, %Y' }}                            {%- assign second_post = site. posts[1] -%}                        {% if second_post. image %}                         &lt;img class= w-100  src= {% if second_post. image contains  ://  %}{{ second_post. image }}{% else %}{{ second_post. image | absolute_url }}{% endif %}  alt= {{ second_post. title }} &gt;                        {% endif %}                                    {{ second_post. title }}          :                       In             {% for category in second_post. categories %}            {{ category }},             {% endfor %}                                                      {{ second_post. date | date: '%b %d, %Y' }}                                    {%- assign third_post = site. posts[2] -%}                        {% if third_post. image %}                         &lt;img class= w-100  src= {% if third_post. image contains  ://  %}{{ third_post. image }}{% else %}{{site. baseurl}}/{{ third_post. image }}{% endif %}  alt= {{ third_post. title }} &gt;                        {% endif %}                                    {{ third_post. title }}          :                       In             {% for category in third_post. categories %}            {{ category }},             {% endfor %}                                                      {{ third_post. date | date: '%b %d, %Y' }}                                    {%- assign fourth_post = site. posts[3] -%}                        {% if fourth_post. image %}                        &lt;img class= w-100  src= {% if fourth_post. image contains  ://  %}{{ fourth_post. image }}{% else %}{{site. baseurl}}/{{ fourth_post. image }}{% endif %}  alt= {{ fourth_post. title }} &gt;                        {% endif %}                                    {{ fourth_post. title }}          :                       In             {% for category in fourth_post. categories %}            {{ category }},             {% endfor %}                                                      {{ fourth_post. date | date: '%b %d, %Y' }}                                  {% for post in site. posts %} {% if post. tags contains  sticky  %}                    {{post. title}}                  {{ post. excerpt | strip_html | strip_newlines | truncate: 136 }}                 Read More            	             {% endif %}{% endfor %}  {% endif %}                All Stories:         {% for post in paginator. posts %}          {% include main-loop-card. html %}        {% endfor %}                   {% if paginator. total_pages &gt; 1 %}              {% if paginator. previous_page %}        &laquo; Prev       {% else %}        &laquo;       {% endif %}       {% for page in (1. . paginator. total_pages) %}        {% if page == paginator. page %}        {{ page }}        {% elsif page == 1 %}        {{ page }}        {% else %}        {{ page }}        {% endif %}       {% endfor %}       {% if paginator. next_page %}        Next &raquo;       {% else %}        &raquo;       {% endif %}            {% endif %}                     {% include sidebar-featured. html %}      "
     }, {
     "id": 12,
+    "url": "http://localhost:4000/2020/04/v3-2019-lesson06-note/",
+    "title": "fastai 2019 course-v3 Part1, lesson06",
+    "body": "2020/04/15 - Lesson 06Rossmann(Tabular):  Tabular data: be careful on Categorical variable vs Continuous variable.  if datatype is int, fastai think it is classification, not a regression.  Root mean square percentage error. as loss function.  When you assign the y_range, it’s better to assign little bit more than actual maximum. &gt; because it’s sigmoid.  intermediate layers, which is weight matrix is 1) 1000, and 2) 500 -&gt; which means our parameter would be 500*1000. learn. modelWhat is dropout and embedding dropout?: Nitish Srivastava, Dropout: A Simple way to prevent Neural Networks from Overfitting  you can dropout with p value, make it specified to specific layer, or make it applied to all the layers.  Pytorch code 1) bernoulli, which decides whether you will hold it? 2) and divide the noise value depends on noise value. so noise became 2 or remain 0.      According to pytorch code, We do change at training time, but we do nothing at test time. and this means you don’t have to do anything special with inference time. ’   TODO: find at forums what is inference time - Related to NVIDIA, GPU.     Embedding dropout is just a dropout.      It’s different between continuous variable and embedding layer.  TODO Still can’t understand. why embedding dropout is effective. or,… in need.    Let’s delete at random, some of the results of the embedding.    and It worked well especially at Kaggle   Batch Normalization: Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift -&gt; came out false! According to How Does Batch Normalization Help Optimization?  The key was multiplicative bias {\gamma} and additive bias {\beta}` Explain     Let $$ \hat{y} = f(w_1, w_2, w_3, … , x)} $$ , loss = MSE , Then y_range should be between 1 and 5`   And Activation function ends with -1 -&gt; +1   To mitigate this problem, we can add the other parameter, like $$w_n$$   But there’re so much interactions in the process so just re-scale the output.    Momentum parameter at BatchNorm1d:  Different from momentum like in optimization.  This momentum is Exponentially weighted moving average of the mean, instead of deviation.      If this is small number: mean standard deviation would be less from mini_batch to mini_batch » less regularization effect. (If this is large number, variation would be greater from mini_batch to mini_batch » more regularization effect)   TODO: can’t sure, but i understand, this is not about how to update parameter but about how much reflect previous value when scale and shift   Q. Preference between batchnorm and the other regularizations(drop out, weight decay)A. Nope, always try and see the results## lesson6-pets-more### Data Augmentation- Last reg- `get_transforms` has lots of params (even not yet learned all) -&gt; check documentation	- Remember you can implement all the doc contents bc it's made from nbdev	- TODO: try this!!- Essence of data augmentation is you should maintain the label, while somewhat making sense. 	- ex) tilt, because it's optically sensible, you can always change the angle of the data view. - zeros, border, and reflection but always `reflection` works most of the time, so that is the default### Convolutional Kernel(What is convolution?)- Will make heat\_map from scratch, which means the parts convolution focuses on![setosa_visualization]()- http://setosa. io/ev/image-kernels/	- javascript thing	- How convolution works	- Kernel. which does element-wise multiplication, and sum them up	- so it has on pixel less at borders -&gt; so it uses padding, and fastai uses reflection as said. - why this Kernel(matrix) helps catching horizontal edge side?	- because this kernel`(picture2)` weights differently, depends on `x axis`	- why familiar, because it's similar intuition with fugus`(paper)` paper- CNN from different viewpoints`link`	- output of pixel is results from different linear equations. 	- If you connect this with represents of neural network nodes, you can see that the specific inp nodes connected with specific out nodes. 	- **Summarize**: cnn does 1) matmul some of the elements are always zero 2) same weight for every row, which is called `weight time? weight. . ?, 1:18:50` `(picture)`#### Further lowdown- Because generally image has 3 channels, we need rank 3 kernel. - And **do multiply with all channel output is one pixel**. (`draw by your self`)	- but this kernel will catch one feature, like horizontal, so that we make more kernel so that output becomes (h * w * kernel)	- And that `kernel` come to `channel`- **Conv2d**: with 3 by 3 kernel, stride 2 conv -&gt; (h/2 * w/2 * kernel)	- skip or jump over input pixel	- to protect from memory out of control~~~pythonlearn. modellearn. summary()~~~TODO: understand yourself the blocks of conv-kernel: - Usually use big kernel size at first layer (will study this at part2)- Bottom right highlighting kernel(`pic / draw`)- `torch. tensor. expand`: for memory efficient, because we should do RGB- We do not make separate kernel, but make rank 4 kernel	- 4d tensor is just stacked kernel- `t[None]. shape` create new unit axis, and why? we make this -&gt; it should move unit of batch, not one size image. ### Average pooling, feature- suppose our pre-trained model results in size of `11 by 11 by 512 ` `pic 4` and my classification task has 37 classes	* take the first face of channel, which is 11 by 11 and `mean` it, so that make rank 2 tensor, 512 by 1	* and make 2d matrix, which is 512 by 37 and multiply so that we can get 37 by 1 matrix. - Feature, at convolution block	- So, when we transfer-learning without unfreeze, every element of last matrix (512 by 1) should represent(or could catch) each feature. 	### Heatmap, Hook~~~hook_output(model[0]) -&gt; acts -&gt; avg_acts~~~- if we average the block with `axis=feature`, result of matrix(11 by 11) depicts `how activated was that area?` -&gt; it is heatmap, `avg_acts`- and acts comes from hook, which is more advanced pytorch feature. 	- hook into pytorch machine itself, and run any arbitrary Pytorch code	- Why this is cool?: Normally it gives set of outputs of forward pass, but we can interrupt and hook the forward pass. 	- Also can store the output of the convolutional part of the model, which is before avg_pooling- Thinking back when we do cut off `after` the conv part. 	- but with fast. ai the original convolutional part of the model would be *the first thing in the model*, specifically could be given from `learn. model. eval()[0]`	- And this is gotten from `hooked_output` and having hooked the output, we can pass our x_minibatch to output. 	- Not directly, but with normalized, minibatch, put on to the gpu	- `one_item()` function do it, when we have one data `TODO: this is assignment` do it yourself without one_item function	- and `. cuda()` put it on gpu- you should print out very often the shape of tensor, and try think why. "
+    }, {
+    "id": 13,
+    "url": "http://localhost:4000/2020/04/qna-image-segmentation/",
+    "title": "[Q&A] Image Segmentation, using Unet with Driving Video data",
+    "body": "2020/04/02 - This post is about my questions while I was studying USF Deep Learning course about image segmentation task. All the answers are from the course, source code, library document, or document. I cared about being clear at reporting information including source of information, however if there are still anything unclear, please contact me. And thank you Jeremy&amp;Rachael for everything. Also Thank you Cambridge Computer Vision Lab to made us to study with your labor. The Cambridge-driving Labeled Video Database (CamVid) is the first collection of videos with object class semantic labels, complete with metadata. The database provides ground truth labels that associate each pixel with one of 32 semantic classes. If someone is interested in this project, please check the site and see the details. Now, let’s start first using jupyter’s one of tricks which I love most. It enables cell to print the code without print function. from IPython. core. interactiveshell import InteractiveShell# pretty print all cell's output and not just the last oneInteractiveShell. ast_node_interactivity =  all from fastai. vision import *from fastai. callbacks. hooks import *from fastai. utils. mem import *path = untar_data(URLs. CAMVID) # The locations where the data and models are downloaded are set in config. ymlpath. ls() I’m trying to accustomed to using pathlib module, not just it became built-in module in python, but I felt uncomfortable myself with os module. However, still unpredictable conflicts are remain, even in the quite standard library like Pytorch, tensorflow, onnx. (it require me string for path. not PosixPath. will send PR. . ) [PosixPath('/root/. fastai/data/camvid/valid. txt'), PosixPath('/root/. fastai/data/camvid/images'), PosixPath('/root/. fastai/data/camvid/labels'), PosixPath('/root/. fastai/data/camvid/codes. txt')]path_img = path/'images'path_lbl = path/'labels'fnames = get_image_files(path_img) #filenamelbl_names = get_image_files(path_lbl)1. (Play with data) My Hypothesis:  File name has A_B format. and A / B would be at key-value position.  Use collections - defaultdict  Default Dict: Link:   easy to group a sequence of key and value pairs into a dictionary of list?from collections import defaultdictfnames[0], lbl_names[0](PosixPath('/root/. fastai/data/camvid/images/0001TP_009210. png'), PosixPath('/root/. fastai/data/camvid/labels/0016E5_01800_P. png'))files = [tuple(i. stem. split('_')) for i in fnames]labels = [tuple(i. stem. split('_')[:-1]) for i in lbl_names]d = defaultdict(list)for k, v in files: d[k]. append(v)d. keys()len(d['0001TP'])124for k, v in d. items():  print(k, v)0001TP ['009210', '008850', '007350', '008970', '009840', '010140', '008490', '008520', '009540', '008250', '008340', '006840', '007860', '007410', '007740', '009870', '010080', '007890', '008790', '010020', '008400', '007080', '008280', '010380', '009330', '009060', '007470', '006810', '009720', '008580', '007110', '008730', '009150', '007680', '009780', '007800', '007290', '008760', '009510', '008640', '008310', '007440', '006900', '007500', '008460', '009030', '008130', '009480', '009900', '010230', '009270', '008040', '007590', '007950', '009990', '008550', '007260', '008100', '007530', '006960', '008190', '009420', '009930', '009000', '007830', '008940', '006690', '009570', '008880', '010170', '007560', '009300', '006750', '009360', '010200', '007320', '008010', '009120', '007620', '007200', '007140', '010320', '006720', '008670', '007230', '008370', '010260', '009690', '006930', '009090', '007770', '010290', '010350', '008610', '008070', '009600', '008430', '009450', '007380', '009240', '007710', '007170', '008160', '008910', '007020', '006780', '007050', '009960', '009810', '008220', '009180', '009750', '010050', '009660', '010110', '007920', '009630', '007650', '006990', '008700', '009390', '007980', '008820', '006870']0016E5 ['01290', '08159', '05760', '08133', '08063', '06660', '00960', '05850', '00750', '06960', '08035', '08107', '07975', '08017', '05610', '07140', '08119', '08027', '07170', '08400', '08093', '02100', '06390', '04470', '08340', '06060', '00600', '07470', '08151', '07800', '01620', '05730', '01530', '00690', '08430', '05940', '01980', '07320', '08069', '07965', '04380', '05430', '01410', '06780', '08007', '08087', '08079', '06600', '08109', '05490', '00901', '04590', '04680', '08045', '01770', '06690', '08085', '06810', '00420', '08011', '07440', '02190', '06300', '04800', '01500', '00450', '08029', '01470', '06330', '07997', '08067', '05370', '08013', '08190', '00840', '02370', '08049', '08135', '01440', '06870', '05820', '05280', '08051', '04440', '08091', '01380', '00630', '07290', '05520', '04770', '00540', '07995', '07999', '05550', '07920', '08101', '08141', '08053', '04620', '08103', '05160', '07350', '08057', '06030', '06000', '08550', '07963', '08089', '05970', '08047', '05640', '06240', '05220', '04350', '01590', '07959', '01950', '08117', '06180', '01560', '05400', '08043', '07680', '00780', '08081', '07050', '01020', '01350', '04530', '06720', '07969', '08149', '08003', '08131', '08129', '08033', '05460', '01650', '07530', '08023', '05340', '08640', '05100', '08075', '01230', '04980', '02070', '01080', '06210', '05910', '08009', '01800', '05190', '02400', '08083', '08019', '07620', '07200', '07890', '08059', '06990', '04410', '08121', '08123', '06930', '08137', '08147', '08095', '06570', '06150', '08153', '06840', '05250', '00510', '08370', '08580', '08113', '07410', '08097', '01200', '04950', '07770', '07650', '04710', '06090', '08055', '07110', '07981', '00990', '08250', '08127', '01920', '07985', '08220', '08005', '08157', '05130', '08071', '01140', '04830', '07740', '08143', '06120', '02040', '08111', '08115', '00660', '08280', '06420', '07983', '02220', '05700', '01860', '01260', '04920', '06510', '07020', '08073', '08105', '08125', '06360', '07860', '07993', '00810', '06540', '08099', '08139', '02010', '07973', '08155', '07991', '06630', '00480', '06750', '04890', '08001', '08025', '00870', '08490', '01830', '07977', '05010', '01170', '07961', '01680', '01050', '07987', '07080', '04560', '00930', '05310', '02340', '05790', '08460', '00720', '08031', '02280', '08039', '08037', '08065', '06270', '08077', '06900', '04650', '06480', '07230', '08041', '06450', '00570', '07989', '04740', '07979', '02250', '07380', '00390', '01710', '07590', '08021', '08520', '07500', '01110', '04500', '02310', '07971', '02130', '05580', '05880', '08610', '08310', '08145', '05670', '04860', '07260', '08015', '07967', '01740', '01320', '07560', '07830', '01890', '08061', '02160', '07710', '05070', '05040']Seq05VD ['f00030', 'f02550', 'f03450', 'f01110', 'f00480', 'f00210', 'f04590', 'f04170', 'f01800', 'f03990', 'f03360', 'f03900', 'f02070', 'f00810', 'f03690', 'f01350', 'f01530', 'f04980', 'f05100', 'f03060', 'f00900', 'f03870', 'f02460', 'f01470', 'f02370', 'f02820', 'f04080', 'f02760', 'f04860', 'f02250', 'f04200', 'f00270', 'f03720', 'f02850', 'f04410', 'f01200', 'f03090', 'f02010', 'f03930', 'f00090', 'f01650', 'f01890', 'f03840', 'f03030', 'f02130', 'f01230', 'f04110', 'f02520', 'f04140', 'f04020', 'f00060', 'f03420', 'f01560', 'f00120', 'f04290', 'f02340', 'f00300', 'f01380', 'f00870', 'f01860', 'f02970', 'f04560', 'f02730', 'f00330', 'f04530', 'f03780', 'f01770', 'f03390', 'f05040', 'f02430', 'f03330', 'f00660', 'f01740', 'f02100', 'f04800', 'f04050', 'f00510', 'f02790', 'f04350', 'f00690', 'f00540', 'f02490', 'f00960', 'f00930', 'f04230', 'f02880', 'f03600', 'f01020', 'f01500', 'f02400', 'f04830', 'f04470', 'f03300', 'f02670', 'f00450', 'f01980', 'f01170', 'f01620', 'f04500', 'f01080', 'f03180', 'f05070', 'f03150', 'f04950', 'f01440', 'f03510', 'f01710', 'f00360', 'f04770', 'f02910', 'f01050', 'f00630', 'f04320', 'f00570', 'f03240', 'f02190', 'f01140', 'f03540', 'f02220', 'f02640', 'f03960', 'f00000', 'f04920', 'f01950', 'f00990', 'f03480', 'f03000', 'f00420', 'f04620', 'f03210', 'f00780', 'f03570', 'f01590', 'f00750', 'f01920', 'f04650', 'f03750', 'f03630', 'f02310', 'f02610', 'f02580', 'f04740', 'f02280', 'f04680', 'f00390', 'f00720', 'f03660', 'f02040', 'f03270', 'f00180', 'f03810', 'f01410', 'f01290', 'f03120', 'f00840', 'f04440', 'f00150', 'f01260', 'f02700', 'f02940', 'f00600', 'f01830', 'f04260', 'f05010', 'f04890', 'f02160', 'f00240', 'f04380', 'f01680', 'f04710', 'f01320']0006R0 ['f02820', 'f03690', 'f03180', 'f02550', 'f01020', 'f03660', 'f02340', 'f01170', 'f02610', 'f02940', 'f01290', 'f02100', 'f01350', 'f03270', 'f03870', 'f01380', 'f01980', 'f03810', 'f02430', 'f02310', 'f01830', 'f03480', 'f02970', 'f01890', 'f03210', 'f03930', 'f02040', 'f02070', 'f02400', 'f01560', 'f03030', 'f01770', 'f01590', 'f01950', 'f03420', 'f01650', 'f03450', 'f00990', 'f03630', 'f01500', 'f03570', 'f00930', 'f03090', 'f03360', 'f02880', 'f02460', 'f01440', 'f01920', 'f01230', 'f03840', 'f02730', 'f01620', 'f02220', 'f03750', 'f03330', 'f03540', 'f02520', 'f02790', 'f01050', 'f03120', 'f01800', 'f01140', 'f01860', 'f01530', 'f01470', 'f02670', 'f02490', 'f01260', 'f01110', 'f02760', 'f01680', 'f03150', 'f02580', 'f03300', 'f02280', 'f01200', 'f03390', 'f03510', 'f02640', 'f02190', 'f02370', 'f01320', 'f02130', 'f03600', 'f03240', 'f03780', 'f03720', 'f02700', 'f01410', 'f01080', 'f02850', 'f01710', 'f03900', 'f03060', 'f01740', 'f02010', 'f02250', 'f00960', 'f03000', 'f02160', 'f02910']for k, v in d. items():  print(k, len(d[k]))0001TP 1240016E5 305Seq05VD 1710006R0 101for i in d2. keys():  print(i,len(d2[i]))0016E5 3050001TP 1240006R0 101Seq05VD 171files[0], labels[0](('0001TP', '009210'), ('0016E5', '01800'))2. My question: Link:  Why do we need masking? and does color from fastai library? (have to look into source code) What do the parameter alpha do? When people make masked img, would it be have ranged integer limit? Does image normalization related with this?lbl_sorted = sorted(lbl_names)f_sorted = sorted(fnames)lbl_1 = lbl_sorted[33]f_1 = f_sorted[33]img = open_image(lbl_1)mask = open_mask(lbl_1)_,axs = plt. subplots(1,2, figsize=(10,5))# img. show(ax=axs[0], y=mask, title='masked')img. show(ax=axs[0], title='1')mask. show(ax=axs[1], title='2', alpha=1. ) img_2 = open_image(f_1)mask_2 = open_mask(f_1)_,axs = plt. subplots(1,2, figsize=(10,5))# img. show(ax=axs[0], y=mask, title='masked')img_2. show(ax=axs[0], title='3',)mask_2. show(ax=axs[1], title='4', alpha=1. ) open_mask(lbl_1). data. shapetorch. Size([1, 720, 960])open_mask(lbl_1). data. shapetorch. Size([1, 720, 960])open_image(f_1). data. shapetorch. Size([3, 720, 960])open_image(f_1). data. shapetorch. Size([3, 720, 960])img. data #labeled datatensor([[[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]],    [[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]],    [[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]]])mask. data # after mask, labeled datatensor([[[ 4, 4, 4, . . . , 21, 21, 21],     [ 4, 4, 4, . . . , 21, 21, 21],     [ 4, 4, 4, . . . , 21, 21, 21],     . . . ,     [17, 17, 17, . . . , 30, 30, 30],     [17, 17, 17, . . . , 30, 30, 30],     [17, 17, 17, . . . , 30, 30, 30]]])img_2. data, mask_2. data(tensor([[[0. 0706, 0. 0667, 0. 0706, . . . , 0. 6431, 0. 6549, 0. 6627],     [0. 0745, 0. 0706, 0. 0706, . . . , 0. 6431, 0. 6510, 0. 6549],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 6392, 0. 6588, 0. 6588],     . . . ,     [0. 0863, 0. 0824, 0. 0824, . . . , 0. 1333, 0. 1216, 0. 1255],     [0. 0902, 0. 0863, 0. 0824, . . . , 0. 1255, 0. 1176, 0. 1216],     [0. 0863, 0. 0824, 0. 0784, . . . , 0. 1137, 0. 1059, 0. 1137]],     [[0. 0706, 0. 0667, 0. 0706, . . . , 0. 7490, 0. 7608, 0. 7686],     [0. 0745, 0. 0706, 0. 0706, . . . , 0. 7451, 0. 7569, 0. 7608],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 7412, 0. 7529, 0. 7529],     . . . ,     [0. 0980, 0. 0941, 0. 0941, . . . , 0. 1804, 0. 1686, 0. 1725],     [0. 1059, 0. 1020, 0. 0980, . . . , 0. 1725, 0. 1647, 0. 1686],     [0. 1020, 0. 0980, 0. 0941, . . . , 0. 1608, 0. 1529, 0. 1608]],     [[0. 0784, 0. 0745, 0. 0784, . . . , 0. 7569, 0. 7686, 0. 7765],     [0. 0824, 0. 0784, 0. 0784, . . . , 0. 7647, 0. 7647, 0. 7686],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 7608, 0. 7647, 0. 7647],     . . . ,     [0. 1216, 0. 1176, 0. 1176, . . . , 0. 2000, 0. 1882, 0. 1922],     [0. 1176, 0. 1137, 0. 1098, . . . , 0. 1843, 0. 1765, 0. 1804],     [0. 1137, 0. 1098, 0. 1059, . . . , 0. 1725, 0. 1647, 0. 1725]]]), tensor([[[ 18, 17, 18, . . . , 183, 186, 188],     [ 19, 18, 18, . . . , 183, 185, 186],     [ 20, 18, 19, . . . , 182, 185, 185],     . . . ,     [ 25, 24, 24, . . . , 43, 40, 41],     [ 26, 25, 24, . . . , 41, 39, 40],     [ 25, 24, 23, . . . , 38, 36, 38]]]))3. What is a difference between image and imageSegment?: imageSegment  An ImageSegment object has the same properties as an Image. The only difference is that when applying the transformations to an ImageSegment, it will ignore the functions that deal with lighting and keep values of 0 and 1.  It’s easy to show the segmentation mask over the associated Image by using the y argument of show_image. img = open_image(fnames[0])mask = open_mask(lbl_names[0])_,axs = plt. subplots(1,3, figsize=(8,4))img. show(ax=axs[0], title='no mask')img. show(ax=axs[1], y=mask, title='masked') #seg mask over the img using y argmask. show(ax=axs[2], title='mask only', alpha=1. ) vision. image ##4. Why/How img div by 255 and how it results fast. ai : vision. image - If div=True, pixel values are divided by 255. to become floats between 0. and 1.  At times, you want to get rid of distortions caused by lights and shadows in an image.    Normalizing the RGB values of an image can at times be a simple and effective way of achieving this.     So sum of the pixel’s value over all channels(which is S) divides each intensified channel so that nomalized value will be R/S, G/S and B/S (where, S=R+G+B).   Detailed explain here4. Python Evaluation Order: Python evaluates expressions from left to right. Notice that while evaluating an assignment, the right-hand side is evaluated before the left-hand side. mask_tmp, trg_tmp, void_tmp = 2, 1, 10mask_tmp = trg_tmp != void_tmpprint(mask_tmp, trg_tmp, void_tmp) # (1) target is not same with voidTrue 1 10# Example 1x = 1y = 2x,y = y,x+yx, y(2, 3)# Example 2x = 1y = 2x = yy = x+yx, y(2, 4)5. model learner parameter :: pct_start: A: Percentage of total number of epochs when learning rate rises during one cycle. Q: Sorry, I still confused that one cycle in the new API only runs one epoch. How the percentage of total number of epochs works? Can you give a example? If learn. fit_one_cycle(10, slice(1e-4,1e-3,1e-2), pct_start=0. 05)??A: Ok, strictly correct answer would be percentage of iterations, so you can have lr both increase and decrease during same epoch. In your example, say, you have 100 iterations per epoch, then for half an epoch (0. 05 * (10 * 100) = 50) lr will rise, then slowly decrease. Q2: Thanks for this explanation … so essentially, it is the percentage of overall iterations where the LR is increasing, correct? So, given the default of 0. 3, it means that your LR is going up for 30% of your iterations and then decreasing over the last 70%. Is that a correct summation of what is happening? A2: Yes, I think that’s correct. You can verify that by changing its value and check:learn. recorder. plot_lr() For example if pct_start = 0. 2 source: forums. fastai "
+    }, {
+    "id": 14,
     "url": "http://localhost:4000/2020/03/note08-fastai-4/",
     "title": "Gradient backward, Chain Rule, Refactoring",
-    "body": "2020/03/02 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves   ⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
+    "body": "2020/03/02 -  This note is divided into 4 section.      Section1: What is the meaning of ‘deep-learning from foundations?’   Section2: What’s inside Pytorch Operator?   Section3: Implement forward&amp;backward pass from scratch   Section4: Gradient backward, Chain Rule, Refactoring   ” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves    Postfix underscore means in pytorch, in-place function, What is in-place function?⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
     }, {
-    "id": 13,
+    "id": 15,
     "url": "http://localhost:4000/2020/03/note08-fastai-3/",
     "title": "Implement forward&backward pass from scratch",
     "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring1. The forward and backward passes: 1. 1 Normalization: train_mean,train_std = x_train. mean(),x_train. std()&gt;&gt;&gt; train_mean,train_std(tensor(0. 1304), tensor(0. 3073))Remember!  Dataset, which is x_train, mean and standard deviation is not 0&amp;1. But we need them to be which means we should substract means and divide data by std.  You should not standarlize validation set because training set and validation set should be aparted.  after normalize, mean is close to zero, and standard deviation is close to 1. 1. 2 Variable definition:  n,m: size of the training set c: the number of activations we need in our model2. Foundation Version: 2. 1 Basic architecture:  Our model has one hidden layer, output to have 10 activations, used in cross entropy.    But in process of building architecture, we will use mean square error, output to have 1 activations and lator change it to cross entropy   number of hidden unit; 50see below pic  We want to make w1&amp;w2 mean and std be 0&amp;1.      why initializating and make mean zero and std one is important?   paper highlighting importance of normalisation - training 10,000 layer network without regularisation1   2. 1. 1 simplified kaiming initQ: Why we did init, normalize with only validation data? Because we can not handle and get statistics from each value of x_valid?{: style=”color:red; font-size: 130%; text-align: center;”}  what about hidden(first) layer?w1 = torch. randn(m,nh)b1 = torch. zeros(nh)t = lin(x_valid, w1, b1) # hidden&gt;&gt;&gt; t. mean(), t. std()((tensor(2. 3191), tensor(27. 0303))In output(second) layer, w2 = torch. randn(nh,1)b2 = torch. zeros(1)t2 = lin(t, w2, b2) # output&gt;&gt;&gt; t2. mean(), t2. std()(tensor(-58. 2665), tensor(170. 9717))   which is terribly far from normalzed value.     But if we apply simplified kaiming init  w1 = torch. randn(m,nh)/math. sqrt(m); b1 = torch. zeros(nh)w2 = torch. randn(nh,1)/math. sqrt(nh); b2 = torch. zeros(1)t = lin(x_valid, w1, b1)t. mean(),t. std()&gt;&gt;&gt; (tensor(-0. 0516), tensor(0. 9354)) But, actually, we use activations not only linear function After applying activations relu at linear layer, mean and deviation became 0. 5. 2. 1. 2 Glorrot initializationPaper2: Understanding the difficulty of training deep feedforward neural networks  Gaussian(, bell shaped, normal distributions) is not trained very well.  How to initialize neural nets? with the size of layer , the number of filters .  But there is No acount for import of ReLU If we got 1000 layers, vanishing gradients problem emerges2. 1. 3 Kaiming initializatingPaper3: Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification  Kaiming He, explained here rectifier: rectified linear unit rectifier network: neural network with rectifier linear units  This is kaiming init, and why suddenly replace one to two on a top?     to avoid vanishing gradient(weights)   But it doesn’t give very nice mean tough.    2. 1. 4 Pytorch package Why fan_out?     according to pytorch documentation,   choosing 'fan_in' preserves the magnitude of the variance of the wights in the forward pass. choosing 'fan_out' preserves the magnitues in the backward pass(, which means matmul; with transposed matrix) ➡️ in the other words, torch use fan_out cz pytorch transpose in linear transformaton.  What about CNN in Pytorch?I tried torch. nn. Conv2d. conv2d_forward?? Jeremy digged into using torch. nn. modules. conv. _ConvNd. reset_parameters?? 2  in Pytorch, it doesn’t seem to be implemented kaiming init in right formula. so we should use our own operation.  But actually, this has been discussed in Pytorch community before. 3 4 Jeremy said it enhanced variance also, so I sampled 100 times and counted better results.  To make sure the shape seems sensible. check with assert. (remember we will replace 1 to 10 in cross entropy)assert model(x_valid). shape==torch. Size([x_valid. shape[0],1])&gt;&gt;&gt; model(x_valid). shape(10000, 1) We have made Relu, init, linear, it seems we can forward pass code we need for basic architecture nh = 50def lin(x, w, b): return x@w + b;w1 = torch. randn(m,nh)*math. sqrt(2. /m ); b1 = torch. zeros(nh)w2 = torch. randn(nh,1); b2 = torch. zeros(1)def relu(x): return x. clamp_min(0. ) - 0. 5t1 = relu(lin(x_valid, w1, b1))def model(xb):  l1 = lin(xb, w1, b1)  l2 = relu(l1)  l3 = lin(l2, w2, b2)  return l32. 2 Loss function: MSE:  Mean squared error need unit vector, so we remove unit axis.   def mse(output, targ): return (output. squeeze(-1) - targ). pow(2). mean()    In python, in case you remove axis, you use ‘squeeze’, or add axis use ‘unsqueeze’ torch. squeeze where code commonly broken. so, when you use squeeze, clarify dimension axis you want to removetmp = torch. tensor([1,1])tmp. squeeze()&gt;&gt;&gt; tensor([1, 1]) make sure to make as float when you calculateBut why??? because it is tensor?{: style=”color:red; font-size: 130%;”} Here’s the error when I don’t transform the data type ---------------------------------------------------------------------------TypeError                 Traceback (most recent call last)&lt;ipython-input-22-ae6009bef8b4&gt; in &lt;module&gt;()----&gt; 1 y_train = get_data()[1] # call data again   2 mse(preds, y_train)TypeError: 'map' object is not subscriptable This is forward passFootnote: Other materials:  Understanding the difficulty of training deep feedforward neural networks, paper that introduced Xavier initialization      Fixup Initialization: Residual Learning Without Normalization &#8617;        Pytorch implementaion on Kaiming init of conv and linear layers &#8617;        Pytorch kaiming init issue &#8617;        Pytorch kaiming init explained &#8617;    "
     }, {
-    "id": 14,
+    "id": 16,
     "url": "http://localhost:4000/2020/03/note08-fastai-2/",
     "title": "What's inside Pytorch Operator?",
     "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, RefactoringWhat’s inside Pytorch Operator?: Section02 Time comparison with pure Python:    Matmul with broadcasting&gt; 3194. 95 times faster     Einstein summation&gt; 16090. 91 times faster     Pytorch’s operator&gt; 49166. 67 times faster  1. Elementwise op: 1. 1 Frobenius norm:   above converted into (m*m). sum(). sqrt() Plus, don’t suffer from mathmatical symbols. He also copy and paste that equations from wikipedia.  and if you need latex form, download it from archive. 2. Elementwise Matmul:  What is the meaning of elementwise?   We do not calculate each component. But all of the component at once. Because, length of column of A and row of B are fixed.   How much time we saved?  So now that takes 1. 37ms. We have removed one line of code and it is a 178 times faster…#TODOI don’t know where the 5 from. but keep it. Maybe this is related with frobenius norm…?as a result, the code before for k in range(ac):  c[i,j] += a[i,k] + b[k,j]the code after c[i,j] = (a[i,:] * b[:,j]). sum()To compare it (result betweet original and adjusted version) we use not test_eq but other function. The reason for this is that due to rounding errors from math operations, matrices may not be exactly the same. As a result, we want a function that will “is a equal to b within some tolerance” #exportdef near(a,b):   return torch. allclose(a, b, rtol=1e-3, atol=1e-5)def test_near(a,b):   test(a,b,near)test_near(t1, matmul(m1, m2))3. Broadcasting:  Now, we will use the broadcasting and removec[i,j] = (a[i,:] * b[:,j]). sum() How it works?&gt;&gt;&gt; a=tensor([[10,10,10],     [20,20,20],     [30,30,30]])&gt;&gt;&gt; b=tensor([1,2,3,])&gt;&gt;&gt; a,b  (tensor([[10, 10, 10],     [20, 20, 20],     [30, 30, 30]]),tensor([1, 2, 3]))     &gt;&gt;&gt; a+btensor([[11, 12, 13],    [21, 22, 23],    [31, 32, 33]])  &lt;Figure 2&gt; demonstrated how array b is broadcasting(or copied but not occupy memory) to compatible with a. Refered from numpy_tutorial   there is no loop, but it seems there is exactly the loop.     This is not from jeremy (actually after a moment he cover it) but i wondered How to broadcast an array by columns?  c=tensor([[1],[2],[3]])a+ctensor([[11, 11, 11],    [22, 22, 22],    [33, 33, 33]])s  What is tensor. stride()?help(t. stride)Help on built-in function stride:  stride(…) method of torch. Tensor instancestride(dim) -&gt; tuple or intReturns the stride of :attr:’self’ tensor. Stride is the jump necessary to go from one element to the next one in the specified dimension :attr:’dim’. A tuple of all strides is returned when no argument is passed in. Otherwise, an integer value is returned as the stride in the particular dimension :attr:’dim’. Args:  dim (int, optional): the desired dimension in which stride is requiredExample::* x = torch. tensor([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]])`x. stride()&gt;&gt;&gt; (5, 1)x. stride(0)&gt;&gt;&gt; 5x. stride(-1)&gt;&gt;&gt; 1   unsqueeze &amp; None index   We can manipulate rank of tensor Special value ‘None’, which means please squeeze a new axis here== please broadcast herec = torch. tensor([10,20,30])c[None,:] in c, squeeze a new axis in here please. 2. 2 Matmul with broadcasting: for i in range(ar):#  c[i,j] = (a[i,:]).      *[:,j]. sum() #previous  c[i]  = (a[i]. unsqueeze(-1) * b). sum(dim=0) And Using None also (As howard teached)c[i]  = (a[i ]. unsqueeze(-1) * b). sum(dim=0) #howardc[i]  = (a[i][:,None] * b). sum(dim=0) # using Nonec[i]  = (a[i,:,None]*b). sum(dim=0)⭐️Tips🌟 1) Anytime there’s a trailinng(final) colon in numpy or pytorch you can delete it ex) c[i, :] = c [i]2) any number of colon commas at the start, you can switch it with the single elipsis.  ex) c[:,:,:,:,i] = c […,i] 2. 3 Broadcasting Rules:  What if we tensor. size([1,3]) * tensor. size([3,1])?  torch. Size([3, 3])    What is scale????   What if they are one array is times of the other array? ex) Image : 256 x 256 x 3Scale : 128 x 256 x 3Result: ?   Why I did not inserted axis via None, but happened broadcasting? &gt;&gt;&gt; c * c[:,None]tensor([[100. , 200. , 300. ],    [200. , 400. , 600. ],    [300. , 600. , 900. ]])maybe it broadcast cz following array has 3 rows as same principle, no matter what nature shape was, if we do the operation tensor broadcasts to the other. &gt;&gt;&gt; c==c[None]tensor([[True, True, True]])&gt;&gt;&gt; c[None]==c[None,:]tensor([[True, True, True]])&gt;&gt;&gt;c[None,:]==ctensor([[True, True, True]])3. Einstein summation:  Creates batch-wise, remove inner most loop, and replaced it with an elementwise producta. k. ac[i,j] += a[i,k] * b[k,j]inner most loop c[i,j] = (a[i,:] * b[:,j]). sum()elementwise product  Because K is repeated so we do a dot product. And it is torch. Usage of einsum()1) transpose2) diagnalisation tracing3) batch-wise (matmul) …  einstein summation notationdef matmul(a,b): return torch. einsum('ik,kj-&gt;ij', a, b)so after all, we are now 16000 times faster than Python. 4. Pytorch op: 49166. 67 times faster than pure python And we will use this matrix multiplication in Fully Connect forward, with some initialized parameters and ReLU. But before that, we need initialized parameters and ReLU, Footnote:  TensorRank ti noteResources:  Frobenius Norm Review Broadcasting Review (especially Rule)     Refer colab! (I totally confused with extension of arrays)    torch. allclose Review np. einsum Reviewh "
     }, {
-    "id": 15,
+    "id": 17,
     "url": "http://localhost:4000/2020/02/note08-fastai-1/",
     "title": "What is the meaning of 'deep-learning from foundations?'",
     "body": "2020/02/29 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ I don’t know if you read this article, but I heartily appreciate Rachael Thomas and Jeremy Howard for providing these priceless lectures for free Homework:  Review concepts 16 concepts from Course 1 (lessons 1 - 7)(1) Affine Functions &amp; non-linearities; 2) Parameters &amp; activations; 3) Random initialization &amp; transfer learning; 4) SGD, Momentum, Adam; 5) Convolutions; Batch-norm; 6) Dropout; 7) Data augmentation; 8) Weight decay; 9) Res/dense blocks; 10) Image classification and regression; 11)Embeddings; 12) Continuous &amp; Categorical variables; 13) Collaborative filtering; 14) Language models; 15) NLP classification; 16) Segmentation; U-net; GANS) Make sure you understand broadcasting Read section 2. 2 in Delving Deep into Rectifiers Try to replicate as much of the notebooks as you can without peeking; when you get stuck, peek at the lesson notebook, but then close it and try to do it yourself calculus for machine learning     based on weight…    einsum conventionCONTENTS:  What is going on in this course?     What is ‘from foundations’?   Steps to a basic modern CNN model   Today’s implementation goal: 1) matmul -&gt; 4) FC backward    Library development using jupyter notebook     jupyter notebook certainly can make module    Elementwise ops     How can we make python faster?         What is element wise operation?           FootnoteWhat is going on in this course?: What is ‘from foundations’?: 1) Recreate fast. ai and Pytorch 2) using pure python  Evade OverfittingOverfit : validation error getting worsetraining loss &lt; validation loss  Know the name of the symbol you usefind in this page if you don’t know the symbol that you are using or just draw it here (run by ML!) Steps to a basic modern CNN model:  1) Matrix multiplication -&gt; 2) Relu/Initialization -&gt; 3) Fully-connected Forward-&gt; 4) Fully-connected Backward -&gt; 5) Train loop -&gt; 6) Convolution-&gt; 7) Optimization -&gt;8) Batchnormalization -&gt; 9) Resnet Today’s implementation goal: 1) matmul -&gt; 4) FC backward: Library development using jupyter notebook: what is assers? jupyter notebook certainly can make module:  There will be #export tag that Howard (and we) want to extract special notebook2script. py will detect sign of #expert and convert following into python module and test ittest\_eq(TEST,'test')test\_eq(TEST,'test1')  what is run_notebook. py?     when you want to test your module in command line interface   		!python run\_notebook. py 01_matmul. ipynb  Is there any difference between 1) and 2)?1) test -&gt; test01 2) test01 -&gt; test #TODO I don’t know yet  look into run_notebook. py, package fire Jeremy used. What is that?read and run the code in a notebook, and in the process, Jeremy made Python Fire library called!shockingly, fire takes any kind of function and converts into CLI command. fire library was released by Google open source, Thursday, March 2, 2017    Get data   pytorch and numpy are pretty much same.  variable c explains how many pixels there are in in MNIST, 28 pixels PyTorch’s view() method: torch function that manipulating tensor, and squeeze() in torch &amp; mathmatical operation similar function Rao &amp; McMahan said usually this functions result in feature vector.    In part 1, you can use view function several times.     Initial python model     Which is Linear, like $Xw$(weight)$+a$(bias) $= Y$     If you don’t know hou to multiple matrix, refer this site matmul visulization site   How many time spends if we we use pure python   function matmul, typical matrix multiplication function, takes about 1 second for calculating 1 single train data! (maybe assumed stochastic, 5 data points in validation)     it takes about 11. 36 hours to update parameters even single layer and 1 iteration! (if that was my computer, it would be 14 hours. . )🤪   THIS is why we need to consider ‘time’&amp;’space’ This is kinda slow - what if we could speed it up by 50,000 times? Let’s try! Elementwise ops: How can we make python faster?:  If we want to calculate faster, then do remove pythonic calcuation, by passing its computation down to something that is written something other than python, like pytorch.  According to PyTorch doc it uses C++ (via ATen), so we are going to implement that function with python. What is element wise operation?:  items makes a pair, operate corresponding componentFootnote:  notebooks material video broadcasting excel"
     }, {
-    "id": 16,
+    "id": 18,
     "url": "http://localhost:4000/2020/02/what-is-convolution/",
     "title": "Digging into convolution",
     "body": "2020/02/28 - Issues 1) Kaiming Initializtion in Pytorch was in trouble. 1 2) Jeremy started to dig in, in lesson09, but I didn’t know why the size of tensor is 2 and even understand this spreadsheet data. 3 Homework: Read Visualizing and Understanding Convolutional Networks paper  What is a convolution?     Visualization         one kernel     Matthew D Zeiler &amp; Rob Fergus Paper          Convolution can be represented as matmul   Padding   Kernel has rank 3   How can we find a side-edge, a gradient and area of constant weight?   What is a convolution?: A convolutional neural network is that your red, green, and blue pixels go into the simple computation, and something comes out of that, and then the result of that goes into a second layer, and the result of that goes into the third layer and so forth. Visualization: one kernel Refer this site for visualizing CNN filteringMatthew D Zeiler &amp; Rob Fergus PaperLecture01     Nine examples of the actual coefficients from the **first layer**  Convolution can be represented as matmul: CNNs from different viewpoints {align-items: center;}    [A B C D E F G H I J] is 3 by 3 image data flatten to vector.   As a result, convolution is a just matrix just two things happens     Some of entries are set to zeros at all the times   same color always have the same weight. That called weight time / wegith sharing    So, we can implement a convolution with matrix multiplication. But, we don’t do that because it’s slow!Padding:  What most of libraries do is just put zeros asdie of matrix  fast. ai uses reflection paddings (what is this? Jeremy said he uttered it)Kernel has rank 3:  As standard picture input would be 4 5, it would be actually 3d, not 2d.  If we make kernel as a 3x3 size, we pass over same kernel all the different Red, Green, Blue Pixels.      This could make problem, because, if we want to detect frog, which is green, we would want more activations on the green(I made a test cell in my colab 6)   How can we find a side-edge, a gradient and area of constant weight?: Not top-edge!  One kernel can find only the top-edge, so we should stack the kernels 7 So, we pass it through bunch of kernels to the input images, and that process gives us height x width x corresponding number of kernels.  Usually that number of chanel is 16 And if we want to get the more channels and features, we should repeat that process     This process gives rise to memory out of control, we do the stride   #### conv-example. xlsx  2 convolutional filters At a second layer, filter is 3x3x2 tensor, because to add up together the first layer’s channel. Reference:       Problem was math. sqrt(5) was not kaiming initialization formula, Implementation in Pytorch &#8617;        size of tensor, lecture09 &#8617;        conv-example. xlsx &#8617;        Why do computer use red, green and blue instead of primary colors &#8617;        Grayscale is a group of shades without any visible color. … Each of these dots has its own brightness level as well and, therefore, can be converted to grayscale. A grayscale image is one with all color information removed.  &#8617;        Testing RGB and grayscale &#8617;        stack kernel and make new rank of tensor at output, Lesson06-2019 &#8617;    "
     }, {
-    "id": 17,
+    "id": 19,
     "url": "http://localhost:4000/2020/02/dps-week8/",
-    "title": "Digital Product School week 8&9",
-    "body": "2020/02/24 - The 8th week retropect at Digital Product School Week 8/9 - Ship your MVP/Release next iteration each day     This week's schedule  CONTENT:  Preparing engineering weekly Agile Process     Daily Stand-up    Making application flowchart (feat draw. io) / ER diagram     Flowchart, understaning user journey   ER diagram    Engineering weekly AI lunch Connecting firebase andPreparing engineering weekly: This week at Wednesday, I planned to explain the Language Modelings, mainly focusing ELMo, ULMFiT, BERT and GPT-2. Slides is available here Changed the presentation, because there were people who are not in ML domain. hereWhenever I do the presentation, I learn more than the information I give them. At the same time, I realize I need to learn more than I know. Agile Process: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m not sure ‘what is agile’ but this is what we tried to make agile process.  Daily Stand-up: Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Making application flowchart (feat draw. io) / ER diagram:  Before we start the party, we should clarify the flowchart and ER diagram of our application. Flowchart, understaning user journey: Thanks for google, we could use draw. io for our framechart framework. Actually, we cana choice other good flatform, but draw. io has connected app throgh google drive, most of our engineer was used to it. And after this job, I got to know there is also (of course) rule with the symbols, color, size, space, scaling and direction of arrow -reference. But why we should do this? WE have made our storymap before!! I think storymap is for visualize our status and app. So it should be shared with whole the team, and they should able to understand each role’s issue. But flowchart is more like testing technical feasibility, and error that user can experience. So it could be little more specific, complicated, and hypothetical.     This week engineer's main plan  ER diagram: Even if we use NoSQL database through firebase, my team was accustomed to SQL more. That what we educated when we were at college, so we had to organize our concept while we were learning NoSQL. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Before today, my AI collegues presented regression, knn and it was my turn. I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! AI lunch: Connecting firebase and: "
+    "title": "My life in Digital Product School - week 8/19/10",
+    "body": "2020/02/24 - The 8/9/10th week retropect at Digital Product School Week 8 - Ship your MVPWeek 9/10 - Release next iteration each day     Week 8th schedule  CONTENT:  Agile Product Development     Daily Stand-up(planning)   Gemba Walk   Sprint Reviews    Engineering weeklyAgile Product Development: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m still not sure ‘what is agile’ but this is how we tried to make agile process.  Daily Stand-up(planning): Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Gemba Walk:     Team Cero with core team  Every 2 weeks, we do the Gemba work, which is ‘question everything to the core team’ time. At this period, people can ask anything related to our product, workshop, and framework. Core team will help just for each team, and each team can solve the problem related to their work. &lt; br/&gt;Why we need this session? because with workshop and general schedule, core team has no time just focus on each team. So through this session, we can have opportunity to understand each program and workshop, like why we are using this platform, and when is the due of our small project, and we have this problem and we need help for this. whatever small problem you have, core team is always willing to help you. Sprint Reviews: Every Friday, we have time to summarise what we did for the week. Maybe we need HMW question and our storymap to share our process and then tell and share what we did try, what point we succeeded and what point it was deviant of our prediction, and why we tried it. .     Sprint of Ve-link  And then, just after all team’s ppt, we do vote with such a cute marvel. Always it’s very difficult to vote (of course you can’t vote to your team!) Because it depends on criteria what do I value!But since this is process of our agile work, I try to focus on what they have changed since last week, and why they did it, how they did it. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Everyone have their knowledge to share and we can be tutor and at the same time can be of tutee. Previously, my AI collegues presented regression, knn. And because I’m somewhat specialized to NLP, I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! "
     }, {
-    "id": 18,
+    "id": 20,
     "url": "http://localhost:4000/2020/02/fast.ai-nlp-note-16/",
     "title": "Algorithmic bias",
     "body": "2020/02/20 - Algorithms can encode &amp; magnify human bias Case Study 1: Facial Recognition &amp; Predictive Policing:  Joy Buolamwini &amp; Timnit Gebru, gendershades. org     Microsoft, FACE+, IBM - All of these things are sell now.    Largest gap between $\therefore\ Lighter Male\ &gt;\ Darker\ Female $      This US mayor joked cops should “mount . 50-caliber” guns where AI predicts crime      With machine learning, with automation, there’s a 99% success, so that robot is ㅡwill beㅡ99% accurate in telling us what is going to happen next, which is really interesting.     - city official in Lancater, CA, approving on using IBM for public security  Bias:  Bias is type of error Statistical Bias: difference between a statistic’s expected value and the true value Unjust Bias: disproportionate preference for or prejudice against a group Unconscious bias: bias that we don’t realize we have But, term bias is too generic to be productive. Different sources of bias have different causes Representation Bias: Dataset was not representative of the algorithm that might be used on later. Above : Data is okay, but algorithm has some problem. Below : Data has error.  For example, object detection production that performs very well in common product of US. But in contrast, change of target product region, like Zimbabwe, Solomon Island, and so on, reduced the performence remarkably. It is not the algorithmic problem, so we should care about data volume of region. Evaluation Bias: Benchmark datasets spur on research, 4. 4% of IJB-A images are dark-skinned women. 2/3 of ImageNet images from the West (Sharkar et al, 2017) Case Study 2: Recidivism Algorithm Used Prison Sentencing: Case Study 3: Online Ad Delivery: Bias in NLP: ( Nothing to do with the course, but I’m researching this field these days. )    But all about Englsih     ImpactThe person is doctor. The person is nurse -&gt; 그는 의사다. 그녀는 간호사다.  Concept of “biased data” often too generic to be useful:  Different sources of bias have different sources Data, models and systems are not unchanging numbers on a screen. They’re the result of a complex process that starts with years of historical context and involves a series of choices and norms, from data measurement to model evaluation to human interpretation. - Harini Suresh, “The problem with Biased Data” Five Sources of Bias in ML:  Representation Bias Evaluation Bias Measurement Bias Aggregation Bias(46:02) Historical Bias(46:26)     A few studies(47:13)   Racial Bias, Even when we have good intentions(new york times)(47:10)   gender(48:59)   Humans are biased, so why does algorithmic bias matter?: Algorithms &amp; humans are used differently (humans are usually decision maker)  Algorithms are accurate and objective No way to apeal if there if error processed large scale cheap Machine learning can amplify bias Machine learning can create feedback loops. Technology is power. And with that comes responsibility. Solutions:  Analyze a project at work/school:     Questions about AI   5 types of bias (Suresh &amp; Guttag)   Datasheets for datasets, Modelcards for model reporting   Accuracy rate on different sub-groups    Work with domain experts &amp; those impacted Increase diversity in our workspace Advocate for good policy Be on the ongoing lookout for bias"
     }, {
-    "id": 19,
+    "id": 21,
     "url": "http://localhost:4000/2020/02/classifier-city/",
     "title": "Making a classifier with image dataset made from gooogle",
     "body": "2020/02/15 - CONTENTS:  Creating dataset from google images     Using google_images_download   Create ImageDataBunch    Train model     fit_one_cycle()   Let’s find-tune   Let’s train the whole model!   Let’s make batch size bigger!    Interpretation Model in productionCode can be found hereDeployed model here Making a classifier which can distinguish Seoul from Munich and Sanfrancisco!(hoping my well in Munich!) Creating dataset from google images: In machine learning, you always need data before you build your model. You can use either URLs or google_images_download package. Since Jeremy explained specifically, I will try the other. Using google_images_download: note: This is not google official package Refer to Official Doncument, put that arguments. from google_images_download import google_images_downloadresponse = google_images_download. googleimagesdownload()  #class instantiationout_dir = os. path. abspath('. . /. . /materials/dataset/pkg/')os. mkdir(out_dir)arguments = { keywords : Cebu,Munich,Seoul ,        print_urls :True,        suffix_keywords : city ,        output_directory :out_dir,        type : photo ,      }paths = response. download(arguments)  #passing the arguments to the functionprint(paths)and if you need, here is main code. Create ImageDataBunch: We need to separate validation set because we just grabbed these imagese from Google. Most of the dataset we use (kaggle/research) splited into train / validation / test so if they are not devided beforehand we should make databunch, and Jeremy recommended assign 20% to validation. Help on function verify_images in module fastai. vision. data:verify_images(path: Union[pathlib. Path, str], delete: bool = True, max_workers: int = 4, max_size: int = None, recurse: bool = False, dest: Union[pathlib. Path, str] = '. ', n_channels: int = 3, interp=2, ext: str = None, img_format: str = None, resume: bool = None, **kwargs)  Check if the images in `path` aren't broken, maybe resize them and copy it in `dest`. Data from google image url Data from package Train model:           len(class)   len(train)   len(valid)         Data_url   3   432   108       Data_pkg   3   216   53   Uisng model: restnet34 1, Measurement: accuracy 2 fit_one_cycle(): What is fit one cycle? Cyclical Learning Rates for Training Neural Networks One of the way to find good learning rate. Core idea is to start with small learning rate (like 1e-4, 1e-3) and increase the learning rate after each mini-batch till loss starts exploding. And pick up learning rate one order lower than exploding point. For example, plotted learning rate is like below picture, picking up around 1e-2 is the best way.  Why this methods Traditionally, the learning rate is decreased as the learning starts converging with time. But this paper suggests to cycle our learning rate, because it makes us avoid local minimum. Basically this cyclic method enables us to explore whole of loss function so that find out global minimum. In other words, higher learning rate behaves like regularisation. Let’s find-tune: Do train just one last layer by learning rate found by find_lr This section you should find the strongest downward slope that kind of sticking around for quite a while. And choose just one order lower than lowest point. As explained before, I will pick up 1e-2. And of course, this is fine-tuning, we don’t need discriminative learning rate yet.  Let’s train the whole model!: link When you plot the learning rate again, maybe you will get soaring shape of learning rate. Rule of thumb, When you slice the learning rate, use learning rate you used at unfrozen part. Divide it by 5 or 10 and put it on maximum bound. At minimum bound, get the point just before it soared, and divide it by 10.  Let’s make batch size bigger!: Since default batch size is 64, I tried it to 128. And it gets way more better result(even it’s still underfitting!) And if I freeze model and train whole model again, the model would be better. Also, you can use this method to the other big dataset model training! Interpretation: See the confusion matrix. Result is quite great. *Since I’m using colab, I will skip data cleansing. But I highly recommend you to use ImageCleaner widget, only if you are using jupyter notebook (not jupyter lab) Model in production: You can deploy your model in simple way. I referred fast. ai, and used render(it’s free for limited time). You can find detailed document here. and you can create a route like this. @app. route( /classify-url , methods=[ GET ])async def classify_url(request):  bytes = await get_bytes(request. query_params[ url ])  img = open_image(BytesIO(bytes))  _,_,losses = learner. predict(img)  return JSONResponse({     predictions : sorted(      zip(cat_learner. data. classes, map(float, losses)),      key=lambda p: p[1],      reverse=True    )  })You can find my deployed model here Reference: How to create a deep learning dataset using Google Images towardsdatascience - one cycle policy       Deep Residual Learning for Image Recognition &#8617;        Accuracy_and_precision &#8617;    "
     }, {
-    "id": 20,
+    "id": 22,
     "url": "http://localhost:4000/2020/02/dps-week5/",
     "title": "Digital Product School week 5",
     "body": "2020/02/09 - The 5th week retropect at Digital Product School Week 5 - Create a Storymap and sync it with Lean Canvas     This week's schedule  CONTENT:  How to create our story map Prepare your story Discover your product’s AI potentialMondayHow to create our story map:     We need this 'aha' moment  There was a Milestone workshop, about our weekly goal. As we are agile working, we go fast and change every week’s goal. This week we will finalize our story map based on user’s pain-point and HMW questions.     How should we make our story-map  Basically we should make story map based on this rule  Tell stories, don’t just write them!     We always need context, that means all the story component should be connected    Visualize your product to establish a shared understanding and speed up discussions!     Post-it filled of text is not enough, we should fill it with visualizations then team mates can understand it fast    Only discuss in front our your story map! (Speed)     So we can update our story-map as soon as we change our opinion   And also    Use a story map to find the parts that matter most and to identify holes in your idea!     Since the story map consists of techinical part, we should consider each story’s technical feasibility    Minimise output, maximise outcome and impact! Build tests to figure out what’s minimum and what’s viable!     This story map functions to find out our minimum value of ideas    Work iteratively: Change your story map according to your learnings!     We should repeat this process again and again    PMs: Make sure Storymap is up to date!Prepare your story:     team cero, our whole story map    Our goal    Technical feasibility of our storyWhat is your strategy to make user achieve something? This would be our expand point Discover your product’s AI potential: How can we apply AI to our product? Let’s write down our ‘HMW’ questions, and find out all p ossibilities. These are suggestion of possibilities, so don’t attached to feasibility (we will do in at lean start-up)     Software section's expectation    AI section's expectationTuesday    Engineer's task, week5This 5th week, engineers settled WendesdayThursdayFriday"
     }, {
-    "id": 21,
+    "id": 23,
     "url": "http://localhost:4000/2020/02/GPU-time/",
     "title": "4 reasons took much time to setting GPU for fast.ai than I expected",
     "body": "2020/02/05 - Motivation: Before now, me as a undergraduate student, I was parsimony who usually depend on colab, kaggle, friend’s server(occasional) whenever i need GPU. . And this time it’s been for a while to install GPU than I expected and I share the several component that stood in my way. Written at Oct 24 2019, if you think this is deprecated, please do not have a leap of faith. Just for the record, I’ve used Kaggle, Colab, GCP, Azure, EC2 as GPU cloud. 1. Did not know there is JupyterLab option in Google Cloud Platform. : At the first time when GCP came out, there was no AI Platform service. So from starting vm instance to launching jupyter and installing packages, I did all of the things myself. (and I learned 🤗) $	curl -O https://repo. continuum. io/archive/Anaconda3-5. 0. 1-Linux-x86_64. sh[Downloading conda in ssh] I created VM instance,selected zone, machine type and disk type. Then, define firewall rules and in ssh terminal, install jupyter and other packages. But you can do all of these things just using AI Platform.  [AI Platform] I think it especially save your time if you are living in Asia-Pacific, which google doesn’t support not that much GPU resources.  2. Consider if the platform has limited resources in a region you live in. : I live in South Korea, East Asia, and it seems like this region has lots of limitation in GPU (except quite expensive AWS) And the Taiwan which was the only one region where I can launch my own VM with GPU (I tried all the other regions in the list) sometimes do normaly, but not always. 😥After launching, I did several works and next day I could not start VM. (I didn’t count it, but tried it a few hours because I didn’t want cost any more time…) Endlessly failed to start instance, then I choose to move AWS as an alternative way. 3. Fast. ai gives deliberate guide and I didn’t know it. : Fast. ai offer the guide for all available platform. (Colab, salamander, Gradient, Kaggle, Colab, and so on) It is so important, and really needs, because cloud computing options are vary as occasion and purpose arise. I didn’t know fast. ai has manual to running GCP, and I think it’s as good a reason as any for me to be have taken time. It helped me so much when I had aws and shortened my time. I don’t want to read all of the manual in amazno. . (It is recommended. . but I’d rather read GIT PRO now…) ssh -i ~/. ssh/&lt;your_private_key_pair&gt; -L localhost:8888:localhost:8888 ubuntu@&lt;your instance IP&gt;4. You should wait to add more volume just after add volume, by building AWS EC2. : Since Elastic Block Store(EBS) storage supports optimized storage, users can’t extend storage volume two times in a row.  Unfortunately, at the first time, I didn’t know it (again 👻) and when VM lacked volume, I doubled dist capacity (76*2) at a rough but It needs more.  &lt;!– this time I installed GPU in two years, and it became little complicated compared to 2 years ago. And this time for the first time(maybe not the first time. . but i handled it in my class or with my friend. but it’s my first time on my own. ) I very I’m started to using used google colab, kaggleand, GCP-JupyterLab, ec2 - friend made, aws vm machine but I had a environment variable but i did not know of it. On these days, I could not get a resources from taiwan…    I couldn’t notice a deliberate     Anyway, as a result I tried myself gcp myself and aws ec2 with fast. ai But I think doing on my self surely takes much time (in this point I wonder why I’m doing this, and should remind me, especially I was studying disk volume optimization)     disk volume exceed - https://askubuntu. com/questions/919748/no-space-left-on-device-even-though-there-is:  "
     }, {
-    "id": 22,
+    "id": 24,
     "url": "http://localhost:4000/2020/02/dps-week4/",
     "title": "Digital Product School week 4",
     "body": "2020/02/01 - The 4th week retropect at Digital Product School Week 4 - Find solution ideas and run experiments   [This week’s schedule] CONTENT:  Ideation Techniques     What is ideation techniques?    Generating idea in my team AIdeation Team brain storming of idea Die Produkt MacherMondayIdeation Techniques:   [slides from @steffen] What is ideation techniques?: We tried to find out user’s painpoint last week. Tried to users talk about their, pain point. No question directly, but extract from them their pain with transportation. Generating idea in my team:   AIdeation:   TuesdayTeam brain storming of idea:   Based on generated idea on Monday, we extended our idea doing rolling-paper!     Die Produkt Macher: What is lean start-up?  Lean startup is a methodology for developing businesses and products that aims to shorten product development cycles and rapidly discover if a proposed business model is viable; this is achieved by adopting a combination of business-hypothesis-driven experimentation, iterative product releases, and validated learning. - wikipedia WendesdayThursdayFriday"
     }, {
-    "id": 23,
+    "id": 25,
     "url": "http://localhost:4000/2020/01/retrosprect-of-acl-paper-2020/",
     "title": "Retrospect of ACL 2020 paper writing",
     "body": "2020/01/29 - 2020 Annual Conference of the Association for Computational Linguistics Why I can’t use ‘Cebuano’ for the research?: Why I had to change target language from ‘Cebuano’ to ‘Tagalog’?-&gt; No language translator options except google translation. But before knowing that I already consult my friend, whose mother tongue is English. So I had to aplogize her, but couldn’t tell her why suddenly I changed my plan. -&gt; I realized there are many languages even can’t be researched at all. . -&gt; Getting accustomed to discrimination makes misunderstanding, sometimes. At my country, we couldn’t use music streaming service, because of legal problem. But at that moment, I thought it was discrimination, which is done by music company. "
     }, {
-    "id": 24,
+    "id": 26,
     "url": "http://localhost:4000/2020/01/Git-Merge/",
     "title": "Why am I not listed as a contributor?!",
     "body": "2020/01/10 - From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’m not listed as contributor of repo which my PR accessed. How did I come to a repository?: When I’m stuck, I would prefer to code, than to go deep in theory. (It must be so. . too much to understand 🤒)It was BERT released by Google AI I felt keenly the necessity of implementing, because not only couldn’t understand the way they figured out positional encoding formula, but how it actually works. What does it mean to “scale” dot product in Attention? (Now I know it’s far from my section 😂) Figure 1. Scaled Dot Product. Adopted from tensorflow blogWhat was the code error?: For implement code in paper, I read the papers Transformer and BERT, structured the model, and refered the others’ code. Meanwhile, I found out a small error in tokenization process, which was changing a token into [MASK], enabled bidirectional representation. I’ve made PR, and got merged. But I was not in contributors. Why?: Figure 2. Merged Pull request Adopted from graykode projectActually I happened to know there can be couple of reasons github doesn’t include my name as contributor. Well, if contributors tab has more than 100 people, in which case it shows you up only if you are in the top 100 contributors because displaying too many contributors can make webpages down. Somethimes, however, it doesn’t that problem. Why not? Two possibilities are there.    First, According to Joel-Glovier, if repository maintainer merged-as-a-rebase PR will end up showing as maintainer’s commit. But maintainer shouldn’t normally do this.     Second, if you happend to commit using a different git email that what is in your GitHub profile, it will not be attached to your Github user, and “doesn’t show up” as you.  Reference:  Michał Chromiak’s blog Github: why are my contributions are not showing on my profile atlassian-gitfetch"
     }, {
-    "id": 25,
-    "url": "http://localhost:4000/2019/12/lesson1-fastai/",
-    "title": "Fine Grained Classification",
-    "body": "2019/12/31 - Finally you can solve the mystery behind this weird drawing. . through this course. juptyer notebook magic: %reload_ext autoreload%autoreload 2%matplotlib inlinethis is special directives to jupyter notebook, not python code. And it is called ‘magics’ (but i think jeremy is magicion)  If somebody changes underlying library code while I’m running this, please reload it automatically If somebody asks to plot something, then please plot it here in this Jupyter NotebookDon’t hesitate to import start~ Digging into untar_data, path. ls: Union[pathlib. Path, str]: typed programming language? -&gt; maybe i think disclaim the type beforehand for sure.  Q. like assert? path. ls()this is some module that fast. ai made because os. listdir(‘path’) is unconvinient. Python3 pathlib library!: pathlib "
-    }, {
-    "id": 26,
+    "id": 27,
     "url": "http://localhost:4000/2019/12/jeremy-howard/",
     "title": "Jeremy Howard",
     "body": "2019/12/15 - This is journey to find out ‘who am I trying to be?’: How he impacted me?  The person who made me start Computer Vision again. He emphasized the importance of studying NLP and Computer together to understand the deep-learning. He didn’t order it to study, but always he pursuade me with reasonable way.  “It’s not just something I can throw away. NLP and computer vision a few weeks apart and that’s going to force your brain to realize like ‘oh I have to remember this’”  He made me admit my failure in deep-learning. I started to objectify where am I. What should I do when I’m frustrated.  “Keep going. You’re not expected to remember everything. Yet.  You’re not expected to understand everything. Yet. You’re not expected to know why everything works.  Yet. ”  His articles are numerous, below.      What is torch. nn Really?   High Performance Numeric Programming with Swift: Explorations and Reflections   C++11, random distributions, and Swift    And especially, I like this book. Designing great data products Great predictive modeling is an important part of the solution, but it no longer stands on its own; as products become more sophisticated, it disappears into the plumbing. Designing great data products And he is also famous for words. Here are some.  we’re going to try and use that to really understand what’s going on. So to warn you, none of it is rocket science but a lot of its going to look really new. So don’t expect to get it the first time but expect to listen and jump into the notebook try a few things test things out look particularly at like tensor shapes and inputs and outputs to check your understanding then go back and listen again. But and kind of try it, a few times, because you will get there right, it’s just that there’s going to be a lot of new concepts because we haven’t done that much stuff in pure Pytorch. Lesson 6: Deep Learning 2019 "
     }, {
-    "id": 27,
+    "id": 28,
     "url": "http://localhost:4000/2019/11/julia-evans/",
     "title": "Julia Evans",
     "body": "2019/11/20 - This is journey to find out ‘who am I trying to be?’: The women who surprised me in many ways. First, she approached me to teaching some concepts drawing cartoons. It was at Hackers news, which was hightest ranks. Personally I have the use of not to reading title, so and cartoon was so cute and clear. I naturally gonna understood mechanism and astonished by her explaination ability. Her value, which she was taught by many people so want to do same things, moved me. Volume of her knowledge, that just reading post title is a deal of work, amazed me. "
     }, {
-    "id": 28,
+    "id": 29,
     "url": "http://localhost:4000/2019/11/coc-retropective/",
     "title": "Retrospective on Pycon 2019 Korea (CoC Committee)",
     "body": "2019/11/05 - When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized our responsibility, and if I passed each other’s burden, It could be my burden next time. In solidarity of the obligation, we finished conference well. And after participating PyCon Korea 2018 as volunteer, I’ve joined PyCon Korea Organizer last year. &lt;Figure 1&gt; First meeting of PyCon 2019 Korea Organizers It’s been a while since PyCon 2019 finished. It’s held on Aug 15 - 18, at Coex Grand Balloom &lt;Figure 2&gt; Ongoing session, speaking on news comment processing &lt;Figure 3&gt; Sponsor Booth iin Coex Hall &lt;Figure 4&gt; After PyCon 2019, with all of volunteer, organizer, speakers 😍 🥰  Serving as part of the coc TF, I spent large fraction of last year doing CoC job. here’s the path what we’ve been grappled with to grasp a solution. First half: Before the conference Toward Diverse Community:  Formally we’ve been reusing and modifying PyCon US CoC, but we needed fit in Korean and I was part of that to revise code of conduct. Except ‘That’ Diversity, Because it is ‘Harassment’:  Specific point was harassment, and the others were not. process of finding the points. How can we settle this point?Second half: During the conference Handling the potential Harassment: Disjunction of policy and real-time situation: This ‘PyCon 2019 Korea retrospective series’ would be devided into 3 Episodes.  “Retrospective on Pycon 2019 Korea (CoC Committee)” “Retrospective on Pycon 2019 Korea (Program Chair)” (20 Nov, To Be Update) “Maintaining participation while still making timely decisions” (29 Nov, To Be Update)"
     }, {
-    "id": 29,
+    "id": 30,
     "url": "http://localhost:4000/2019/11/elif-shafak/",
     "title": "Elif Shafak",
     "body": "2019/11/05 - This is journey to find out ‘who am I trying to be?’: For creative-minded people, Istanbul is a treasure. ’ Photo © Chris Boland, licensed under CC BY-NC-ND 2. 0    it suddenly felt like what I was trying to convey was more complicated and detailed than what the circumstances allowed me to say.     And I did what I usually do in similar situations: I stammered, I shut down, and I stopped talking. I stopped talking because the truth was complicated, even though I knew, deep within, that one should never, ever remain silent for fear of complexity.         &lt;Figure 1&gt; Elif Shafak   Photo credit: www. elifsafak. com. tr      I want to talk about emotions and the need to boost our emotional intelligence. I think it’s a pity that mainstream political theory pays very little attention to emotions.     Oftentimes, analysts and experts are so busy with data and metrics that they seem to forget those things in life that are difficult to measure and perhaps impossible to cluster under statistical models. But I think this is a mistake, for two main reasons. We are emotional beings.     I think it’s going to be one of our biggest intellectual challenges, because our political systems are replete with emotions. In country after country, we have seen illiberal politicians exploiting these emotions. And yet within the academia and among the intelligentsia, we are yet to take emotions seriously. I think we should.  1 2 Reference:       British Council Worldwide &#8617;        Ted Talk &#8617;    "
     }, {
-    "id": 30,
+    "id": 31,
     "url": "http://localhost:4000/2019/01/dps-week1/",
     "title": "Digital Product School week 1",
     "body": "2019/01/11 - The 1th week retropect at Digital Product School [This week’s schedule] CONTENT:  Welcome to Digital Product School!     Trip to Spitzingsee   Welcome to Design Office   Specifying our goal of product   Welcome to Digital Product School!: Trip to Spitzingsee:    At the first day of Digital Product School, we had a off-site with all of batch 9 people. All the costs were managed by dps. At the beautiful mountain, we settled the team, and got my team goal.     Basically, there are two kind of team in DPS. (1) Wild team - the team has fixed topic(2) Company team - the team which has specific stakeholders, and also topic defined by that stakeholders  The Core-team will fix what team you will join in DPS for 3 months based on ymy professionals, they announce it at off-site. [My team for 3 months at DPS]    And we decide on my batch #9 theme song. How? Each team draw for songs and pitch ‘why this song should be batch #9 theme song’The result? Imagine dragon - Believer (I didn’t know at the moment, this song would be stamped in my memory)     We have a workshop for getting to know each other. For example, we share 1) what do I expect from 3 months of dps, 2) when I feel happy in my life time, 3) what I worked for last week, 4) what was my last project and 5) what plays important role in my life      My team's board  Cero  Welcome to Design Office:  At first day of design office, we had workshop, which celebrates my day in dps also discuss specific rule, menifesto and stakeholders    We get sticker and attach it in map depends on my nationality  Now time to get to know my team’s stakeholders. What they want for us? What they expect from us? How free my team are on the topic?To be honest, it is endless tug-of-war. We should discuss with my stakeholders, endlessly, and find out solution which can meet interest of users, stakeholders and my team. Basically, my team’s main stakeholder is ADAC, but BMW, City of munich and Nokia will also participate as my team’s stakeholders. Specifying our goal of product: "
diff --git a/_site/2019/11/coc-retropective/index.html b/_site/2019/11/coc-retropective/index.html
index 108e03a5ee..1a52ea49a1 100644
--- a/_site/2019/11/coc-retropective/index.html
+++ b/_site/2019/11/coc-retropective/index.html
@@ -19,9 +19,9 @@
 <meta property="og:site_name" content="SpellOnYou" />
 <meta property="og:image" content="http://localhost:4000/assets/images/14.jpg" />
 <meta property="og:type" content="article" />
-<meta property="article:published_time" content="2019-11-05T00:00:00+01:00" />
+<meta property="article:published_time" content="2019-11-05T00:00:00+09:00" />
 <script type="application/ld+json">
-{"description":"When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized our responsibility, and if I passed each other’s burden, It could be my burden next time. In solidarity of the obligation, we finished conference well. And after participating PyCon Korea 2018 as volunteer, I’ve joined PyCon Korea Organizer last year.","author":{"@type":"Person","name":"dionne"},"@type":"BlogPosting","url":"http://localhost:4000/2019/11/coc-retropective/","publisher":{"@type":"Organization","logo":{"@type":"ImageObject","url":"http://localhost:4000/assets/images/logo.png"},"name":"dionne"},"image":"http://localhost:4000/assets/images/14.jpg","headline":"Retrospective on Pycon 2019 Korea (CoC Committee)","dateModified":"2019-11-05T00:00:00+01:00","datePublished":"2019-11-05T00:00:00+01:00","mainEntityOfPage":{"@type":"WebPage","@id":"http://localhost:4000/2019/11/coc-retropective/"},"@context":"http://schema.org"}</script>
+{"description":"When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized our responsibility, and if I passed each other’s burden, It could be my burden next time. In solidarity of the obligation, we finished conference well. And after participating PyCon Korea 2018 as volunteer, I’ve joined PyCon Korea Organizer last year.","author":{"@type":"Person","name":"dionne"},"@type":"BlogPosting","url":"http://localhost:4000/2019/11/coc-retropective/","publisher":{"@type":"Organization","logo":{"@type":"ImageObject","url":"http://localhost:4000/assets/images/logo.png"},"name":"dionne"},"image":"http://localhost:4000/assets/images/14.jpg","headline":"Retrospective on Pycon 2019 Korea (CoC Committee)","dateModified":"2019-11-05T00:00:00+09:00","datePublished":"2019-11-05T00:00:00+09:00","mainEntityOfPage":{"@type":"WebPage","@id":"http://localhost:4000/2019/11/coc-retropective/"},"@context":"http://schema.org"}</script>
 <!-- End Jekyll SEO tag -->
 
 
@@ -161,96 +161,101 @@
     "body": "  {% if page. url ==  /  %}            {% assign latest_post = site. posts[0] %}          &lt;div class= topfirstimage  style= background-image: url({% if latest_post. image contains  ://  %}{{ latest_post. image }}{% else %} {{site. baseurl}}/{{ latest_post. image}}{% endif %}); height: 200px;  background-size: cover;  background-repeat: no-repeat; &gt;&lt;/div&gt;           {{ latest_post. title }}  :       {{ latest_post. excerpt | strip_html | strip_newlines | truncate: 136 }}               In         {% for category in latest_post. categories %}        {{ category }},         {% endfor %}                                {{ latest_post. date | date: '%b %d, %Y' }}                            {%- assign second_post = site. posts[1] -%}                        {% if second_post. image %}                         &lt;img class= w-100  src= {% if second_post. image contains  ://  %}{{ second_post. image }}{% else %}{{ second_post. image | absolute_url }}{% endif %}  alt= {{ second_post. title }} &gt;                        {% endif %}                                    {{ second_post. title }}          :                       In             {% for category in second_post. categories %}            {{ category }},             {% endfor %}                                                      {{ second_post. date | date: '%b %d, %Y' }}                                    {%- assign third_post = site. posts[2] -%}                        {% if third_post. image %}                         &lt;img class= w-100  src= {% if third_post. image contains  ://  %}{{ third_post. image }}{% else %}{{site. baseurl}}/{{ third_post. image }}{% endif %}  alt= {{ third_post. title }} &gt;                        {% endif %}                                    {{ third_post. title }}          :                       In             {% for category in third_post. categories %}            {{ category }},             {% endfor %}                                                      {{ third_post. date | date: '%b %d, %Y' }}                                    {%- assign fourth_post = site. posts[3] -%}                        {% if fourth_post. image %}                        &lt;img class= w-100  src= {% if fourth_post. image contains  ://  %}{{ fourth_post. image }}{% else %}{{site. baseurl}}/{{ fourth_post. image }}{% endif %}  alt= {{ fourth_post. title }} &gt;                        {% endif %}                                    {{ fourth_post. title }}          :                       In             {% for category in fourth_post. categories %}            {{ category }},             {% endfor %}                                                      {{ fourth_post. date | date: '%b %d, %Y' }}                                  {% for post in site. posts %} {% if post. tags contains  sticky  %}                    {{post. title}}                  {{ post. excerpt | strip_html | strip_newlines | truncate: 136 }}                 Read More            	             {% endif %}{% endfor %}  {% endif %}                All Stories:         {% for post in paginator. posts %}          {% include main-loop-card. html %}        {% endfor %}                   {% if paginator. total_pages &gt; 1 %}              {% if paginator. previous_page %}        &laquo; Prev       {% else %}        &laquo;       {% endif %}       {% for page in (1. . paginator. total_pages) %}        {% if page == paginator. page %}        {{ page }}        {% elsif page == 1 %}        {{ page }}        {% else %}        {{ page }}        {% endif %}       {% endfor %}       {% if paginator. next_page %}        Next &raquo;       {% else %}        &raquo;       {% endif %}            {% endif %}                     {% include sidebar-featured. html %}      "
     }, {
     "id": 12,
+    "url": "http://localhost:4000/2020/04/v3-2019-lesson06-note/",
+    "title": "fastai 2019 course-v3 Part1, lesson06",
+    "body": "2020/04/15 - Lesson 06Rossmann(Tabular):  Tabular data: be careful on Categorical variable vs Continuous variable.  if datatype is int, fastai think it is classification, not a regression.  Root mean square percentage error. as loss function.  When you assign the y_range, it’s better to assign little bit more than actual maximum. &gt; because it’s sigmoid.  intermediate layers, which is weight matrix is 1) 1000, and 2) 500 -&gt; which means our parameter would be 500*1000. learn. modelWhat is dropout and embedding dropout?: Nitish Srivastava, Dropout: A Simple way to prevent Neural Networks from Overfitting  you can dropout with p value, make it specified to specific layer, or make it applied to all the layers.  Pytorch code 1) bernoulli, which decides whether you will hold it? 2) and divide the noise value depends on noise value. so noise became 2 or remain 0.      According to pytorch code, We do change at training time, but we do nothing at test time. and this means you don’t have to do anything special with inference time. ’   TODO: find at forums what is inference time - Related to NVIDIA, GPU.     Embedding dropout is just a dropout.      It’s different between continuous variable and embedding layer.  TODO Still can’t understand. why embedding dropout is effective. or,… in need.    Let’s delete at random, some of the results of the embedding.    and It worked well especially at Kaggle   Batch Normalization: Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift -&gt; came out false! According to How Does Batch Normalization Help Optimization?  The key was multiplicative bias {\gamma} and additive bias {\beta}` Explain     Let $$ \hat{y} = f(w_1, w_2, w_3, … , x)} $$ , loss = MSE , Then y_range should be between 1 and 5`   And Activation function ends with -1 -&gt; +1   To mitigate this problem, we can add the other parameter, like $$w_n$$   But there’re so much interactions in the process so just re-scale the output.    Momentum parameter at BatchNorm1d:  Different from momentum like in optimization.  This momentum is Exponentially weighted moving average of the mean, instead of deviation.      If this is small number: mean standard deviation would be less from mini_batch to mini_batch » less regularization effect. (If this is large number, variation would be greater from mini_batch to mini_batch » more regularization effect)   TODO: can’t sure, but i understand, this is not about how to update parameter but about how much reflect previous value when scale and shift   Q. Preference between batchnorm and the other regularizations(drop out, weight decay)A. Nope, always try and see the results## lesson6-pets-more### Data Augmentation- Last reg- `get_transforms` has lots of params (even not yet learned all) -&gt; check documentation	- Remember you can implement all the doc contents bc it's made from nbdev	- TODO: try this!!- Essence of data augmentation is you should maintain the label, while somewhat making sense. 	- ex) tilt, because it's optically sensible, you can always change the angle of the data view. - zeros, border, and reflection but always `reflection` works most of the time, so that is the default### Convolutional Kernel(What is convolution?)- Will make heat\_map from scratch, which means the parts convolution focuses on![setosa_visualization]()- http://setosa. io/ev/image-kernels/	- javascript thing	- How convolution works	- Kernel. which does element-wise multiplication, and sum them up	- so it has on pixel less at borders -&gt; so it uses padding, and fastai uses reflection as said. - why this Kernel(matrix) helps catching horizontal edge side?	- because this kernel`(picture2)` weights differently, depends on `x axis`	- why familiar, because it's similar intuition with fugus`(paper)` paper- CNN from different viewpoints`link`	- output of pixel is results from different linear equations. 	- If you connect this with represents of neural network nodes, you can see that the specific inp nodes connected with specific out nodes. 	- **Summarize**: cnn does 1) matmul some of the elements are always zero 2) same weight for every row, which is called `weight time? weight. . ?, 1:18:50` `(picture)`#### Further lowdown- Because generally image has 3 channels, we need rank 3 kernel. - And **do multiply with all channel output is one pixel**. (`draw by your self`)	- but this kernel will catch one feature, like horizontal, so that we make more kernel so that output becomes (h * w * kernel)	- And that `kernel` come to `channel`- **Conv2d**: with 3 by 3 kernel, stride 2 conv -&gt; (h/2 * w/2 * kernel)	- skip or jump over input pixel	- to protect from memory out of control~~~pythonlearn. modellearn. summary()~~~TODO: understand yourself the blocks of conv-kernel: - Usually use big kernel size at first layer (will study this at part2)- Bottom right highlighting kernel(`pic / draw`)- `torch. tensor. expand`: for memory efficient, because we should do RGB- We do not make separate kernel, but make rank 4 kernel	- 4d tensor is just stacked kernel- `t[None]. shape` create new unit axis, and why? we make this -&gt; it should move unit of batch, not one size image. ### Average pooling, feature- suppose our pre-trained model results in size of `11 by 11 by 512 ` `pic 4` and my classification task has 37 classes	* take the first face of channel, which is 11 by 11 and `mean` it, so that make rank 2 tensor, 512 by 1	* and make 2d matrix, which is 512 by 37 and multiply so that we can get 37 by 1 matrix. - Feature, at convolution block	- So, when we transfer-learning without unfreeze, every element of last matrix (512 by 1) should represent(or could catch) each feature. 	### Heatmap, Hook~~~hook_output(model[0]) -&gt; acts -&gt; avg_acts~~~- if we average the block with `axis=feature`, result of matrix(11 by 11) depicts `how activated was that area?` -&gt; it is heatmap, `avg_acts`- and acts comes from hook, which is more advanced pytorch feature. 	- hook into pytorch machine itself, and run any arbitrary Pytorch code	- Why this is cool?: Normally it gives set of outputs of forward pass, but we can interrupt and hook the forward pass. 	- Also can store the output of the convolutional part of the model, which is before avg_pooling- Thinking back when we do cut off `after` the conv part. 	- but with fast. ai the original convolutional part of the model would be *the first thing in the model*, specifically could be given from `learn. model. eval()[0]`	- And this is gotten from `hooked_output` and having hooked the output, we can pass our x_minibatch to output. 	- Not directly, but with normalized, minibatch, put on to the gpu	- `one_item()` function do it, when we have one data `TODO: this is assignment` do it yourself without one_item function	- and `. cuda()` put it on gpu- you should print out very often the shape of tensor, and try think why. "
+    }, {
+    "id": 13,
+    "url": "http://localhost:4000/2020/04/qna-image-segmentation/",
+    "title": "[Q&A] Image Segmentation, using Unet with Driving Video data",
+    "body": "2020/04/02 - This post is about my questions while I was studying USF Deep Learning course about image segmentation task. All the answers are from the course, source code, library document, or document. I cared about being clear at reporting information including source of information, however if there are still anything unclear, please contact me. And thank you Jeremy&amp;Rachael for everything. Also Thank you Cambridge Computer Vision Lab to made us to study with your labor. The Cambridge-driving Labeled Video Database (CamVid) is the first collection of videos with object class semantic labels, complete with metadata. The database provides ground truth labels that associate each pixel with one of 32 semantic classes. If someone is interested in this project, please check the site and see the details. Now, let’s start first using jupyter’s one of tricks which I love most. It enables cell to print the code without print function. from IPython. core. interactiveshell import InteractiveShell# pretty print all cell's output and not just the last oneInteractiveShell. ast_node_interactivity =  all from fastai. vision import *from fastai. callbacks. hooks import *from fastai. utils. mem import *path = untar_data(URLs. CAMVID) # The locations where the data and models are downloaded are set in config. ymlpath. ls() I’m trying to accustomed to using pathlib module, not just it became built-in module in python, but I felt uncomfortable myself with os module. However, still unpredictable conflicts are remain, even in the quite standard library like Pytorch, tensorflow, onnx. (it require me string for path. not PosixPath. will send PR. . ) [PosixPath('/root/. fastai/data/camvid/valid. txt'), PosixPath('/root/. fastai/data/camvid/images'), PosixPath('/root/. fastai/data/camvid/labels'), PosixPath('/root/. fastai/data/camvid/codes. txt')]path_img = path/'images'path_lbl = path/'labels'fnames = get_image_files(path_img) #filenamelbl_names = get_image_files(path_lbl)1. (Play with data) My Hypothesis:  File name has A_B format. and A / B would be at key-value position.  Use collections - defaultdict  Default Dict: Link:   easy to group a sequence of key and value pairs into a dictionary of list?from collections import defaultdictfnames[0], lbl_names[0](PosixPath('/root/. fastai/data/camvid/images/0001TP_009210. png'), PosixPath('/root/. fastai/data/camvid/labels/0016E5_01800_P. png'))files = [tuple(i. stem. split('_')) for i in fnames]labels = [tuple(i. stem. split('_')[:-1]) for i in lbl_names]d = defaultdict(list)for k, v in files: d[k]. append(v)d. keys()len(d['0001TP'])124for k, v in d. items():  print(k, v)0001TP ['009210', '008850', '007350', '008970', '009840', '010140', '008490', '008520', '009540', '008250', '008340', '006840', '007860', '007410', '007740', '009870', '010080', '007890', '008790', '010020', '008400', '007080', '008280', '010380', '009330', '009060', '007470', '006810', '009720', '008580', '007110', '008730', '009150', '007680', '009780', '007800', '007290', '008760', '009510', '008640', '008310', '007440', '006900', '007500', '008460', '009030', '008130', '009480', '009900', '010230', '009270', '008040', '007590', '007950', '009990', '008550', '007260', '008100', '007530', '006960', '008190', '009420', '009930', '009000', '007830', '008940', '006690', '009570', '008880', '010170', '007560', '009300', '006750', '009360', '010200', '007320', '008010', '009120', '007620', '007200', '007140', '010320', '006720', '008670', '007230', '008370', '010260', '009690', '006930', '009090', '007770', '010290', '010350', '008610', '008070', '009600', '008430', '009450', '007380', '009240', '007710', '007170', '008160', '008910', '007020', '006780', '007050', '009960', '009810', '008220', '009180', '009750', '010050', '009660', '010110', '007920', '009630', '007650', '006990', '008700', '009390', '007980', '008820', '006870']0016E5 ['01290', '08159', '05760', '08133', '08063', '06660', '00960', '05850', '00750', '06960', '08035', '08107', '07975', '08017', '05610', '07140', '08119', '08027', '07170', '08400', '08093', '02100', '06390', '04470', '08340', '06060', '00600', '07470', '08151', '07800', '01620', '05730', '01530', '00690', '08430', '05940', '01980', '07320', '08069', '07965', '04380', '05430', '01410', '06780', '08007', '08087', '08079', '06600', '08109', '05490', '00901', '04590', '04680', '08045', '01770', '06690', '08085', '06810', '00420', '08011', '07440', '02190', '06300', '04800', '01500', '00450', '08029', '01470', '06330', '07997', '08067', '05370', '08013', '08190', '00840', '02370', '08049', '08135', '01440', '06870', '05820', '05280', '08051', '04440', '08091', '01380', '00630', '07290', '05520', '04770', '00540', '07995', '07999', '05550', '07920', '08101', '08141', '08053', '04620', '08103', '05160', '07350', '08057', '06030', '06000', '08550', '07963', '08089', '05970', '08047', '05640', '06240', '05220', '04350', '01590', '07959', '01950', '08117', '06180', '01560', '05400', '08043', '07680', '00780', '08081', '07050', '01020', '01350', '04530', '06720', '07969', '08149', '08003', '08131', '08129', '08033', '05460', '01650', '07530', '08023', '05340', '08640', '05100', '08075', '01230', '04980', '02070', '01080', '06210', '05910', '08009', '01800', '05190', '02400', '08083', '08019', '07620', '07200', '07890', '08059', '06990', '04410', '08121', '08123', '06930', '08137', '08147', '08095', '06570', '06150', '08153', '06840', '05250', '00510', '08370', '08580', '08113', '07410', '08097', '01200', '04950', '07770', '07650', '04710', '06090', '08055', '07110', '07981', '00990', '08250', '08127', '01920', '07985', '08220', '08005', '08157', '05130', '08071', '01140', '04830', '07740', '08143', '06120', '02040', '08111', '08115', '00660', '08280', '06420', '07983', '02220', '05700', '01860', '01260', '04920', '06510', '07020', '08073', '08105', '08125', '06360', '07860', '07993', '00810', '06540', '08099', '08139', '02010', '07973', '08155', '07991', '06630', '00480', '06750', '04890', '08001', '08025', '00870', '08490', '01830', '07977', '05010', '01170', '07961', '01680', '01050', '07987', '07080', '04560', '00930', '05310', '02340', '05790', '08460', '00720', '08031', '02280', '08039', '08037', '08065', '06270', '08077', '06900', '04650', '06480', '07230', '08041', '06450', '00570', '07989', '04740', '07979', '02250', '07380', '00390', '01710', '07590', '08021', '08520', '07500', '01110', '04500', '02310', '07971', '02130', '05580', '05880', '08610', '08310', '08145', '05670', '04860', '07260', '08015', '07967', '01740', '01320', '07560', '07830', '01890', '08061', '02160', '07710', '05070', '05040']Seq05VD ['f00030', 'f02550', 'f03450', 'f01110', 'f00480', 'f00210', 'f04590', 'f04170', 'f01800', 'f03990', 'f03360', 'f03900', 'f02070', 'f00810', 'f03690', 'f01350', 'f01530', 'f04980', 'f05100', 'f03060', 'f00900', 'f03870', 'f02460', 'f01470', 'f02370', 'f02820', 'f04080', 'f02760', 'f04860', 'f02250', 'f04200', 'f00270', 'f03720', 'f02850', 'f04410', 'f01200', 'f03090', 'f02010', 'f03930', 'f00090', 'f01650', 'f01890', 'f03840', 'f03030', 'f02130', 'f01230', 'f04110', 'f02520', 'f04140', 'f04020', 'f00060', 'f03420', 'f01560', 'f00120', 'f04290', 'f02340', 'f00300', 'f01380', 'f00870', 'f01860', 'f02970', 'f04560', 'f02730', 'f00330', 'f04530', 'f03780', 'f01770', 'f03390', 'f05040', 'f02430', 'f03330', 'f00660', 'f01740', 'f02100', 'f04800', 'f04050', 'f00510', 'f02790', 'f04350', 'f00690', 'f00540', 'f02490', 'f00960', 'f00930', 'f04230', 'f02880', 'f03600', 'f01020', 'f01500', 'f02400', 'f04830', 'f04470', 'f03300', 'f02670', 'f00450', 'f01980', 'f01170', 'f01620', 'f04500', 'f01080', 'f03180', 'f05070', 'f03150', 'f04950', 'f01440', 'f03510', 'f01710', 'f00360', 'f04770', 'f02910', 'f01050', 'f00630', 'f04320', 'f00570', 'f03240', 'f02190', 'f01140', 'f03540', 'f02220', 'f02640', 'f03960', 'f00000', 'f04920', 'f01950', 'f00990', 'f03480', 'f03000', 'f00420', 'f04620', 'f03210', 'f00780', 'f03570', 'f01590', 'f00750', 'f01920', 'f04650', 'f03750', 'f03630', 'f02310', 'f02610', 'f02580', 'f04740', 'f02280', 'f04680', 'f00390', 'f00720', 'f03660', 'f02040', 'f03270', 'f00180', 'f03810', 'f01410', 'f01290', 'f03120', 'f00840', 'f04440', 'f00150', 'f01260', 'f02700', 'f02940', 'f00600', 'f01830', 'f04260', 'f05010', 'f04890', 'f02160', 'f00240', 'f04380', 'f01680', 'f04710', 'f01320']0006R0 ['f02820', 'f03690', 'f03180', 'f02550', 'f01020', 'f03660', 'f02340', 'f01170', 'f02610', 'f02940', 'f01290', 'f02100', 'f01350', 'f03270', 'f03870', 'f01380', 'f01980', 'f03810', 'f02430', 'f02310', 'f01830', 'f03480', 'f02970', 'f01890', 'f03210', 'f03930', 'f02040', 'f02070', 'f02400', 'f01560', 'f03030', 'f01770', 'f01590', 'f01950', 'f03420', 'f01650', 'f03450', 'f00990', 'f03630', 'f01500', 'f03570', 'f00930', 'f03090', 'f03360', 'f02880', 'f02460', 'f01440', 'f01920', 'f01230', 'f03840', 'f02730', 'f01620', 'f02220', 'f03750', 'f03330', 'f03540', 'f02520', 'f02790', 'f01050', 'f03120', 'f01800', 'f01140', 'f01860', 'f01530', 'f01470', 'f02670', 'f02490', 'f01260', 'f01110', 'f02760', 'f01680', 'f03150', 'f02580', 'f03300', 'f02280', 'f01200', 'f03390', 'f03510', 'f02640', 'f02190', 'f02370', 'f01320', 'f02130', 'f03600', 'f03240', 'f03780', 'f03720', 'f02700', 'f01410', 'f01080', 'f02850', 'f01710', 'f03900', 'f03060', 'f01740', 'f02010', 'f02250', 'f00960', 'f03000', 'f02160', 'f02910']for k, v in d. items():  print(k, len(d[k]))0001TP 1240016E5 305Seq05VD 1710006R0 101for i in d2. keys():  print(i,len(d2[i]))0016E5 3050001TP 1240006R0 101Seq05VD 171files[0], labels[0](('0001TP', '009210'), ('0016E5', '01800'))2. My question: Link:  Why do we need masking? and does color from fastai library? (have to look into source code) What do the parameter alpha do? When people make masked img, would it be have ranged integer limit? Does image normalization related with this?lbl_sorted = sorted(lbl_names)f_sorted = sorted(fnames)lbl_1 = lbl_sorted[33]f_1 = f_sorted[33]img = open_image(lbl_1)mask = open_mask(lbl_1)_,axs = plt. subplots(1,2, figsize=(10,5))# img. show(ax=axs[0], y=mask, title='masked')img. show(ax=axs[0], title='1')mask. show(ax=axs[1], title='2', alpha=1. ) img_2 = open_image(f_1)mask_2 = open_mask(f_1)_,axs = plt. subplots(1,2, figsize=(10,5))# img. show(ax=axs[0], y=mask, title='masked')img_2. show(ax=axs[0], title='3',)mask_2. show(ax=axs[1], title='4', alpha=1. ) open_mask(lbl_1). data. shapetorch. Size([1, 720, 960])open_mask(lbl_1). data. shapetorch. Size([1, 720, 960])open_image(f_1). data. shapetorch. Size([3, 720, 960])open_image(f_1). data. shapetorch. Size([3, 720, 960])img. data #labeled datatensor([[[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]],    [[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]],    [[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]]])mask. data # after mask, labeled datatensor([[[ 4, 4, 4, . . . , 21, 21, 21],     [ 4, 4, 4, . . . , 21, 21, 21],     [ 4, 4, 4, . . . , 21, 21, 21],     . . . ,     [17, 17, 17, . . . , 30, 30, 30],     [17, 17, 17, . . . , 30, 30, 30],     [17, 17, 17, . . . , 30, 30, 30]]])img_2. data, mask_2. data(tensor([[[0. 0706, 0. 0667, 0. 0706, . . . , 0. 6431, 0. 6549, 0. 6627],     [0. 0745, 0. 0706, 0. 0706, . . . , 0. 6431, 0. 6510, 0. 6549],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 6392, 0. 6588, 0. 6588],     . . . ,     [0. 0863, 0. 0824, 0. 0824, . . . , 0. 1333, 0. 1216, 0. 1255],     [0. 0902, 0. 0863, 0. 0824, . . . , 0. 1255, 0. 1176, 0. 1216],     [0. 0863, 0. 0824, 0. 0784, . . . , 0. 1137, 0. 1059, 0. 1137]],     [[0. 0706, 0. 0667, 0. 0706, . . . , 0. 7490, 0. 7608, 0. 7686],     [0. 0745, 0. 0706, 0. 0706, . . . , 0. 7451, 0. 7569, 0. 7608],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 7412, 0. 7529, 0. 7529],     . . . ,     [0. 0980, 0. 0941, 0. 0941, . . . , 0. 1804, 0. 1686, 0. 1725],     [0. 1059, 0. 1020, 0. 0980, . . . , 0. 1725, 0. 1647, 0. 1686],     [0. 1020, 0. 0980, 0. 0941, . . . , 0. 1608, 0. 1529, 0. 1608]],     [[0. 0784, 0. 0745, 0. 0784, . . . , 0. 7569, 0. 7686, 0. 7765],     [0. 0824, 0. 0784, 0. 0784, . . . , 0. 7647, 0. 7647, 0. 7686],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 7608, 0. 7647, 0. 7647],     . . . ,     [0. 1216, 0. 1176, 0. 1176, . . . , 0. 2000, 0. 1882, 0. 1922],     [0. 1176, 0. 1137, 0. 1098, . . . , 0. 1843, 0. 1765, 0. 1804],     [0. 1137, 0. 1098, 0. 1059, . . . , 0. 1725, 0. 1647, 0. 1725]]]), tensor([[[ 18, 17, 18, . . . , 183, 186, 188],     [ 19, 18, 18, . . . , 183, 185, 186],     [ 20, 18, 19, . . . , 182, 185, 185],     . . . ,     [ 25, 24, 24, . . . , 43, 40, 41],     [ 26, 25, 24, . . . , 41, 39, 40],     [ 25, 24, 23, . . . , 38, 36, 38]]]))3. What is a difference between image and imageSegment?: imageSegment  An ImageSegment object has the same properties as an Image. The only difference is that when applying the transformations to an ImageSegment, it will ignore the functions that deal with lighting and keep values of 0 and 1.  It’s easy to show the segmentation mask over the associated Image by using the y argument of show_image. img = open_image(fnames[0])mask = open_mask(lbl_names[0])_,axs = plt. subplots(1,3, figsize=(8,4))img. show(ax=axs[0], title='no mask')img. show(ax=axs[1], y=mask, title='masked') #seg mask over the img using y argmask. show(ax=axs[2], title='mask only', alpha=1. ) vision. image ##4. Why/How img div by 255 and how it results fast. ai : vision. image - If div=True, pixel values are divided by 255. to become floats between 0. and 1.  At times, you want to get rid of distortions caused by lights and shadows in an image.    Normalizing the RGB values of an image can at times be a simple and effective way of achieving this.     So sum of the pixel’s value over all channels(which is S) divides each intensified channel so that nomalized value will be R/S, G/S and B/S (where, S=R+G+B).   Detailed explain here4. Python Evaluation Order: Python evaluates expressions from left to right. Notice that while evaluating an assignment, the right-hand side is evaluated before the left-hand side. mask_tmp, trg_tmp, void_tmp = 2, 1, 10mask_tmp = trg_tmp != void_tmpprint(mask_tmp, trg_tmp, void_tmp) # (1) target is not same with voidTrue 1 10# Example 1x = 1y = 2x,y = y,x+yx, y(2, 3)# Example 2x = 1y = 2x = yy = x+yx, y(2, 4)5. model learner parameter :: pct_start: A: Percentage of total number of epochs when learning rate rises during one cycle. Q: Sorry, I still confused that one cycle in the new API only runs one epoch. How the percentage of total number of epochs works? Can you give a example? If learn. fit_one_cycle(10, slice(1e-4,1e-3,1e-2), pct_start=0. 05)??A: Ok, strictly correct answer would be percentage of iterations, so you can have lr both increase and decrease during same epoch. In your example, say, you have 100 iterations per epoch, then for half an epoch (0. 05 * (10 * 100) = 50) lr will rise, then slowly decrease. Q2: Thanks for this explanation … so essentially, it is the percentage of overall iterations where the LR is increasing, correct? So, given the default of 0. 3, it means that your LR is going up for 30% of your iterations and then decreasing over the last 70%. Is that a correct summation of what is happening? A2: Yes, I think that’s correct. You can verify that by changing its value and check:learn. recorder. plot_lr() For example if pct_start = 0. 2 source: forums. fastai "
+    }, {
+    "id": 14,
     "url": "http://localhost:4000/2020/03/note08-fastai-4/",
     "title": "Gradient backward, Chain Rule, Refactoring",
-    "body": "2020/03/02 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves   ⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
+    "body": "2020/03/02 -  This note is divided into 4 section.      Section1: What is the meaning of ‘deep-learning from foundations?’   Section2: What’s inside Pytorch Operator?   Section3: Implement forward&amp;backward pass from scratch   Section4: Gradient backward, Chain Rule, Refactoring   ” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves    Postfix underscore means in pytorch, in-place function, What is in-place function?⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
     }, {
-    "id": 13,
+    "id": 15,
     "url": "http://localhost:4000/2020/03/note08-fastai-3/",
     "title": "Implement forward&backward pass from scratch",
     "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring1. The forward and backward passes: 1. 1 Normalization: train_mean,train_std = x_train. mean(),x_train. std()&gt;&gt;&gt; train_mean,train_std(tensor(0. 1304), tensor(0. 3073))Remember!  Dataset, which is x_train, mean and standard deviation is not 0&amp;1. But we need them to be which means we should substract means and divide data by std.  You should not standarlize validation set because training set and validation set should be aparted.  after normalize, mean is close to zero, and standard deviation is close to 1. 1. 2 Variable definition:  n,m: size of the training set c: the number of activations we need in our model2. Foundation Version: 2. 1 Basic architecture:  Our model has one hidden layer, output to have 10 activations, used in cross entropy.    But in process of building architecture, we will use mean square error, output to have 1 activations and lator change it to cross entropy   number of hidden unit; 50see below pic  We want to make w1&amp;w2 mean and std be 0&amp;1.      why initializating and make mean zero and std one is important?   paper highlighting importance of normalisation - training 10,000 layer network without regularisation1   2. 1. 1 simplified kaiming initQ: Why we did init, normalize with only validation data? Because we can not handle and get statistics from each value of x_valid?{: style=”color:red; font-size: 130%; text-align: center;”}  what about hidden(first) layer?w1 = torch. randn(m,nh)b1 = torch. zeros(nh)t = lin(x_valid, w1, b1) # hidden&gt;&gt;&gt; t. mean(), t. std()((tensor(2. 3191), tensor(27. 0303))In output(second) layer, w2 = torch. randn(nh,1)b2 = torch. zeros(1)t2 = lin(t, w2, b2) # output&gt;&gt;&gt; t2. mean(), t2. std()(tensor(-58. 2665), tensor(170. 9717))   which is terribly far from normalzed value.     But if we apply simplified kaiming init  w1 = torch. randn(m,nh)/math. sqrt(m); b1 = torch. zeros(nh)w2 = torch. randn(nh,1)/math. sqrt(nh); b2 = torch. zeros(1)t = lin(x_valid, w1, b1)t. mean(),t. std()&gt;&gt;&gt; (tensor(-0. 0516), tensor(0. 9354)) But, actually, we use activations not only linear function After applying activations relu at linear layer, mean and deviation became 0. 5. 2. 1. 2 Glorrot initializationPaper2: Understanding the difficulty of training deep feedforward neural networks  Gaussian(, bell shaped, normal distributions) is not trained very well.  How to initialize neural nets? with the size of layer , the number of filters .  But there is No acount for import of ReLU If we got 1000 layers, vanishing gradients problem emerges2. 1. 3 Kaiming initializatingPaper3: Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification  Kaiming He, explained here rectifier: rectified linear unit rectifier network: neural network with rectifier linear units  This is kaiming init, and why suddenly replace one to two on a top?     to avoid vanishing gradient(weights)   But it doesn’t give very nice mean tough.    2. 1. 4 Pytorch package Why fan_out?     according to pytorch documentation,   choosing 'fan_in' preserves the magnitude of the variance of the wights in the forward pass. choosing 'fan_out' preserves the magnitues in the backward pass(, which means matmul; with transposed matrix) ➡️ in the other words, torch use fan_out cz pytorch transpose in linear transformaton.  What about CNN in Pytorch?I tried torch. nn. Conv2d. conv2d_forward?? Jeremy digged into using torch. nn. modules. conv. _ConvNd. reset_parameters?? 2  in Pytorch, it doesn’t seem to be implemented kaiming init in right formula. so we should use our own operation.  But actually, this has been discussed in Pytorch community before. 3 4 Jeremy said it enhanced variance also, so I sampled 100 times and counted better results.  To make sure the shape seems sensible. check with assert. (remember we will replace 1 to 10 in cross entropy)assert model(x_valid). shape==torch. Size([x_valid. shape[0],1])&gt;&gt;&gt; model(x_valid). shape(10000, 1) We have made Relu, init, linear, it seems we can forward pass code we need for basic architecture nh = 50def lin(x, w, b): return x@w + b;w1 = torch. randn(m,nh)*math. sqrt(2. /m ); b1 = torch. zeros(nh)w2 = torch. randn(nh,1); b2 = torch. zeros(1)def relu(x): return x. clamp_min(0. ) - 0. 5t1 = relu(lin(x_valid, w1, b1))def model(xb):  l1 = lin(xb, w1, b1)  l2 = relu(l1)  l3 = lin(l2, w2, b2)  return l32. 2 Loss function: MSE:  Mean squared error need unit vector, so we remove unit axis.   def mse(output, targ): return (output. squeeze(-1) - targ). pow(2). mean()    In python, in case you remove axis, you use ‘squeeze’, or add axis use ‘unsqueeze’ torch. squeeze where code commonly broken. so, when you use squeeze, clarify dimension axis you want to removetmp = torch. tensor([1,1])tmp. squeeze()&gt;&gt;&gt; tensor([1, 1]) make sure to make as float when you calculateBut why??? because it is tensor?{: style=”color:red; font-size: 130%;”} Here’s the error when I don’t transform the data type ---------------------------------------------------------------------------TypeError                 Traceback (most recent call last)&lt;ipython-input-22-ae6009bef8b4&gt; in &lt;module&gt;()----&gt; 1 y_train = get_data()[1] # call data again   2 mse(preds, y_train)TypeError: 'map' object is not subscriptable This is forward passFootnote: Other materials:  Understanding the difficulty of training deep feedforward neural networks, paper that introduced Xavier initialization      Fixup Initialization: Residual Learning Without Normalization &#8617;        Pytorch implementaion on Kaiming init of conv and linear layers &#8617;        Pytorch kaiming init issue &#8617;        Pytorch kaiming init explained &#8617;    "
     }, {
-    "id": 14,
+    "id": 16,
     "url": "http://localhost:4000/2020/03/note08-fastai-2/",
     "title": "What's inside Pytorch Operator?",
     "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, RefactoringWhat’s inside Pytorch Operator?: Section02 Time comparison with pure Python:    Matmul with broadcasting&gt; 3194. 95 times faster     Einstein summation&gt; 16090. 91 times faster     Pytorch’s operator&gt; 49166. 67 times faster  1. Elementwise op: 1. 1 Frobenius norm:   above converted into (m*m). sum(). sqrt() Plus, don’t suffer from mathmatical symbols. He also copy and paste that equations from wikipedia.  and if you need latex form, download it from archive. 2. Elementwise Matmul:  What is the meaning of elementwise?   We do not calculate each component. But all of the component at once. Because, length of column of A and row of B are fixed.   How much time we saved?  So now that takes 1. 37ms. We have removed one line of code and it is a 178 times faster…#TODOI don’t know where the 5 from. but keep it. Maybe this is related with frobenius norm…?as a result, the code before for k in range(ac):  c[i,j] += a[i,k] + b[k,j]the code after c[i,j] = (a[i,:] * b[:,j]). sum()To compare it (result betweet original and adjusted version) we use not test_eq but other function. The reason for this is that due to rounding errors from math operations, matrices may not be exactly the same. As a result, we want a function that will “is a equal to b within some tolerance” #exportdef near(a,b):   return torch. allclose(a, b, rtol=1e-3, atol=1e-5)def test_near(a,b):   test(a,b,near)test_near(t1, matmul(m1, m2))3. Broadcasting:  Now, we will use the broadcasting and removec[i,j] = (a[i,:] * b[:,j]). sum() How it works?&gt;&gt;&gt; a=tensor([[10,10,10],     [20,20,20],     [30,30,30]])&gt;&gt;&gt; b=tensor([1,2,3,])&gt;&gt;&gt; a,b  (tensor([[10, 10, 10],     [20, 20, 20],     [30, 30, 30]]),tensor([1, 2, 3]))     &gt;&gt;&gt; a+btensor([[11, 12, 13],    [21, 22, 23],    [31, 32, 33]])  &lt;Figure 2&gt; demonstrated how array b is broadcasting(or copied but not occupy memory) to compatible with a. Refered from numpy_tutorial   there is no loop, but it seems there is exactly the loop.     This is not from jeremy (actually after a moment he cover it) but i wondered How to broadcast an array by columns?  c=tensor([[1],[2],[3]])a+ctensor([[11, 11, 11],    [22, 22, 22],    [33, 33, 33]])s  What is tensor. stride()?help(t. stride)Help on built-in function stride:  stride(…) method of torch. Tensor instancestride(dim) -&gt; tuple or intReturns the stride of :attr:’self’ tensor. Stride is the jump necessary to go from one element to the next one in the specified dimension :attr:’dim’. A tuple of all strides is returned when no argument is passed in. Otherwise, an integer value is returned as the stride in the particular dimension :attr:’dim’. Args:  dim (int, optional): the desired dimension in which stride is requiredExample::* x = torch. tensor([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]])`x. stride()&gt;&gt;&gt; (5, 1)x. stride(0)&gt;&gt;&gt; 5x. stride(-1)&gt;&gt;&gt; 1   unsqueeze &amp; None index   We can manipulate rank of tensor Special value ‘None’, which means please squeeze a new axis here== please broadcast herec = torch. tensor([10,20,30])c[None,:] in c, squeeze a new axis in here please. 2. 2 Matmul with broadcasting: for i in range(ar):#  c[i,j] = (a[i,:]).      *[:,j]. sum() #previous  c[i]  = (a[i]. unsqueeze(-1) * b). sum(dim=0) And Using None also (As howard teached)c[i]  = (a[i ]. unsqueeze(-1) * b). sum(dim=0) #howardc[i]  = (a[i][:,None] * b). sum(dim=0) # using Nonec[i]  = (a[i,:,None]*b). sum(dim=0)⭐️Tips🌟 1) Anytime there’s a trailinng(final) colon in numpy or pytorch you can delete it ex) c[i, :] = c [i]2) any number of colon commas at the start, you can switch it with the single elipsis.  ex) c[:,:,:,:,i] = c […,i] 2. 3 Broadcasting Rules:  What if we tensor. size([1,3]) * tensor. size([3,1])?  torch. Size([3, 3])    What is scale????   What if they are one array is times of the other array? ex) Image : 256 x 256 x 3Scale : 128 x 256 x 3Result: ?   Why I did not inserted axis via None, but happened broadcasting? &gt;&gt;&gt; c * c[:,None]tensor([[100. , 200. , 300. ],    [200. , 400. , 600. ],    [300. , 600. , 900. ]])maybe it broadcast cz following array has 3 rows as same principle, no matter what nature shape was, if we do the operation tensor broadcasts to the other. &gt;&gt;&gt; c==c[None]tensor([[True, True, True]])&gt;&gt;&gt; c[None]==c[None,:]tensor([[True, True, True]])&gt;&gt;&gt;c[None,:]==ctensor([[True, True, True]])3. Einstein summation:  Creates batch-wise, remove inner most loop, and replaced it with an elementwise producta. k. ac[i,j] += a[i,k] * b[k,j]inner most loop c[i,j] = (a[i,:] * b[:,j]). sum()elementwise product  Because K is repeated so we do a dot product. And it is torch. Usage of einsum()1) transpose2) diagnalisation tracing3) batch-wise (matmul) …  einstein summation notationdef matmul(a,b): return torch. einsum('ik,kj-&gt;ij', a, b)so after all, we are now 16000 times faster than Python. 4. Pytorch op: 49166. 67 times faster than pure python And we will use this matrix multiplication in Fully Connect forward, with some initialized parameters and ReLU. But before that, we need initialized parameters and ReLU, Footnote:  TensorRank ti noteResources:  Frobenius Norm Review Broadcasting Review (especially Rule)     Refer colab! (I totally confused with extension of arrays)    torch. allclose Review np. einsum Reviewh "
     }, {
-    "id": 15,
+    "id": 17,
     "url": "http://localhost:4000/2020/02/note08-fastai-1/",
     "title": "What is the meaning of 'deep-learning from foundations?'",
     "body": "2020/02/29 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ I don’t know if you read this article, but I heartily appreciate Rachael Thomas and Jeremy Howard for providing these priceless lectures for free Homework:  Review concepts 16 concepts from Course 1 (lessons 1 - 7)(1) Affine Functions &amp; non-linearities; 2) Parameters &amp; activations; 3) Random initialization &amp; transfer learning; 4) SGD, Momentum, Adam; 5) Convolutions; Batch-norm; 6) Dropout; 7) Data augmentation; 8) Weight decay; 9) Res/dense blocks; 10) Image classification and regression; 11)Embeddings; 12) Continuous &amp; Categorical variables; 13) Collaborative filtering; 14) Language models; 15) NLP classification; 16) Segmentation; U-net; GANS) Make sure you understand broadcasting Read section 2. 2 in Delving Deep into Rectifiers Try to replicate as much of the notebooks as you can without peeking; when you get stuck, peek at the lesson notebook, but then close it and try to do it yourself calculus for machine learning     based on weight…    einsum conventionCONTENTS:  What is going on in this course?     What is ‘from foundations’?   Steps to a basic modern CNN model   Today’s implementation goal: 1) matmul -&gt; 4) FC backward    Library development using jupyter notebook     jupyter notebook certainly can make module    Elementwise ops     How can we make python faster?         What is element wise operation?           FootnoteWhat is going on in this course?: What is ‘from foundations’?: 1) Recreate fast. ai and Pytorch 2) using pure python  Evade OverfittingOverfit : validation error getting worsetraining loss &lt; validation loss  Know the name of the symbol you usefind in this page if you don’t know the symbol that you are using or just draw it here (run by ML!) Steps to a basic modern CNN model:  1) Matrix multiplication -&gt; 2) Relu/Initialization -&gt; 3) Fully-connected Forward-&gt; 4) Fully-connected Backward -&gt; 5) Train loop -&gt; 6) Convolution-&gt; 7) Optimization -&gt;8) Batchnormalization -&gt; 9) Resnet Today’s implementation goal: 1) matmul -&gt; 4) FC backward: Library development using jupyter notebook: what is assers? jupyter notebook certainly can make module:  There will be #export tag that Howard (and we) want to extract special notebook2script. py will detect sign of #expert and convert following into python module and test ittest\_eq(TEST,'test')test\_eq(TEST,'test1')  what is run_notebook. py?     when you want to test your module in command line interface   		!python run\_notebook. py 01_matmul. ipynb  Is there any difference between 1) and 2)?1) test -&gt; test01 2) test01 -&gt; test #TODO I don’t know yet  look into run_notebook. py, package fire Jeremy used. What is that?read and run the code in a notebook, and in the process, Jeremy made Python Fire library called!shockingly, fire takes any kind of function and converts into CLI command. fire library was released by Google open source, Thursday, March 2, 2017    Get data   pytorch and numpy are pretty much same.  variable c explains how many pixels there are in in MNIST, 28 pixels PyTorch’s view() method: torch function that manipulating tensor, and squeeze() in torch &amp; mathmatical operation similar function Rao &amp; McMahan said usually this functions result in feature vector.    In part 1, you can use view function several times.     Initial python model     Which is Linear, like $Xw$(weight)$+a$(bias) $= Y$     If you don’t know hou to multiple matrix, refer this site matmul visulization site   How many time spends if we we use pure python   function matmul, typical matrix multiplication function, takes about 1 second for calculating 1 single train data! (maybe assumed stochastic, 5 data points in validation)     it takes about 11. 36 hours to update parameters even single layer and 1 iteration! (if that was my computer, it would be 14 hours. . )🤪   THIS is why we need to consider ‘time’&amp;’space’ This is kinda slow - what if we could speed it up by 50,000 times? Let’s try! Elementwise ops: How can we make python faster?:  If we want to calculate faster, then do remove pythonic calcuation, by passing its computation down to something that is written something other than python, like pytorch.  According to PyTorch doc it uses C++ (via ATen), so we are going to implement that function with python. What is element wise operation?:  items makes a pair, operate corresponding componentFootnote:  notebooks material video broadcasting excel"
     }, {
-    "id": 16,
+    "id": 18,
     "url": "http://localhost:4000/2020/02/what-is-convolution/",
     "title": "Digging into convolution",
     "body": "2020/02/28 - Issues 1) Kaiming Initializtion in Pytorch was in trouble. 1 2) Jeremy started to dig in, in lesson09, but I didn’t know why the size of tensor is 2 and even understand this spreadsheet data. 3 Homework: Read Visualizing and Understanding Convolutional Networks paper  What is a convolution?     Visualization         one kernel     Matthew D Zeiler &amp; Rob Fergus Paper          Convolution can be represented as matmul   Padding   Kernel has rank 3   How can we find a side-edge, a gradient and area of constant weight?   What is a convolution?: A convolutional neural network is that your red, green, and blue pixels go into the simple computation, and something comes out of that, and then the result of that goes into a second layer, and the result of that goes into the third layer and so forth. Visualization: one kernel Refer this site for visualizing CNN filteringMatthew D Zeiler &amp; Rob Fergus PaperLecture01     Nine examples of the actual coefficients from the **first layer**  Convolution can be represented as matmul: CNNs from different viewpoints {align-items: center;}    [A B C D E F G H I J] is 3 by 3 image data flatten to vector.   As a result, convolution is a just matrix just two things happens     Some of entries are set to zeros at all the times   same color always have the same weight. That called weight time / wegith sharing    So, we can implement a convolution with matrix multiplication. But, we don’t do that because it’s slow!Padding:  What most of libraries do is just put zeros asdie of matrix  fast. ai uses reflection paddings (what is this? Jeremy said he uttered it)Kernel has rank 3:  As standard picture input would be 4 5, it would be actually 3d, not 2d.  If we make kernel as a 3x3 size, we pass over same kernel all the different Red, Green, Blue Pixels.      This could make problem, because, if we want to detect frog, which is green, we would want more activations on the green(I made a test cell in my colab 6)   How can we find a side-edge, a gradient and area of constant weight?: Not top-edge!  One kernel can find only the top-edge, so we should stack the kernels 7 So, we pass it through bunch of kernels to the input images, and that process gives us height x width x corresponding number of kernels.  Usually that number of chanel is 16 And if we want to get the more channels and features, we should repeat that process     This process gives rise to memory out of control, we do the stride   #### conv-example. xlsx  2 convolutional filters At a second layer, filter is 3x3x2 tensor, because to add up together the first layer’s channel. Reference:       Problem was math. sqrt(5) was not kaiming initialization formula, Implementation in Pytorch &#8617;        size of tensor, lecture09 &#8617;        conv-example. xlsx &#8617;        Why do computer use red, green and blue instead of primary colors &#8617;        Grayscale is a group of shades without any visible color. … Each of these dots has its own brightness level as well and, therefore, can be converted to grayscale. A grayscale image is one with all color information removed.  &#8617;        Testing RGB and grayscale &#8617;        stack kernel and make new rank of tensor at output, Lesson06-2019 &#8617;    "
     }, {
-    "id": 17,
+    "id": 19,
     "url": "http://localhost:4000/2020/02/dps-week8/",
-    "title": "Digital Product School week 8&9",
-    "body": "2020/02/24 - The 8th week retropect at Digital Product School Week 8/9 - Ship your MVP/Release next iteration each day     This week's schedule  CONTENT:  Preparing engineering weekly Agile Process     Daily Stand-up    Making application flowchart (feat draw. io) / ER diagram     Flowchart, understaning user journey   ER diagram    Engineering weekly AI lunch Connecting firebase andPreparing engineering weekly: This week at Wednesday, I planned to explain the Language Modelings, mainly focusing ELMo, ULMFiT, BERT and GPT-2. Slides is available here Changed the presentation, because there were people who are not in ML domain. hereWhenever I do the presentation, I learn more than the information I give them. At the same time, I realize I need to learn more than I know. Agile Process: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m not sure ‘what is agile’ but this is what we tried to make agile process.  Daily Stand-up: Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Making application flowchart (feat draw. io) / ER diagram:  Before we start the party, we should clarify the flowchart and ER diagram of our application. Flowchart, understaning user journey: Thanks for google, we could use draw. io for our framechart framework. Actually, we cana choice other good flatform, but draw. io has connected app throgh google drive, most of our engineer was used to it. And after this job, I got to know there is also (of course) rule with the symbols, color, size, space, scaling and direction of arrow -reference. But why we should do this? WE have made our storymap before!! I think storymap is for visualize our status and app. So it should be shared with whole the team, and they should able to understand each role’s issue. But flowchart is more like testing technical feasibility, and error that user can experience. So it could be little more specific, complicated, and hypothetical.     This week engineer's main plan  ER diagram: Even if we use NoSQL database through firebase, my team was accustomed to SQL more. That what we educated when we were at college, so we had to organize our concept while we were learning NoSQL. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Before today, my AI collegues presented regression, knn and it was my turn. I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! AI lunch: Connecting firebase and: "
+    "title": "My life in Digital Product School - week 8/19/10",
+    "body": "2020/02/24 - The 8/9/10th week retropect at Digital Product School Week 8 - Ship your MVPWeek 9/10 - Release next iteration each day     Week 8th schedule  CONTENT:  Agile Product Development     Daily Stand-up(planning)   Gemba Walk   Sprint Reviews    Engineering weeklyAgile Product Development: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m still not sure ‘what is agile’ but this is how we tried to make agile process.  Daily Stand-up(planning): Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Gemba Walk:     Team Cero with core team  Every 2 weeks, we do the Gemba work, which is ‘question everything to the core team’ time. At this period, people can ask anything related to our product, workshop, and framework. Core team will help just for each team, and each team can solve the problem related to their work. &lt; br/&gt;Why we need this session? because with workshop and general schedule, core team has no time just focus on each team. So through this session, we can have opportunity to understand each program and workshop, like why we are using this platform, and when is the due of our small project, and we have this problem and we need help for this. whatever small problem you have, core team is always willing to help you. Sprint Reviews: Every Friday, we have time to summarise what we did for the week. Maybe we need HMW question and our storymap to share our process and then tell and share what we did try, what point we succeeded and what point it was deviant of our prediction, and why we tried it. .     Sprint of Ve-link  And then, just after all team’s ppt, we do vote with such a cute marvel. Always it’s very difficult to vote (of course you can’t vote to your team!) Because it depends on criteria what do I value!But since this is process of our agile work, I try to focus on what they have changed since last week, and why they did it, how they did it. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Everyone have their knowledge to share and we can be tutor and at the same time can be of tutee. Previously, my AI collegues presented regression, knn. And because I’m somewhat specialized to NLP, I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! "
     }, {
-    "id": 18,
+    "id": 20,
     "url": "http://localhost:4000/2020/02/fast.ai-nlp-note-16/",
     "title": "Algorithmic bias",
     "body": "2020/02/20 - Algorithms can encode &amp; magnify human bias Case Study 1: Facial Recognition &amp; Predictive Policing:  Joy Buolamwini &amp; Timnit Gebru, gendershades. org     Microsoft, FACE+, IBM - All of these things are sell now.    Largest gap between $\therefore\ Lighter Male\ &gt;\ Darker\ Female $      This US mayor joked cops should “mount . 50-caliber” guns where AI predicts crime      With machine learning, with automation, there’s a 99% success, so that robot is ㅡwill beㅡ99% accurate in telling us what is going to happen next, which is really interesting.     - city official in Lancater, CA, approving on using IBM for public security  Bias:  Bias is type of error Statistical Bias: difference between a statistic’s expected value and the true value Unjust Bias: disproportionate preference for or prejudice against a group Unconscious bias: bias that we don’t realize we have But, term bias is too generic to be productive. Different sources of bias have different causes Representation Bias: Dataset was not representative of the algorithm that might be used on later. Above : Data is okay, but algorithm has some problem. Below : Data has error.  For example, object detection production that performs very well in common product of US. But in contrast, change of target product region, like Zimbabwe, Solomon Island, and so on, reduced the performence remarkably. It is not the algorithmic problem, so we should care about data volume of region. Evaluation Bias: Benchmark datasets spur on research, 4. 4% of IJB-A images are dark-skinned women. 2/3 of ImageNet images from the West (Sharkar et al, 2017) Case Study 2: Recidivism Algorithm Used Prison Sentencing: Case Study 3: Online Ad Delivery: Bias in NLP: ( Nothing to do with the course, but I’m researching this field these days. )    But all about Englsih     ImpactThe person is doctor. The person is nurse -&gt; 그는 의사다. 그녀는 간호사다.  Concept of “biased data” often too generic to be useful:  Different sources of bias have different sources Data, models and systems are not unchanging numbers on a screen. They’re the result of a complex process that starts with years of historical context and involves a series of choices and norms, from data measurement to model evaluation to human interpretation. - Harini Suresh, “The problem with Biased Data” Five Sources of Bias in ML:  Representation Bias Evaluation Bias Measurement Bias Aggregation Bias(46:02) Historical Bias(46:26)     A few studies(47:13)   Racial Bias, Even when we have good intentions(new york times)(47:10)   gender(48:59)   Humans are biased, so why does algorithmic bias matter?: Algorithms &amp; humans are used differently (humans are usually decision maker)  Algorithms are accurate and objective No way to apeal if there if error processed large scale cheap Machine learning can amplify bias Machine learning can create feedback loops. Technology is power. And with that comes responsibility. Solutions:  Analyze a project at work/school:     Questions about AI   5 types of bias (Suresh &amp; Guttag)   Datasheets for datasets, Modelcards for model reporting   Accuracy rate on different sub-groups    Work with domain experts &amp; those impacted Increase diversity in our workspace Advocate for good policy Be on the ongoing lookout for bias"
     }, {
-    "id": 19,
+    "id": 21,
     "url": "http://localhost:4000/2020/02/classifier-city/",
     "title": "Making a classifier with image dataset made from gooogle",
     "body": "2020/02/15 - CONTENTS:  Creating dataset from google images     Using google_images_download   Create ImageDataBunch    Train model     fit_one_cycle()   Let’s find-tune   Let’s train the whole model!   Let’s make batch size bigger!    Interpretation Model in productionCode can be found hereDeployed model here Making a classifier which can distinguish Seoul from Munich and Sanfrancisco!(hoping my well in Munich!) Creating dataset from google images: In machine learning, you always need data before you build your model. You can use either URLs or google_images_download package. Since Jeremy explained specifically, I will try the other. Using google_images_download: note: This is not google official package Refer to Official Doncument, put that arguments. from google_images_download import google_images_downloadresponse = google_images_download. googleimagesdownload()  #class instantiationout_dir = os. path. abspath('. . /. . /materials/dataset/pkg/')os. mkdir(out_dir)arguments = { keywords : Cebu,Munich,Seoul ,        print_urls :True,        suffix_keywords : city ,        output_directory :out_dir,        type : photo ,      }paths = response. download(arguments)  #passing the arguments to the functionprint(paths)and if you need, here is main code. Create ImageDataBunch: We need to separate validation set because we just grabbed these imagese from Google. Most of the dataset we use (kaggle/research) splited into train / validation / test so if they are not devided beforehand we should make databunch, and Jeremy recommended assign 20% to validation. Help on function verify_images in module fastai. vision. data:verify_images(path: Union[pathlib. Path, str], delete: bool = True, max_workers: int = 4, max_size: int = None, recurse: bool = False, dest: Union[pathlib. Path, str] = '. ', n_channels: int = 3, interp=2, ext: str = None, img_format: str = None, resume: bool = None, **kwargs)  Check if the images in `path` aren't broken, maybe resize them and copy it in `dest`. Data from google image url Data from package Train model:           len(class)   len(train)   len(valid)         Data_url   3   432   108       Data_pkg   3   216   53   Uisng model: restnet34 1, Measurement: accuracy 2 fit_one_cycle(): What is fit one cycle? Cyclical Learning Rates for Training Neural Networks One of the way to find good learning rate. Core idea is to start with small learning rate (like 1e-4, 1e-3) and increase the learning rate after each mini-batch till loss starts exploding. And pick up learning rate one order lower than exploding point. For example, plotted learning rate is like below picture, picking up around 1e-2 is the best way.  Why this methods Traditionally, the learning rate is decreased as the learning starts converging with time. But this paper suggests to cycle our learning rate, because it makes us avoid local minimum. Basically this cyclic method enables us to explore whole of loss function so that find out global minimum. In other words, higher learning rate behaves like regularisation. Let’s find-tune: Do train just one last layer by learning rate found by find_lr This section you should find the strongest downward slope that kind of sticking around for quite a while. And choose just one order lower than lowest point. As explained before, I will pick up 1e-2. And of course, this is fine-tuning, we don’t need discriminative learning rate yet.  Let’s train the whole model!: link When you plot the learning rate again, maybe you will get soaring shape of learning rate. Rule of thumb, When you slice the learning rate, use learning rate you used at unfrozen part. Divide it by 5 or 10 and put it on maximum bound. At minimum bound, get the point just before it soared, and divide it by 10.  Let’s make batch size bigger!: Since default batch size is 64, I tried it to 128. And it gets way more better result(even it’s still underfitting!) And if I freeze model and train whole model again, the model would be better. Also, you can use this method to the other big dataset model training! Interpretation: See the confusion matrix. Result is quite great. *Since I’m using colab, I will skip data cleansing. But I highly recommend you to use ImageCleaner widget, only if you are using jupyter notebook (not jupyter lab) Model in production: You can deploy your model in simple way. I referred fast. ai, and used render(it’s free for limited time). You can find detailed document here. and you can create a route like this. @app. route( /classify-url , methods=[ GET ])async def classify_url(request):  bytes = await get_bytes(request. query_params[ url ])  img = open_image(BytesIO(bytes))  _,_,losses = learner. predict(img)  return JSONResponse({     predictions : sorted(      zip(cat_learner. data. classes, map(float, losses)),      key=lambda p: p[1],      reverse=True    )  })You can find my deployed model here Reference: How to create a deep learning dataset using Google Images towardsdatascience - one cycle policy       Deep Residual Learning for Image Recognition &#8617;        Accuracy_and_precision &#8617;    "
     }, {
-    "id": 20,
+    "id": 22,
     "url": "http://localhost:4000/2020/02/dps-week5/",
     "title": "Digital Product School week 5",
     "body": "2020/02/09 - The 5th week retropect at Digital Product School Week 5 - Create a Storymap and sync it with Lean Canvas     This week's schedule  CONTENT:  How to create our story map Prepare your story Discover your product’s AI potentialMondayHow to create our story map:     We need this 'aha' moment  There was a Milestone workshop, about our weekly goal. As we are agile working, we go fast and change every week’s goal. This week we will finalize our story map based on user’s pain-point and HMW questions.     How should we make our story-map  Basically we should make story map based on this rule  Tell stories, don’t just write them!     We always need context, that means all the story component should be connected    Visualize your product to establish a shared understanding and speed up discussions!     Post-it filled of text is not enough, we should fill it with visualizations then team mates can understand it fast    Only discuss in front our your story map! (Speed)     So we can update our story-map as soon as we change our opinion   And also    Use a story map to find the parts that matter most and to identify holes in your idea!     Since the story map consists of techinical part, we should consider each story’s technical feasibility    Minimise output, maximise outcome and impact! Build tests to figure out what’s minimum and what’s viable!     This story map functions to find out our minimum value of ideas    Work iteratively: Change your story map according to your learnings!     We should repeat this process again and again    PMs: Make sure Storymap is up to date!Prepare your story:     team cero, our whole story map    Our goal    Technical feasibility of our storyWhat is your strategy to make user achieve something? This would be our expand point Discover your product’s AI potential: How can we apply AI to our product? Let’s write down our ‘HMW’ questions, and find out all p ossibilities. These are suggestion of possibilities, so don’t attached to feasibility (we will do in at lean start-up)     Software section's expectation    AI section's expectationTuesday    Engineer's task, week5This 5th week, engineers settled WendesdayThursdayFriday"
     }, {
-    "id": 21,
+    "id": 23,
     "url": "http://localhost:4000/2020/02/GPU-time/",
     "title": "4 reasons took much time to setting GPU for fast.ai than I expected",
     "body": "2020/02/05 - Motivation: Before now, me as a undergraduate student, I was parsimony who usually depend on colab, kaggle, friend’s server(occasional) whenever i need GPU. . And this time it’s been for a while to install GPU than I expected and I share the several component that stood in my way. Written at Oct 24 2019, if you think this is deprecated, please do not have a leap of faith. Just for the record, I’ve used Kaggle, Colab, GCP, Azure, EC2 as GPU cloud. 1. Did not know there is JupyterLab option in Google Cloud Platform. : At the first time when GCP came out, there was no AI Platform service. So from starting vm instance to launching jupyter and installing packages, I did all of the things myself. (and I learned 🤗) $	curl -O https://repo. continuum. io/archive/Anaconda3-5. 0. 1-Linux-x86_64. sh[Downloading conda in ssh] I created VM instance,selected zone, machine type and disk type. Then, define firewall rules and in ssh terminal, install jupyter and other packages. But you can do all of these things just using AI Platform.  [AI Platform] I think it especially save your time if you are living in Asia-Pacific, which google doesn’t support not that much GPU resources.  2. Consider if the platform has limited resources in a region you live in. : I live in South Korea, East Asia, and it seems like this region has lots of limitation in GPU (except quite expensive AWS) And the Taiwan which was the only one region where I can launch my own VM with GPU (I tried all the other regions in the list) sometimes do normaly, but not always. 😥After launching, I did several works and next day I could not start VM. (I didn’t count it, but tried it a few hours because I didn’t want cost any more time…) Endlessly failed to start instance, then I choose to move AWS as an alternative way. 3. Fast. ai gives deliberate guide and I didn’t know it. : Fast. ai offer the guide for all available platform. (Colab, salamander, Gradient, Kaggle, Colab, and so on) It is so important, and really needs, because cloud computing options are vary as occasion and purpose arise. I didn’t know fast. ai has manual to running GCP, and I think it’s as good a reason as any for me to be have taken time. It helped me so much when I had aws and shortened my time. I don’t want to read all of the manual in amazno. . (It is recommended. . but I’d rather read GIT PRO now…) ssh -i ~/. ssh/&lt;your_private_key_pair&gt; -L localhost:8888:localhost:8888 ubuntu@&lt;your instance IP&gt;4. You should wait to add more volume just after add volume, by building AWS EC2. : Since Elastic Block Store(EBS) storage supports optimized storage, users can’t extend storage volume two times in a row.  Unfortunately, at the first time, I didn’t know it (again 👻) and when VM lacked volume, I doubled dist capacity (76*2) at a rough but It needs more.  &lt;!– this time I installed GPU in two years, and it became little complicated compared to 2 years ago. And this time for the first time(maybe not the first time. . but i handled it in my class or with my friend. but it’s my first time on my own. ) I very I’m started to using used google colab, kaggleand, GCP-JupyterLab, ec2 - friend made, aws vm machine but I had a environment variable but i did not know of it. On these days, I could not get a resources from taiwan…    I couldn’t notice a deliberate     Anyway, as a result I tried myself gcp myself and aws ec2 with fast. ai But I think doing on my self surely takes much time (in this point I wonder why I’m doing this, and should remind me, especially I was studying disk volume optimization)     disk volume exceed - https://askubuntu. com/questions/919748/no-space-left-on-device-even-though-there-is:  "
     }, {
-    "id": 22,
+    "id": 24,
     "url": "http://localhost:4000/2020/02/dps-week4/",
     "title": "Digital Product School week 4",
     "body": "2020/02/01 - The 4th week retropect at Digital Product School Week 4 - Find solution ideas and run experiments   [This week’s schedule] CONTENT:  Ideation Techniques     What is ideation techniques?    Generating idea in my team AIdeation Team brain storming of idea Die Produkt MacherMondayIdeation Techniques:   [slides from @steffen] What is ideation techniques?: We tried to find out user’s painpoint last week. Tried to users talk about their, pain point. No question directly, but extract from them their pain with transportation. Generating idea in my team:   AIdeation:   TuesdayTeam brain storming of idea:   Based on generated idea on Monday, we extended our idea doing rolling-paper!     Die Produkt Macher: What is lean start-up?  Lean startup is a methodology for developing businesses and products that aims to shorten product development cycles and rapidly discover if a proposed business model is viable; this is achieved by adopting a combination of business-hypothesis-driven experimentation, iterative product releases, and validated learning. - wikipedia WendesdayThursdayFriday"
     }, {
-    "id": 23,
+    "id": 25,
     "url": "http://localhost:4000/2020/01/retrosprect-of-acl-paper-2020/",
     "title": "Retrospect of ACL 2020 paper writing",
     "body": "2020/01/29 - 2020 Annual Conference of the Association for Computational Linguistics Why I can’t use ‘Cebuano’ for the research?: Why I had to change target language from ‘Cebuano’ to ‘Tagalog’?-&gt; No language translator options except google translation. But before knowing that I already consult my friend, whose mother tongue is English. So I had to aplogize her, but couldn’t tell her why suddenly I changed my plan. -&gt; I realized there are many languages even can’t be researched at all. . -&gt; Getting accustomed to discrimination makes misunderstanding, sometimes. At my country, we couldn’t use music streaming service, because of legal problem. But at that moment, I thought it was discrimination, which is done by music company. "
     }, {
-    "id": 24,
+    "id": 26,
     "url": "http://localhost:4000/2020/01/Git-Merge/",
     "title": "Why am I not listed as a contributor?!",
     "body": "2020/01/10 - From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’m not listed as contributor of repo which my PR accessed. How did I come to a repository?: When I’m stuck, I would prefer to code, than to go deep in theory. (It must be so. . too much to understand 🤒)It was BERT released by Google AI I felt keenly the necessity of implementing, because not only couldn’t understand the way they figured out positional encoding formula, but how it actually works. What does it mean to “scale” dot product in Attention? (Now I know it’s far from my section 😂) Figure 1. Scaled Dot Product. Adopted from tensorflow blogWhat was the code error?: For implement code in paper, I read the papers Transformer and BERT, structured the model, and refered the others’ code. Meanwhile, I found out a small error in tokenization process, which was changing a token into [MASK], enabled bidirectional representation. I’ve made PR, and got merged. But I was not in contributors. Why?: Figure 2. Merged Pull request Adopted from graykode projectActually I happened to know there can be couple of reasons github doesn’t include my name as contributor. Well, if contributors tab has more than 100 people, in which case it shows you up only if you are in the top 100 contributors because displaying too many contributors can make webpages down. Somethimes, however, it doesn’t that problem. Why not? Two possibilities are there.    First, According to Joel-Glovier, if repository maintainer merged-as-a-rebase PR will end up showing as maintainer’s commit. But maintainer shouldn’t normally do this.     Second, if you happend to commit using a different git email that what is in your GitHub profile, it will not be attached to your Github user, and “doesn’t show up” as you.  Reference:  Michał Chromiak’s blog Github: why are my contributions are not showing on my profile atlassian-gitfetch"
     }, {
-    "id": 25,
-    "url": "http://localhost:4000/2019/12/lesson1-fastai/",
-    "title": "Fine Grained Classification",
-    "body": "2019/12/31 - Finally you can solve the mystery behind this weird drawing. . through this course. juptyer notebook magic: %reload_ext autoreload%autoreload 2%matplotlib inlinethis is special directives to jupyter notebook, not python code. And it is called ‘magics’ (but i think jeremy is magicion)  If somebody changes underlying library code while I’m running this, please reload it automatically If somebody asks to plot something, then please plot it here in this Jupyter NotebookDon’t hesitate to import start~ Digging into untar_data, path. ls: Union[pathlib. Path, str]: typed programming language? -&gt; maybe i think disclaim the type beforehand for sure.  Q. like assert? path. ls()this is some module that fast. ai made because os. listdir(‘path’) is unconvinient. Python3 pathlib library!: pathlib "
-    }, {
-    "id": 26,
+    "id": 27,
     "url": "http://localhost:4000/2019/12/jeremy-howard/",
     "title": "Jeremy Howard",
     "body": "2019/12/15 - This is journey to find out ‘who am I trying to be?’: How he impacted me?  The person who made me start Computer Vision again. He emphasized the importance of studying NLP and Computer together to understand the deep-learning. He didn’t order it to study, but always he pursuade me with reasonable way.  “It’s not just something I can throw away. NLP and computer vision a few weeks apart and that’s going to force your brain to realize like ‘oh I have to remember this’”  He made me admit my failure in deep-learning. I started to objectify where am I. What should I do when I’m frustrated.  “Keep going. You’re not expected to remember everything. Yet.  You’re not expected to understand everything. Yet. You’re not expected to know why everything works.  Yet. ”  His articles are numerous, below.      What is torch. nn Really?   High Performance Numeric Programming with Swift: Explorations and Reflections   C++11, random distributions, and Swift    And especially, I like this book. Designing great data products Great predictive modeling is an important part of the solution, but it no longer stands on its own; as products become more sophisticated, it disappears into the plumbing. Designing great data products And he is also famous for words. Here are some.  we’re going to try and use that to really understand what’s going on. So to warn you, none of it is rocket science but a lot of its going to look really new. So don’t expect to get it the first time but expect to listen and jump into the notebook try a few things test things out look particularly at like tensor shapes and inputs and outputs to check your understanding then go back and listen again. But and kind of try it, a few times, because you will get there right, it’s just that there’s going to be a lot of new concepts because we haven’t done that much stuff in pure Pytorch. Lesson 6: Deep Learning 2019 "
     }, {
-    "id": 27,
+    "id": 28,
     "url": "http://localhost:4000/2019/11/julia-evans/",
     "title": "Julia Evans",
     "body": "2019/11/20 - This is journey to find out ‘who am I trying to be?’: The women who surprised me in many ways. First, she approached me to teaching some concepts drawing cartoons. It was at Hackers news, which was hightest ranks. Personally I have the use of not to reading title, so and cartoon was so cute and clear. I naturally gonna understood mechanism and astonished by her explaination ability. Her value, which she was taught by many people so want to do same things, moved me. Volume of her knowledge, that just reading post title is a deal of work, amazed me. "
     }, {
-    "id": 28,
+    "id": 29,
     "url": "http://localhost:4000/2019/11/coc-retropective/",
     "title": "Retrospective on Pycon 2019 Korea (CoC Committee)",
     "body": "2019/11/05 - When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized our responsibility, and if I passed each other’s burden, It could be my burden next time. In solidarity of the obligation, we finished conference well. And after participating PyCon Korea 2018 as volunteer, I’ve joined PyCon Korea Organizer last year. &lt;Figure 1&gt; First meeting of PyCon 2019 Korea Organizers It’s been a while since PyCon 2019 finished. It’s held on Aug 15 - 18, at Coex Grand Balloom &lt;Figure 2&gt; Ongoing session, speaking on news comment processing &lt;Figure 3&gt; Sponsor Booth iin Coex Hall &lt;Figure 4&gt; After PyCon 2019, with all of volunteer, organizer, speakers 😍 🥰  Serving as part of the coc TF, I spent large fraction of last year doing CoC job. here’s the path what we’ve been grappled with to grasp a solution. First half: Before the conference Toward Diverse Community:  Formally we’ve been reusing and modifying PyCon US CoC, but we needed fit in Korean and I was part of that to revise code of conduct. Except ‘That’ Diversity, Because it is ‘Harassment’:  Specific point was harassment, and the others were not. process of finding the points. How can we settle this point?Second half: During the conference Handling the potential Harassment: Disjunction of policy and real-time situation: This ‘PyCon 2019 Korea retrospective series’ would be devided into 3 Episodes.  “Retrospective on Pycon 2019 Korea (CoC Committee)” “Retrospective on Pycon 2019 Korea (Program Chair)” (20 Nov, To Be Update) “Maintaining participation while still making timely decisions” (29 Nov, To Be Update)"
     }, {
-    "id": 29,
+    "id": 30,
     "url": "http://localhost:4000/2019/11/elif-shafak/",
     "title": "Elif Shafak",
     "body": "2019/11/05 - This is journey to find out ‘who am I trying to be?’: For creative-minded people, Istanbul is a treasure. ’ Photo © Chris Boland, licensed under CC BY-NC-ND 2. 0    it suddenly felt like what I was trying to convey was more complicated and detailed than what the circumstances allowed me to say.     And I did what I usually do in similar situations: I stammered, I shut down, and I stopped talking. I stopped talking because the truth was complicated, even though I knew, deep within, that one should never, ever remain silent for fear of complexity.         &lt;Figure 1&gt; Elif Shafak   Photo credit: www. elifsafak. com. tr      I want to talk about emotions and the need to boost our emotional intelligence. I think it’s a pity that mainstream political theory pays very little attention to emotions.     Oftentimes, analysts and experts are so busy with data and metrics that they seem to forget those things in life that are difficult to measure and perhaps impossible to cluster under statistical models. But I think this is a mistake, for two main reasons. We are emotional beings.     I think it’s going to be one of our biggest intellectual challenges, because our political systems are replete with emotions. In country after country, we have seen illiberal politicians exploiting these emotions. And yet within the academia and among the intelligentsia, we are yet to take emotions seriously. I think we should.  1 2 Reference:       British Council Worldwide &#8617;        Ted Talk &#8617;    "
     }, {
-    "id": 30,
+    "id": 31,
     "url": "http://localhost:4000/2019/01/dps-week1/",
     "title": "Digital Product School week 1",
     "body": "2019/01/11 - The 1th week retropect at Digital Product School [This week’s schedule] CONTENT:  Welcome to Digital Product School!     Trip to Spitzingsee   Welcome to Design Office   Specifying our goal of product   Welcome to Digital Product School!: Trip to Spitzingsee:    At the first day of Digital Product School, we had a off-site with all of batch 9 people. All the costs were managed by dps. At the beautiful mountain, we settled the team, and got my team goal.     Basically, there are two kind of team in DPS. (1) Wild team - the team has fixed topic(2) Company team - the team which has specific stakeholders, and also topic defined by that stakeholders  The Core-team will fix what team you will join in DPS for 3 months based on ymy professionals, they announce it at off-site. [My team for 3 months at DPS]    And we decide on my batch #9 theme song. How? Each team draw for songs and pitch ‘why this song should be batch #9 theme song’The result? Imagine dragon - Believer (I didn’t know at the moment, this song would be stamped in my memory)     We have a workshop for getting to know each other. For example, we share 1) what do I expect from 3 months of dps, 2) when I feel happy in my life time, 3) what I worked for last week, 4) what was my last project and 5) what plays important role in my life      My team's board  Cero  Welcome to Design Office:  At first day of design office, we had workshop, which celebrates my day in dps also discuss specific rule, menifesto and stakeholders    We get sticker and attach it in map depends on my nationality  Now time to get to know my team’s stakeholders. What they want for us? What they expect from us? How free my team are on the topic?To be honest, it is endless tug-of-war. We should discuss with my stakeholders, endlessly, and find out solution which can meet interest of users, stakeholders and my team. Basically, my team’s main stakeholder is ADAC, but BMW, City of munich and Nokia will also participate as my team’s stakeholders. Specifying our goal of product: "
diff --git a/_site/2019/11/elif-shafak/index.html b/_site/2019/11/elif-shafak/index.html
index e5b409aa6e..c8bfbbcfb5 100644
--- a/_site/2019/11/elif-shafak/index.html
+++ b/_site/2019/11/elif-shafak/index.html
@@ -19,9 +19,9 @@
 <meta property="og:site_name" content="SpellOnYou" />
 <meta property="og:image" content="http://localhost:4000/assets/images/29.jpg" />
 <meta property="og:type" content="article" />
-<meta property="article:published_time" content="2019-11-05T00:00:00+01:00" />
+<meta property="article:published_time" content="2019-11-05T00:00:00+09:00" />
 <script type="application/ld+json">
-{"description":"This is journey to find out ‘who am I trying to be?’","author":{"@type":"Person","name":"dionne"},"@type":"BlogPosting","url":"http://localhost:4000/2019/11/elif-shafak/","publisher":{"@type":"Organization","logo":{"@type":"ImageObject","url":"http://localhost:4000/assets/images/logo.png"},"name":"dionne"},"image":"http://localhost:4000/assets/images/29.jpg","headline":"Elif Shafak","dateModified":"2019-11-05T00:00:00+01:00","datePublished":"2019-11-05T00:00:00+01:00","mainEntityOfPage":{"@type":"WebPage","@id":"http://localhost:4000/2019/11/elif-shafak/"},"@context":"http://schema.org"}</script>
+{"description":"This is journey to find out ‘who am I trying to be?’","author":{"@type":"Person","name":"dionne"},"@type":"BlogPosting","url":"http://localhost:4000/2019/11/elif-shafak/","publisher":{"@type":"Organization","logo":{"@type":"ImageObject","url":"http://localhost:4000/assets/images/logo.png"},"name":"dionne"},"image":"http://localhost:4000/assets/images/29.jpg","headline":"Elif Shafak","dateModified":"2019-11-05T00:00:00+09:00","datePublished":"2019-11-05T00:00:00+09:00","mainEntityOfPage":{"@type":"WebPage","@id":"http://localhost:4000/2019/11/elif-shafak/"},"@context":"http://schema.org"}</script>
 <!-- End Jekyll SEO tag -->
 
 
@@ -161,96 +161,101 @@
     "body": "  {% if page. url ==  /  %}            {% assign latest_post = site. posts[0] %}          &lt;div class= topfirstimage  style= background-image: url({% if latest_post. image contains  ://  %}{{ latest_post. image }}{% else %} {{site. baseurl}}/{{ latest_post. image}}{% endif %}); height: 200px;  background-size: cover;  background-repeat: no-repeat; &gt;&lt;/div&gt;           {{ latest_post. title }}  :       {{ latest_post. excerpt | strip_html | strip_newlines | truncate: 136 }}               In         {% for category in latest_post. categories %}        {{ category }},         {% endfor %}                                {{ latest_post. date | date: '%b %d, %Y' }}                            {%- assign second_post = site. posts[1] -%}                        {% if second_post. image %}                         &lt;img class= w-100  src= {% if second_post. image contains  ://  %}{{ second_post. image }}{% else %}{{ second_post. image | absolute_url }}{% endif %}  alt= {{ second_post. title }} &gt;                        {% endif %}                                    {{ second_post. title }}          :                       In             {% for category in second_post. categories %}            {{ category }},             {% endfor %}                                                      {{ second_post. date | date: '%b %d, %Y' }}                                    {%- assign third_post = site. posts[2] -%}                        {% if third_post. image %}                         &lt;img class= w-100  src= {% if third_post. image contains  ://  %}{{ third_post. image }}{% else %}{{site. baseurl}}/{{ third_post. image }}{% endif %}  alt= {{ third_post. title }} &gt;                        {% endif %}                                    {{ third_post. title }}          :                       In             {% for category in third_post. categories %}            {{ category }},             {% endfor %}                                                      {{ third_post. date | date: '%b %d, %Y' }}                                    {%- assign fourth_post = site. posts[3] -%}                        {% if fourth_post. image %}                        &lt;img class= w-100  src= {% if fourth_post. image contains  ://  %}{{ fourth_post. image }}{% else %}{{site. baseurl}}/{{ fourth_post. image }}{% endif %}  alt= {{ fourth_post. title }} &gt;                        {% endif %}                                    {{ fourth_post. title }}          :                       In             {% for category in fourth_post. categories %}            {{ category }},             {% endfor %}                                                      {{ fourth_post. date | date: '%b %d, %Y' }}                                  {% for post in site. posts %} {% if post. tags contains  sticky  %}                    {{post. title}}                  {{ post. excerpt | strip_html | strip_newlines | truncate: 136 }}                 Read More            	             {% endif %}{% endfor %}  {% endif %}                All Stories:         {% for post in paginator. posts %}          {% include main-loop-card. html %}        {% endfor %}                   {% if paginator. total_pages &gt; 1 %}              {% if paginator. previous_page %}        &laquo; Prev       {% else %}        &laquo;       {% endif %}       {% for page in (1. . paginator. total_pages) %}        {% if page == paginator. page %}        {{ page }}        {% elsif page == 1 %}        {{ page }}        {% else %}        {{ page }}        {% endif %}       {% endfor %}       {% if paginator. next_page %}        Next &raquo;       {% else %}        &raquo;       {% endif %}            {% endif %}                     {% include sidebar-featured. html %}      "
     }, {
     "id": 12,
+    "url": "http://localhost:4000/2020/04/v3-2019-lesson06-note/",
+    "title": "fastai 2019 course-v3 Part1, lesson06",
+    "body": "2020/04/15 - Lesson 06Rossmann(Tabular):  Tabular data: be careful on Categorical variable vs Continuous variable.  if datatype is int, fastai think it is classification, not a regression.  Root mean square percentage error. as loss function.  When you assign the y_range, it’s better to assign little bit more than actual maximum. &gt; because it’s sigmoid.  intermediate layers, which is weight matrix is 1) 1000, and 2) 500 -&gt; which means our parameter would be 500*1000. learn. modelWhat is dropout and embedding dropout?: Nitish Srivastava, Dropout: A Simple way to prevent Neural Networks from Overfitting  you can dropout with p value, make it specified to specific layer, or make it applied to all the layers.  Pytorch code 1) bernoulli, which decides whether you will hold it? 2) and divide the noise value depends on noise value. so noise became 2 or remain 0.      According to pytorch code, We do change at training time, but we do nothing at test time. and this means you don’t have to do anything special with inference time. ’   TODO: find at forums what is inference time - Related to NVIDIA, GPU.     Embedding dropout is just a dropout.      It’s different between continuous variable and embedding layer.  TODO Still can’t understand. why embedding dropout is effective. or,… in need.    Let’s delete at random, some of the results of the embedding.    and It worked well especially at Kaggle   Batch Normalization: Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift -&gt; came out false! According to How Does Batch Normalization Help Optimization?  The key was multiplicative bias {\gamma} and additive bias {\beta}` Explain     Let $$ \hat{y} = f(w_1, w_2, w_3, … , x)} $$ , loss = MSE , Then y_range should be between 1 and 5`   And Activation function ends with -1 -&gt; +1   To mitigate this problem, we can add the other parameter, like $$w_n$$   But there’re so much interactions in the process so just re-scale the output.    Momentum parameter at BatchNorm1d:  Different from momentum like in optimization.  This momentum is Exponentially weighted moving average of the mean, instead of deviation.      If this is small number: mean standard deviation would be less from mini_batch to mini_batch » less regularization effect. (If this is large number, variation would be greater from mini_batch to mini_batch » more regularization effect)   TODO: can’t sure, but i understand, this is not about how to update parameter but about how much reflect previous value when scale and shift   Q. Preference between batchnorm and the other regularizations(drop out, weight decay)A. Nope, always try and see the results## lesson6-pets-more### Data Augmentation- Last reg- `get_transforms` has lots of params (even not yet learned all) -&gt; check documentation	- Remember you can implement all the doc contents bc it's made from nbdev	- TODO: try this!!- Essence of data augmentation is you should maintain the label, while somewhat making sense. 	- ex) tilt, because it's optically sensible, you can always change the angle of the data view. - zeros, border, and reflection but always `reflection` works most of the time, so that is the default### Convolutional Kernel(What is convolution?)- Will make heat\_map from scratch, which means the parts convolution focuses on![setosa_visualization]()- http://setosa. io/ev/image-kernels/	- javascript thing	- How convolution works	- Kernel. which does element-wise multiplication, and sum them up	- so it has on pixel less at borders -&gt; so it uses padding, and fastai uses reflection as said. - why this Kernel(matrix) helps catching horizontal edge side?	- because this kernel`(picture2)` weights differently, depends on `x axis`	- why familiar, because it's similar intuition with fugus`(paper)` paper- CNN from different viewpoints`link`	- output of pixel is results from different linear equations. 	- If you connect this with represents of neural network nodes, you can see that the specific inp nodes connected with specific out nodes. 	- **Summarize**: cnn does 1) matmul some of the elements are always zero 2) same weight for every row, which is called `weight time? weight. . ?, 1:18:50` `(picture)`#### Further lowdown- Because generally image has 3 channels, we need rank 3 kernel. - And **do multiply with all channel output is one pixel**. (`draw by your self`)	- but this kernel will catch one feature, like horizontal, so that we make more kernel so that output becomes (h * w * kernel)	- And that `kernel` come to `channel`- **Conv2d**: with 3 by 3 kernel, stride 2 conv -&gt; (h/2 * w/2 * kernel)	- skip or jump over input pixel	- to protect from memory out of control~~~pythonlearn. modellearn. summary()~~~TODO: understand yourself the blocks of conv-kernel: - Usually use big kernel size at first layer (will study this at part2)- Bottom right highlighting kernel(`pic / draw`)- `torch. tensor. expand`: for memory efficient, because we should do RGB- We do not make separate kernel, but make rank 4 kernel	- 4d tensor is just stacked kernel- `t[None]. shape` create new unit axis, and why? we make this -&gt; it should move unit of batch, not one size image. ### Average pooling, feature- suppose our pre-trained model results in size of `11 by 11 by 512 ` `pic 4` and my classification task has 37 classes	* take the first face of channel, which is 11 by 11 and `mean` it, so that make rank 2 tensor, 512 by 1	* and make 2d matrix, which is 512 by 37 and multiply so that we can get 37 by 1 matrix. - Feature, at convolution block	- So, when we transfer-learning without unfreeze, every element of last matrix (512 by 1) should represent(or could catch) each feature. 	### Heatmap, Hook~~~hook_output(model[0]) -&gt; acts -&gt; avg_acts~~~- if we average the block with `axis=feature`, result of matrix(11 by 11) depicts `how activated was that area?` -&gt; it is heatmap, `avg_acts`- and acts comes from hook, which is more advanced pytorch feature. 	- hook into pytorch machine itself, and run any arbitrary Pytorch code	- Why this is cool?: Normally it gives set of outputs of forward pass, but we can interrupt and hook the forward pass. 	- Also can store the output of the convolutional part of the model, which is before avg_pooling- Thinking back when we do cut off `after` the conv part. 	- but with fast. ai the original convolutional part of the model would be *the first thing in the model*, specifically could be given from `learn. model. eval()[0]`	- And this is gotten from `hooked_output` and having hooked the output, we can pass our x_minibatch to output. 	- Not directly, but with normalized, minibatch, put on to the gpu	- `one_item()` function do it, when we have one data `TODO: this is assignment` do it yourself without one_item function	- and `. cuda()` put it on gpu- you should print out very often the shape of tensor, and try think why. "
+    }, {
+    "id": 13,
+    "url": "http://localhost:4000/2020/04/qna-image-segmentation/",
+    "title": "[Q&A] Image Segmentation, using Unet with Driving Video data",
+    "body": "2020/04/02 - This post is about my questions while I was studying USF Deep Learning course about image segmentation task. All the answers are from the course, source code, library document, or document. I cared about being clear at reporting information including source of information, however if there are still anything unclear, please contact me. And thank you Jeremy&amp;Rachael for everything. Also Thank you Cambridge Computer Vision Lab to made us to study with your labor. The Cambridge-driving Labeled Video Database (CamVid) is the first collection of videos with object class semantic labels, complete with metadata. The database provides ground truth labels that associate each pixel with one of 32 semantic classes. If someone is interested in this project, please check the site and see the details. Now, let’s start first using jupyter’s one of tricks which I love most. It enables cell to print the code without print function. from IPython. core. interactiveshell import InteractiveShell# pretty print all cell's output and not just the last oneInteractiveShell. ast_node_interactivity =  all from fastai. vision import *from fastai. callbacks. hooks import *from fastai. utils. mem import *path = untar_data(URLs. CAMVID) # The locations where the data and models are downloaded are set in config. ymlpath. ls() I’m trying to accustomed to using pathlib module, not just it became built-in module in python, but I felt uncomfortable myself with os module. However, still unpredictable conflicts are remain, even in the quite standard library like Pytorch, tensorflow, onnx. (it require me string for path. not PosixPath. will send PR. . ) [PosixPath('/root/. fastai/data/camvid/valid. txt'), PosixPath('/root/. fastai/data/camvid/images'), PosixPath('/root/. fastai/data/camvid/labels'), PosixPath('/root/. fastai/data/camvid/codes. txt')]path_img = path/'images'path_lbl = path/'labels'fnames = get_image_files(path_img) #filenamelbl_names = get_image_files(path_lbl)1. (Play with data) My Hypothesis:  File name has A_B format. and A / B would be at key-value position.  Use collections - defaultdict  Default Dict: Link:   easy to group a sequence of key and value pairs into a dictionary of list?from collections import defaultdictfnames[0], lbl_names[0](PosixPath('/root/. fastai/data/camvid/images/0001TP_009210. png'), PosixPath('/root/. fastai/data/camvid/labels/0016E5_01800_P. png'))files = [tuple(i. stem. split('_')) for i in fnames]labels = [tuple(i. stem. split('_')[:-1]) for i in lbl_names]d = defaultdict(list)for k, v in files: d[k]. append(v)d. keys()len(d['0001TP'])124for k, v in d. items():  print(k, v)0001TP ['009210', '008850', '007350', '008970', '009840', '010140', '008490', '008520', '009540', '008250', '008340', '006840', '007860', '007410', '007740', '009870', '010080', '007890', '008790', '010020', '008400', '007080', '008280', '010380', '009330', '009060', '007470', '006810', '009720', '008580', '007110', '008730', '009150', '007680', '009780', '007800', '007290', '008760', '009510', '008640', '008310', '007440', '006900', '007500', '008460', '009030', '008130', '009480', '009900', '010230', '009270', '008040', '007590', '007950', '009990', '008550', '007260', '008100', '007530', '006960', '008190', '009420', '009930', '009000', '007830', '008940', '006690', '009570', '008880', '010170', '007560', '009300', '006750', '009360', '010200', '007320', '008010', '009120', '007620', '007200', '007140', '010320', '006720', '008670', '007230', '008370', '010260', '009690', '006930', '009090', '007770', '010290', '010350', '008610', '008070', '009600', '008430', '009450', '007380', '009240', '007710', '007170', '008160', '008910', '007020', '006780', '007050', '009960', '009810', '008220', '009180', '009750', '010050', '009660', '010110', '007920', '009630', '007650', '006990', '008700', '009390', '007980', '008820', '006870']0016E5 ['01290', '08159', '05760', '08133', '08063', '06660', '00960', '05850', '00750', '06960', '08035', '08107', '07975', '08017', '05610', '07140', '08119', '08027', '07170', '08400', '08093', '02100', '06390', '04470', '08340', '06060', '00600', '07470', '08151', '07800', '01620', '05730', '01530', '00690', '08430', '05940', '01980', '07320', '08069', '07965', '04380', '05430', '01410', '06780', '08007', '08087', '08079', '06600', '08109', '05490', '00901', '04590', '04680', '08045', '01770', '06690', '08085', '06810', '00420', '08011', '07440', '02190', '06300', '04800', '01500', '00450', '08029', '01470', '06330', '07997', '08067', '05370', '08013', '08190', '00840', '02370', '08049', '08135', '01440', '06870', '05820', '05280', '08051', '04440', '08091', '01380', '00630', '07290', '05520', '04770', '00540', '07995', '07999', '05550', '07920', '08101', '08141', '08053', '04620', '08103', '05160', '07350', '08057', '06030', '06000', '08550', '07963', '08089', '05970', '08047', '05640', '06240', '05220', '04350', '01590', '07959', '01950', '08117', '06180', '01560', '05400', '08043', '07680', '00780', '08081', '07050', '01020', '01350', '04530', '06720', '07969', '08149', '08003', '08131', '08129', '08033', '05460', '01650', '07530', '08023', '05340', '08640', '05100', '08075', '01230', '04980', '02070', '01080', '06210', '05910', '08009', '01800', '05190', '02400', '08083', '08019', '07620', '07200', '07890', '08059', '06990', '04410', '08121', '08123', '06930', '08137', '08147', '08095', '06570', '06150', '08153', '06840', '05250', '00510', '08370', '08580', '08113', '07410', '08097', '01200', '04950', '07770', '07650', '04710', '06090', '08055', '07110', '07981', '00990', '08250', '08127', '01920', '07985', '08220', '08005', '08157', '05130', '08071', '01140', '04830', '07740', '08143', '06120', '02040', '08111', '08115', '00660', '08280', '06420', '07983', '02220', '05700', '01860', '01260', '04920', '06510', '07020', '08073', '08105', '08125', '06360', '07860', '07993', '00810', '06540', '08099', '08139', '02010', '07973', '08155', '07991', '06630', '00480', '06750', '04890', '08001', '08025', '00870', '08490', '01830', '07977', '05010', '01170', '07961', '01680', '01050', '07987', '07080', '04560', '00930', '05310', '02340', '05790', '08460', '00720', '08031', '02280', '08039', '08037', '08065', '06270', '08077', '06900', '04650', '06480', '07230', '08041', '06450', '00570', '07989', '04740', '07979', '02250', '07380', '00390', '01710', '07590', '08021', '08520', '07500', '01110', '04500', '02310', '07971', '02130', '05580', '05880', '08610', '08310', '08145', '05670', '04860', '07260', '08015', '07967', '01740', '01320', '07560', '07830', '01890', '08061', '02160', '07710', '05070', '05040']Seq05VD ['f00030', 'f02550', 'f03450', 'f01110', 'f00480', 'f00210', 'f04590', 'f04170', 'f01800', 'f03990', 'f03360', 'f03900', 'f02070', 'f00810', 'f03690', 'f01350', 'f01530', 'f04980', 'f05100', 'f03060', 'f00900', 'f03870', 'f02460', 'f01470', 'f02370', 'f02820', 'f04080', 'f02760', 'f04860', 'f02250', 'f04200', 'f00270', 'f03720', 'f02850', 'f04410', 'f01200', 'f03090', 'f02010', 'f03930', 'f00090', 'f01650', 'f01890', 'f03840', 'f03030', 'f02130', 'f01230', 'f04110', 'f02520', 'f04140', 'f04020', 'f00060', 'f03420', 'f01560', 'f00120', 'f04290', 'f02340', 'f00300', 'f01380', 'f00870', 'f01860', 'f02970', 'f04560', 'f02730', 'f00330', 'f04530', 'f03780', 'f01770', 'f03390', 'f05040', 'f02430', 'f03330', 'f00660', 'f01740', 'f02100', 'f04800', 'f04050', 'f00510', 'f02790', 'f04350', 'f00690', 'f00540', 'f02490', 'f00960', 'f00930', 'f04230', 'f02880', 'f03600', 'f01020', 'f01500', 'f02400', 'f04830', 'f04470', 'f03300', 'f02670', 'f00450', 'f01980', 'f01170', 'f01620', 'f04500', 'f01080', 'f03180', 'f05070', 'f03150', 'f04950', 'f01440', 'f03510', 'f01710', 'f00360', 'f04770', 'f02910', 'f01050', 'f00630', 'f04320', 'f00570', 'f03240', 'f02190', 'f01140', 'f03540', 'f02220', 'f02640', 'f03960', 'f00000', 'f04920', 'f01950', 'f00990', 'f03480', 'f03000', 'f00420', 'f04620', 'f03210', 'f00780', 'f03570', 'f01590', 'f00750', 'f01920', 'f04650', 'f03750', 'f03630', 'f02310', 'f02610', 'f02580', 'f04740', 'f02280', 'f04680', 'f00390', 'f00720', 'f03660', 'f02040', 'f03270', 'f00180', 'f03810', 'f01410', 'f01290', 'f03120', 'f00840', 'f04440', 'f00150', 'f01260', 'f02700', 'f02940', 'f00600', 'f01830', 'f04260', 'f05010', 'f04890', 'f02160', 'f00240', 'f04380', 'f01680', 'f04710', 'f01320']0006R0 ['f02820', 'f03690', 'f03180', 'f02550', 'f01020', 'f03660', 'f02340', 'f01170', 'f02610', 'f02940', 'f01290', 'f02100', 'f01350', 'f03270', 'f03870', 'f01380', 'f01980', 'f03810', 'f02430', 'f02310', 'f01830', 'f03480', 'f02970', 'f01890', 'f03210', 'f03930', 'f02040', 'f02070', 'f02400', 'f01560', 'f03030', 'f01770', 'f01590', 'f01950', 'f03420', 'f01650', 'f03450', 'f00990', 'f03630', 'f01500', 'f03570', 'f00930', 'f03090', 'f03360', 'f02880', 'f02460', 'f01440', 'f01920', 'f01230', 'f03840', 'f02730', 'f01620', 'f02220', 'f03750', 'f03330', 'f03540', 'f02520', 'f02790', 'f01050', 'f03120', 'f01800', 'f01140', 'f01860', 'f01530', 'f01470', 'f02670', 'f02490', 'f01260', 'f01110', 'f02760', 'f01680', 'f03150', 'f02580', 'f03300', 'f02280', 'f01200', 'f03390', 'f03510', 'f02640', 'f02190', 'f02370', 'f01320', 'f02130', 'f03600', 'f03240', 'f03780', 'f03720', 'f02700', 'f01410', 'f01080', 'f02850', 'f01710', 'f03900', 'f03060', 'f01740', 'f02010', 'f02250', 'f00960', 'f03000', 'f02160', 'f02910']for k, v in d. items():  print(k, len(d[k]))0001TP 1240016E5 305Seq05VD 1710006R0 101for i in d2. keys():  print(i,len(d2[i]))0016E5 3050001TP 1240006R0 101Seq05VD 171files[0], labels[0](('0001TP', '009210'), ('0016E5', '01800'))2. My question: Link:  Why do we need masking? and does color from fastai library? (have to look into source code) What do the parameter alpha do? When people make masked img, would it be have ranged integer limit? Does image normalization related with this?lbl_sorted = sorted(lbl_names)f_sorted = sorted(fnames)lbl_1 = lbl_sorted[33]f_1 = f_sorted[33]img = open_image(lbl_1)mask = open_mask(lbl_1)_,axs = plt. subplots(1,2, figsize=(10,5))# img. show(ax=axs[0], y=mask, title='masked')img. show(ax=axs[0], title='1')mask. show(ax=axs[1], title='2', alpha=1. ) img_2 = open_image(f_1)mask_2 = open_mask(f_1)_,axs = plt. subplots(1,2, figsize=(10,5))# img. show(ax=axs[0], y=mask, title='masked')img_2. show(ax=axs[0], title='3',)mask_2. show(ax=axs[1], title='4', alpha=1. ) open_mask(lbl_1). data. shapetorch. Size([1, 720, 960])open_mask(lbl_1). data. shapetorch. Size([1, 720, 960])open_image(f_1). data. shapetorch. Size([3, 720, 960])open_image(f_1). data. shapetorch. Size([3, 720, 960])img. data #labeled datatensor([[[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]],    [[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]],    [[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]]])mask. data # after mask, labeled datatensor([[[ 4, 4, 4, . . . , 21, 21, 21],     [ 4, 4, 4, . . . , 21, 21, 21],     [ 4, 4, 4, . . . , 21, 21, 21],     . . . ,     [17, 17, 17, . . . , 30, 30, 30],     [17, 17, 17, . . . , 30, 30, 30],     [17, 17, 17, . . . , 30, 30, 30]]])img_2. data, mask_2. data(tensor([[[0. 0706, 0. 0667, 0. 0706, . . . , 0. 6431, 0. 6549, 0. 6627],     [0. 0745, 0. 0706, 0. 0706, . . . , 0. 6431, 0. 6510, 0. 6549],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 6392, 0. 6588, 0. 6588],     . . . ,     [0. 0863, 0. 0824, 0. 0824, . . . , 0. 1333, 0. 1216, 0. 1255],     [0. 0902, 0. 0863, 0. 0824, . . . , 0. 1255, 0. 1176, 0. 1216],     [0. 0863, 0. 0824, 0. 0784, . . . , 0. 1137, 0. 1059, 0. 1137]],     [[0. 0706, 0. 0667, 0. 0706, . . . , 0. 7490, 0. 7608, 0. 7686],     [0. 0745, 0. 0706, 0. 0706, . . . , 0. 7451, 0. 7569, 0. 7608],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 7412, 0. 7529, 0. 7529],     . . . ,     [0. 0980, 0. 0941, 0. 0941, . . . , 0. 1804, 0. 1686, 0. 1725],     [0. 1059, 0. 1020, 0. 0980, . . . , 0. 1725, 0. 1647, 0. 1686],     [0. 1020, 0. 0980, 0. 0941, . . . , 0. 1608, 0. 1529, 0. 1608]],     [[0. 0784, 0. 0745, 0. 0784, . . . , 0. 7569, 0. 7686, 0. 7765],     [0. 0824, 0. 0784, 0. 0784, . . . , 0. 7647, 0. 7647, 0. 7686],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 7608, 0. 7647, 0. 7647],     . . . ,     [0. 1216, 0. 1176, 0. 1176, . . . , 0. 2000, 0. 1882, 0. 1922],     [0. 1176, 0. 1137, 0. 1098, . . . , 0. 1843, 0. 1765, 0. 1804],     [0. 1137, 0. 1098, 0. 1059, . . . , 0. 1725, 0. 1647, 0. 1725]]]), tensor([[[ 18, 17, 18, . . . , 183, 186, 188],     [ 19, 18, 18, . . . , 183, 185, 186],     [ 20, 18, 19, . . . , 182, 185, 185],     . . . ,     [ 25, 24, 24, . . . , 43, 40, 41],     [ 26, 25, 24, . . . , 41, 39, 40],     [ 25, 24, 23, . . . , 38, 36, 38]]]))3. What is a difference between image and imageSegment?: imageSegment  An ImageSegment object has the same properties as an Image. The only difference is that when applying the transformations to an ImageSegment, it will ignore the functions that deal with lighting and keep values of 0 and 1.  It’s easy to show the segmentation mask over the associated Image by using the y argument of show_image. img = open_image(fnames[0])mask = open_mask(lbl_names[0])_,axs = plt. subplots(1,3, figsize=(8,4))img. show(ax=axs[0], title='no mask')img. show(ax=axs[1], y=mask, title='masked') #seg mask over the img using y argmask. show(ax=axs[2], title='mask only', alpha=1. ) vision. image ##4. Why/How img div by 255 and how it results fast. ai : vision. image - If div=True, pixel values are divided by 255. to become floats between 0. and 1.  At times, you want to get rid of distortions caused by lights and shadows in an image.    Normalizing the RGB values of an image can at times be a simple and effective way of achieving this.     So sum of the pixel’s value over all channels(which is S) divides each intensified channel so that nomalized value will be R/S, G/S and B/S (where, S=R+G+B).   Detailed explain here4. Python Evaluation Order: Python evaluates expressions from left to right. Notice that while evaluating an assignment, the right-hand side is evaluated before the left-hand side. mask_tmp, trg_tmp, void_tmp = 2, 1, 10mask_tmp = trg_tmp != void_tmpprint(mask_tmp, trg_tmp, void_tmp) # (1) target is not same with voidTrue 1 10# Example 1x = 1y = 2x,y = y,x+yx, y(2, 3)# Example 2x = 1y = 2x = yy = x+yx, y(2, 4)5. model learner parameter :: pct_start: A: Percentage of total number of epochs when learning rate rises during one cycle. Q: Sorry, I still confused that one cycle in the new API only runs one epoch. How the percentage of total number of epochs works? Can you give a example? If learn. fit_one_cycle(10, slice(1e-4,1e-3,1e-2), pct_start=0. 05)??A: Ok, strictly correct answer would be percentage of iterations, so you can have lr both increase and decrease during same epoch. In your example, say, you have 100 iterations per epoch, then for half an epoch (0. 05 * (10 * 100) = 50) lr will rise, then slowly decrease. Q2: Thanks for this explanation … so essentially, it is the percentage of overall iterations where the LR is increasing, correct? So, given the default of 0. 3, it means that your LR is going up for 30% of your iterations and then decreasing over the last 70%. Is that a correct summation of what is happening? A2: Yes, I think that’s correct. You can verify that by changing its value and check:learn. recorder. plot_lr() For example if pct_start = 0. 2 source: forums. fastai "
+    }, {
+    "id": 14,
     "url": "http://localhost:4000/2020/03/note08-fastai-4/",
     "title": "Gradient backward, Chain Rule, Refactoring",
-    "body": "2020/03/02 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves   ⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
+    "body": "2020/03/02 -  This note is divided into 4 section.      Section1: What is the meaning of ‘deep-learning from foundations?’   Section2: What’s inside Pytorch Operator?   Section3: Implement forward&amp;backward pass from scratch   Section4: Gradient backward, Chain Rule, Refactoring   ” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves    Postfix underscore means in pytorch, in-place function, What is in-place function?⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
     }, {
-    "id": 13,
+    "id": 15,
     "url": "http://localhost:4000/2020/03/note08-fastai-3/",
     "title": "Implement forward&backward pass from scratch",
     "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring1. The forward and backward passes: 1. 1 Normalization: train_mean,train_std = x_train. mean(),x_train. std()&gt;&gt;&gt; train_mean,train_std(tensor(0. 1304), tensor(0. 3073))Remember!  Dataset, which is x_train, mean and standard deviation is not 0&amp;1. But we need them to be which means we should substract means and divide data by std.  You should not standarlize validation set because training set and validation set should be aparted.  after normalize, mean is close to zero, and standard deviation is close to 1. 1. 2 Variable definition:  n,m: size of the training set c: the number of activations we need in our model2. Foundation Version: 2. 1 Basic architecture:  Our model has one hidden layer, output to have 10 activations, used in cross entropy.    But in process of building architecture, we will use mean square error, output to have 1 activations and lator change it to cross entropy   number of hidden unit; 50see below pic  We want to make w1&amp;w2 mean and std be 0&amp;1.      why initializating and make mean zero and std one is important?   paper highlighting importance of normalisation - training 10,000 layer network without regularisation1   2. 1. 1 simplified kaiming initQ: Why we did init, normalize with only validation data? Because we can not handle and get statistics from each value of x_valid?{: style=”color:red; font-size: 130%; text-align: center;”}  what about hidden(first) layer?w1 = torch. randn(m,nh)b1 = torch. zeros(nh)t = lin(x_valid, w1, b1) # hidden&gt;&gt;&gt; t. mean(), t. std()((tensor(2. 3191), tensor(27. 0303))In output(second) layer, w2 = torch. randn(nh,1)b2 = torch. zeros(1)t2 = lin(t, w2, b2) # output&gt;&gt;&gt; t2. mean(), t2. std()(tensor(-58. 2665), tensor(170. 9717))   which is terribly far from normalzed value.     But if we apply simplified kaiming init  w1 = torch. randn(m,nh)/math. sqrt(m); b1 = torch. zeros(nh)w2 = torch. randn(nh,1)/math. sqrt(nh); b2 = torch. zeros(1)t = lin(x_valid, w1, b1)t. mean(),t. std()&gt;&gt;&gt; (tensor(-0. 0516), tensor(0. 9354)) But, actually, we use activations not only linear function After applying activations relu at linear layer, mean and deviation became 0. 5. 2. 1. 2 Glorrot initializationPaper2: Understanding the difficulty of training deep feedforward neural networks  Gaussian(, bell shaped, normal distributions) is not trained very well.  How to initialize neural nets? with the size of layer , the number of filters .  But there is No acount for import of ReLU If we got 1000 layers, vanishing gradients problem emerges2. 1. 3 Kaiming initializatingPaper3: Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification  Kaiming He, explained here rectifier: rectified linear unit rectifier network: neural network with rectifier linear units  This is kaiming init, and why suddenly replace one to two on a top?     to avoid vanishing gradient(weights)   But it doesn’t give very nice mean tough.    2. 1. 4 Pytorch package Why fan_out?     according to pytorch documentation,   choosing 'fan_in' preserves the magnitude of the variance of the wights in the forward pass. choosing 'fan_out' preserves the magnitues in the backward pass(, which means matmul; with transposed matrix) ➡️ in the other words, torch use fan_out cz pytorch transpose in linear transformaton.  What about CNN in Pytorch?I tried torch. nn. Conv2d. conv2d_forward?? Jeremy digged into using torch. nn. modules. conv. _ConvNd. reset_parameters?? 2  in Pytorch, it doesn’t seem to be implemented kaiming init in right formula. so we should use our own operation.  But actually, this has been discussed in Pytorch community before. 3 4 Jeremy said it enhanced variance also, so I sampled 100 times and counted better results.  To make sure the shape seems sensible. check with assert. (remember we will replace 1 to 10 in cross entropy)assert model(x_valid). shape==torch. Size([x_valid. shape[0],1])&gt;&gt;&gt; model(x_valid). shape(10000, 1) We have made Relu, init, linear, it seems we can forward pass code we need for basic architecture nh = 50def lin(x, w, b): return x@w + b;w1 = torch. randn(m,nh)*math. sqrt(2. /m ); b1 = torch. zeros(nh)w2 = torch. randn(nh,1); b2 = torch. zeros(1)def relu(x): return x. clamp_min(0. ) - 0. 5t1 = relu(lin(x_valid, w1, b1))def model(xb):  l1 = lin(xb, w1, b1)  l2 = relu(l1)  l3 = lin(l2, w2, b2)  return l32. 2 Loss function: MSE:  Mean squared error need unit vector, so we remove unit axis.   def mse(output, targ): return (output. squeeze(-1) - targ). pow(2). mean()    In python, in case you remove axis, you use ‘squeeze’, or add axis use ‘unsqueeze’ torch. squeeze where code commonly broken. so, when you use squeeze, clarify dimension axis you want to removetmp = torch. tensor([1,1])tmp. squeeze()&gt;&gt;&gt; tensor([1, 1]) make sure to make as float when you calculateBut why??? because it is tensor?{: style=”color:red; font-size: 130%;”} Here’s the error when I don’t transform the data type ---------------------------------------------------------------------------TypeError                 Traceback (most recent call last)&lt;ipython-input-22-ae6009bef8b4&gt; in &lt;module&gt;()----&gt; 1 y_train = get_data()[1] # call data again   2 mse(preds, y_train)TypeError: 'map' object is not subscriptable This is forward passFootnote: Other materials:  Understanding the difficulty of training deep feedforward neural networks, paper that introduced Xavier initialization      Fixup Initialization: Residual Learning Without Normalization &#8617;        Pytorch implementaion on Kaiming init of conv and linear layers &#8617;        Pytorch kaiming init issue &#8617;        Pytorch kaiming init explained &#8617;    "
     }, {
-    "id": 14,
+    "id": 16,
     "url": "http://localhost:4000/2020/03/note08-fastai-2/",
     "title": "What's inside Pytorch Operator?",
     "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, RefactoringWhat’s inside Pytorch Operator?: Section02 Time comparison with pure Python:    Matmul with broadcasting&gt; 3194. 95 times faster     Einstein summation&gt; 16090. 91 times faster     Pytorch’s operator&gt; 49166. 67 times faster  1. Elementwise op: 1. 1 Frobenius norm:   above converted into (m*m). sum(). sqrt() Plus, don’t suffer from mathmatical symbols. He also copy and paste that equations from wikipedia.  and if you need latex form, download it from archive. 2. Elementwise Matmul:  What is the meaning of elementwise?   We do not calculate each component. But all of the component at once. Because, length of column of A and row of B are fixed.   How much time we saved?  So now that takes 1. 37ms. We have removed one line of code and it is a 178 times faster…#TODOI don’t know where the 5 from. but keep it. Maybe this is related with frobenius norm…?as a result, the code before for k in range(ac):  c[i,j] += a[i,k] + b[k,j]the code after c[i,j] = (a[i,:] * b[:,j]). sum()To compare it (result betweet original and adjusted version) we use not test_eq but other function. The reason for this is that due to rounding errors from math operations, matrices may not be exactly the same. As a result, we want a function that will “is a equal to b within some tolerance” #exportdef near(a,b):   return torch. allclose(a, b, rtol=1e-3, atol=1e-5)def test_near(a,b):   test(a,b,near)test_near(t1, matmul(m1, m2))3. Broadcasting:  Now, we will use the broadcasting and removec[i,j] = (a[i,:] * b[:,j]). sum() How it works?&gt;&gt;&gt; a=tensor([[10,10,10],     [20,20,20],     [30,30,30]])&gt;&gt;&gt; b=tensor([1,2,3,])&gt;&gt;&gt; a,b  (tensor([[10, 10, 10],     [20, 20, 20],     [30, 30, 30]]),tensor([1, 2, 3]))     &gt;&gt;&gt; a+btensor([[11, 12, 13],    [21, 22, 23],    [31, 32, 33]])  &lt;Figure 2&gt; demonstrated how array b is broadcasting(or copied but not occupy memory) to compatible with a. Refered from numpy_tutorial   there is no loop, but it seems there is exactly the loop.     This is not from jeremy (actually after a moment he cover it) but i wondered How to broadcast an array by columns?  c=tensor([[1],[2],[3]])a+ctensor([[11, 11, 11],    [22, 22, 22],    [33, 33, 33]])s  What is tensor. stride()?help(t. stride)Help on built-in function stride:  stride(…) method of torch. Tensor instancestride(dim) -&gt; tuple or intReturns the stride of :attr:’self’ tensor. Stride is the jump necessary to go from one element to the next one in the specified dimension :attr:’dim’. A tuple of all strides is returned when no argument is passed in. Otherwise, an integer value is returned as the stride in the particular dimension :attr:’dim’. Args:  dim (int, optional): the desired dimension in which stride is requiredExample::* x = torch. tensor([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]])`x. stride()&gt;&gt;&gt; (5, 1)x. stride(0)&gt;&gt;&gt; 5x. stride(-1)&gt;&gt;&gt; 1   unsqueeze &amp; None index   We can manipulate rank of tensor Special value ‘None’, which means please squeeze a new axis here== please broadcast herec = torch. tensor([10,20,30])c[None,:] in c, squeeze a new axis in here please. 2. 2 Matmul with broadcasting: for i in range(ar):#  c[i,j] = (a[i,:]).      *[:,j]. sum() #previous  c[i]  = (a[i]. unsqueeze(-1) * b). sum(dim=0) And Using None also (As howard teached)c[i]  = (a[i ]. unsqueeze(-1) * b). sum(dim=0) #howardc[i]  = (a[i][:,None] * b). sum(dim=0) # using Nonec[i]  = (a[i,:,None]*b). sum(dim=0)⭐️Tips🌟 1) Anytime there’s a trailinng(final) colon in numpy or pytorch you can delete it ex) c[i, :] = c [i]2) any number of colon commas at the start, you can switch it with the single elipsis.  ex) c[:,:,:,:,i] = c […,i] 2. 3 Broadcasting Rules:  What if we tensor. size([1,3]) * tensor. size([3,1])?  torch. Size([3, 3])    What is scale????   What if they are one array is times of the other array? ex) Image : 256 x 256 x 3Scale : 128 x 256 x 3Result: ?   Why I did not inserted axis via None, but happened broadcasting? &gt;&gt;&gt; c * c[:,None]tensor([[100. , 200. , 300. ],    [200. , 400. , 600. ],    [300. , 600. , 900. ]])maybe it broadcast cz following array has 3 rows as same principle, no matter what nature shape was, if we do the operation tensor broadcasts to the other. &gt;&gt;&gt; c==c[None]tensor([[True, True, True]])&gt;&gt;&gt; c[None]==c[None,:]tensor([[True, True, True]])&gt;&gt;&gt;c[None,:]==ctensor([[True, True, True]])3. Einstein summation:  Creates batch-wise, remove inner most loop, and replaced it with an elementwise producta. k. ac[i,j] += a[i,k] * b[k,j]inner most loop c[i,j] = (a[i,:] * b[:,j]). sum()elementwise product  Because K is repeated so we do a dot product. And it is torch. Usage of einsum()1) transpose2) diagnalisation tracing3) batch-wise (matmul) …  einstein summation notationdef matmul(a,b): return torch. einsum('ik,kj-&gt;ij', a, b)so after all, we are now 16000 times faster than Python. 4. Pytorch op: 49166. 67 times faster than pure python And we will use this matrix multiplication in Fully Connect forward, with some initialized parameters and ReLU. But before that, we need initialized parameters and ReLU, Footnote:  TensorRank ti noteResources:  Frobenius Norm Review Broadcasting Review (especially Rule)     Refer colab! (I totally confused with extension of arrays)    torch. allclose Review np. einsum Reviewh "
     }, {
-    "id": 15,
+    "id": 17,
     "url": "http://localhost:4000/2020/02/note08-fastai-1/",
     "title": "What is the meaning of 'deep-learning from foundations?'",
     "body": "2020/02/29 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ I don’t know if you read this article, but I heartily appreciate Rachael Thomas and Jeremy Howard for providing these priceless lectures for free Homework:  Review concepts 16 concepts from Course 1 (lessons 1 - 7)(1) Affine Functions &amp; non-linearities; 2) Parameters &amp; activations; 3) Random initialization &amp; transfer learning; 4) SGD, Momentum, Adam; 5) Convolutions; Batch-norm; 6) Dropout; 7) Data augmentation; 8) Weight decay; 9) Res/dense blocks; 10) Image classification and regression; 11)Embeddings; 12) Continuous &amp; Categorical variables; 13) Collaborative filtering; 14) Language models; 15) NLP classification; 16) Segmentation; U-net; GANS) Make sure you understand broadcasting Read section 2. 2 in Delving Deep into Rectifiers Try to replicate as much of the notebooks as you can without peeking; when you get stuck, peek at the lesson notebook, but then close it and try to do it yourself calculus for machine learning     based on weight…    einsum conventionCONTENTS:  What is going on in this course?     What is ‘from foundations’?   Steps to a basic modern CNN model   Today’s implementation goal: 1) matmul -&gt; 4) FC backward    Library development using jupyter notebook     jupyter notebook certainly can make module    Elementwise ops     How can we make python faster?         What is element wise operation?           FootnoteWhat is going on in this course?: What is ‘from foundations’?: 1) Recreate fast. ai and Pytorch 2) using pure python  Evade OverfittingOverfit : validation error getting worsetraining loss &lt; validation loss  Know the name of the symbol you usefind in this page if you don’t know the symbol that you are using or just draw it here (run by ML!) Steps to a basic modern CNN model:  1) Matrix multiplication -&gt; 2) Relu/Initialization -&gt; 3) Fully-connected Forward-&gt; 4) Fully-connected Backward -&gt; 5) Train loop -&gt; 6) Convolution-&gt; 7) Optimization -&gt;8) Batchnormalization -&gt; 9) Resnet Today’s implementation goal: 1) matmul -&gt; 4) FC backward: Library development using jupyter notebook: what is assers? jupyter notebook certainly can make module:  There will be #export tag that Howard (and we) want to extract special notebook2script. py will detect sign of #expert and convert following into python module and test ittest\_eq(TEST,'test')test\_eq(TEST,'test1')  what is run_notebook. py?     when you want to test your module in command line interface   		!python run\_notebook. py 01_matmul. ipynb  Is there any difference between 1) and 2)?1) test -&gt; test01 2) test01 -&gt; test #TODO I don’t know yet  look into run_notebook. py, package fire Jeremy used. What is that?read and run the code in a notebook, and in the process, Jeremy made Python Fire library called!shockingly, fire takes any kind of function and converts into CLI command. fire library was released by Google open source, Thursday, March 2, 2017    Get data   pytorch and numpy are pretty much same.  variable c explains how many pixels there are in in MNIST, 28 pixels PyTorch’s view() method: torch function that manipulating tensor, and squeeze() in torch &amp; mathmatical operation similar function Rao &amp; McMahan said usually this functions result in feature vector.    In part 1, you can use view function several times.     Initial python model     Which is Linear, like $Xw$(weight)$+a$(bias) $= Y$     If you don’t know hou to multiple matrix, refer this site matmul visulization site   How many time spends if we we use pure python   function matmul, typical matrix multiplication function, takes about 1 second for calculating 1 single train data! (maybe assumed stochastic, 5 data points in validation)     it takes about 11. 36 hours to update parameters even single layer and 1 iteration! (if that was my computer, it would be 14 hours. . )🤪   THIS is why we need to consider ‘time’&amp;’space’ This is kinda slow - what if we could speed it up by 50,000 times? Let’s try! Elementwise ops: How can we make python faster?:  If we want to calculate faster, then do remove pythonic calcuation, by passing its computation down to something that is written something other than python, like pytorch.  According to PyTorch doc it uses C++ (via ATen), so we are going to implement that function with python. What is element wise operation?:  items makes a pair, operate corresponding componentFootnote:  notebooks material video broadcasting excel"
     }, {
-    "id": 16,
+    "id": 18,
     "url": "http://localhost:4000/2020/02/what-is-convolution/",
     "title": "Digging into convolution",
     "body": "2020/02/28 - Issues 1) Kaiming Initializtion in Pytorch was in trouble. 1 2) Jeremy started to dig in, in lesson09, but I didn’t know why the size of tensor is 2 and even understand this spreadsheet data. 3 Homework: Read Visualizing and Understanding Convolutional Networks paper  What is a convolution?     Visualization         one kernel     Matthew D Zeiler &amp; Rob Fergus Paper          Convolution can be represented as matmul   Padding   Kernel has rank 3   How can we find a side-edge, a gradient and area of constant weight?   What is a convolution?: A convolutional neural network is that your red, green, and blue pixels go into the simple computation, and something comes out of that, and then the result of that goes into a second layer, and the result of that goes into the third layer and so forth. Visualization: one kernel Refer this site for visualizing CNN filteringMatthew D Zeiler &amp; Rob Fergus PaperLecture01     Nine examples of the actual coefficients from the **first layer**  Convolution can be represented as matmul: CNNs from different viewpoints {align-items: center;}    [A B C D E F G H I J] is 3 by 3 image data flatten to vector.   As a result, convolution is a just matrix just two things happens     Some of entries are set to zeros at all the times   same color always have the same weight. That called weight time / wegith sharing    So, we can implement a convolution with matrix multiplication. But, we don’t do that because it’s slow!Padding:  What most of libraries do is just put zeros asdie of matrix  fast. ai uses reflection paddings (what is this? Jeremy said he uttered it)Kernel has rank 3:  As standard picture input would be 4 5, it would be actually 3d, not 2d.  If we make kernel as a 3x3 size, we pass over same kernel all the different Red, Green, Blue Pixels.      This could make problem, because, if we want to detect frog, which is green, we would want more activations on the green(I made a test cell in my colab 6)   How can we find a side-edge, a gradient and area of constant weight?: Not top-edge!  One kernel can find only the top-edge, so we should stack the kernels 7 So, we pass it through bunch of kernels to the input images, and that process gives us height x width x corresponding number of kernels.  Usually that number of chanel is 16 And if we want to get the more channels and features, we should repeat that process     This process gives rise to memory out of control, we do the stride   #### conv-example. xlsx  2 convolutional filters At a second layer, filter is 3x3x2 tensor, because to add up together the first layer’s channel. Reference:       Problem was math. sqrt(5) was not kaiming initialization formula, Implementation in Pytorch &#8617;        size of tensor, lecture09 &#8617;        conv-example. xlsx &#8617;        Why do computer use red, green and blue instead of primary colors &#8617;        Grayscale is a group of shades without any visible color. … Each of these dots has its own brightness level as well and, therefore, can be converted to grayscale. A grayscale image is one with all color information removed.  &#8617;        Testing RGB and grayscale &#8617;        stack kernel and make new rank of tensor at output, Lesson06-2019 &#8617;    "
     }, {
-    "id": 17,
+    "id": 19,
     "url": "http://localhost:4000/2020/02/dps-week8/",
-    "title": "Digital Product School week 8&9",
-    "body": "2020/02/24 - The 8th week retropect at Digital Product School Week 8/9 - Ship your MVP/Release next iteration each day     This week's schedule  CONTENT:  Preparing engineering weekly Agile Process     Daily Stand-up    Making application flowchart (feat draw. io) / ER diagram     Flowchart, understaning user journey   ER diagram    Engineering weekly AI lunch Connecting firebase andPreparing engineering weekly: This week at Wednesday, I planned to explain the Language Modelings, mainly focusing ELMo, ULMFiT, BERT and GPT-2. Slides is available here Changed the presentation, because there were people who are not in ML domain. hereWhenever I do the presentation, I learn more than the information I give them. At the same time, I realize I need to learn more than I know. Agile Process: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m not sure ‘what is agile’ but this is what we tried to make agile process.  Daily Stand-up: Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Making application flowchart (feat draw. io) / ER diagram:  Before we start the party, we should clarify the flowchart and ER diagram of our application. Flowchart, understaning user journey: Thanks for google, we could use draw. io for our framechart framework. Actually, we cana choice other good flatform, but draw. io has connected app throgh google drive, most of our engineer was used to it. And after this job, I got to know there is also (of course) rule with the symbols, color, size, space, scaling and direction of arrow -reference. But why we should do this? WE have made our storymap before!! I think storymap is for visualize our status and app. So it should be shared with whole the team, and they should able to understand each role’s issue. But flowchart is more like testing technical feasibility, and error that user can experience. So it could be little more specific, complicated, and hypothetical.     This week engineer's main plan  ER diagram: Even if we use NoSQL database through firebase, my team was accustomed to SQL more. That what we educated when we were at college, so we had to organize our concept while we were learning NoSQL. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Before today, my AI collegues presented regression, knn and it was my turn. I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! AI lunch: Connecting firebase and: "
+    "title": "My life in Digital Product School - week 8/19/10",
+    "body": "2020/02/24 - The 8/9/10th week retropect at Digital Product School Week 8 - Ship your MVPWeek 9/10 - Release next iteration each day     Week 8th schedule  CONTENT:  Agile Product Development     Daily Stand-up(planning)   Gemba Walk   Sprint Reviews    Engineering weeklyAgile Product Development: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m still not sure ‘what is agile’ but this is how we tried to make agile process.  Daily Stand-up(planning): Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Gemba Walk:     Team Cero with core team  Every 2 weeks, we do the Gemba work, which is ‘question everything to the core team’ time. At this period, people can ask anything related to our product, workshop, and framework. Core team will help just for each team, and each team can solve the problem related to their work. &lt; br/&gt;Why we need this session? because with workshop and general schedule, core team has no time just focus on each team. So through this session, we can have opportunity to understand each program and workshop, like why we are using this platform, and when is the due of our small project, and we have this problem and we need help for this. whatever small problem you have, core team is always willing to help you. Sprint Reviews: Every Friday, we have time to summarise what we did for the week. Maybe we need HMW question and our storymap to share our process and then tell and share what we did try, what point we succeeded and what point it was deviant of our prediction, and why we tried it. .     Sprint of Ve-link  And then, just after all team’s ppt, we do vote with such a cute marvel. Always it’s very difficult to vote (of course you can’t vote to your team!) Because it depends on criteria what do I value!But since this is process of our agile work, I try to focus on what they have changed since last week, and why they did it, how they did it. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Everyone have their knowledge to share and we can be tutor and at the same time can be of tutee. Previously, my AI collegues presented regression, knn. And because I’m somewhat specialized to NLP, I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! "
     }, {
-    "id": 18,
+    "id": 20,
     "url": "http://localhost:4000/2020/02/fast.ai-nlp-note-16/",
     "title": "Algorithmic bias",
     "body": "2020/02/20 - Algorithms can encode &amp; magnify human bias Case Study 1: Facial Recognition &amp; Predictive Policing:  Joy Buolamwini &amp; Timnit Gebru, gendershades. org     Microsoft, FACE+, IBM - All of these things are sell now.    Largest gap between $\therefore\ Lighter Male\ &gt;\ Darker\ Female $      This US mayor joked cops should “mount . 50-caliber” guns where AI predicts crime      With machine learning, with automation, there’s a 99% success, so that robot is ㅡwill beㅡ99% accurate in telling us what is going to happen next, which is really interesting.     - city official in Lancater, CA, approving on using IBM for public security  Bias:  Bias is type of error Statistical Bias: difference between a statistic’s expected value and the true value Unjust Bias: disproportionate preference for or prejudice against a group Unconscious bias: bias that we don’t realize we have But, term bias is too generic to be productive. Different sources of bias have different causes Representation Bias: Dataset was not representative of the algorithm that might be used on later. Above : Data is okay, but algorithm has some problem. Below : Data has error.  For example, object detection production that performs very well in common product of US. But in contrast, change of target product region, like Zimbabwe, Solomon Island, and so on, reduced the performence remarkably. It is not the algorithmic problem, so we should care about data volume of region. Evaluation Bias: Benchmark datasets spur on research, 4. 4% of IJB-A images are dark-skinned women. 2/3 of ImageNet images from the West (Sharkar et al, 2017) Case Study 2: Recidivism Algorithm Used Prison Sentencing: Case Study 3: Online Ad Delivery: Bias in NLP: ( Nothing to do with the course, but I’m researching this field these days. )    But all about Englsih     ImpactThe person is doctor. The person is nurse -&gt; 그는 의사다. 그녀는 간호사다.  Concept of “biased data” often too generic to be useful:  Different sources of bias have different sources Data, models and systems are not unchanging numbers on a screen. They’re the result of a complex process that starts with years of historical context and involves a series of choices and norms, from data measurement to model evaluation to human interpretation. - Harini Suresh, “The problem with Biased Data” Five Sources of Bias in ML:  Representation Bias Evaluation Bias Measurement Bias Aggregation Bias(46:02) Historical Bias(46:26)     A few studies(47:13)   Racial Bias, Even when we have good intentions(new york times)(47:10)   gender(48:59)   Humans are biased, so why does algorithmic bias matter?: Algorithms &amp; humans are used differently (humans are usually decision maker)  Algorithms are accurate and objective No way to apeal if there if error processed large scale cheap Machine learning can amplify bias Machine learning can create feedback loops. Technology is power. And with that comes responsibility. Solutions:  Analyze a project at work/school:     Questions about AI   5 types of bias (Suresh &amp; Guttag)   Datasheets for datasets, Modelcards for model reporting   Accuracy rate on different sub-groups    Work with domain experts &amp; those impacted Increase diversity in our workspace Advocate for good policy Be on the ongoing lookout for bias"
     }, {
-    "id": 19,
+    "id": 21,
     "url": "http://localhost:4000/2020/02/classifier-city/",
     "title": "Making a classifier with image dataset made from gooogle",
     "body": "2020/02/15 - CONTENTS:  Creating dataset from google images     Using google_images_download   Create ImageDataBunch    Train model     fit_one_cycle()   Let’s find-tune   Let’s train the whole model!   Let’s make batch size bigger!    Interpretation Model in productionCode can be found hereDeployed model here Making a classifier which can distinguish Seoul from Munich and Sanfrancisco!(hoping my well in Munich!) Creating dataset from google images: In machine learning, you always need data before you build your model. You can use either URLs or google_images_download package. Since Jeremy explained specifically, I will try the other. Using google_images_download: note: This is not google official package Refer to Official Doncument, put that arguments. from google_images_download import google_images_downloadresponse = google_images_download. googleimagesdownload()  #class instantiationout_dir = os. path. abspath('. . /. . /materials/dataset/pkg/')os. mkdir(out_dir)arguments = { keywords : Cebu,Munich,Seoul ,        print_urls :True,        suffix_keywords : city ,        output_directory :out_dir,        type : photo ,      }paths = response. download(arguments)  #passing the arguments to the functionprint(paths)and if you need, here is main code. Create ImageDataBunch: We need to separate validation set because we just grabbed these imagese from Google. Most of the dataset we use (kaggle/research) splited into train / validation / test so if they are not devided beforehand we should make databunch, and Jeremy recommended assign 20% to validation. Help on function verify_images in module fastai. vision. data:verify_images(path: Union[pathlib. Path, str], delete: bool = True, max_workers: int = 4, max_size: int = None, recurse: bool = False, dest: Union[pathlib. Path, str] = '. ', n_channels: int = 3, interp=2, ext: str = None, img_format: str = None, resume: bool = None, **kwargs)  Check if the images in `path` aren't broken, maybe resize them and copy it in `dest`. Data from google image url Data from package Train model:           len(class)   len(train)   len(valid)         Data_url   3   432   108       Data_pkg   3   216   53   Uisng model: restnet34 1, Measurement: accuracy 2 fit_one_cycle(): What is fit one cycle? Cyclical Learning Rates for Training Neural Networks One of the way to find good learning rate. Core idea is to start with small learning rate (like 1e-4, 1e-3) and increase the learning rate after each mini-batch till loss starts exploding. And pick up learning rate one order lower than exploding point. For example, plotted learning rate is like below picture, picking up around 1e-2 is the best way.  Why this methods Traditionally, the learning rate is decreased as the learning starts converging with time. But this paper suggests to cycle our learning rate, because it makes us avoid local minimum. Basically this cyclic method enables us to explore whole of loss function so that find out global minimum. In other words, higher learning rate behaves like regularisation. Let’s find-tune: Do train just one last layer by learning rate found by find_lr This section you should find the strongest downward slope that kind of sticking around for quite a while. And choose just one order lower than lowest point. As explained before, I will pick up 1e-2. And of course, this is fine-tuning, we don’t need discriminative learning rate yet.  Let’s train the whole model!: link When you plot the learning rate again, maybe you will get soaring shape of learning rate. Rule of thumb, When you slice the learning rate, use learning rate you used at unfrozen part. Divide it by 5 or 10 and put it on maximum bound. At minimum bound, get the point just before it soared, and divide it by 10.  Let’s make batch size bigger!: Since default batch size is 64, I tried it to 128. And it gets way more better result(even it’s still underfitting!) And if I freeze model and train whole model again, the model would be better. Also, you can use this method to the other big dataset model training! Interpretation: See the confusion matrix. Result is quite great. *Since I’m using colab, I will skip data cleansing. But I highly recommend you to use ImageCleaner widget, only if you are using jupyter notebook (not jupyter lab) Model in production: You can deploy your model in simple way. I referred fast. ai, and used render(it’s free for limited time). You can find detailed document here. and you can create a route like this. @app. route( /classify-url , methods=[ GET ])async def classify_url(request):  bytes = await get_bytes(request. query_params[ url ])  img = open_image(BytesIO(bytes))  _,_,losses = learner. predict(img)  return JSONResponse({     predictions : sorted(      zip(cat_learner. data. classes, map(float, losses)),      key=lambda p: p[1],      reverse=True    )  })You can find my deployed model here Reference: How to create a deep learning dataset using Google Images towardsdatascience - one cycle policy       Deep Residual Learning for Image Recognition &#8617;        Accuracy_and_precision &#8617;    "
     }, {
-    "id": 20,
+    "id": 22,
     "url": "http://localhost:4000/2020/02/dps-week5/",
     "title": "Digital Product School week 5",
     "body": "2020/02/09 - The 5th week retropect at Digital Product School Week 5 - Create a Storymap and sync it with Lean Canvas     This week's schedule  CONTENT:  How to create our story map Prepare your story Discover your product’s AI potentialMondayHow to create our story map:     We need this 'aha' moment  There was a Milestone workshop, about our weekly goal. As we are agile working, we go fast and change every week’s goal. This week we will finalize our story map based on user’s pain-point and HMW questions.     How should we make our story-map  Basically we should make story map based on this rule  Tell stories, don’t just write them!     We always need context, that means all the story component should be connected    Visualize your product to establish a shared understanding and speed up discussions!     Post-it filled of text is not enough, we should fill it with visualizations then team mates can understand it fast    Only discuss in front our your story map! (Speed)     So we can update our story-map as soon as we change our opinion   And also    Use a story map to find the parts that matter most and to identify holes in your idea!     Since the story map consists of techinical part, we should consider each story’s technical feasibility    Minimise output, maximise outcome and impact! Build tests to figure out what’s minimum and what’s viable!     This story map functions to find out our minimum value of ideas    Work iteratively: Change your story map according to your learnings!     We should repeat this process again and again    PMs: Make sure Storymap is up to date!Prepare your story:     team cero, our whole story map    Our goal    Technical feasibility of our storyWhat is your strategy to make user achieve something? This would be our expand point Discover your product’s AI potential: How can we apply AI to our product? Let’s write down our ‘HMW’ questions, and find out all p ossibilities. These are suggestion of possibilities, so don’t attached to feasibility (we will do in at lean start-up)     Software section's expectation    AI section's expectationTuesday    Engineer's task, week5This 5th week, engineers settled WendesdayThursdayFriday"
     }, {
-    "id": 21,
+    "id": 23,
     "url": "http://localhost:4000/2020/02/GPU-time/",
     "title": "4 reasons took much time to setting GPU for fast.ai than I expected",
     "body": "2020/02/05 - Motivation: Before now, me as a undergraduate student, I was parsimony who usually depend on colab, kaggle, friend’s server(occasional) whenever i need GPU. . And this time it’s been for a while to install GPU than I expected and I share the several component that stood in my way. Written at Oct 24 2019, if you think this is deprecated, please do not have a leap of faith. Just for the record, I’ve used Kaggle, Colab, GCP, Azure, EC2 as GPU cloud. 1. Did not know there is JupyterLab option in Google Cloud Platform. : At the first time when GCP came out, there was no AI Platform service. So from starting vm instance to launching jupyter and installing packages, I did all of the things myself. (and I learned 🤗) $	curl -O https://repo. continuum. io/archive/Anaconda3-5. 0. 1-Linux-x86_64. sh[Downloading conda in ssh] I created VM instance,selected zone, machine type and disk type. Then, define firewall rules and in ssh terminal, install jupyter and other packages. But you can do all of these things just using AI Platform.  [AI Platform] I think it especially save your time if you are living in Asia-Pacific, which google doesn’t support not that much GPU resources.  2. Consider if the platform has limited resources in a region you live in. : I live in South Korea, East Asia, and it seems like this region has lots of limitation in GPU (except quite expensive AWS) And the Taiwan which was the only one region where I can launch my own VM with GPU (I tried all the other regions in the list) sometimes do normaly, but not always. 😥After launching, I did several works and next day I could not start VM. (I didn’t count it, but tried it a few hours because I didn’t want cost any more time…) Endlessly failed to start instance, then I choose to move AWS as an alternative way. 3. Fast. ai gives deliberate guide and I didn’t know it. : Fast. ai offer the guide for all available platform. (Colab, salamander, Gradient, Kaggle, Colab, and so on) It is so important, and really needs, because cloud computing options are vary as occasion and purpose arise. I didn’t know fast. ai has manual to running GCP, and I think it’s as good a reason as any for me to be have taken time. It helped me so much when I had aws and shortened my time. I don’t want to read all of the manual in amazno. . (It is recommended. . but I’d rather read GIT PRO now…) ssh -i ~/. ssh/&lt;your_private_key_pair&gt; -L localhost:8888:localhost:8888 ubuntu@&lt;your instance IP&gt;4. You should wait to add more volume just after add volume, by building AWS EC2. : Since Elastic Block Store(EBS) storage supports optimized storage, users can’t extend storage volume two times in a row.  Unfortunately, at the first time, I didn’t know it (again 👻) and when VM lacked volume, I doubled dist capacity (76*2) at a rough but It needs more.  &lt;!– this time I installed GPU in two years, and it became little complicated compared to 2 years ago. And this time for the first time(maybe not the first time. . but i handled it in my class or with my friend. but it’s my first time on my own. ) I very I’m started to using used google colab, kaggleand, GCP-JupyterLab, ec2 - friend made, aws vm machine but I had a environment variable but i did not know of it. On these days, I could not get a resources from taiwan…    I couldn’t notice a deliberate     Anyway, as a result I tried myself gcp myself and aws ec2 with fast. ai But I think doing on my self surely takes much time (in this point I wonder why I’m doing this, and should remind me, especially I was studying disk volume optimization)     disk volume exceed - https://askubuntu. com/questions/919748/no-space-left-on-device-even-though-there-is:  "
     }, {
-    "id": 22,
+    "id": 24,
     "url": "http://localhost:4000/2020/02/dps-week4/",
     "title": "Digital Product School week 4",
     "body": "2020/02/01 - The 4th week retropect at Digital Product School Week 4 - Find solution ideas and run experiments   [This week’s schedule] CONTENT:  Ideation Techniques     What is ideation techniques?    Generating idea in my team AIdeation Team brain storming of idea Die Produkt MacherMondayIdeation Techniques:   [slides from @steffen] What is ideation techniques?: We tried to find out user’s painpoint last week. Tried to users talk about their, pain point. No question directly, but extract from them their pain with transportation. Generating idea in my team:   AIdeation:   TuesdayTeam brain storming of idea:   Based on generated idea on Monday, we extended our idea doing rolling-paper!     Die Produkt Macher: What is lean start-up?  Lean startup is a methodology for developing businesses and products that aims to shorten product development cycles and rapidly discover if a proposed business model is viable; this is achieved by adopting a combination of business-hypothesis-driven experimentation, iterative product releases, and validated learning. - wikipedia WendesdayThursdayFriday"
     }, {
-    "id": 23,
+    "id": 25,
     "url": "http://localhost:4000/2020/01/retrosprect-of-acl-paper-2020/",
     "title": "Retrospect of ACL 2020 paper writing",
     "body": "2020/01/29 - 2020 Annual Conference of the Association for Computational Linguistics Why I can’t use ‘Cebuano’ for the research?: Why I had to change target language from ‘Cebuano’ to ‘Tagalog’?-&gt; No language translator options except google translation. But before knowing that I already consult my friend, whose mother tongue is English. So I had to aplogize her, but couldn’t tell her why suddenly I changed my plan. -&gt; I realized there are many languages even can’t be researched at all. . -&gt; Getting accustomed to discrimination makes misunderstanding, sometimes. At my country, we couldn’t use music streaming service, because of legal problem. But at that moment, I thought it was discrimination, which is done by music company. "
     }, {
-    "id": 24,
+    "id": 26,
     "url": "http://localhost:4000/2020/01/Git-Merge/",
     "title": "Why am I not listed as a contributor?!",
     "body": "2020/01/10 - From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’m not listed as contributor of repo which my PR accessed. How did I come to a repository?: When I’m stuck, I would prefer to code, than to go deep in theory. (It must be so. . too much to understand 🤒)It was BERT released by Google AI I felt keenly the necessity of implementing, because not only couldn’t understand the way they figured out positional encoding formula, but how it actually works. What does it mean to “scale” dot product in Attention? (Now I know it’s far from my section 😂) Figure 1. Scaled Dot Product. Adopted from tensorflow blogWhat was the code error?: For implement code in paper, I read the papers Transformer and BERT, structured the model, and refered the others’ code. Meanwhile, I found out a small error in tokenization process, which was changing a token into [MASK], enabled bidirectional representation. I’ve made PR, and got merged. But I was not in contributors. Why?: Figure 2. Merged Pull request Adopted from graykode projectActually I happened to know there can be couple of reasons github doesn’t include my name as contributor. Well, if contributors tab has more than 100 people, in which case it shows you up only if you are in the top 100 contributors because displaying too many contributors can make webpages down. Somethimes, however, it doesn’t that problem. Why not? Two possibilities are there.    First, According to Joel-Glovier, if repository maintainer merged-as-a-rebase PR will end up showing as maintainer’s commit. But maintainer shouldn’t normally do this.     Second, if you happend to commit using a different git email that what is in your GitHub profile, it will not be attached to your Github user, and “doesn’t show up” as you.  Reference:  Michał Chromiak’s blog Github: why are my contributions are not showing on my profile atlassian-gitfetch"
     }, {
-    "id": 25,
-    "url": "http://localhost:4000/2019/12/lesson1-fastai/",
-    "title": "Fine Grained Classification",
-    "body": "2019/12/31 - Finally you can solve the mystery behind this weird drawing. . through this course. juptyer notebook magic: %reload_ext autoreload%autoreload 2%matplotlib inlinethis is special directives to jupyter notebook, not python code. And it is called ‘magics’ (but i think jeremy is magicion)  If somebody changes underlying library code while I’m running this, please reload it automatically If somebody asks to plot something, then please plot it here in this Jupyter NotebookDon’t hesitate to import start~ Digging into untar_data, path. ls: Union[pathlib. Path, str]: typed programming language? -&gt; maybe i think disclaim the type beforehand for sure.  Q. like assert? path. ls()this is some module that fast. ai made because os. listdir(‘path’) is unconvinient. Python3 pathlib library!: pathlib "
-    }, {
-    "id": 26,
+    "id": 27,
     "url": "http://localhost:4000/2019/12/jeremy-howard/",
     "title": "Jeremy Howard",
     "body": "2019/12/15 - This is journey to find out ‘who am I trying to be?’: How he impacted me?  The person who made me start Computer Vision again. He emphasized the importance of studying NLP and Computer together to understand the deep-learning. He didn’t order it to study, but always he pursuade me with reasonable way.  “It’s not just something I can throw away. NLP and computer vision a few weeks apart and that’s going to force your brain to realize like ‘oh I have to remember this’”  He made me admit my failure in deep-learning. I started to objectify where am I. What should I do when I’m frustrated.  “Keep going. You’re not expected to remember everything. Yet.  You’re not expected to understand everything. Yet. You’re not expected to know why everything works.  Yet. ”  His articles are numerous, below.      What is torch. nn Really?   High Performance Numeric Programming with Swift: Explorations and Reflections   C++11, random distributions, and Swift    And especially, I like this book. Designing great data products Great predictive modeling is an important part of the solution, but it no longer stands on its own; as products become more sophisticated, it disappears into the plumbing. Designing great data products And he is also famous for words. Here are some.  we’re going to try and use that to really understand what’s going on. So to warn you, none of it is rocket science but a lot of its going to look really new. So don’t expect to get it the first time but expect to listen and jump into the notebook try a few things test things out look particularly at like tensor shapes and inputs and outputs to check your understanding then go back and listen again. But and kind of try it, a few times, because you will get there right, it’s just that there’s going to be a lot of new concepts because we haven’t done that much stuff in pure Pytorch. Lesson 6: Deep Learning 2019 "
     }, {
-    "id": 27,
+    "id": 28,
     "url": "http://localhost:4000/2019/11/julia-evans/",
     "title": "Julia Evans",
     "body": "2019/11/20 - This is journey to find out ‘who am I trying to be?’: The women who surprised me in many ways. First, she approached me to teaching some concepts drawing cartoons. It was at Hackers news, which was hightest ranks. Personally I have the use of not to reading title, so and cartoon was so cute and clear. I naturally gonna understood mechanism and astonished by her explaination ability. Her value, which she was taught by many people so want to do same things, moved me. Volume of her knowledge, that just reading post title is a deal of work, amazed me. "
     }, {
-    "id": 28,
+    "id": 29,
     "url": "http://localhost:4000/2019/11/coc-retropective/",
     "title": "Retrospective on Pycon 2019 Korea (CoC Committee)",
     "body": "2019/11/05 - When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized our responsibility, and if I passed each other’s burden, It could be my burden next time. In solidarity of the obligation, we finished conference well. And after participating PyCon Korea 2018 as volunteer, I’ve joined PyCon Korea Organizer last year. &lt;Figure 1&gt; First meeting of PyCon 2019 Korea Organizers It’s been a while since PyCon 2019 finished. It’s held on Aug 15 - 18, at Coex Grand Balloom &lt;Figure 2&gt; Ongoing session, speaking on news comment processing &lt;Figure 3&gt; Sponsor Booth iin Coex Hall &lt;Figure 4&gt; After PyCon 2019, with all of volunteer, organizer, speakers 😍 🥰  Serving as part of the coc TF, I spent large fraction of last year doing CoC job. here’s the path what we’ve been grappled with to grasp a solution. First half: Before the conference Toward Diverse Community:  Formally we’ve been reusing and modifying PyCon US CoC, but we needed fit in Korean and I was part of that to revise code of conduct. Except ‘That’ Diversity, Because it is ‘Harassment’:  Specific point was harassment, and the others were not. process of finding the points. How can we settle this point?Second half: During the conference Handling the potential Harassment: Disjunction of policy and real-time situation: This ‘PyCon 2019 Korea retrospective series’ would be devided into 3 Episodes.  “Retrospective on Pycon 2019 Korea (CoC Committee)” “Retrospective on Pycon 2019 Korea (Program Chair)” (20 Nov, To Be Update) “Maintaining participation while still making timely decisions” (29 Nov, To Be Update)"
     }, {
-    "id": 29,
+    "id": 30,
     "url": "http://localhost:4000/2019/11/elif-shafak/",
     "title": "Elif Shafak",
     "body": "2019/11/05 - This is journey to find out ‘who am I trying to be?’: For creative-minded people, Istanbul is a treasure. ’ Photo © Chris Boland, licensed under CC BY-NC-ND 2. 0    it suddenly felt like what I was trying to convey was more complicated and detailed than what the circumstances allowed me to say.     And I did what I usually do in similar situations: I stammered, I shut down, and I stopped talking. I stopped talking because the truth was complicated, even though I knew, deep within, that one should never, ever remain silent for fear of complexity.         &lt;Figure 1&gt; Elif Shafak   Photo credit: www. elifsafak. com. tr      I want to talk about emotions and the need to boost our emotional intelligence. I think it’s a pity that mainstream political theory pays very little attention to emotions.     Oftentimes, analysts and experts are so busy with data and metrics that they seem to forget those things in life that are difficult to measure and perhaps impossible to cluster under statistical models. But I think this is a mistake, for two main reasons. We are emotional beings.     I think it’s going to be one of our biggest intellectual challenges, because our political systems are replete with emotions. In country after country, we have seen illiberal politicians exploiting these emotions. And yet within the academia and among the intelligentsia, we are yet to take emotions seriously. I think we should.  1 2 Reference:       British Council Worldwide &#8617;        Ted Talk &#8617;    "
     }, {
-    "id": 30,
+    "id": 31,
     "url": "http://localhost:4000/2019/01/dps-week1/",
     "title": "Digital Product School week 1",
     "body": "2019/01/11 - The 1th week retropect at Digital Product School [This week’s schedule] CONTENT:  Welcome to Digital Product School!     Trip to Spitzingsee   Welcome to Design Office   Specifying our goal of product   Welcome to Digital Product School!: Trip to Spitzingsee:    At the first day of Digital Product School, we had a off-site with all of batch 9 people. All the costs were managed by dps. At the beautiful mountain, we settled the team, and got my team goal.     Basically, there are two kind of team in DPS. (1) Wild team - the team has fixed topic(2) Company team - the team which has specific stakeholders, and also topic defined by that stakeholders  The Core-team will fix what team you will join in DPS for 3 months based on ymy professionals, they announce it at off-site. [My team for 3 months at DPS]    And we decide on my batch #9 theme song. How? Each team draw for songs and pitch ‘why this song should be batch #9 theme song’The result? Imagine dragon - Believer (I didn’t know at the moment, this song would be stamped in my memory)     We have a workshop for getting to know each other. For example, we share 1) what do I expect from 3 months of dps, 2) when I feel happy in my life time, 3) what I worked for last week, 4) what was my last project and 5) what plays important role in my life      My team's board  Cero  Welcome to Design Office:  At first day of design office, we had workshop, which celebrates my day in dps also discuss specific rule, menifesto and stakeholders    We get sticker and attach it in map depends on my nationality  Now time to get to know my team’s stakeholders. What they want for us? What they expect from us? How free my team are on the topic?To be honest, it is endless tug-of-war. We should discuss with my stakeholders, endlessly, and find out solution which can meet interest of users, stakeholders and my team. Basically, my team’s main stakeholder is ADAC, but BMW, City of munich and Nokia will also participate as my team’s stakeholders. Specifying our goal of product: "
diff --git a/_site/2019/11/julia-evans/index.html b/_site/2019/11/julia-evans/index.html
index a67ec64f36..b7b0ca0872 100644
--- a/_site/2019/11/julia-evans/index.html
+++ b/_site/2019/11/julia-evans/index.html
@@ -19,9 +19,9 @@
 <meta property="og:site_name" content="SpellOnYou" />
 <meta property="og:image" content="http://localhost:4000/assets/images/evans.jpg" />
 <meta property="og:type" content="article" />
-<meta property="article:published_time" content="2019-11-20T00:00:00+01:00" />
+<meta property="article:published_time" content="2019-11-20T00:00:00+09:00" />
 <script type="application/ld+json">
-{"description":"This is journey to find out ‘who am I trying to be?’","author":{"@type":"Person","name":"dionne"},"@type":"BlogPosting","url":"http://localhost:4000/2019/11/julia-evans/","publisher":{"@type":"Organization","logo":{"@type":"ImageObject","url":"http://localhost:4000/assets/images/logo.png"},"name":"dionne"},"image":"http://localhost:4000/assets/images/evans.jpg","headline":"Julia Evans","dateModified":"2019-11-20T00:00:00+01:00","datePublished":"2019-11-20T00:00:00+01:00","mainEntityOfPage":{"@type":"WebPage","@id":"http://localhost:4000/2019/11/julia-evans/"},"@context":"http://schema.org"}</script>
+{"description":"This is journey to find out ‘who am I trying to be?’","author":{"@type":"Person","name":"dionne"},"@type":"BlogPosting","url":"http://localhost:4000/2019/11/julia-evans/","publisher":{"@type":"Organization","logo":{"@type":"ImageObject","url":"http://localhost:4000/assets/images/logo.png"},"name":"dionne"},"image":"http://localhost:4000/assets/images/evans.jpg","headline":"Julia Evans","dateModified":"2019-11-20T00:00:00+09:00","datePublished":"2019-11-20T00:00:00+09:00","mainEntityOfPage":{"@type":"WebPage","@id":"http://localhost:4000/2019/11/julia-evans/"},"@context":"http://schema.org"}</script>
 <!-- End Jekyll SEO tag -->
 
 
@@ -161,96 +161,101 @@
     "body": "  {% if page. url ==  /  %}            {% assign latest_post = site. posts[0] %}          &lt;div class= topfirstimage  style= background-image: url({% if latest_post. image contains  ://  %}{{ latest_post. image }}{% else %} {{site. baseurl}}/{{ latest_post. image}}{% endif %}); height: 200px;  background-size: cover;  background-repeat: no-repeat; &gt;&lt;/div&gt;           {{ latest_post. title }}  :       {{ latest_post. excerpt | strip_html | strip_newlines | truncate: 136 }}               In         {% for category in latest_post. categories %}        {{ category }},         {% endfor %}                                {{ latest_post. date | date: '%b %d, %Y' }}                            {%- assign second_post = site. posts[1] -%}                        {% if second_post. image %}                         &lt;img class= w-100  src= {% if second_post. image contains  ://  %}{{ second_post. image }}{% else %}{{ second_post. image | absolute_url }}{% endif %}  alt= {{ second_post. title }} &gt;                        {% endif %}                                    {{ second_post. title }}          :                       In             {% for category in second_post. categories %}            {{ category }},             {% endfor %}                                                      {{ second_post. date | date: '%b %d, %Y' }}                                    {%- assign third_post = site. posts[2] -%}                        {% if third_post. image %}                         &lt;img class= w-100  src= {% if third_post. image contains  ://  %}{{ third_post. image }}{% else %}{{site. baseurl}}/{{ third_post. image }}{% endif %}  alt= {{ third_post. title }} &gt;                        {% endif %}                                    {{ third_post. title }}          :                       In             {% for category in third_post. categories %}            {{ category }},             {% endfor %}                                                      {{ third_post. date | date: '%b %d, %Y' }}                                    {%- assign fourth_post = site. posts[3] -%}                        {% if fourth_post. image %}                        &lt;img class= w-100  src= {% if fourth_post. image contains  ://  %}{{ fourth_post. image }}{% else %}{{site. baseurl}}/{{ fourth_post. image }}{% endif %}  alt= {{ fourth_post. title }} &gt;                        {% endif %}                                    {{ fourth_post. title }}          :                       In             {% for category in fourth_post. categories %}            {{ category }},             {% endfor %}                                                      {{ fourth_post. date | date: '%b %d, %Y' }}                                  {% for post in site. posts %} {% if post. tags contains  sticky  %}                    {{post. title}}                  {{ post. excerpt | strip_html | strip_newlines | truncate: 136 }}                 Read More            	             {% endif %}{% endfor %}  {% endif %}                All Stories:         {% for post in paginator. posts %}          {% include main-loop-card. html %}        {% endfor %}                   {% if paginator. total_pages &gt; 1 %}              {% if paginator. previous_page %}        &laquo; Prev       {% else %}        &laquo;       {% endif %}       {% for page in (1. . paginator. total_pages) %}        {% if page == paginator. page %}        {{ page }}        {% elsif page == 1 %}        {{ page }}        {% else %}        {{ page }}        {% endif %}       {% endfor %}       {% if paginator. next_page %}        Next &raquo;       {% else %}        &raquo;       {% endif %}            {% endif %}                     {% include sidebar-featured. html %}      "
     }, {
     "id": 12,
+    "url": "http://localhost:4000/2020/04/v3-2019-lesson06-note/",
+    "title": "fastai 2019 course-v3 Part1, lesson06",
+    "body": "2020/04/15 - Lesson 06Rossmann(Tabular):  Tabular data: be careful on Categorical variable vs Continuous variable.  if datatype is int, fastai think it is classification, not a regression.  Root mean square percentage error. as loss function.  When you assign the y_range, it’s better to assign little bit more than actual maximum. &gt; because it’s sigmoid.  intermediate layers, which is weight matrix is 1) 1000, and 2) 500 -&gt; which means our parameter would be 500*1000. learn. modelWhat is dropout and embedding dropout?: Nitish Srivastava, Dropout: A Simple way to prevent Neural Networks from Overfitting  you can dropout with p value, make it specified to specific layer, or make it applied to all the layers.  Pytorch code 1) bernoulli, which decides whether you will hold it? 2) and divide the noise value depends on noise value. so noise became 2 or remain 0.      According to pytorch code, We do change at training time, but we do nothing at test time. and this means you don’t have to do anything special with inference time. ’   TODO: find at forums what is inference time - Related to NVIDIA, GPU.     Embedding dropout is just a dropout.      It’s different between continuous variable and embedding layer.  TODO Still can’t understand. why embedding dropout is effective. or,… in need.    Let’s delete at random, some of the results of the embedding.    and It worked well especially at Kaggle   Batch Normalization: Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift -&gt; came out false! According to How Does Batch Normalization Help Optimization?  The key was multiplicative bias {\gamma} and additive bias {\beta}` Explain     Let $$ \hat{y} = f(w_1, w_2, w_3, … , x)} $$ , loss = MSE , Then y_range should be between 1 and 5`   And Activation function ends with -1 -&gt; +1   To mitigate this problem, we can add the other parameter, like $$w_n$$   But there’re so much interactions in the process so just re-scale the output.    Momentum parameter at BatchNorm1d:  Different from momentum like in optimization.  This momentum is Exponentially weighted moving average of the mean, instead of deviation.      If this is small number: mean standard deviation would be less from mini_batch to mini_batch » less regularization effect. (If this is large number, variation would be greater from mini_batch to mini_batch » more regularization effect)   TODO: can’t sure, but i understand, this is not about how to update parameter but about how much reflect previous value when scale and shift   Q. Preference between batchnorm and the other regularizations(drop out, weight decay)A. Nope, always try and see the results## lesson6-pets-more### Data Augmentation- Last reg- `get_transforms` has lots of params (even not yet learned all) -&gt; check documentation	- Remember you can implement all the doc contents bc it's made from nbdev	- TODO: try this!!- Essence of data augmentation is you should maintain the label, while somewhat making sense. 	- ex) tilt, because it's optically sensible, you can always change the angle of the data view. - zeros, border, and reflection but always `reflection` works most of the time, so that is the default### Convolutional Kernel(What is convolution?)- Will make heat\_map from scratch, which means the parts convolution focuses on![setosa_visualization]()- http://setosa. io/ev/image-kernels/	- javascript thing	- How convolution works	- Kernel. which does element-wise multiplication, and sum them up	- so it has on pixel less at borders -&gt; so it uses padding, and fastai uses reflection as said. - why this Kernel(matrix) helps catching horizontal edge side?	- because this kernel`(picture2)` weights differently, depends on `x axis`	- why familiar, because it's similar intuition with fugus`(paper)` paper- CNN from different viewpoints`link`	- output of pixel is results from different linear equations. 	- If you connect this with represents of neural network nodes, you can see that the specific inp nodes connected with specific out nodes. 	- **Summarize**: cnn does 1) matmul some of the elements are always zero 2) same weight for every row, which is called `weight time? weight. . ?, 1:18:50` `(picture)`#### Further lowdown- Because generally image has 3 channels, we need rank 3 kernel. - And **do multiply with all channel output is one pixel**. (`draw by your self`)	- but this kernel will catch one feature, like horizontal, so that we make more kernel so that output becomes (h * w * kernel)	- And that `kernel` come to `channel`- **Conv2d**: with 3 by 3 kernel, stride 2 conv -&gt; (h/2 * w/2 * kernel)	- skip or jump over input pixel	- to protect from memory out of control~~~pythonlearn. modellearn. summary()~~~TODO: understand yourself the blocks of conv-kernel: - Usually use big kernel size at first layer (will study this at part2)- Bottom right highlighting kernel(`pic / draw`)- `torch. tensor. expand`: for memory efficient, because we should do RGB- We do not make separate kernel, but make rank 4 kernel	- 4d tensor is just stacked kernel- `t[None]. shape` create new unit axis, and why? we make this -&gt; it should move unit of batch, not one size image. ### Average pooling, feature- suppose our pre-trained model results in size of `11 by 11 by 512 ` `pic 4` and my classification task has 37 classes	* take the first face of channel, which is 11 by 11 and `mean` it, so that make rank 2 tensor, 512 by 1	* and make 2d matrix, which is 512 by 37 and multiply so that we can get 37 by 1 matrix. - Feature, at convolution block	- So, when we transfer-learning without unfreeze, every element of last matrix (512 by 1) should represent(or could catch) each feature. 	### Heatmap, Hook~~~hook_output(model[0]) -&gt; acts -&gt; avg_acts~~~- if we average the block with `axis=feature`, result of matrix(11 by 11) depicts `how activated was that area?` -&gt; it is heatmap, `avg_acts`- and acts comes from hook, which is more advanced pytorch feature. 	- hook into pytorch machine itself, and run any arbitrary Pytorch code	- Why this is cool?: Normally it gives set of outputs of forward pass, but we can interrupt and hook the forward pass. 	- Also can store the output of the convolutional part of the model, which is before avg_pooling- Thinking back when we do cut off `after` the conv part. 	- but with fast. ai the original convolutional part of the model would be *the first thing in the model*, specifically could be given from `learn. model. eval()[0]`	- And this is gotten from `hooked_output` and having hooked the output, we can pass our x_minibatch to output. 	- Not directly, but with normalized, minibatch, put on to the gpu	- `one_item()` function do it, when we have one data `TODO: this is assignment` do it yourself without one_item function	- and `. cuda()` put it on gpu- you should print out very often the shape of tensor, and try think why. "
+    }, {
+    "id": 13,
+    "url": "http://localhost:4000/2020/04/qna-image-segmentation/",
+    "title": "[Q&A] Image Segmentation, using Unet with Driving Video data",
+    "body": "2020/04/02 - This post is about my questions while I was studying USF Deep Learning course about image segmentation task. All the answers are from the course, source code, library document, or document. I cared about being clear at reporting information including source of information, however if there are still anything unclear, please contact me. And thank you Jeremy&amp;Rachael for everything. Also Thank you Cambridge Computer Vision Lab to made us to study with your labor. The Cambridge-driving Labeled Video Database (CamVid) is the first collection of videos with object class semantic labels, complete with metadata. The database provides ground truth labels that associate each pixel with one of 32 semantic classes. If someone is interested in this project, please check the site and see the details. Now, let’s start first using jupyter’s one of tricks which I love most. It enables cell to print the code without print function. from IPython. core. interactiveshell import InteractiveShell# pretty print all cell's output and not just the last oneInteractiveShell. ast_node_interactivity =  all from fastai. vision import *from fastai. callbacks. hooks import *from fastai. utils. mem import *path = untar_data(URLs. CAMVID) # The locations where the data and models are downloaded are set in config. ymlpath. ls() I’m trying to accustomed to using pathlib module, not just it became built-in module in python, but I felt uncomfortable myself with os module. However, still unpredictable conflicts are remain, even in the quite standard library like Pytorch, tensorflow, onnx. (it require me string for path. not PosixPath. will send PR. . ) [PosixPath('/root/. fastai/data/camvid/valid. txt'), PosixPath('/root/. fastai/data/camvid/images'), PosixPath('/root/. fastai/data/camvid/labels'), PosixPath('/root/. fastai/data/camvid/codes. txt')]path_img = path/'images'path_lbl = path/'labels'fnames = get_image_files(path_img) #filenamelbl_names = get_image_files(path_lbl)1. (Play with data) My Hypothesis:  File name has A_B format. and A / B would be at key-value position.  Use collections - defaultdict  Default Dict: Link:   easy to group a sequence of key and value pairs into a dictionary of list?from collections import defaultdictfnames[0], lbl_names[0](PosixPath('/root/. fastai/data/camvid/images/0001TP_009210. png'), PosixPath('/root/. fastai/data/camvid/labels/0016E5_01800_P. png'))files = [tuple(i. stem. split('_')) for i in fnames]labels = [tuple(i. stem. split('_')[:-1]) for i in lbl_names]d = defaultdict(list)for k, v in files: d[k]. append(v)d. keys()len(d['0001TP'])124for k, v in d. items():  print(k, v)0001TP ['009210', '008850', '007350', '008970', '009840', '010140', '008490', '008520', '009540', '008250', '008340', '006840', '007860', '007410', '007740', '009870', '010080', '007890', '008790', '010020', '008400', '007080', '008280', '010380', '009330', '009060', '007470', '006810', '009720', '008580', '007110', '008730', '009150', '007680', '009780', '007800', '007290', '008760', '009510', '008640', '008310', '007440', '006900', '007500', '008460', '009030', '008130', '009480', '009900', '010230', '009270', '008040', '007590', '007950', '009990', '008550', '007260', '008100', '007530', '006960', '008190', '009420', '009930', '009000', '007830', '008940', '006690', '009570', '008880', '010170', '007560', '009300', '006750', '009360', '010200', '007320', '008010', '009120', '007620', '007200', '007140', '010320', '006720', '008670', '007230', '008370', '010260', '009690', '006930', '009090', '007770', '010290', '010350', '008610', '008070', '009600', '008430', '009450', '007380', '009240', '007710', '007170', '008160', '008910', '007020', '006780', '007050', '009960', '009810', '008220', '009180', '009750', '010050', '009660', '010110', '007920', '009630', '007650', '006990', '008700', '009390', '007980', '008820', '006870']0016E5 ['01290', '08159', '05760', '08133', '08063', '06660', '00960', '05850', '00750', '06960', '08035', '08107', '07975', '08017', '05610', '07140', '08119', '08027', '07170', '08400', '08093', '02100', '06390', '04470', '08340', '06060', '00600', '07470', '08151', '07800', '01620', '05730', '01530', '00690', '08430', '05940', '01980', '07320', '08069', '07965', '04380', '05430', '01410', '06780', '08007', '08087', '08079', '06600', '08109', '05490', '00901', '04590', '04680', '08045', '01770', '06690', '08085', '06810', '00420', '08011', '07440', '02190', '06300', '04800', '01500', '00450', '08029', '01470', '06330', '07997', '08067', '05370', '08013', '08190', '00840', '02370', '08049', '08135', '01440', '06870', '05820', '05280', '08051', '04440', '08091', '01380', '00630', '07290', '05520', '04770', '00540', '07995', '07999', '05550', '07920', '08101', '08141', '08053', '04620', '08103', '05160', '07350', '08057', '06030', '06000', '08550', '07963', '08089', '05970', '08047', '05640', '06240', '05220', '04350', '01590', '07959', '01950', '08117', '06180', '01560', '05400', '08043', '07680', '00780', '08081', '07050', '01020', '01350', '04530', '06720', '07969', '08149', '08003', '08131', '08129', '08033', '05460', '01650', '07530', '08023', '05340', '08640', '05100', '08075', '01230', '04980', '02070', '01080', '06210', '05910', '08009', '01800', '05190', '02400', '08083', '08019', '07620', '07200', '07890', '08059', '06990', '04410', '08121', '08123', '06930', '08137', '08147', '08095', '06570', '06150', '08153', '06840', '05250', '00510', '08370', '08580', '08113', '07410', '08097', '01200', '04950', '07770', '07650', '04710', '06090', '08055', '07110', '07981', '00990', '08250', '08127', '01920', '07985', '08220', '08005', '08157', '05130', '08071', '01140', '04830', '07740', '08143', '06120', '02040', '08111', '08115', '00660', '08280', '06420', '07983', '02220', '05700', '01860', '01260', '04920', '06510', '07020', '08073', '08105', '08125', '06360', '07860', '07993', '00810', '06540', '08099', '08139', '02010', '07973', '08155', '07991', '06630', '00480', '06750', '04890', '08001', '08025', '00870', '08490', '01830', '07977', '05010', '01170', '07961', '01680', '01050', '07987', '07080', '04560', '00930', '05310', '02340', '05790', '08460', '00720', '08031', '02280', '08039', '08037', '08065', '06270', '08077', '06900', '04650', '06480', '07230', '08041', '06450', '00570', '07989', '04740', '07979', '02250', '07380', '00390', '01710', '07590', '08021', '08520', '07500', '01110', '04500', '02310', '07971', '02130', '05580', '05880', '08610', '08310', '08145', '05670', '04860', '07260', '08015', '07967', '01740', '01320', '07560', '07830', '01890', '08061', '02160', '07710', '05070', '05040']Seq05VD ['f00030', 'f02550', 'f03450', 'f01110', 'f00480', 'f00210', 'f04590', 'f04170', 'f01800', 'f03990', 'f03360', 'f03900', 'f02070', 'f00810', 'f03690', 'f01350', 'f01530', 'f04980', 'f05100', 'f03060', 'f00900', 'f03870', 'f02460', 'f01470', 'f02370', 'f02820', 'f04080', 'f02760', 'f04860', 'f02250', 'f04200', 'f00270', 'f03720', 'f02850', 'f04410', 'f01200', 'f03090', 'f02010', 'f03930', 'f00090', 'f01650', 'f01890', 'f03840', 'f03030', 'f02130', 'f01230', 'f04110', 'f02520', 'f04140', 'f04020', 'f00060', 'f03420', 'f01560', 'f00120', 'f04290', 'f02340', 'f00300', 'f01380', 'f00870', 'f01860', 'f02970', 'f04560', 'f02730', 'f00330', 'f04530', 'f03780', 'f01770', 'f03390', 'f05040', 'f02430', 'f03330', 'f00660', 'f01740', 'f02100', 'f04800', 'f04050', 'f00510', 'f02790', 'f04350', 'f00690', 'f00540', 'f02490', 'f00960', 'f00930', 'f04230', 'f02880', 'f03600', 'f01020', 'f01500', 'f02400', 'f04830', 'f04470', 'f03300', 'f02670', 'f00450', 'f01980', 'f01170', 'f01620', 'f04500', 'f01080', 'f03180', 'f05070', 'f03150', 'f04950', 'f01440', 'f03510', 'f01710', 'f00360', 'f04770', 'f02910', 'f01050', 'f00630', 'f04320', 'f00570', 'f03240', 'f02190', 'f01140', 'f03540', 'f02220', 'f02640', 'f03960', 'f00000', 'f04920', 'f01950', 'f00990', 'f03480', 'f03000', 'f00420', 'f04620', 'f03210', 'f00780', 'f03570', 'f01590', 'f00750', 'f01920', 'f04650', 'f03750', 'f03630', 'f02310', 'f02610', 'f02580', 'f04740', 'f02280', 'f04680', 'f00390', 'f00720', 'f03660', 'f02040', 'f03270', 'f00180', 'f03810', 'f01410', 'f01290', 'f03120', 'f00840', 'f04440', 'f00150', 'f01260', 'f02700', 'f02940', 'f00600', 'f01830', 'f04260', 'f05010', 'f04890', 'f02160', 'f00240', 'f04380', 'f01680', 'f04710', 'f01320']0006R0 ['f02820', 'f03690', 'f03180', 'f02550', 'f01020', 'f03660', 'f02340', 'f01170', 'f02610', 'f02940', 'f01290', 'f02100', 'f01350', 'f03270', 'f03870', 'f01380', 'f01980', 'f03810', 'f02430', 'f02310', 'f01830', 'f03480', 'f02970', 'f01890', 'f03210', 'f03930', 'f02040', 'f02070', 'f02400', 'f01560', 'f03030', 'f01770', 'f01590', 'f01950', 'f03420', 'f01650', 'f03450', 'f00990', 'f03630', 'f01500', 'f03570', 'f00930', 'f03090', 'f03360', 'f02880', 'f02460', 'f01440', 'f01920', 'f01230', 'f03840', 'f02730', 'f01620', 'f02220', 'f03750', 'f03330', 'f03540', 'f02520', 'f02790', 'f01050', 'f03120', 'f01800', 'f01140', 'f01860', 'f01530', 'f01470', 'f02670', 'f02490', 'f01260', 'f01110', 'f02760', 'f01680', 'f03150', 'f02580', 'f03300', 'f02280', 'f01200', 'f03390', 'f03510', 'f02640', 'f02190', 'f02370', 'f01320', 'f02130', 'f03600', 'f03240', 'f03780', 'f03720', 'f02700', 'f01410', 'f01080', 'f02850', 'f01710', 'f03900', 'f03060', 'f01740', 'f02010', 'f02250', 'f00960', 'f03000', 'f02160', 'f02910']for k, v in d. items():  print(k, len(d[k]))0001TP 1240016E5 305Seq05VD 1710006R0 101for i in d2. keys():  print(i,len(d2[i]))0016E5 3050001TP 1240006R0 101Seq05VD 171files[0], labels[0](('0001TP', '009210'), ('0016E5', '01800'))2. My question: Link:  Why do we need masking? and does color from fastai library? (have to look into source code) What do the parameter alpha do? When people make masked img, would it be have ranged integer limit? Does image normalization related with this?lbl_sorted = sorted(lbl_names)f_sorted = sorted(fnames)lbl_1 = lbl_sorted[33]f_1 = f_sorted[33]img = open_image(lbl_1)mask = open_mask(lbl_1)_,axs = plt. subplots(1,2, figsize=(10,5))# img. show(ax=axs[0], y=mask, title='masked')img. show(ax=axs[0], title='1')mask. show(ax=axs[1], title='2', alpha=1. ) img_2 = open_image(f_1)mask_2 = open_mask(f_1)_,axs = plt. subplots(1,2, figsize=(10,5))# img. show(ax=axs[0], y=mask, title='masked')img_2. show(ax=axs[0], title='3',)mask_2. show(ax=axs[1], title='4', alpha=1. ) open_mask(lbl_1). data. shapetorch. Size([1, 720, 960])open_mask(lbl_1). data. shapetorch. Size([1, 720, 960])open_image(f_1). data. shapetorch. Size([3, 720, 960])open_image(f_1). data. shapetorch. Size([3, 720, 960])img. data #labeled datatensor([[[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]],    [[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]],    [[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]]])mask. data # after mask, labeled datatensor([[[ 4, 4, 4, . . . , 21, 21, 21],     [ 4, 4, 4, . . . , 21, 21, 21],     [ 4, 4, 4, . . . , 21, 21, 21],     . . . ,     [17, 17, 17, . . . , 30, 30, 30],     [17, 17, 17, . . . , 30, 30, 30],     [17, 17, 17, . . . , 30, 30, 30]]])img_2. data, mask_2. data(tensor([[[0. 0706, 0. 0667, 0. 0706, . . . , 0. 6431, 0. 6549, 0. 6627],     [0. 0745, 0. 0706, 0. 0706, . . . , 0. 6431, 0. 6510, 0. 6549],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 6392, 0. 6588, 0. 6588],     . . . ,     [0. 0863, 0. 0824, 0. 0824, . . . , 0. 1333, 0. 1216, 0. 1255],     [0. 0902, 0. 0863, 0. 0824, . . . , 0. 1255, 0. 1176, 0. 1216],     [0. 0863, 0. 0824, 0. 0784, . . . , 0. 1137, 0. 1059, 0. 1137]],     [[0. 0706, 0. 0667, 0. 0706, . . . , 0. 7490, 0. 7608, 0. 7686],     [0. 0745, 0. 0706, 0. 0706, . . . , 0. 7451, 0. 7569, 0. 7608],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 7412, 0. 7529, 0. 7529],     . . . ,     [0. 0980, 0. 0941, 0. 0941, . . . , 0. 1804, 0. 1686, 0. 1725],     [0. 1059, 0. 1020, 0. 0980, . . . , 0. 1725, 0. 1647, 0. 1686],     [0. 1020, 0. 0980, 0. 0941, . . . , 0. 1608, 0. 1529, 0. 1608]],     [[0. 0784, 0. 0745, 0. 0784, . . . , 0. 7569, 0. 7686, 0. 7765],     [0. 0824, 0. 0784, 0. 0784, . . . , 0. 7647, 0. 7647, 0. 7686],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 7608, 0. 7647, 0. 7647],     . . . ,     [0. 1216, 0. 1176, 0. 1176, . . . , 0. 2000, 0. 1882, 0. 1922],     [0. 1176, 0. 1137, 0. 1098, . . . , 0. 1843, 0. 1765, 0. 1804],     [0. 1137, 0. 1098, 0. 1059, . . . , 0. 1725, 0. 1647, 0. 1725]]]), tensor([[[ 18, 17, 18, . . . , 183, 186, 188],     [ 19, 18, 18, . . . , 183, 185, 186],     [ 20, 18, 19, . . . , 182, 185, 185],     . . . ,     [ 25, 24, 24, . . . , 43, 40, 41],     [ 26, 25, 24, . . . , 41, 39, 40],     [ 25, 24, 23, . . . , 38, 36, 38]]]))3. What is a difference between image and imageSegment?: imageSegment  An ImageSegment object has the same properties as an Image. The only difference is that when applying the transformations to an ImageSegment, it will ignore the functions that deal with lighting and keep values of 0 and 1.  It’s easy to show the segmentation mask over the associated Image by using the y argument of show_image. img = open_image(fnames[0])mask = open_mask(lbl_names[0])_,axs = plt. subplots(1,3, figsize=(8,4))img. show(ax=axs[0], title='no mask')img. show(ax=axs[1], y=mask, title='masked') #seg mask over the img using y argmask. show(ax=axs[2], title='mask only', alpha=1. ) vision. image ##4. Why/How img div by 255 and how it results fast. ai : vision. image - If div=True, pixel values are divided by 255. to become floats between 0. and 1.  At times, you want to get rid of distortions caused by lights and shadows in an image.    Normalizing the RGB values of an image can at times be a simple and effective way of achieving this.     So sum of the pixel’s value over all channels(which is S) divides each intensified channel so that nomalized value will be R/S, G/S and B/S (where, S=R+G+B).   Detailed explain here4. Python Evaluation Order: Python evaluates expressions from left to right. Notice that while evaluating an assignment, the right-hand side is evaluated before the left-hand side. mask_tmp, trg_tmp, void_tmp = 2, 1, 10mask_tmp = trg_tmp != void_tmpprint(mask_tmp, trg_tmp, void_tmp) # (1) target is not same with voidTrue 1 10# Example 1x = 1y = 2x,y = y,x+yx, y(2, 3)# Example 2x = 1y = 2x = yy = x+yx, y(2, 4)5. model learner parameter :: pct_start: A: Percentage of total number of epochs when learning rate rises during one cycle. Q: Sorry, I still confused that one cycle in the new API only runs one epoch. How the percentage of total number of epochs works? Can you give a example? If learn. fit_one_cycle(10, slice(1e-4,1e-3,1e-2), pct_start=0. 05)??A: Ok, strictly correct answer would be percentage of iterations, so you can have lr both increase and decrease during same epoch. In your example, say, you have 100 iterations per epoch, then for half an epoch (0. 05 * (10 * 100) = 50) lr will rise, then slowly decrease. Q2: Thanks for this explanation … so essentially, it is the percentage of overall iterations where the LR is increasing, correct? So, given the default of 0. 3, it means that your LR is going up for 30% of your iterations and then decreasing over the last 70%. Is that a correct summation of what is happening? A2: Yes, I think that’s correct. You can verify that by changing its value and check:learn. recorder. plot_lr() For example if pct_start = 0. 2 source: forums. fastai "
+    }, {
+    "id": 14,
     "url": "http://localhost:4000/2020/03/note08-fastai-4/",
     "title": "Gradient backward, Chain Rule, Refactoring",
-    "body": "2020/03/02 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves   ⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
+    "body": "2020/03/02 -  This note is divided into 4 section.      Section1: What is the meaning of ‘deep-learning from foundations?’   Section2: What’s inside Pytorch Operator?   Section3: Implement forward&amp;backward pass from scratch   Section4: Gradient backward, Chain Rule, Refactoring   ” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves    Postfix underscore means in pytorch, in-place function, What is in-place function?⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
     }, {
-    "id": 13,
+    "id": 15,
     "url": "http://localhost:4000/2020/03/note08-fastai-3/",
     "title": "Implement forward&backward pass from scratch",
     "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring1. The forward and backward passes: 1. 1 Normalization: train_mean,train_std = x_train. mean(),x_train. std()&gt;&gt;&gt; train_mean,train_std(tensor(0. 1304), tensor(0. 3073))Remember!  Dataset, which is x_train, mean and standard deviation is not 0&amp;1. But we need them to be which means we should substract means and divide data by std.  You should not standarlize validation set because training set and validation set should be aparted.  after normalize, mean is close to zero, and standard deviation is close to 1. 1. 2 Variable definition:  n,m: size of the training set c: the number of activations we need in our model2. Foundation Version: 2. 1 Basic architecture:  Our model has one hidden layer, output to have 10 activations, used in cross entropy.    But in process of building architecture, we will use mean square error, output to have 1 activations and lator change it to cross entropy   number of hidden unit; 50see below pic  We want to make w1&amp;w2 mean and std be 0&amp;1.      why initializating and make mean zero and std one is important?   paper highlighting importance of normalisation - training 10,000 layer network without regularisation1   2. 1. 1 simplified kaiming initQ: Why we did init, normalize with only validation data? Because we can not handle and get statistics from each value of x_valid?{: style=”color:red; font-size: 130%; text-align: center;”}  what about hidden(first) layer?w1 = torch. randn(m,nh)b1 = torch. zeros(nh)t = lin(x_valid, w1, b1) # hidden&gt;&gt;&gt; t. mean(), t. std()((tensor(2. 3191), tensor(27. 0303))In output(second) layer, w2 = torch. randn(nh,1)b2 = torch. zeros(1)t2 = lin(t, w2, b2) # output&gt;&gt;&gt; t2. mean(), t2. std()(tensor(-58. 2665), tensor(170. 9717))   which is terribly far from normalzed value.     But if we apply simplified kaiming init  w1 = torch. randn(m,nh)/math. sqrt(m); b1 = torch. zeros(nh)w2 = torch. randn(nh,1)/math. sqrt(nh); b2 = torch. zeros(1)t = lin(x_valid, w1, b1)t. mean(),t. std()&gt;&gt;&gt; (tensor(-0. 0516), tensor(0. 9354)) But, actually, we use activations not only linear function After applying activations relu at linear layer, mean and deviation became 0. 5. 2. 1. 2 Glorrot initializationPaper2: Understanding the difficulty of training deep feedforward neural networks  Gaussian(, bell shaped, normal distributions) is not trained very well.  How to initialize neural nets? with the size of layer , the number of filters .  But there is No acount for import of ReLU If we got 1000 layers, vanishing gradients problem emerges2. 1. 3 Kaiming initializatingPaper3: Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification  Kaiming He, explained here rectifier: rectified linear unit rectifier network: neural network with rectifier linear units  This is kaiming init, and why suddenly replace one to two on a top?     to avoid vanishing gradient(weights)   But it doesn’t give very nice mean tough.    2. 1. 4 Pytorch package Why fan_out?     according to pytorch documentation,   choosing 'fan_in' preserves the magnitude of the variance of the wights in the forward pass. choosing 'fan_out' preserves the magnitues in the backward pass(, which means matmul; with transposed matrix) ➡️ in the other words, torch use fan_out cz pytorch transpose in linear transformaton.  What about CNN in Pytorch?I tried torch. nn. Conv2d. conv2d_forward?? Jeremy digged into using torch. nn. modules. conv. _ConvNd. reset_parameters?? 2  in Pytorch, it doesn’t seem to be implemented kaiming init in right formula. so we should use our own operation.  But actually, this has been discussed in Pytorch community before. 3 4 Jeremy said it enhanced variance also, so I sampled 100 times and counted better results.  To make sure the shape seems sensible. check with assert. (remember we will replace 1 to 10 in cross entropy)assert model(x_valid). shape==torch. Size([x_valid. shape[0],1])&gt;&gt;&gt; model(x_valid). shape(10000, 1) We have made Relu, init, linear, it seems we can forward pass code we need for basic architecture nh = 50def lin(x, w, b): return x@w + b;w1 = torch. randn(m,nh)*math. sqrt(2. /m ); b1 = torch. zeros(nh)w2 = torch. randn(nh,1); b2 = torch. zeros(1)def relu(x): return x. clamp_min(0. ) - 0. 5t1 = relu(lin(x_valid, w1, b1))def model(xb):  l1 = lin(xb, w1, b1)  l2 = relu(l1)  l3 = lin(l2, w2, b2)  return l32. 2 Loss function: MSE:  Mean squared error need unit vector, so we remove unit axis.   def mse(output, targ): return (output. squeeze(-1) - targ). pow(2). mean()    In python, in case you remove axis, you use ‘squeeze’, or add axis use ‘unsqueeze’ torch. squeeze where code commonly broken. so, when you use squeeze, clarify dimension axis you want to removetmp = torch. tensor([1,1])tmp. squeeze()&gt;&gt;&gt; tensor([1, 1]) make sure to make as float when you calculateBut why??? because it is tensor?{: style=”color:red; font-size: 130%;”} Here’s the error when I don’t transform the data type ---------------------------------------------------------------------------TypeError                 Traceback (most recent call last)&lt;ipython-input-22-ae6009bef8b4&gt; in &lt;module&gt;()----&gt; 1 y_train = get_data()[1] # call data again   2 mse(preds, y_train)TypeError: 'map' object is not subscriptable This is forward passFootnote: Other materials:  Understanding the difficulty of training deep feedforward neural networks, paper that introduced Xavier initialization      Fixup Initialization: Residual Learning Without Normalization &#8617;        Pytorch implementaion on Kaiming init of conv and linear layers &#8617;        Pytorch kaiming init issue &#8617;        Pytorch kaiming init explained &#8617;    "
     }, {
-    "id": 14,
+    "id": 16,
     "url": "http://localhost:4000/2020/03/note08-fastai-2/",
     "title": "What's inside Pytorch Operator?",
     "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, RefactoringWhat’s inside Pytorch Operator?: Section02 Time comparison with pure Python:    Matmul with broadcasting&gt; 3194. 95 times faster     Einstein summation&gt; 16090. 91 times faster     Pytorch’s operator&gt; 49166. 67 times faster  1. Elementwise op: 1. 1 Frobenius norm:   above converted into (m*m). sum(). sqrt() Plus, don’t suffer from mathmatical symbols. He also copy and paste that equations from wikipedia.  and if you need latex form, download it from archive. 2. Elementwise Matmul:  What is the meaning of elementwise?   We do not calculate each component. But all of the component at once. Because, length of column of A and row of B are fixed.   How much time we saved?  So now that takes 1. 37ms. We have removed one line of code and it is a 178 times faster…#TODOI don’t know where the 5 from. but keep it. Maybe this is related with frobenius norm…?as a result, the code before for k in range(ac):  c[i,j] += a[i,k] + b[k,j]the code after c[i,j] = (a[i,:] * b[:,j]). sum()To compare it (result betweet original and adjusted version) we use not test_eq but other function. The reason for this is that due to rounding errors from math operations, matrices may not be exactly the same. As a result, we want a function that will “is a equal to b within some tolerance” #exportdef near(a,b):   return torch. allclose(a, b, rtol=1e-3, atol=1e-5)def test_near(a,b):   test(a,b,near)test_near(t1, matmul(m1, m2))3. Broadcasting:  Now, we will use the broadcasting and removec[i,j] = (a[i,:] * b[:,j]). sum() How it works?&gt;&gt;&gt; a=tensor([[10,10,10],     [20,20,20],     [30,30,30]])&gt;&gt;&gt; b=tensor([1,2,3,])&gt;&gt;&gt; a,b  (tensor([[10, 10, 10],     [20, 20, 20],     [30, 30, 30]]),tensor([1, 2, 3]))     &gt;&gt;&gt; a+btensor([[11, 12, 13],    [21, 22, 23],    [31, 32, 33]])  &lt;Figure 2&gt; demonstrated how array b is broadcasting(or copied but not occupy memory) to compatible with a. Refered from numpy_tutorial   there is no loop, but it seems there is exactly the loop.     This is not from jeremy (actually after a moment he cover it) but i wondered How to broadcast an array by columns?  c=tensor([[1],[2],[3]])a+ctensor([[11, 11, 11],    [22, 22, 22],    [33, 33, 33]])s  What is tensor. stride()?help(t. stride)Help on built-in function stride:  stride(…) method of torch. Tensor instancestride(dim) -&gt; tuple or intReturns the stride of :attr:’self’ tensor. Stride is the jump necessary to go from one element to the next one in the specified dimension :attr:’dim’. A tuple of all strides is returned when no argument is passed in. Otherwise, an integer value is returned as the stride in the particular dimension :attr:’dim’. Args:  dim (int, optional): the desired dimension in which stride is requiredExample::* x = torch. tensor([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]])`x. stride()&gt;&gt;&gt; (5, 1)x. stride(0)&gt;&gt;&gt; 5x. stride(-1)&gt;&gt;&gt; 1   unsqueeze &amp; None index   We can manipulate rank of tensor Special value ‘None’, which means please squeeze a new axis here== please broadcast herec = torch. tensor([10,20,30])c[None,:] in c, squeeze a new axis in here please. 2. 2 Matmul with broadcasting: for i in range(ar):#  c[i,j] = (a[i,:]).      *[:,j]. sum() #previous  c[i]  = (a[i]. unsqueeze(-1) * b). sum(dim=0) And Using None also (As howard teached)c[i]  = (a[i ]. unsqueeze(-1) * b). sum(dim=0) #howardc[i]  = (a[i][:,None] * b). sum(dim=0) # using Nonec[i]  = (a[i,:,None]*b). sum(dim=0)⭐️Tips🌟 1) Anytime there’s a trailinng(final) colon in numpy or pytorch you can delete it ex) c[i, :] = c [i]2) any number of colon commas at the start, you can switch it with the single elipsis.  ex) c[:,:,:,:,i] = c […,i] 2. 3 Broadcasting Rules:  What if we tensor. size([1,3]) * tensor. size([3,1])?  torch. Size([3, 3])    What is scale????   What if they are one array is times of the other array? ex) Image : 256 x 256 x 3Scale : 128 x 256 x 3Result: ?   Why I did not inserted axis via None, but happened broadcasting? &gt;&gt;&gt; c * c[:,None]tensor([[100. , 200. , 300. ],    [200. , 400. , 600. ],    [300. , 600. , 900. ]])maybe it broadcast cz following array has 3 rows as same principle, no matter what nature shape was, if we do the operation tensor broadcasts to the other. &gt;&gt;&gt; c==c[None]tensor([[True, True, True]])&gt;&gt;&gt; c[None]==c[None,:]tensor([[True, True, True]])&gt;&gt;&gt;c[None,:]==ctensor([[True, True, True]])3. Einstein summation:  Creates batch-wise, remove inner most loop, and replaced it with an elementwise producta. k. ac[i,j] += a[i,k] * b[k,j]inner most loop c[i,j] = (a[i,:] * b[:,j]). sum()elementwise product  Because K is repeated so we do a dot product. And it is torch. Usage of einsum()1) transpose2) diagnalisation tracing3) batch-wise (matmul) …  einstein summation notationdef matmul(a,b): return torch. einsum('ik,kj-&gt;ij', a, b)so after all, we are now 16000 times faster than Python. 4. Pytorch op: 49166. 67 times faster than pure python And we will use this matrix multiplication in Fully Connect forward, with some initialized parameters and ReLU. But before that, we need initialized parameters and ReLU, Footnote:  TensorRank ti noteResources:  Frobenius Norm Review Broadcasting Review (especially Rule)     Refer colab! (I totally confused with extension of arrays)    torch. allclose Review np. einsum Reviewh "
     }, {
-    "id": 15,
+    "id": 17,
     "url": "http://localhost:4000/2020/02/note08-fastai-1/",
     "title": "What is the meaning of 'deep-learning from foundations?'",
     "body": "2020/02/29 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ I don’t know if you read this article, but I heartily appreciate Rachael Thomas and Jeremy Howard for providing these priceless lectures for free Homework:  Review concepts 16 concepts from Course 1 (lessons 1 - 7)(1) Affine Functions &amp; non-linearities; 2) Parameters &amp; activations; 3) Random initialization &amp; transfer learning; 4) SGD, Momentum, Adam; 5) Convolutions; Batch-norm; 6) Dropout; 7) Data augmentation; 8) Weight decay; 9) Res/dense blocks; 10) Image classification and regression; 11)Embeddings; 12) Continuous &amp; Categorical variables; 13) Collaborative filtering; 14) Language models; 15) NLP classification; 16) Segmentation; U-net; GANS) Make sure you understand broadcasting Read section 2. 2 in Delving Deep into Rectifiers Try to replicate as much of the notebooks as you can without peeking; when you get stuck, peek at the lesson notebook, but then close it and try to do it yourself calculus for machine learning     based on weight…    einsum conventionCONTENTS:  What is going on in this course?     What is ‘from foundations’?   Steps to a basic modern CNN model   Today’s implementation goal: 1) matmul -&gt; 4) FC backward    Library development using jupyter notebook     jupyter notebook certainly can make module    Elementwise ops     How can we make python faster?         What is element wise operation?           FootnoteWhat is going on in this course?: What is ‘from foundations’?: 1) Recreate fast. ai and Pytorch 2) using pure python  Evade OverfittingOverfit : validation error getting worsetraining loss &lt; validation loss  Know the name of the symbol you usefind in this page if you don’t know the symbol that you are using or just draw it here (run by ML!) Steps to a basic modern CNN model:  1) Matrix multiplication -&gt; 2) Relu/Initialization -&gt; 3) Fully-connected Forward-&gt; 4) Fully-connected Backward -&gt; 5) Train loop -&gt; 6) Convolution-&gt; 7) Optimization -&gt;8) Batchnormalization -&gt; 9) Resnet Today’s implementation goal: 1) matmul -&gt; 4) FC backward: Library development using jupyter notebook: what is assers? jupyter notebook certainly can make module:  There will be #export tag that Howard (and we) want to extract special notebook2script. py will detect sign of #expert and convert following into python module and test ittest\_eq(TEST,'test')test\_eq(TEST,'test1')  what is run_notebook. py?     when you want to test your module in command line interface   		!python run\_notebook. py 01_matmul. ipynb  Is there any difference between 1) and 2)?1) test -&gt; test01 2) test01 -&gt; test #TODO I don’t know yet  look into run_notebook. py, package fire Jeremy used. What is that?read and run the code in a notebook, and in the process, Jeremy made Python Fire library called!shockingly, fire takes any kind of function and converts into CLI command. fire library was released by Google open source, Thursday, March 2, 2017    Get data   pytorch and numpy are pretty much same.  variable c explains how many pixels there are in in MNIST, 28 pixels PyTorch’s view() method: torch function that manipulating tensor, and squeeze() in torch &amp; mathmatical operation similar function Rao &amp; McMahan said usually this functions result in feature vector.    In part 1, you can use view function several times.     Initial python model     Which is Linear, like $Xw$(weight)$+a$(bias) $= Y$     If you don’t know hou to multiple matrix, refer this site matmul visulization site   How many time spends if we we use pure python   function matmul, typical matrix multiplication function, takes about 1 second for calculating 1 single train data! (maybe assumed stochastic, 5 data points in validation)     it takes about 11. 36 hours to update parameters even single layer and 1 iteration! (if that was my computer, it would be 14 hours. . )🤪   THIS is why we need to consider ‘time’&amp;’space’ This is kinda slow - what if we could speed it up by 50,000 times? Let’s try! Elementwise ops: How can we make python faster?:  If we want to calculate faster, then do remove pythonic calcuation, by passing its computation down to something that is written something other than python, like pytorch.  According to PyTorch doc it uses C++ (via ATen), so we are going to implement that function with python. What is element wise operation?:  items makes a pair, operate corresponding componentFootnote:  notebooks material video broadcasting excel"
     }, {
-    "id": 16,
+    "id": 18,
     "url": "http://localhost:4000/2020/02/what-is-convolution/",
     "title": "Digging into convolution",
     "body": "2020/02/28 - Issues 1) Kaiming Initializtion in Pytorch was in trouble. 1 2) Jeremy started to dig in, in lesson09, but I didn’t know why the size of tensor is 2 and even understand this spreadsheet data. 3 Homework: Read Visualizing and Understanding Convolutional Networks paper  What is a convolution?     Visualization         one kernel     Matthew D Zeiler &amp; Rob Fergus Paper          Convolution can be represented as matmul   Padding   Kernel has rank 3   How can we find a side-edge, a gradient and area of constant weight?   What is a convolution?: A convolutional neural network is that your red, green, and blue pixels go into the simple computation, and something comes out of that, and then the result of that goes into a second layer, and the result of that goes into the third layer and so forth. Visualization: one kernel Refer this site for visualizing CNN filteringMatthew D Zeiler &amp; Rob Fergus PaperLecture01     Nine examples of the actual coefficients from the **first layer**  Convolution can be represented as matmul: CNNs from different viewpoints {align-items: center;}    [A B C D E F G H I J] is 3 by 3 image data flatten to vector.   As a result, convolution is a just matrix just two things happens     Some of entries are set to zeros at all the times   same color always have the same weight. That called weight time / wegith sharing    So, we can implement a convolution with matrix multiplication. But, we don’t do that because it’s slow!Padding:  What most of libraries do is just put zeros asdie of matrix  fast. ai uses reflection paddings (what is this? Jeremy said he uttered it)Kernel has rank 3:  As standard picture input would be 4 5, it would be actually 3d, not 2d.  If we make kernel as a 3x3 size, we pass over same kernel all the different Red, Green, Blue Pixels.      This could make problem, because, if we want to detect frog, which is green, we would want more activations on the green(I made a test cell in my colab 6)   How can we find a side-edge, a gradient and area of constant weight?: Not top-edge!  One kernel can find only the top-edge, so we should stack the kernels 7 So, we pass it through bunch of kernels to the input images, and that process gives us height x width x corresponding number of kernels.  Usually that number of chanel is 16 And if we want to get the more channels and features, we should repeat that process     This process gives rise to memory out of control, we do the stride   #### conv-example. xlsx  2 convolutional filters At a second layer, filter is 3x3x2 tensor, because to add up together the first layer’s channel. Reference:       Problem was math. sqrt(5) was not kaiming initialization formula, Implementation in Pytorch &#8617;        size of tensor, lecture09 &#8617;        conv-example. xlsx &#8617;        Why do computer use red, green and blue instead of primary colors &#8617;        Grayscale is a group of shades without any visible color. … Each of these dots has its own brightness level as well and, therefore, can be converted to grayscale. A grayscale image is one with all color information removed.  &#8617;        Testing RGB and grayscale &#8617;        stack kernel and make new rank of tensor at output, Lesson06-2019 &#8617;    "
     }, {
-    "id": 17,
+    "id": 19,
     "url": "http://localhost:4000/2020/02/dps-week8/",
-    "title": "Digital Product School week 8&9",
-    "body": "2020/02/24 - The 8th week retropect at Digital Product School Week 8/9 - Ship your MVP/Release next iteration each day     This week's schedule  CONTENT:  Preparing engineering weekly Agile Process     Daily Stand-up    Making application flowchart (feat draw. io) / ER diagram     Flowchart, understaning user journey   ER diagram    Engineering weekly AI lunch Connecting firebase andPreparing engineering weekly: This week at Wednesday, I planned to explain the Language Modelings, mainly focusing ELMo, ULMFiT, BERT and GPT-2. Slides is available here Changed the presentation, because there were people who are not in ML domain. hereWhenever I do the presentation, I learn more than the information I give them. At the same time, I realize I need to learn more than I know. Agile Process: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m not sure ‘what is agile’ but this is what we tried to make agile process.  Daily Stand-up: Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Making application flowchart (feat draw. io) / ER diagram:  Before we start the party, we should clarify the flowchart and ER diagram of our application. Flowchart, understaning user journey: Thanks for google, we could use draw. io for our framechart framework. Actually, we cana choice other good flatform, but draw. io has connected app throgh google drive, most of our engineer was used to it. And after this job, I got to know there is also (of course) rule with the symbols, color, size, space, scaling and direction of arrow -reference. But why we should do this? WE have made our storymap before!! I think storymap is for visualize our status and app. So it should be shared with whole the team, and they should able to understand each role’s issue. But flowchart is more like testing technical feasibility, and error that user can experience. So it could be little more specific, complicated, and hypothetical.     This week engineer's main plan  ER diagram: Even if we use NoSQL database through firebase, my team was accustomed to SQL more. That what we educated when we were at college, so we had to organize our concept while we were learning NoSQL. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Before today, my AI collegues presented regression, knn and it was my turn. I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! AI lunch: Connecting firebase and: "
+    "title": "My life in Digital Product School - week 8/19/10",
+    "body": "2020/02/24 - The 8/9/10th week retropect at Digital Product School Week 8 - Ship your MVPWeek 9/10 - Release next iteration each day     Week 8th schedule  CONTENT:  Agile Product Development     Daily Stand-up(planning)   Gemba Walk   Sprint Reviews    Engineering weeklyAgile Product Development: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m still not sure ‘what is agile’ but this is how we tried to make agile process.  Daily Stand-up(planning): Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Gemba Walk:     Team Cero with core team  Every 2 weeks, we do the Gemba work, which is ‘question everything to the core team’ time. At this period, people can ask anything related to our product, workshop, and framework. Core team will help just for each team, and each team can solve the problem related to their work. &lt; br/&gt;Why we need this session? because with workshop and general schedule, core team has no time just focus on each team. So through this session, we can have opportunity to understand each program and workshop, like why we are using this platform, and when is the due of our small project, and we have this problem and we need help for this. whatever small problem you have, core team is always willing to help you. Sprint Reviews: Every Friday, we have time to summarise what we did for the week. Maybe we need HMW question and our storymap to share our process and then tell and share what we did try, what point we succeeded and what point it was deviant of our prediction, and why we tried it. .     Sprint of Ve-link  And then, just after all team’s ppt, we do vote with such a cute marvel. Always it’s very difficult to vote (of course you can’t vote to your team!) Because it depends on criteria what do I value!But since this is process of our agile work, I try to focus on what they have changed since last week, and why they did it, how they did it. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Everyone have their knowledge to share and we can be tutor and at the same time can be of tutee. Previously, my AI collegues presented regression, knn. And because I’m somewhat specialized to NLP, I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! "
     }, {
-    "id": 18,
+    "id": 20,
     "url": "http://localhost:4000/2020/02/fast.ai-nlp-note-16/",
     "title": "Algorithmic bias",
     "body": "2020/02/20 - Algorithms can encode &amp; magnify human bias Case Study 1: Facial Recognition &amp; Predictive Policing:  Joy Buolamwini &amp; Timnit Gebru, gendershades. org     Microsoft, FACE+, IBM - All of these things are sell now.    Largest gap between $\therefore\ Lighter Male\ &gt;\ Darker\ Female $      This US mayor joked cops should “mount . 50-caliber” guns where AI predicts crime      With machine learning, with automation, there’s a 99% success, so that robot is ㅡwill beㅡ99% accurate in telling us what is going to happen next, which is really interesting.     - city official in Lancater, CA, approving on using IBM for public security  Bias:  Bias is type of error Statistical Bias: difference between a statistic’s expected value and the true value Unjust Bias: disproportionate preference for or prejudice against a group Unconscious bias: bias that we don’t realize we have But, term bias is too generic to be productive. Different sources of bias have different causes Representation Bias: Dataset was not representative of the algorithm that might be used on later. Above : Data is okay, but algorithm has some problem. Below : Data has error.  For example, object detection production that performs very well in common product of US. But in contrast, change of target product region, like Zimbabwe, Solomon Island, and so on, reduced the performence remarkably. It is not the algorithmic problem, so we should care about data volume of region. Evaluation Bias: Benchmark datasets spur on research, 4. 4% of IJB-A images are dark-skinned women. 2/3 of ImageNet images from the West (Sharkar et al, 2017) Case Study 2: Recidivism Algorithm Used Prison Sentencing: Case Study 3: Online Ad Delivery: Bias in NLP: ( Nothing to do with the course, but I’m researching this field these days. )    But all about Englsih     ImpactThe person is doctor. The person is nurse -&gt; 그는 의사다. 그녀는 간호사다.  Concept of “biased data” often too generic to be useful:  Different sources of bias have different sources Data, models and systems are not unchanging numbers on a screen. They’re the result of a complex process that starts with years of historical context and involves a series of choices and norms, from data measurement to model evaluation to human interpretation. - Harini Suresh, “The problem with Biased Data” Five Sources of Bias in ML:  Representation Bias Evaluation Bias Measurement Bias Aggregation Bias(46:02) Historical Bias(46:26)     A few studies(47:13)   Racial Bias, Even when we have good intentions(new york times)(47:10)   gender(48:59)   Humans are biased, so why does algorithmic bias matter?: Algorithms &amp; humans are used differently (humans are usually decision maker)  Algorithms are accurate and objective No way to apeal if there if error processed large scale cheap Machine learning can amplify bias Machine learning can create feedback loops. Technology is power. And with that comes responsibility. Solutions:  Analyze a project at work/school:     Questions about AI   5 types of bias (Suresh &amp; Guttag)   Datasheets for datasets, Modelcards for model reporting   Accuracy rate on different sub-groups    Work with domain experts &amp; those impacted Increase diversity in our workspace Advocate for good policy Be on the ongoing lookout for bias"
     }, {
-    "id": 19,
+    "id": 21,
     "url": "http://localhost:4000/2020/02/classifier-city/",
     "title": "Making a classifier with image dataset made from gooogle",
     "body": "2020/02/15 - CONTENTS:  Creating dataset from google images     Using google_images_download   Create ImageDataBunch    Train model     fit_one_cycle()   Let’s find-tune   Let’s train the whole model!   Let’s make batch size bigger!    Interpretation Model in productionCode can be found hereDeployed model here Making a classifier which can distinguish Seoul from Munich and Sanfrancisco!(hoping my well in Munich!) Creating dataset from google images: In machine learning, you always need data before you build your model. You can use either URLs or google_images_download package. Since Jeremy explained specifically, I will try the other. Using google_images_download: note: This is not google official package Refer to Official Doncument, put that arguments. from google_images_download import google_images_downloadresponse = google_images_download. googleimagesdownload()  #class instantiationout_dir = os. path. abspath('. . /. . /materials/dataset/pkg/')os. mkdir(out_dir)arguments = { keywords : Cebu,Munich,Seoul ,        print_urls :True,        suffix_keywords : city ,        output_directory :out_dir,        type : photo ,      }paths = response. download(arguments)  #passing the arguments to the functionprint(paths)and if you need, here is main code. Create ImageDataBunch: We need to separate validation set because we just grabbed these imagese from Google. Most of the dataset we use (kaggle/research) splited into train / validation / test so if they are not devided beforehand we should make databunch, and Jeremy recommended assign 20% to validation. Help on function verify_images in module fastai. vision. data:verify_images(path: Union[pathlib. Path, str], delete: bool = True, max_workers: int = 4, max_size: int = None, recurse: bool = False, dest: Union[pathlib. Path, str] = '. ', n_channels: int = 3, interp=2, ext: str = None, img_format: str = None, resume: bool = None, **kwargs)  Check if the images in `path` aren't broken, maybe resize them and copy it in `dest`. Data from google image url Data from package Train model:           len(class)   len(train)   len(valid)         Data_url   3   432   108       Data_pkg   3   216   53   Uisng model: restnet34 1, Measurement: accuracy 2 fit_one_cycle(): What is fit one cycle? Cyclical Learning Rates for Training Neural Networks One of the way to find good learning rate. Core idea is to start with small learning rate (like 1e-4, 1e-3) and increase the learning rate after each mini-batch till loss starts exploding. And pick up learning rate one order lower than exploding point. For example, plotted learning rate is like below picture, picking up around 1e-2 is the best way.  Why this methods Traditionally, the learning rate is decreased as the learning starts converging with time. But this paper suggests to cycle our learning rate, because it makes us avoid local minimum. Basically this cyclic method enables us to explore whole of loss function so that find out global minimum. In other words, higher learning rate behaves like regularisation. Let’s find-tune: Do train just one last layer by learning rate found by find_lr This section you should find the strongest downward slope that kind of sticking around for quite a while. And choose just one order lower than lowest point. As explained before, I will pick up 1e-2. And of course, this is fine-tuning, we don’t need discriminative learning rate yet.  Let’s train the whole model!: link When you plot the learning rate again, maybe you will get soaring shape of learning rate. Rule of thumb, When you slice the learning rate, use learning rate you used at unfrozen part. Divide it by 5 or 10 and put it on maximum bound. At minimum bound, get the point just before it soared, and divide it by 10.  Let’s make batch size bigger!: Since default batch size is 64, I tried it to 128. And it gets way more better result(even it’s still underfitting!) And if I freeze model and train whole model again, the model would be better. Also, you can use this method to the other big dataset model training! Interpretation: See the confusion matrix. Result is quite great. *Since I’m using colab, I will skip data cleansing. But I highly recommend you to use ImageCleaner widget, only if you are using jupyter notebook (not jupyter lab) Model in production: You can deploy your model in simple way. I referred fast. ai, and used render(it’s free for limited time). You can find detailed document here. and you can create a route like this. @app. route( /classify-url , methods=[ GET ])async def classify_url(request):  bytes = await get_bytes(request. query_params[ url ])  img = open_image(BytesIO(bytes))  _,_,losses = learner. predict(img)  return JSONResponse({     predictions : sorted(      zip(cat_learner. data. classes, map(float, losses)),      key=lambda p: p[1],      reverse=True    )  })You can find my deployed model here Reference: How to create a deep learning dataset using Google Images towardsdatascience - one cycle policy       Deep Residual Learning for Image Recognition &#8617;        Accuracy_and_precision &#8617;    "
     }, {
-    "id": 20,
+    "id": 22,
     "url": "http://localhost:4000/2020/02/dps-week5/",
     "title": "Digital Product School week 5",
     "body": "2020/02/09 - The 5th week retropect at Digital Product School Week 5 - Create a Storymap and sync it with Lean Canvas     This week's schedule  CONTENT:  How to create our story map Prepare your story Discover your product’s AI potentialMondayHow to create our story map:     We need this 'aha' moment  There was a Milestone workshop, about our weekly goal. As we are agile working, we go fast and change every week’s goal. This week we will finalize our story map based on user’s pain-point and HMW questions.     How should we make our story-map  Basically we should make story map based on this rule  Tell stories, don’t just write them!     We always need context, that means all the story component should be connected    Visualize your product to establish a shared understanding and speed up discussions!     Post-it filled of text is not enough, we should fill it with visualizations then team mates can understand it fast    Only discuss in front our your story map! (Speed)     So we can update our story-map as soon as we change our opinion   And also    Use a story map to find the parts that matter most and to identify holes in your idea!     Since the story map consists of techinical part, we should consider each story’s technical feasibility    Minimise output, maximise outcome and impact! Build tests to figure out what’s minimum and what’s viable!     This story map functions to find out our minimum value of ideas    Work iteratively: Change your story map according to your learnings!     We should repeat this process again and again    PMs: Make sure Storymap is up to date!Prepare your story:     team cero, our whole story map    Our goal    Technical feasibility of our storyWhat is your strategy to make user achieve something? This would be our expand point Discover your product’s AI potential: How can we apply AI to our product? Let’s write down our ‘HMW’ questions, and find out all p ossibilities. These are suggestion of possibilities, so don’t attached to feasibility (we will do in at lean start-up)     Software section's expectation    AI section's expectationTuesday    Engineer's task, week5This 5th week, engineers settled WendesdayThursdayFriday"
     }, {
-    "id": 21,
+    "id": 23,
     "url": "http://localhost:4000/2020/02/GPU-time/",
     "title": "4 reasons took much time to setting GPU for fast.ai than I expected",
     "body": "2020/02/05 - Motivation: Before now, me as a undergraduate student, I was parsimony who usually depend on colab, kaggle, friend’s server(occasional) whenever i need GPU. . And this time it’s been for a while to install GPU than I expected and I share the several component that stood in my way. Written at Oct 24 2019, if you think this is deprecated, please do not have a leap of faith. Just for the record, I’ve used Kaggle, Colab, GCP, Azure, EC2 as GPU cloud. 1. Did not know there is JupyterLab option in Google Cloud Platform. : At the first time when GCP came out, there was no AI Platform service. So from starting vm instance to launching jupyter and installing packages, I did all of the things myself. (and I learned 🤗) $	curl -O https://repo. continuum. io/archive/Anaconda3-5. 0. 1-Linux-x86_64. sh[Downloading conda in ssh] I created VM instance,selected zone, machine type and disk type. Then, define firewall rules and in ssh terminal, install jupyter and other packages. But you can do all of these things just using AI Platform.  [AI Platform] I think it especially save your time if you are living in Asia-Pacific, which google doesn’t support not that much GPU resources.  2. Consider if the platform has limited resources in a region you live in. : I live in South Korea, East Asia, and it seems like this region has lots of limitation in GPU (except quite expensive AWS) And the Taiwan which was the only one region where I can launch my own VM with GPU (I tried all the other regions in the list) sometimes do normaly, but not always. 😥After launching, I did several works and next day I could not start VM. (I didn’t count it, but tried it a few hours because I didn’t want cost any more time…) Endlessly failed to start instance, then I choose to move AWS as an alternative way. 3. Fast. ai gives deliberate guide and I didn’t know it. : Fast. ai offer the guide for all available platform. (Colab, salamander, Gradient, Kaggle, Colab, and so on) It is so important, and really needs, because cloud computing options are vary as occasion and purpose arise. I didn’t know fast. ai has manual to running GCP, and I think it’s as good a reason as any for me to be have taken time. It helped me so much when I had aws and shortened my time. I don’t want to read all of the manual in amazno. . (It is recommended. . but I’d rather read GIT PRO now…) ssh -i ~/. ssh/&lt;your_private_key_pair&gt; -L localhost:8888:localhost:8888 ubuntu@&lt;your instance IP&gt;4. You should wait to add more volume just after add volume, by building AWS EC2. : Since Elastic Block Store(EBS) storage supports optimized storage, users can’t extend storage volume two times in a row.  Unfortunately, at the first time, I didn’t know it (again 👻) and when VM lacked volume, I doubled dist capacity (76*2) at a rough but It needs more.  &lt;!– this time I installed GPU in two years, and it became little complicated compared to 2 years ago. And this time for the first time(maybe not the first time. . but i handled it in my class or with my friend. but it’s my first time on my own. ) I very I’m started to using used google colab, kaggleand, GCP-JupyterLab, ec2 - friend made, aws vm machine but I had a environment variable but i did not know of it. On these days, I could not get a resources from taiwan…    I couldn’t notice a deliberate     Anyway, as a result I tried myself gcp myself and aws ec2 with fast. ai But I think doing on my self surely takes much time (in this point I wonder why I’m doing this, and should remind me, especially I was studying disk volume optimization)     disk volume exceed - https://askubuntu. com/questions/919748/no-space-left-on-device-even-though-there-is:  "
     }, {
-    "id": 22,
+    "id": 24,
     "url": "http://localhost:4000/2020/02/dps-week4/",
     "title": "Digital Product School week 4",
     "body": "2020/02/01 - The 4th week retropect at Digital Product School Week 4 - Find solution ideas and run experiments   [This week’s schedule] CONTENT:  Ideation Techniques     What is ideation techniques?    Generating idea in my team AIdeation Team brain storming of idea Die Produkt MacherMondayIdeation Techniques:   [slides from @steffen] What is ideation techniques?: We tried to find out user’s painpoint last week. Tried to users talk about their, pain point. No question directly, but extract from them their pain with transportation. Generating idea in my team:   AIdeation:   TuesdayTeam brain storming of idea:   Based on generated idea on Monday, we extended our idea doing rolling-paper!     Die Produkt Macher: What is lean start-up?  Lean startup is a methodology for developing businesses and products that aims to shorten product development cycles and rapidly discover if a proposed business model is viable; this is achieved by adopting a combination of business-hypothesis-driven experimentation, iterative product releases, and validated learning. - wikipedia WendesdayThursdayFriday"
     }, {
-    "id": 23,
+    "id": 25,
     "url": "http://localhost:4000/2020/01/retrosprect-of-acl-paper-2020/",
     "title": "Retrospect of ACL 2020 paper writing",
     "body": "2020/01/29 - 2020 Annual Conference of the Association for Computational Linguistics Why I can’t use ‘Cebuano’ for the research?: Why I had to change target language from ‘Cebuano’ to ‘Tagalog’?-&gt; No language translator options except google translation. But before knowing that I already consult my friend, whose mother tongue is English. So I had to aplogize her, but couldn’t tell her why suddenly I changed my plan. -&gt; I realized there are many languages even can’t be researched at all. . -&gt; Getting accustomed to discrimination makes misunderstanding, sometimes. At my country, we couldn’t use music streaming service, because of legal problem. But at that moment, I thought it was discrimination, which is done by music company. "
     }, {
-    "id": 24,
+    "id": 26,
     "url": "http://localhost:4000/2020/01/Git-Merge/",
     "title": "Why am I not listed as a contributor?!",
     "body": "2020/01/10 - From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’m not listed as contributor of repo which my PR accessed. How did I come to a repository?: When I’m stuck, I would prefer to code, than to go deep in theory. (It must be so. . too much to understand 🤒)It was BERT released by Google AI I felt keenly the necessity of implementing, because not only couldn’t understand the way they figured out positional encoding formula, but how it actually works. What does it mean to “scale” dot product in Attention? (Now I know it’s far from my section 😂) Figure 1. Scaled Dot Product. Adopted from tensorflow blogWhat was the code error?: For implement code in paper, I read the papers Transformer and BERT, structured the model, and refered the others’ code. Meanwhile, I found out a small error in tokenization process, which was changing a token into [MASK], enabled bidirectional representation. I’ve made PR, and got merged. But I was not in contributors. Why?: Figure 2. Merged Pull request Adopted from graykode projectActually I happened to know there can be couple of reasons github doesn’t include my name as contributor. Well, if contributors tab has more than 100 people, in which case it shows you up only if you are in the top 100 contributors because displaying too many contributors can make webpages down. Somethimes, however, it doesn’t that problem. Why not? Two possibilities are there.    First, According to Joel-Glovier, if repository maintainer merged-as-a-rebase PR will end up showing as maintainer’s commit. But maintainer shouldn’t normally do this.     Second, if you happend to commit using a different git email that what is in your GitHub profile, it will not be attached to your Github user, and “doesn’t show up” as you.  Reference:  Michał Chromiak’s blog Github: why are my contributions are not showing on my profile atlassian-gitfetch"
     }, {
-    "id": 25,
-    "url": "http://localhost:4000/2019/12/lesson1-fastai/",
-    "title": "Fine Grained Classification",
-    "body": "2019/12/31 - Finally you can solve the mystery behind this weird drawing. . through this course. juptyer notebook magic: %reload_ext autoreload%autoreload 2%matplotlib inlinethis is special directives to jupyter notebook, not python code. And it is called ‘magics’ (but i think jeremy is magicion)  If somebody changes underlying library code while I’m running this, please reload it automatically If somebody asks to plot something, then please plot it here in this Jupyter NotebookDon’t hesitate to import start~ Digging into untar_data, path. ls: Union[pathlib. Path, str]: typed programming language? -&gt; maybe i think disclaim the type beforehand for sure.  Q. like assert? path. ls()this is some module that fast. ai made because os. listdir(‘path’) is unconvinient. Python3 pathlib library!: pathlib "
-    }, {
-    "id": 26,
+    "id": 27,
     "url": "http://localhost:4000/2019/12/jeremy-howard/",
     "title": "Jeremy Howard",
     "body": "2019/12/15 - This is journey to find out ‘who am I trying to be?’: How he impacted me?  The person who made me start Computer Vision again. He emphasized the importance of studying NLP and Computer together to understand the deep-learning. He didn’t order it to study, but always he pursuade me with reasonable way.  “It’s not just something I can throw away. NLP and computer vision a few weeks apart and that’s going to force your brain to realize like ‘oh I have to remember this’”  He made me admit my failure in deep-learning. I started to objectify where am I. What should I do when I’m frustrated.  “Keep going. You’re not expected to remember everything. Yet.  You’re not expected to understand everything. Yet. You’re not expected to know why everything works.  Yet. ”  His articles are numerous, below.      What is torch. nn Really?   High Performance Numeric Programming with Swift: Explorations and Reflections   C++11, random distributions, and Swift    And especially, I like this book. Designing great data products Great predictive modeling is an important part of the solution, but it no longer stands on its own; as products become more sophisticated, it disappears into the plumbing. Designing great data products And he is also famous for words. Here are some.  we’re going to try and use that to really understand what’s going on. So to warn you, none of it is rocket science but a lot of its going to look really new. So don’t expect to get it the first time but expect to listen and jump into the notebook try a few things test things out look particularly at like tensor shapes and inputs and outputs to check your understanding then go back and listen again. But and kind of try it, a few times, because you will get there right, it’s just that there’s going to be a lot of new concepts because we haven’t done that much stuff in pure Pytorch. Lesson 6: Deep Learning 2019 "
     }, {
-    "id": 27,
+    "id": 28,
     "url": "http://localhost:4000/2019/11/julia-evans/",
     "title": "Julia Evans",
     "body": "2019/11/20 - This is journey to find out ‘who am I trying to be?’: The women who surprised me in many ways. First, she approached me to teaching some concepts drawing cartoons. It was at Hackers news, which was hightest ranks. Personally I have the use of not to reading title, so and cartoon was so cute and clear. I naturally gonna understood mechanism and astonished by her explaination ability. Her value, which she was taught by many people so want to do same things, moved me. Volume of her knowledge, that just reading post title is a deal of work, amazed me. "
     }, {
-    "id": 28,
+    "id": 29,
     "url": "http://localhost:4000/2019/11/coc-retropective/",
     "title": "Retrospective on Pycon 2019 Korea (CoC Committee)",
     "body": "2019/11/05 - When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized our responsibility, and if I passed each other’s burden, It could be my burden next time. In solidarity of the obligation, we finished conference well. And after participating PyCon Korea 2018 as volunteer, I’ve joined PyCon Korea Organizer last year. &lt;Figure 1&gt; First meeting of PyCon 2019 Korea Organizers It’s been a while since PyCon 2019 finished. It’s held on Aug 15 - 18, at Coex Grand Balloom &lt;Figure 2&gt; Ongoing session, speaking on news comment processing &lt;Figure 3&gt; Sponsor Booth iin Coex Hall &lt;Figure 4&gt; After PyCon 2019, with all of volunteer, organizer, speakers 😍 🥰  Serving as part of the coc TF, I spent large fraction of last year doing CoC job. here’s the path what we’ve been grappled with to grasp a solution. First half: Before the conference Toward Diverse Community:  Formally we’ve been reusing and modifying PyCon US CoC, but we needed fit in Korean and I was part of that to revise code of conduct. Except ‘That’ Diversity, Because it is ‘Harassment’:  Specific point was harassment, and the others were not. process of finding the points. How can we settle this point?Second half: During the conference Handling the potential Harassment: Disjunction of policy and real-time situation: This ‘PyCon 2019 Korea retrospective series’ would be devided into 3 Episodes.  “Retrospective on Pycon 2019 Korea (CoC Committee)” “Retrospective on Pycon 2019 Korea (Program Chair)” (20 Nov, To Be Update) “Maintaining participation while still making timely decisions” (29 Nov, To Be Update)"
     }, {
-    "id": 29,
+    "id": 30,
     "url": "http://localhost:4000/2019/11/elif-shafak/",
     "title": "Elif Shafak",
     "body": "2019/11/05 - This is journey to find out ‘who am I trying to be?’: For creative-minded people, Istanbul is a treasure. ’ Photo © Chris Boland, licensed under CC BY-NC-ND 2. 0    it suddenly felt like what I was trying to convey was more complicated and detailed than what the circumstances allowed me to say.     And I did what I usually do in similar situations: I stammered, I shut down, and I stopped talking. I stopped talking because the truth was complicated, even though I knew, deep within, that one should never, ever remain silent for fear of complexity.         &lt;Figure 1&gt; Elif Shafak   Photo credit: www. elifsafak. com. tr      I want to talk about emotions and the need to boost our emotional intelligence. I think it’s a pity that mainstream political theory pays very little attention to emotions.     Oftentimes, analysts and experts are so busy with data and metrics that they seem to forget those things in life that are difficult to measure and perhaps impossible to cluster under statistical models. But I think this is a mistake, for two main reasons. We are emotional beings.     I think it’s going to be one of our biggest intellectual challenges, because our political systems are replete with emotions. In country after country, we have seen illiberal politicians exploiting these emotions. And yet within the academia and among the intelligentsia, we are yet to take emotions seriously. I think we should.  1 2 Reference:       British Council Worldwide &#8617;        Ted Talk &#8617;    "
     }, {
-    "id": 30,
+    "id": 31,
     "url": "http://localhost:4000/2019/01/dps-week1/",
     "title": "Digital Product School week 1",
     "body": "2019/01/11 - The 1th week retropect at Digital Product School [This week’s schedule] CONTENT:  Welcome to Digital Product School!     Trip to Spitzingsee   Welcome to Design Office   Specifying our goal of product   Welcome to Digital Product School!: Trip to Spitzingsee:    At the first day of Digital Product School, we had a off-site with all of batch 9 people. All the costs were managed by dps. At the beautiful mountain, we settled the team, and got my team goal.     Basically, there are two kind of team in DPS. (1) Wild team - the team has fixed topic(2) Company team - the team which has specific stakeholders, and also topic defined by that stakeholders  The Core-team will fix what team you will join in DPS for 3 months based on ymy professionals, they announce it at off-site. [My team for 3 months at DPS]    And we decide on my batch #9 theme song. How? Each team draw for songs and pitch ‘why this song should be batch #9 theme song’The result? Imagine dragon - Believer (I didn’t know at the moment, this song would be stamped in my memory)     We have a workshop for getting to know each other. For example, we share 1) what do I expect from 3 months of dps, 2) when I feel happy in my life time, 3) what I worked for last week, 4) what was my last project and 5) what plays important role in my life      My team's board  Cero  Welcome to Design Office:  At first day of design office, we had workshop, which celebrates my day in dps also discuss specific rule, menifesto and stakeholders    We get sticker and attach it in map depends on my nationality  Now time to get to know my team’s stakeholders. What they want for us? What they expect from us? How free my team are on the topic?To be honest, it is endless tug-of-war. We should discuss with my stakeholders, endlessly, and find out solution which can meet interest of users, stakeholders and my team. Basically, my team’s main stakeholder is ADAC, but BMW, City of munich and Nokia will also participate as my team’s stakeholders. Specifying our goal of product: "
diff --git a/_site/2019/12/jeremy-howard/index.html b/_site/2019/12/jeremy-howard/index.html
index 3249171f67..f0dedfeaf2 100644
--- a/_site/2019/12/jeremy-howard/index.html
+++ b/_site/2019/12/jeremy-howard/index.html
@@ -19,9 +19,9 @@
 <meta property="og:site_name" content="SpellOnYou" />
 <meta property="og:image" content="http://localhost:4000/assets/images/jeremy-howard.jpeg" />
 <meta property="og:type" content="article" />
-<meta property="article:published_time" content="2019-12-15T00:00:00+01:00" />
+<meta property="article:published_time" content="2019-12-15T00:00:00+09:00" />
 <script type="application/ld+json">
-{"description":"This is journey to find out ‘who am I trying to be?’","author":{"@type":"Person","name":"dionne"},"@type":"BlogPosting","url":"http://localhost:4000/2019/12/jeremy-howard/","publisher":{"@type":"Organization","logo":{"@type":"ImageObject","url":"http://localhost:4000/assets/images/logo.png"},"name":"dionne"},"image":"http://localhost:4000/assets/images/jeremy-howard.jpeg","headline":"Jeremy Howard","dateModified":"2019-12-15T00:00:00+01:00","datePublished":"2019-12-15T00:00:00+01:00","mainEntityOfPage":{"@type":"WebPage","@id":"http://localhost:4000/2019/12/jeremy-howard/"},"@context":"http://schema.org"}</script>
+{"description":"This is journey to find out ‘who am I trying to be?’","author":{"@type":"Person","name":"dionne"},"@type":"BlogPosting","url":"http://localhost:4000/2019/12/jeremy-howard/","publisher":{"@type":"Organization","logo":{"@type":"ImageObject","url":"http://localhost:4000/assets/images/logo.png"},"name":"dionne"},"image":"http://localhost:4000/assets/images/jeremy-howard.jpeg","headline":"Jeremy Howard","dateModified":"2019-12-15T00:00:00+09:00","datePublished":"2019-12-15T00:00:00+09:00","mainEntityOfPage":{"@type":"WebPage","@id":"http://localhost:4000/2019/12/jeremy-howard/"},"@context":"http://schema.org"}</script>
 <!-- End Jekyll SEO tag -->
 
 
@@ -161,96 +161,101 @@
     "body": "  {% if page. url ==  /  %}            {% assign latest_post = site. posts[0] %}          &lt;div class= topfirstimage  style= background-image: url({% if latest_post. image contains  ://  %}{{ latest_post. image }}{% else %} {{site. baseurl}}/{{ latest_post. image}}{% endif %}); height: 200px;  background-size: cover;  background-repeat: no-repeat; &gt;&lt;/div&gt;           {{ latest_post. title }}  :       {{ latest_post. excerpt | strip_html | strip_newlines | truncate: 136 }}               In         {% for category in latest_post. categories %}        {{ category }},         {% endfor %}                                {{ latest_post. date | date: '%b %d, %Y' }}                            {%- assign second_post = site. posts[1] -%}                        {% if second_post. image %}                         &lt;img class= w-100  src= {% if second_post. image contains  ://  %}{{ second_post. image }}{% else %}{{ second_post. image | absolute_url }}{% endif %}  alt= {{ second_post. title }} &gt;                        {% endif %}                                    {{ second_post. title }}          :                       In             {% for category in second_post. categories %}            {{ category }},             {% endfor %}                                                      {{ second_post. date | date: '%b %d, %Y' }}                                    {%- assign third_post = site. posts[2] -%}                        {% if third_post. image %}                         &lt;img class= w-100  src= {% if third_post. image contains  ://  %}{{ third_post. image }}{% else %}{{site. baseurl}}/{{ third_post. image }}{% endif %}  alt= {{ third_post. title }} &gt;                        {% endif %}                                    {{ third_post. title }}          :                       In             {% for category in third_post. categories %}            {{ category }},             {% endfor %}                                                      {{ third_post. date | date: '%b %d, %Y' }}                                    {%- assign fourth_post = site. posts[3] -%}                        {% if fourth_post. image %}                        &lt;img class= w-100  src= {% if fourth_post. image contains  ://  %}{{ fourth_post. image }}{% else %}{{site. baseurl}}/{{ fourth_post. image }}{% endif %}  alt= {{ fourth_post. title }} &gt;                        {% endif %}                                    {{ fourth_post. title }}          :                       In             {% for category in fourth_post. categories %}            {{ category }},             {% endfor %}                                                      {{ fourth_post. date | date: '%b %d, %Y' }}                                  {% for post in site. posts %} {% if post. tags contains  sticky  %}                    {{post. title}}                  {{ post. excerpt | strip_html | strip_newlines | truncate: 136 }}                 Read More            	             {% endif %}{% endfor %}  {% endif %}                All Stories:         {% for post in paginator. posts %}          {% include main-loop-card. html %}        {% endfor %}                   {% if paginator. total_pages &gt; 1 %}              {% if paginator. previous_page %}        &laquo; Prev       {% else %}        &laquo;       {% endif %}       {% for page in (1. . paginator. total_pages) %}        {% if page == paginator. page %}        {{ page }}        {% elsif page == 1 %}        {{ page }}        {% else %}        {{ page }}        {% endif %}       {% endfor %}       {% if paginator. next_page %}        Next &raquo;       {% else %}        &raquo;       {% endif %}            {% endif %}                     {% include sidebar-featured. html %}      "
     }, {
     "id": 12,
+    "url": "http://localhost:4000/2020/04/v3-2019-lesson06-note/",
+    "title": "fastai 2019 course-v3 Part1, lesson06",
+    "body": "2020/04/15 - Lesson 06Rossmann(Tabular):  Tabular data: be careful on Categorical variable vs Continuous variable.  if datatype is int, fastai think it is classification, not a regression.  Root mean square percentage error. as loss function.  When you assign the y_range, it’s better to assign little bit more than actual maximum. &gt; because it’s sigmoid.  intermediate layers, which is weight matrix is 1) 1000, and 2) 500 -&gt; which means our parameter would be 500*1000. learn. modelWhat is dropout and embedding dropout?: Nitish Srivastava, Dropout: A Simple way to prevent Neural Networks from Overfitting  you can dropout with p value, make it specified to specific layer, or make it applied to all the layers.  Pytorch code 1) bernoulli, which decides whether you will hold it? 2) and divide the noise value depends on noise value. so noise became 2 or remain 0.      According to pytorch code, We do change at training time, but we do nothing at test time. and this means you don’t have to do anything special with inference time. ’   TODO: find at forums what is inference time - Related to NVIDIA, GPU.     Embedding dropout is just a dropout.      It’s different between continuous variable and embedding layer.  TODO Still can’t understand. why embedding dropout is effective. or,… in need.    Let’s delete at random, some of the results of the embedding.    and It worked well especially at Kaggle   Batch Normalization: Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift -&gt; came out false! According to How Does Batch Normalization Help Optimization?  The key was multiplicative bias {\gamma} and additive bias {\beta}` Explain     Let $$ \hat{y} = f(w_1, w_2, w_3, … , x)} $$ , loss = MSE , Then y_range should be between 1 and 5`   And Activation function ends with -1 -&gt; +1   To mitigate this problem, we can add the other parameter, like $$w_n$$   But there’re so much interactions in the process so just re-scale the output.    Momentum parameter at BatchNorm1d:  Different from momentum like in optimization.  This momentum is Exponentially weighted moving average of the mean, instead of deviation.      If this is small number: mean standard deviation would be less from mini_batch to mini_batch » less regularization effect. (If this is large number, variation would be greater from mini_batch to mini_batch » more regularization effect)   TODO: can’t sure, but i understand, this is not about how to update parameter but about how much reflect previous value when scale and shift   Q. Preference between batchnorm and the other regularizations(drop out, weight decay)A. Nope, always try and see the results## lesson6-pets-more### Data Augmentation- Last reg- `get_transforms` has lots of params (even not yet learned all) -&gt; check documentation	- Remember you can implement all the doc contents bc it's made from nbdev	- TODO: try this!!- Essence of data augmentation is you should maintain the label, while somewhat making sense. 	- ex) tilt, because it's optically sensible, you can always change the angle of the data view. - zeros, border, and reflection but always `reflection` works most of the time, so that is the default### Convolutional Kernel(What is convolution?)- Will make heat\_map from scratch, which means the parts convolution focuses on![setosa_visualization]()- http://setosa. io/ev/image-kernels/	- javascript thing	- How convolution works	- Kernel. which does element-wise multiplication, and sum them up	- so it has on pixel less at borders -&gt; so it uses padding, and fastai uses reflection as said. - why this Kernel(matrix) helps catching horizontal edge side?	- because this kernel`(picture2)` weights differently, depends on `x axis`	- why familiar, because it's similar intuition with fugus`(paper)` paper- CNN from different viewpoints`link`	- output of pixel is results from different linear equations. 	- If you connect this with represents of neural network nodes, you can see that the specific inp nodes connected with specific out nodes. 	- **Summarize**: cnn does 1) matmul some of the elements are always zero 2) same weight for every row, which is called `weight time? weight. . ?, 1:18:50` `(picture)`#### Further lowdown- Because generally image has 3 channels, we need rank 3 kernel. - And **do multiply with all channel output is one pixel**. (`draw by your self`)	- but this kernel will catch one feature, like horizontal, so that we make more kernel so that output becomes (h * w * kernel)	- And that `kernel` come to `channel`- **Conv2d**: with 3 by 3 kernel, stride 2 conv -&gt; (h/2 * w/2 * kernel)	- skip or jump over input pixel	- to protect from memory out of control~~~pythonlearn. modellearn. summary()~~~TODO: understand yourself the blocks of conv-kernel: - Usually use big kernel size at first layer (will study this at part2)- Bottom right highlighting kernel(`pic / draw`)- `torch. tensor. expand`: for memory efficient, because we should do RGB- We do not make separate kernel, but make rank 4 kernel	- 4d tensor is just stacked kernel- `t[None]. shape` create new unit axis, and why? we make this -&gt; it should move unit of batch, not one size image. ### Average pooling, feature- suppose our pre-trained model results in size of `11 by 11 by 512 ` `pic 4` and my classification task has 37 classes	* take the first face of channel, which is 11 by 11 and `mean` it, so that make rank 2 tensor, 512 by 1	* and make 2d matrix, which is 512 by 37 and multiply so that we can get 37 by 1 matrix. - Feature, at convolution block	- So, when we transfer-learning without unfreeze, every element of last matrix (512 by 1) should represent(or could catch) each feature. 	### Heatmap, Hook~~~hook_output(model[0]) -&gt; acts -&gt; avg_acts~~~- if we average the block with `axis=feature`, result of matrix(11 by 11) depicts `how activated was that area?` -&gt; it is heatmap, `avg_acts`- and acts comes from hook, which is more advanced pytorch feature. 	- hook into pytorch machine itself, and run any arbitrary Pytorch code	- Why this is cool?: Normally it gives set of outputs of forward pass, but we can interrupt and hook the forward pass. 	- Also can store the output of the convolutional part of the model, which is before avg_pooling- Thinking back when we do cut off `after` the conv part. 	- but with fast. ai the original convolutional part of the model would be *the first thing in the model*, specifically could be given from `learn. model. eval()[0]`	- And this is gotten from `hooked_output` and having hooked the output, we can pass our x_minibatch to output. 	- Not directly, but with normalized, minibatch, put on to the gpu	- `one_item()` function do it, when we have one data `TODO: this is assignment` do it yourself without one_item function	- and `. cuda()` put it on gpu- you should print out very often the shape of tensor, and try think why. "
+    }, {
+    "id": 13,
+    "url": "http://localhost:4000/2020/04/qna-image-segmentation/",
+    "title": "[Q&A] Image Segmentation, using Unet with Driving Video data",
+    "body": "2020/04/02 - This post is about my questions while I was studying USF Deep Learning course about image segmentation task. All the answers are from the course, source code, library document, or document. I cared about being clear at reporting information including source of information, however if there are still anything unclear, please contact me. And thank you Jeremy&amp;Rachael for everything. Also Thank you Cambridge Computer Vision Lab to made us to study with your labor. The Cambridge-driving Labeled Video Database (CamVid) is the first collection of videos with object class semantic labels, complete with metadata. The database provides ground truth labels that associate each pixel with one of 32 semantic classes. If someone is interested in this project, please check the site and see the details. Now, let’s start first using jupyter’s one of tricks which I love most. It enables cell to print the code without print function. from IPython. core. interactiveshell import InteractiveShell# pretty print all cell's output and not just the last oneInteractiveShell. ast_node_interactivity =  all from fastai. vision import *from fastai. callbacks. hooks import *from fastai. utils. mem import *path = untar_data(URLs. CAMVID) # The locations where the data and models are downloaded are set in config. ymlpath. ls() I’m trying to accustomed to using pathlib module, not just it became built-in module in python, but I felt uncomfortable myself with os module. However, still unpredictable conflicts are remain, even in the quite standard library like Pytorch, tensorflow, onnx. (it require me string for path. not PosixPath. will send PR. . ) [PosixPath('/root/. fastai/data/camvid/valid. txt'), PosixPath('/root/. fastai/data/camvid/images'), PosixPath('/root/. fastai/data/camvid/labels'), PosixPath('/root/. fastai/data/camvid/codes. txt')]path_img = path/'images'path_lbl = path/'labels'fnames = get_image_files(path_img) #filenamelbl_names = get_image_files(path_lbl)1. (Play with data) My Hypothesis:  File name has A_B format. and A / B would be at key-value position.  Use collections - defaultdict  Default Dict: Link:   easy to group a sequence of key and value pairs into a dictionary of list?from collections import defaultdictfnames[0], lbl_names[0](PosixPath('/root/. fastai/data/camvid/images/0001TP_009210. png'), PosixPath('/root/. fastai/data/camvid/labels/0016E5_01800_P. png'))files = [tuple(i. stem. split('_')) for i in fnames]labels = [tuple(i. stem. split('_')[:-1]) for i in lbl_names]d = defaultdict(list)for k, v in files: d[k]. append(v)d. keys()len(d['0001TP'])124for k, v in d. items():  print(k, v)0001TP ['009210', '008850', '007350', '008970', '009840', '010140', '008490', '008520', '009540', '008250', '008340', '006840', '007860', '007410', '007740', '009870', '010080', '007890', '008790', '010020', '008400', '007080', '008280', '010380', '009330', '009060', '007470', '006810', '009720', '008580', '007110', '008730', '009150', '007680', '009780', '007800', '007290', '008760', '009510', '008640', '008310', '007440', '006900', '007500', '008460', '009030', '008130', '009480', '009900', '010230', '009270', '008040', '007590', '007950', '009990', '008550', '007260', '008100', '007530', '006960', '008190', '009420', '009930', '009000', '007830', '008940', '006690', '009570', '008880', '010170', '007560', '009300', '006750', '009360', '010200', '007320', '008010', '009120', '007620', '007200', '007140', '010320', '006720', '008670', '007230', '008370', '010260', '009690', '006930', '009090', '007770', '010290', '010350', '008610', '008070', '009600', '008430', '009450', '007380', '009240', '007710', '007170', '008160', '008910', '007020', '006780', '007050', '009960', '009810', '008220', '009180', '009750', '010050', '009660', '010110', '007920', '009630', '007650', '006990', '008700', '009390', '007980', '008820', '006870']0016E5 ['01290', '08159', '05760', '08133', '08063', '06660', '00960', '05850', '00750', '06960', '08035', '08107', '07975', '08017', '05610', '07140', '08119', '08027', '07170', '08400', '08093', '02100', '06390', '04470', '08340', '06060', '00600', '07470', '08151', '07800', '01620', '05730', '01530', '00690', '08430', '05940', '01980', '07320', '08069', '07965', '04380', '05430', '01410', '06780', '08007', '08087', '08079', '06600', '08109', '05490', '00901', '04590', '04680', '08045', '01770', '06690', '08085', '06810', '00420', '08011', '07440', '02190', '06300', '04800', '01500', '00450', '08029', '01470', '06330', '07997', '08067', '05370', '08013', '08190', '00840', '02370', '08049', '08135', '01440', '06870', '05820', '05280', '08051', '04440', '08091', '01380', '00630', '07290', '05520', '04770', '00540', '07995', '07999', '05550', '07920', '08101', '08141', '08053', '04620', '08103', '05160', '07350', '08057', '06030', '06000', '08550', '07963', '08089', '05970', '08047', '05640', '06240', '05220', '04350', '01590', '07959', '01950', '08117', '06180', '01560', '05400', '08043', '07680', '00780', '08081', '07050', '01020', '01350', '04530', '06720', '07969', '08149', '08003', '08131', '08129', '08033', '05460', '01650', '07530', '08023', '05340', '08640', '05100', '08075', '01230', '04980', '02070', '01080', '06210', '05910', '08009', '01800', '05190', '02400', '08083', '08019', '07620', '07200', '07890', '08059', '06990', '04410', '08121', '08123', '06930', '08137', '08147', '08095', '06570', '06150', '08153', '06840', '05250', '00510', '08370', '08580', '08113', '07410', '08097', '01200', '04950', '07770', '07650', '04710', '06090', '08055', '07110', '07981', '00990', '08250', '08127', '01920', '07985', '08220', '08005', '08157', '05130', '08071', '01140', '04830', '07740', '08143', '06120', '02040', '08111', '08115', '00660', '08280', '06420', '07983', '02220', '05700', '01860', '01260', '04920', '06510', '07020', '08073', '08105', '08125', '06360', '07860', '07993', '00810', '06540', '08099', '08139', '02010', '07973', '08155', '07991', '06630', '00480', '06750', '04890', '08001', '08025', '00870', '08490', '01830', '07977', '05010', '01170', '07961', '01680', '01050', '07987', '07080', '04560', '00930', '05310', '02340', '05790', '08460', '00720', '08031', '02280', '08039', '08037', '08065', '06270', '08077', '06900', '04650', '06480', '07230', '08041', '06450', '00570', '07989', '04740', '07979', '02250', '07380', '00390', '01710', '07590', '08021', '08520', '07500', '01110', '04500', '02310', '07971', '02130', '05580', '05880', '08610', '08310', '08145', '05670', '04860', '07260', '08015', '07967', '01740', '01320', '07560', '07830', '01890', '08061', '02160', '07710', '05070', '05040']Seq05VD ['f00030', 'f02550', 'f03450', 'f01110', 'f00480', 'f00210', 'f04590', 'f04170', 'f01800', 'f03990', 'f03360', 'f03900', 'f02070', 'f00810', 'f03690', 'f01350', 'f01530', 'f04980', 'f05100', 'f03060', 'f00900', 'f03870', 'f02460', 'f01470', 'f02370', 'f02820', 'f04080', 'f02760', 'f04860', 'f02250', 'f04200', 'f00270', 'f03720', 'f02850', 'f04410', 'f01200', 'f03090', 'f02010', 'f03930', 'f00090', 'f01650', 'f01890', 'f03840', 'f03030', 'f02130', 'f01230', 'f04110', 'f02520', 'f04140', 'f04020', 'f00060', 'f03420', 'f01560', 'f00120', 'f04290', 'f02340', 'f00300', 'f01380', 'f00870', 'f01860', 'f02970', 'f04560', 'f02730', 'f00330', 'f04530', 'f03780', 'f01770', 'f03390', 'f05040', 'f02430', 'f03330', 'f00660', 'f01740', 'f02100', 'f04800', 'f04050', 'f00510', 'f02790', 'f04350', 'f00690', 'f00540', 'f02490', 'f00960', 'f00930', 'f04230', 'f02880', 'f03600', 'f01020', 'f01500', 'f02400', 'f04830', 'f04470', 'f03300', 'f02670', 'f00450', 'f01980', 'f01170', 'f01620', 'f04500', 'f01080', 'f03180', 'f05070', 'f03150', 'f04950', 'f01440', 'f03510', 'f01710', 'f00360', 'f04770', 'f02910', 'f01050', 'f00630', 'f04320', 'f00570', 'f03240', 'f02190', 'f01140', 'f03540', 'f02220', 'f02640', 'f03960', 'f00000', 'f04920', 'f01950', 'f00990', 'f03480', 'f03000', 'f00420', 'f04620', 'f03210', 'f00780', 'f03570', 'f01590', 'f00750', 'f01920', 'f04650', 'f03750', 'f03630', 'f02310', 'f02610', 'f02580', 'f04740', 'f02280', 'f04680', 'f00390', 'f00720', 'f03660', 'f02040', 'f03270', 'f00180', 'f03810', 'f01410', 'f01290', 'f03120', 'f00840', 'f04440', 'f00150', 'f01260', 'f02700', 'f02940', 'f00600', 'f01830', 'f04260', 'f05010', 'f04890', 'f02160', 'f00240', 'f04380', 'f01680', 'f04710', 'f01320']0006R0 ['f02820', 'f03690', 'f03180', 'f02550', 'f01020', 'f03660', 'f02340', 'f01170', 'f02610', 'f02940', 'f01290', 'f02100', 'f01350', 'f03270', 'f03870', 'f01380', 'f01980', 'f03810', 'f02430', 'f02310', 'f01830', 'f03480', 'f02970', 'f01890', 'f03210', 'f03930', 'f02040', 'f02070', 'f02400', 'f01560', 'f03030', 'f01770', 'f01590', 'f01950', 'f03420', 'f01650', 'f03450', 'f00990', 'f03630', 'f01500', 'f03570', 'f00930', 'f03090', 'f03360', 'f02880', 'f02460', 'f01440', 'f01920', 'f01230', 'f03840', 'f02730', 'f01620', 'f02220', 'f03750', 'f03330', 'f03540', 'f02520', 'f02790', 'f01050', 'f03120', 'f01800', 'f01140', 'f01860', 'f01530', 'f01470', 'f02670', 'f02490', 'f01260', 'f01110', 'f02760', 'f01680', 'f03150', 'f02580', 'f03300', 'f02280', 'f01200', 'f03390', 'f03510', 'f02640', 'f02190', 'f02370', 'f01320', 'f02130', 'f03600', 'f03240', 'f03780', 'f03720', 'f02700', 'f01410', 'f01080', 'f02850', 'f01710', 'f03900', 'f03060', 'f01740', 'f02010', 'f02250', 'f00960', 'f03000', 'f02160', 'f02910']for k, v in d. items():  print(k, len(d[k]))0001TP 1240016E5 305Seq05VD 1710006R0 101for i in d2. keys():  print(i,len(d2[i]))0016E5 3050001TP 1240006R0 101Seq05VD 171files[0], labels[0](('0001TP', '009210'), ('0016E5', '01800'))2. My question: Link:  Why do we need masking? and does color from fastai library? (have to look into source code) What do the parameter alpha do? When people make masked img, would it be have ranged integer limit? Does image normalization related with this?lbl_sorted = sorted(lbl_names)f_sorted = sorted(fnames)lbl_1 = lbl_sorted[33]f_1 = f_sorted[33]img = open_image(lbl_1)mask = open_mask(lbl_1)_,axs = plt. subplots(1,2, figsize=(10,5))# img. show(ax=axs[0], y=mask, title='masked')img. show(ax=axs[0], title='1')mask. show(ax=axs[1], title='2', alpha=1. ) img_2 = open_image(f_1)mask_2 = open_mask(f_1)_,axs = plt. subplots(1,2, figsize=(10,5))# img. show(ax=axs[0], y=mask, title='masked')img_2. show(ax=axs[0], title='3',)mask_2. show(ax=axs[1], title='4', alpha=1. ) open_mask(lbl_1). data. shapetorch. Size([1, 720, 960])open_mask(lbl_1). data. shapetorch. Size([1, 720, 960])open_image(f_1). data. shapetorch. Size([3, 720, 960])open_image(f_1). data. shapetorch. Size([3, 720, 960])img. data #labeled datatensor([[[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]],    [[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]],    [[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]]])mask. data # after mask, labeled datatensor([[[ 4, 4, 4, . . . , 21, 21, 21],     [ 4, 4, 4, . . . , 21, 21, 21],     [ 4, 4, 4, . . . , 21, 21, 21],     . . . ,     [17, 17, 17, . . . , 30, 30, 30],     [17, 17, 17, . . . , 30, 30, 30],     [17, 17, 17, . . . , 30, 30, 30]]])img_2. data, mask_2. data(tensor([[[0. 0706, 0. 0667, 0. 0706, . . . , 0. 6431, 0. 6549, 0. 6627],     [0. 0745, 0. 0706, 0. 0706, . . . , 0. 6431, 0. 6510, 0. 6549],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 6392, 0. 6588, 0. 6588],     . . . ,     [0. 0863, 0. 0824, 0. 0824, . . . , 0. 1333, 0. 1216, 0. 1255],     [0. 0902, 0. 0863, 0. 0824, . . . , 0. 1255, 0. 1176, 0. 1216],     [0. 0863, 0. 0824, 0. 0784, . . . , 0. 1137, 0. 1059, 0. 1137]],     [[0. 0706, 0. 0667, 0. 0706, . . . , 0. 7490, 0. 7608, 0. 7686],     [0. 0745, 0. 0706, 0. 0706, . . . , 0. 7451, 0. 7569, 0. 7608],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 7412, 0. 7529, 0. 7529],     . . . ,     [0. 0980, 0. 0941, 0. 0941, . . . , 0. 1804, 0. 1686, 0. 1725],     [0. 1059, 0. 1020, 0. 0980, . . . , 0. 1725, 0. 1647, 0. 1686],     [0. 1020, 0. 0980, 0. 0941, . . . , 0. 1608, 0. 1529, 0. 1608]],     [[0. 0784, 0. 0745, 0. 0784, . . . , 0. 7569, 0. 7686, 0. 7765],     [0. 0824, 0. 0784, 0. 0784, . . . , 0. 7647, 0. 7647, 0. 7686],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 7608, 0. 7647, 0. 7647],     . . . ,     [0. 1216, 0. 1176, 0. 1176, . . . , 0. 2000, 0. 1882, 0. 1922],     [0. 1176, 0. 1137, 0. 1098, . . . , 0. 1843, 0. 1765, 0. 1804],     [0. 1137, 0. 1098, 0. 1059, . . . , 0. 1725, 0. 1647, 0. 1725]]]), tensor([[[ 18, 17, 18, . . . , 183, 186, 188],     [ 19, 18, 18, . . . , 183, 185, 186],     [ 20, 18, 19, . . . , 182, 185, 185],     . . . ,     [ 25, 24, 24, . . . , 43, 40, 41],     [ 26, 25, 24, . . . , 41, 39, 40],     [ 25, 24, 23, . . . , 38, 36, 38]]]))3. What is a difference between image and imageSegment?: imageSegment  An ImageSegment object has the same properties as an Image. The only difference is that when applying the transformations to an ImageSegment, it will ignore the functions that deal with lighting and keep values of 0 and 1.  It’s easy to show the segmentation mask over the associated Image by using the y argument of show_image. img = open_image(fnames[0])mask = open_mask(lbl_names[0])_,axs = plt. subplots(1,3, figsize=(8,4))img. show(ax=axs[0], title='no mask')img. show(ax=axs[1], y=mask, title='masked') #seg mask over the img using y argmask. show(ax=axs[2], title='mask only', alpha=1. ) vision. image ##4. Why/How img div by 255 and how it results fast. ai : vision. image - If div=True, pixel values are divided by 255. to become floats between 0. and 1.  At times, you want to get rid of distortions caused by lights and shadows in an image.    Normalizing the RGB values of an image can at times be a simple and effective way of achieving this.     So sum of the pixel’s value over all channels(which is S) divides each intensified channel so that nomalized value will be R/S, G/S and B/S (where, S=R+G+B).   Detailed explain here4. Python Evaluation Order: Python evaluates expressions from left to right. Notice that while evaluating an assignment, the right-hand side is evaluated before the left-hand side. mask_tmp, trg_tmp, void_tmp = 2, 1, 10mask_tmp = trg_tmp != void_tmpprint(mask_tmp, trg_tmp, void_tmp) # (1) target is not same with voidTrue 1 10# Example 1x = 1y = 2x,y = y,x+yx, y(2, 3)# Example 2x = 1y = 2x = yy = x+yx, y(2, 4)5. model learner parameter :: pct_start: A: Percentage of total number of epochs when learning rate rises during one cycle. Q: Sorry, I still confused that one cycle in the new API only runs one epoch. How the percentage of total number of epochs works? Can you give a example? If learn. fit_one_cycle(10, slice(1e-4,1e-3,1e-2), pct_start=0. 05)??A: Ok, strictly correct answer would be percentage of iterations, so you can have lr both increase and decrease during same epoch. In your example, say, you have 100 iterations per epoch, then for half an epoch (0. 05 * (10 * 100) = 50) lr will rise, then slowly decrease. Q2: Thanks for this explanation … so essentially, it is the percentage of overall iterations where the LR is increasing, correct? So, given the default of 0. 3, it means that your LR is going up for 30% of your iterations and then decreasing over the last 70%. Is that a correct summation of what is happening? A2: Yes, I think that’s correct. You can verify that by changing its value and check:learn. recorder. plot_lr() For example if pct_start = 0. 2 source: forums. fastai "
+    }, {
+    "id": 14,
     "url": "http://localhost:4000/2020/03/note08-fastai-4/",
     "title": "Gradient backward, Chain Rule, Refactoring",
-    "body": "2020/03/02 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves   ⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
+    "body": "2020/03/02 -  This note is divided into 4 section.      Section1: What is the meaning of ‘deep-learning from foundations?’   Section2: What’s inside Pytorch Operator?   Section3: Implement forward&amp;backward pass from scratch   Section4: Gradient backward, Chain Rule, Refactoring   ” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves    Postfix underscore means in pytorch, in-place function, What is in-place function?⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
     }, {
-    "id": 13,
+    "id": 15,
     "url": "http://localhost:4000/2020/03/note08-fastai-3/",
     "title": "Implement forward&backward pass from scratch",
     "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring1. The forward and backward passes: 1. 1 Normalization: train_mean,train_std = x_train. mean(),x_train. std()&gt;&gt;&gt; train_mean,train_std(tensor(0. 1304), tensor(0. 3073))Remember!  Dataset, which is x_train, mean and standard deviation is not 0&amp;1. But we need them to be which means we should substract means and divide data by std.  You should not standarlize validation set because training set and validation set should be aparted.  after normalize, mean is close to zero, and standard deviation is close to 1. 1. 2 Variable definition:  n,m: size of the training set c: the number of activations we need in our model2. Foundation Version: 2. 1 Basic architecture:  Our model has one hidden layer, output to have 10 activations, used in cross entropy.    But in process of building architecture, we will use mean square error, output to have 1 activations and lator change it to cross entropy   number of hidden unit; 50see below pic  We want to make w1&amp;w2 mean and std be 0&amp;1.      why initializating and make mean zero and std one is important?   paper highlighting importance of normalisation - training 10,000 layer network without regularisation1   2. 1. 1 simplified kaiming initQ: Why we did init, normalize with only validation data? Because we can not handle and get statistics from each value of x_valid?{: style=”color:red; font-size: 130%; text-align: center;”}  what about hidden(first) layer?w1 = torch. randn(m,nh)b1 = torch. zeros(nh)t = lin(x_valid, w1, b1) # hidden&gt;&gt;&gt; t. mean(), t. std()((tensor(2. 3191), tensor(27. 0303))In output(second) layer, w2 = torch. randn(nh,1)b2 = torch. zeros(1)t2 = lin(t, w2, b2) # output&gt;&gt;&gt; t2. mean(), t2. std()(tensor(-58. 2665), tensor(170. 9717))   which is terribly far from normalzed value.     But if we apply simplified kaiming init  w1 = torch. randn(m,nh)/math. sqrt(m); b1 = torch. zeros(nh)w2 = torch. randn(nh,1)/math. sqrt(nh); b2 = torch. zeros(1)t = lin(x_valid, w1, b1)t. mean(),t. std()&gt;&gt;&gt; (tensor(-0. 0516), tensor(0. 9354)) But, actually, we use activations not only linear function After applying activations relu at linear layer, mean and deviation became 0. 5. 2. 1. 2 Glorrot initializationPaper2: Understanding the difficulty of training deep feedforward neural networks  Gaussian(, bell shaped, normal distributions) is not trained very well.  How to initialize neural nets? with the size of layer , the number of filters .  But there is No acount for import of ReLU If we got 1000 layers, vanishing gradients problem emerges2. 1. 3 Kaiming initializatingPaper3: Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification  Kaiming He, explained here rectifier: rectified linear unit rectifier network: neural network with rectifier linear units  This is kaiming init, and why suddenly replace one to two on a top?     to avoid vanishing gradient(weights)   But it doesn’t give very nice mean tough.    2. 1. 4 Pytorch package Why fan_out?     according to pytorch documentation,   choosing 'fan_in' preserves the magnitude of the variance of the wights in the forward pass. choosing 'fan_out' preserves the magnitues in the backward pass(, which means matmul; with transposed matrix) ➡️ in the other words, torch use fan_out cz pytorch transpose in linear transformaton.  What about CNN in Pytorch?I tried torch. nn. Conv2d. conv2d_forward?? Jeremy digged into using torch. nn. modules. conv. _ConvNd. reset_parameters?? 2  in Pytorch, it doesn’t seem to be implemented kaiming init in right formula. so we should use our own operation.  But actually, this has been discussed in Pytorch community before. 3 4 Jeremy said it enhanced variance also, so I sampled 100 times and counted better results.  To make sure the shape seems sensible. check with assert. (remember we will replace 1 to 10 in cross entropy)assert model(x_valid). shape==torch. Size([x_valid. shape[0],1])&gt;&gt;&gt; model(x_valid). shape(10000, 1) We have made Relu, init, linear, it seems we can forward pass code we need for basic architecture nh = 50def lin(x, w, b): return x@w + b;w1 = torch. randn(m,nh)*math. sqrt(2. /m ); b1 = torch. zeros(nh)w2 = torch. randn(nh,1); b2 = torch. zeros(1)def relu(x): return x. clamp_min(0. ) - 0. 5t1 = relu(lin(x_valid, w1, b1))def model(xb):  l1 = lin(xb, w1, b1)  l2 = relu(l1)  l3 = lin(l2, w2, b2)  return l32. 2 Loss function: MSE:  Mean squared error need unit vector, so we remove unit axis.   def mse(output, targ): return (output. squeeze(-1) - targ). pow(2). mean()    In python, in case you remove axis, you use ‘squeeze’, or add axis use ‘unsqueeze’ torch. squeeze where code commonly broken. so, when you use squeeze, clarify dimension axis you want to removetmp = torch. tensor([1,1])tmp. squeeze()&gt;&gt;&gt; tensor([1, 1]) make sure to make as float when you calculateBut why??? because it is tensor?{: style=”color:red; font-size: 130%;”} Here’s the error when I don’t transform the data type ---------------------------------------------------------------------------TypeError                 Traceback (most recent call last)&lt;ipython-input-22-ae6009bef8b4&gt; in &lt;module&gt;()----&gt; 1 y_train = get_data()[1] # call data again   2 mse(preds, y_train)TypeError: 'map' object is not subscriptable This is forward passFootnote: Other materials:  Understanding the difficulty of training deep feedforward neural networks, paper that introduced Xavier initialization      Fixup Initialization: Residual Learning Without Normalization &#8617;        Pytorch implementaion on Kaiming init of conv and linear layers &#8617;        Pytorch kaiming init issue &#8617;        Pytorch kaiming init explained &#8617;    "
     }, {
-    "id": 14,
+    "id": 16,
     "url": "http://localhost:4000/2020/03/note08-fastai-2/",
     "title": "What's inside Pytorch Operator?",
     "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, RefactoringWhat’s inside Pytorch Operator?: Section02 Time comparison with pure Python:    Matmul with broadcasting&gt; 3194. 95 times faster     Einstein summation&gt; 16090. 91 times faster     Pytorch’s operator&gt; 49166. 67 times faster  1. Elementwise op: 1. 1 Frobenius norm:   above converted into (m*m). sum(). sqrt() Plus, don’t suffer from mathmatical symbols. He also copy and paste that equations from wikipedia.  and if you need latex form, download it from archive. 2. Elementwise Matmul:  What is the meaning of elementwise?   We do not calculate each component. But all of the component at once. Because, length of column of A and row of B are fixed.   How much time we saved?  So now that takes 1. 37ms. We have removed one line of code and it is a 178 times faster…#TODOI don’t know where the 5 from. but keep it. Maybe this is related with frobenius norm…?as a result, the code before for k in range(ac):  c[i,j] += a[i,k] + b[k,j]the code after c[i,j] = (a[i,:] * b[:,j]). sum()To compare it (result betweet original and adjusted version) we use not test_eq but other function. The reason for this is that due to rounding errors from math operations, matrices may not be exactly the same. As a result, we want a function that will “is a equal to b within some tolerance” #exportdef near(a,b):   return torch. allclose(a, b, rtol=1e-3, atol=1e-5)def test_near(a,b):   test(a,b,near)test_near(t1, matmul(m1, m2))3. Broadcasting:  Now, we will use the broadcasting and removec[i,j] = (a[i,:] * b[:,j]). sum() How it works?&gt;&gt;&gt; a=tensor([[10,10,10],     [20,20,20],     [30,30,30]])&gt;&gt;&gt; b=tensor([1,2,3,])&gt;&gt;&gt; a,b  (tensor([[10, 10, 10],     [20, 20, 20],     [30, 30, 30]]),tensor([1, 2, 3]))     &gt;&gt;&gt; a+btensor([[11, 12, 13],    [21, 22, 23],    [31, 32, 33]])  &lt;Figure 2&gt; demonstrated how array b is broadcasting(or copied but not occupy memory) to compatible with a. Refered from numpy_tutorial   there is no loop, but it seems there is exactly the loop.     This is not from jeremy (actually after a moment he cover it) but i wondered How to broadcast an array by columns?  c=tensor([[1],[2],[3]])a+ctensor([[11, 11, 11],    [22, 22, 22],    [33, 33, 33]])s  What is tensor. stride()?help(t. stride)Help on built-in function stride:  stride(…) method of torch. Tensor instancestride(dim) -&gt; tuple or intReturns the stride of :attr:’self’ tensor. Stride is the jump necessary to go from one element to the next one in the specified dimension :attr:’dim’. A tuple of all strides is returned when no argument is passed in. Otherwise, an integer value is returned as the stride in the particular dimension :attr:’dim’. Args:  dim (int, optional): the desired dimension in which stride is requiredExample::* x = torch. tensor([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]])`x. stride()&gt;&gt;&gt; (5, 1)x. stride(0)&gt;&gt;&gt; 5x. stride(-1)&gt;&gt;&gt; 1   unsqueeze &amp; None index   We can manipulate rank of tensor Special value ‘None’, which means please squeeze a new axis here== please broadcast herec = torch. tensor([10,20,30])c[None,:] in c, squeeze a new axis in here please. 2. 2 Matmul with broadcasting: for i in range(ar):#  c[i,j] = (a[i,:]).      *[:,j]. sum() #previous  c[i]  = (a[i]. unsqueeze(-1) * b). sum(dim=0) And Using None also (As howard teached)c[i]  = (a[i ]. unsqueeze(-1) * b). sum(dim=0) #howardc[i]  = (a[i][:,None] * b). sum(dim=0) # using Nonec[i]  = (a[i,:,None]*b). sum(dim=0)⭐️Tips🌟 1) Anytime there’s a trailinng(final) colon in numpy or pytorch you can delete it ex) c[i, :] = c [i]2) any number of colon commas at the start, you can switch it with the single elipsis.  ex) c[:,:,:,:,i] = c […,i] 2. 3 Broadcasting Rules:  What if we tensor. size([1,3]) * tensor. size([3,1])?  torch. Size([3, 3])    What is scale????   What if they are one array is times of the other array? ex) Image : 256 x 256 x 3Scale : 128 x 256 x 3Result: ?   Why I did not inserted axis via None, but happened broadcasting? &gt;&gt;&gt; c * c[:,None]tensor([[100. , 200. , 300. ],    [200. , 400. , 600. ],    [300. , 600. , 900. ]])maybe it broadcast cz following array has 3 rows as same principle, no matter what nature shape was, if we do the operation tensor broadcasts to the other. &gt;&gt;&gt; c==c[None]tensor([[True, True, True]])&gt;&gt;&gt; c[None]==c[None,:]tensor([[True, True, True]])&gt;&gt;&gt;c[None,:]==ctensor([[True, True, True]])3. Einstein summation:  Creates batch-wise, remove inner most loop, and replaced it with an elementwise producta. k. ac[i,j] += a[i,k] * b[k,j]inner most loop c[i,j] = (a[i,:] * b[:,j]). sum()elementwise product  Because K is repeated so we do a dot product. And it is torch. Usage of einsum()1) transpose2) diagnalisation tracing3) batch-wise (matmul) …  einstein summation notationdef matmul(a,b): return torch. einsum('ik,kj-&gt;ij', a, b)so after all, we are now 16000 times faster than Python. 4. Pytorch op: 49166. 67 times faster than pure python And we will use this matrix multiplication in Fully Connect forward, with some initialized parameters and ReLU. But before that, we need initialized parameters and ReLU, Footnote:  TensorRank ti noteResources:  Frobenius Norm Review Broadcasting Review (especially Rule)     Refer colab! (I totally confused with extension of arrays)    torch. allclose Review np. einsum Reviewh "
     }, {
-    "id": 15,
+    "id": 17,
     "url": "http://localhost:4000/2020/02/note08-fastai-1/",
     "title": "What is the meaning of 'deep-learning from foundations?'",
     "body": "2020/02/29 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ I don’t know if you read this article, but I heartily appreciate Rachael Thomas and Jeremy Howard for providing these priceless lectures for free Homework:  Review concepts 16 concepts from Course 1 (lessons 1 - 7)(1) Affine Functions &amp; non-linearities; 2) Parameters &amp; activations; 3) Random initialization &amp; transfer learning; 4) SGD, Momentum, Adam; 5) Convolutions; Batch-norm; 6) Dropout; 7) Data augmentation; 8) Weight decay; 9) Res/dense blocks; 10) Image classification and regression; 11)Embeddings; 12) Continuous &amp; Categorical variables; 13) Collaborative filtering; 14) Language models; 15) NLP classification; 16) Segmentation; U-net; GANS) Make sure you understand broadcasting Read section 2. 2 in Delving Deep into Rectifiers Try to replicate as much of the notebooks as you can without peeking; when you get stuck, peek at the lesson notebook, but then close it and try to do it yourself calculus for machine learning     based on weight…    einsum conventionCONTENTS:  What is going on in this course?     What is ‘from foundations’?   Steps to a basic modern CNN model   Today’s implementation goal: 1) matmul -&gt; 4) FC backward    Library development using jupyter notebook     jupyter notebook certainly can make module    Elementwise ops     How can we make python faster?         What is element wise operation?           FootnoteWhat is going on in this course?: What is ‘from foundations’?: 1) Recreate fast. ai and Pytorch 2) using pure python  Evade OverfittingOverfit : validation error getting worsetraining loss &lt; validation loss  Know the name of the symbol you usefind in this page if you don’t know the symbol that you are using or just draw it here (run by ML!) Steps to a basic modern CNN model:  1) Matrix multiplication -&gt; 2) Relu/Initialization -&gt; 3) Fully-connected Forward-&gt; 4) Fully-connected Backward -&gt; 5) Train loop -&gt; 6) Convolution-&gt; 7) Optimization -&gt;8) Batchnormalization -&gt; 9) Resnet Today’s implementation goal: 1) matmul -&gt; 4) FC backward: Library development using jupyter notebook: what is assers? jupyter notebook certainly can make module:  There will be #export tag that Howard (and we) want to extract special notebook2script. py will detect sign of #expert and convert following into python module and test ittest\_eq(TEST,'test')test\_eq(TEST,'test1')  what is run_notebook. py?     when you want to test your module in command line interface   		!python run\_notebook. py 01_matmul. ipynb  Is there any difference between 1) and 2)?1) test -&gt; test01 2) test01 -&gt; test #TODO I don’t know yet  look into run_notebook. py, package fire Jeremy used. What is that?read and run the code in a notebook, and in the process, Jeremy made Python Fire library called!shockingly, fire takes any kind of function and converts into CLI command. fire library was released by Google open source, Thursday, March 2, 2017    Get data   pytorch and numpy are pretty much same.  variable c explains how many pixels there are in in MNIST, 28 pixels PyTorch’s view() method: torch function that manipulating tensor, and squeeze() in torch &amp; mathmatical operation similar function Rao &amp; McMahan said usually this functions result in feature vector.    In part 1, you can use view function several times.     Initial python model     Which is Linear, like $Xw$(weight)$+a$(bias) $= Y$     If you don’t know hou to multiple matrix, refer this site matmul visulization site   How many time spends if we we use pure python   function matmul, typical matrix multiplication function, takes about 1 second for calculating 1 single train data! (maybe assumed stochastic, 5 data points in validation)     it takes about 11. 36 hours to update parameters even single layer and 1 iteration! (if that was my computer, it would be 14 hours. . )🤪   THIS is why we need to consider ‘time’&amp;’space’ This is kinda slow - what if we could speed it up by 50,000 times? Let’s try! Elementwise ops: How can we make python faster?:  If we want to calculate faster, then do remove pythonic calcuation, by passing its computation down to something that is written something other than python, like pytorch.  According to PyTorch doc it uses C++ (via ATen), so we are going to implement that function with python. What is element wise operation?:  items makes a pair, operate corresponding componentFootnote:  notebooks material video broadcasting excel"
     }, {
-    "id": 16,
+    "id": 18,
     "url": "http://localhost:4000/2020/02/what-is-convolution/",
     "title": "Digging into convolution",
     "body": "2020/02/28 - Issues 1) Kaiming Initializtion in Pytorch was in trouble. 1 2) Jeremy started to dig in, in lesson09, but I didn’t know why the size of tensor is 2 and even understand this spreadsheet data. 3 Homework: Read Visualizing and Understanding Convolutional Networks paper  What is a convolution?     Visualization         one kernel     Matthew D Zeiler &amp; Rob Fergus Paper          Convolution can be represented as matmul   Padding   Kernel has rank 3   How can we find a side-edge, a gradient and area of constant weight?   What is a convolution?: A convolutional neural network is that your red, green, and blue pixels go into the simple computation, and something comes out of that, and then the result of that goes into a second layer, and the result of that goes into the third layer and so forth. Visualization: one kernel Refer this site for visualizing CNN filteringMatthew D Zeiler &amp; Rob Fergus PaperLecture01     Nine examples of the actual coefficients from the **first layer**  Convolution can be represented as matmul: CNNs from different viewpoints {align-items: center;}    [A B C D E F G H I J] is 3 by 3 image data flatten to vector.   As a result, convolution is a just matrix just two things happens     Some of entries are set to zeros at all the times   same color always have the same weight. That called weight time / wegith sharing    So, we can implement a convolution with matrix multiplication. But, we don’t do that because it’s slow!Padding:  What most of libraries do is just put zeros asdie of matrix  fast. ai uses reflection paddings (what is this? Jeremy said he uttered it)Kernel has rank 3:  As standard picture input would be 4 5, it would be actually 3d, not 2d.  If we make kernel as a 3x3 size, we pass over same kernel all the different Red, Green, Blue Pixels.      This could make problem, because, if we want to detect frog, which is green, we would want more activations on the green(I made a test cell in my colab 6)   How can we find a side-edge, a gradient and area of constant weight?: Not top-edge!  One kernel can find only the top-edge, so we should stack the kernels 7 So, we pass it through bunch of kernels to the input images, and that process gives us height x width x corresponding number of kernels.  Usually that number of chanel is 16 And if we want to get the more channels and features, we should repeat that process     This process gives rise to memory out of control, we do the stride   #### conv-example. xlsx  2 convolutional filters At a second layer, filter is 3x3x2 tensor, because to add up together the first layer’s channel. Reference:       Problem was math. sqrt(5) was not kaiming initialization formula, Implementation in Pytorch &#8617;        size of tensor, lecture09 &#8617;        conv-example. xlsx &#8617;        Why do computer use red, green and blue instead of primary colors &#8617;        Grayscale is a group of shades without any visible color. … Each of these dots has its own brightness level as well and, therefore, can be converted to grayscale. A grayscale image is one with all color information removed.  &#8617;        Testing RGB and grayscale &#8617;        stack kernel and make new rank of tensor at output, Lesson06-2019 &#8617;    "
     }, {
-    "id": 17,
+    "id": 19,
     "url": "http://localhost:4000/2020/02/dps-week8/",
-    "title": "Digital Product School week 8&9",
-    "body": "2020/02/24 - The 8th week retropect at Digital Product School Week 8/9 - Ship your MVP/Release next iteration each day     This week's schedule  CONTENT:  Preparing engineering weekly Agile Process     Daily Stand-up    Making application flowchart (feat draw. io) / ER diagram     Flowchart, understaning user journey   ER diagram    Engineering weekly AI lunch Connecting firebase andPreparing engineering weekly: This week at Wednesday, I planned to explain the Language Modelings, mainly focusing ELMo, ULMFiT, BERT and GPT-2. Slides is available here Changed the presentation, because there were people who are not in ML domain. hereWhenever I do the presentation, I learn more than the information I give them. At the same time, I realize I need to learn more than I know. Agile Process: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m not sure ‘what is agile’ but this is what we tried to make agile process.  Daily Stand-up: Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Making application flowchart (feat draw. io) / ER diagram:  Before we start the party, we should clarify the flowchart and ER diagram of our application. Flowchart, understaning user journey: Thanks for google, we could use draw. io for our framechart framework. Actually, we cana choice other good flatform, but draw. io has connected app throgh google drive, most of our engineer was used to it. And after this job, I got to know there is also (of course) rule with the symbols, color, size, space, scaling and direction of arrow -reference. But why we should do this? WE have made our storymap before!! I think storymap is for visualize our status and app. So it should be shared with whole the team, and they should able to understand each role’s issue. But flowchart is more like testing technical feasibility, and error that user can experience. So it could be little more specific, complicated, and hypothetical.     This week engineer's main plan  ER diagram: Even if we use NoSQL database through firebase, my team was accustomed to SQL more. That what we educated when we were at college, so we had to organize our concept while we were learning NoSQL. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Before today, my AI collegues presented regression, knn and it was my turn. I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! AI lunch: Connecting firebase and: "
+    "title": "My life in Digital Product School - week 8/19/10",
+    "body": "2020/02/24 - The 8/9/10th week retropect at Digital Product School Week 8 - Ship your MVPWeek 9/10 - Release next iteration each day     Week 8th schedule  CONTENT:  Agile Product Development     Daily Stand-up(planning)   Gemba Walk   Sprint Reviews    Engineering weeklyAgile Product Development: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m still not sure ‘what is agile’ but this is how we tried to make agile process.  Daily Stand-up(planning): Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Gemba Walk:     Team Cero with core team  Every 2 weeks, we do the Gemba work, which is ‘question everything to the core team’ time. At this period, people can ask anything related to our product, workshop, and framework. Core team will help just for each team, and each team can solve the problem related to their work. &lt; br/&gt;Why we need this session? because with workshop and general schedule, core team has no time just focus on each team. So through this session, we can have opportunity to understand each program and workshop, like why we are using this platform, and when is the due of our small project, and we have this problem and we need help for this. whatever small problem you have, core team is always willing to help you. Sprint Reviews: Every Friday, we have time to summarise what we did for the week. Maybe we need HMW question and our storymap to share our process and then tell and share what we did try, what point we succeeded and what point it was deviant of our prediction, and why we tried it. .     Sprint of Ve-link  And then, just after all team’s ppt, we do vote with such a cute marvel. Always it’s very difficult to vote (of course you can’t vote to your team!) Because it depends on criteria what do I value!But since this is process of our agile work, I try to focus on what they have changed since last week, and why they did it, how they did it. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Everyone have their knowledge to share and we can be tutor and at the same time can be of tutee. Previously, my AI collegues presented regression, knn. And because I’m somewhat specialized to NLP, I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! "
     }, {
-    "id": 18,
+    "id": 20,
     "url": "http://localhost:4000/2020/02/fast.ai-nlp-note-16/",
     "title": "Algorithmic bias",
     "body": "2020/02/20 - Algorithms can encode &amp; magnify human bias Case Study 1: Facial Recognition &amp; Predictive Policing:  Joy Buolamwini &amp; Timnit Gebru, gendershades. org     Microsoft, FACE+, IBM - All of these things are sell now.    Largest gap between $\therefore\ Lighter Male\ &gt;\ Darker\ Female $      This US mayor joked cops should “mount . 50-caliber” guns where AI predicts crime      With machine learning, with automation, there’s a 99% success, so that robot is ㅡwill beㅡ99% accurate in telling us what is going to happen next, which is really interesting.     - city official in Lancater, CA, approving on using IBM for public security  Bias:  Bias is type of error Statistical Bias: difference between a statistic’s expected value and the true value Unjust Bias: disproportionate preference for or prejudice against a group Unconscious bias: bias that we don’t realize we have But, term bias is too generic to be productive. Different sources of bias have different causes Representation Bias: Dataset was not representative of the algorithm that might be used on later. Above : Data is okay, but algorithm has some problem. Below : Data has error.  For example, object detection production that performs very well in common product of US. But in contrast, change of target product region, like Zimbabwe, Solomon Island, and so on, reduced the performence remarkably. It is not the algorithmic problem, so we should care about data volume of region. Evaluation Bias: Benchmark datasets spur on research, 4. 4% of IJB-A images are dark-skinned women. 2/3 of ImageNet images from the West (Sharkar et al, 2017) Case Study 2: Recidivism Algorithm Used Prison Sentencing: Case Study 3: Online Ad Delivery: Bias in NLP: ( Nothing to do with the course, but I’m researching this field these days. )    But all about Englsih     ImpactThe person is doctor. The person is nurse -&gt; 그는 의사다. 그녀는 간호사다.  Concept of “biased data” often too generic to be useful:  Different sources of bias have different sources Data, models and systems are not unchanging numbers on a screen. They’re the result of a complex process that starts with years of historical context and involves a series of choices and norms, from data measurement to model evaluation to human interpretation. - Harini Suresh, “The problem with Biased Data” Five Sources of Bias in ML:  Representation Bias Evaluation Bias Measurement Bias Aggregation Bias(46:02) Historical Bias(46:26)     A few studies(47:13)   Racial Bias, Even when we have good intentions(new york times)(47:10)   gender(48:59)   Humans are biased, so why does algorithmic bias matter?: Algorithms &amp; humans are used differently (humans are usually decision maker)  Algorithms are accurate and objective No way to apeal if there if error processed large scale cheap Machine learning can amplify bias Machine learning can create feedback loops. Technology is power. And with that comes responsibility. Solutions:  Analyze a project at work/school:     Questions about AI   5 types of bias (Suresh &amp; Guttag)   Datasheets for datasets, Modelcards for model reporting   Accuracy rate on different sub-groups    Work with domain experts &amp; those impacted Increase diversity in our workspace Advocate for good policy Be on the ongoing lookout for bias"
     }, {
-    "id": 19,
+    "id": 21,
     "url": "http://localhost:4000/2020/02/classifier-city/",
     "title": "Making a classifier with image dataset made from gooogle",
     "body": "2020/02/15 - CONTENTS:  Creating dataset from google images     Using google_images_download   Create ImageDataBunch    Train model     fit_one_cycle()   Let’s find-tune   Let’s train the whole model!   Let’s make batch size bigger!    Interpretation Model in productionCode can be found hereDeployed model here Making a classifier which can distinguish Seoul from Munich and Sanfrancisco!(hoping my well in Munich!) Creating dataset from google images: In machine learning, you always need data before you build your model. You can use either URLs or google_images_download package. Since Jeremy explained specifically, I will try the other. Using google_images_download: note: This is not google official package Refer to Official Doncument, put that arguments. from google_images_download import google_images_downloadresponse = google_images_download. googleimagesdownload()  #class instantiationout_dir = os. path. abspath('. . /. . /materials/dataset/pkg/')os. mkdir(out_dir)arguments = { keywords : Cebu,Munich,Seoul ,        print_urls :True,        suffix_keywords : city ,        output_directory :out_dir,        type : photo ,      }paths = response. download(arguments)  #passing the arguments to the functionprint(paths)and if you need, here is main code. Create ImageDataBunch: We need to separate validation set because we just grabbed these imagese from Google. Most of the dataset we use (kaggle/research) splited into train / validation / test so if they are not devided beforehand we should make databunch, and Jeremy recommended assign 20% to validation. Help on function verify_images in module fastai. vision. data:verify_images(path: Union[pathlib. Path, str], delete: bool = True, max_workers: int = 4, max_size: int = None, recurse: bool = False, dest: Union[pathlib. Path, str] = '. ', n_channels: int = 3, interp=2, ext: str = None, img_format: str = None, resume: bool = None, **kwargs)  Check if the images in `path` aren't broken, maybe resize them and copy it in `dest`. Data from google image url Data from package Train model:           len(class)   len(train)   len(valid)         Data_url   3   432   108       Data_pkg   3   216   53   Uisng model: restnet34 1, Measurement: accuracy 2 fit_one_cycle(): What is fit one cycle? Cyclical Learning Rates for Training Neural Networks One of the way to find good learning rate. Core idea is to start with small learning rate (like 1e-4, 1e-3) and increase the learning rate after each mini-batch till loss starts exploding. And pick up learning rate one order lower than exploding point. For example, plotted learning rate is like below picture, picking up around 1e-2 is the best way.  Why this methods Traditionally, the learning rate is decreased as the learning starts converging with time. But this paper suggests to cycle our learning rate, because it makes us avoid local minimum. Basically this cyclic method enables us to explore whole of loss function so that find out global minimum. In other words, higher learning rate behaves like regularisation. Let’s find-tune: Do train just one last layer by learning rate found by find_lr This section you should find the strongest downward slope that kind of sticking around for quite a while. And choose just one order lower than lowest point. As explained before, I will pick up 1e-2. And of course, this is fine-tuning, we don’t need discriminative learning rate yet.  Let’s train the whole model!: link When you plot the learning rate again, maybe you will get soaring shape of learning rate. Rule of thumb, When you slice the learning rate, use learning rate you used at unfrozen part. Divide it by 5 or 10 and put it on maximum bound. At minimum bound, get the point just before it soared, and divide it by 10.  Let’s make batch size bigger!: Since default batch size is 64, I tried it to 128. And it gets way more better result(even it’s still underfitting!) And if I freeze model and train whole model again, the model would be better. Also, you can use this method to the other big dataset model training! Interpretation: See the confusion matrix. Result is quite great. *Since I’m using colab, I will skip data cleansing. But I highly recommend you to use ImageCleaner widget, only if you are using jupyter notebook (not jupyter lab) Model in production: You can deploy your model in simple way. I referred fast. ai, and used render(it’s free for limited time). You can find detailed document here. and you can create a route like this. @app. route( /classify-url , methods=[ GET ])async def classify_url(request):  bytes = await get_bytes(request. query_params[ url ])  img = open_image(BytesIO(bytes))  _,_,losses = learner. predict(img)  return JSONResponse({     predictions : sorted(      zip(cat_learner. data. classes, map(float, losses)),      key=lambda p: p[1],      reverse=True    )  })You can find my deployed model here Reference: How to create a deep learning dataset using Google Images towardsdatascience - one cycle policy       Deep Residual Learning for Image Recognition &#8617;        Accuracy_and_precision &#8617;    "
     }, {
-    "id": 20,
+    "id": 22,
     "url": "http://localhost:4000/2020/02/dps-week5/",
     "title": "Digital Product School week 5",
     "body": "2020/02/09 - The 5th week retropect at Digital Product School Week 5 - Create a Storymap and sync it with Lean Canvas     This week's schedule  CONTENT:  How to create our story map Prepare your story Discover your product’s AI potentialMondayHow to create our story map:     We need this 'aha' moment  There was a Milestone workshop, about our weekly goal. As we are agile working, we go fast and change every week’s goal. This week we will finalize our story map based on user’s pain-point and HMW questions.     How should we make our story-map  Basically we should make story map based on this rule  Tell stories, don’t just write them!     We always need context, that means all the story component should be connected    Visualize your product to establish a shared understanding and speed up discussions!     Post-it filled of text is not enough, we should fill it with visualizations then team mates can understand it fast    Only discuss in front our your story map! (Speed)     So we can update our story-map as soon as we change our opinion   And also    Use a story map to find the parts that matter most and to identify holes in your idea!     Since the story map consists of techinical part, we should consider each story’s technical feasibility    Minimise output, maximise outcome and impact! Build tests to figure out what’s minimum and what’s viable!     This story map functions to find out our minimum value of ideas    Work iteratively: Change your story map according to your learnings!     We should repeat this process again and again    PMs: Make sure Storymap is up to date!Prepare your story:     team cero, our whole story map    Our goal    Technical feasibility of our storyWhat is your strategy to make user achieve something? This would be our expand point Discover your product’s AI potential: How can we apply AI to our product? Let’s write down our ‘HMW’ questions, and find out all p ossibilities. These are suggestion of possibilities, so don’t attached to feasibility (we will do in at lean start-up)     Software section's expectation    AI section's expectationTuesday    Engineer's task, week5This 5th week, engineers settled WendesdayThursdayFriday"
     }, {
-    "id": 21,
+    "id": 23,
     "url": "http://localhost:4000/2020/02/GPU-time/",
     "title": "4 reasons took much time to setting GPU for fast.ai than I expected",
     "body": "2020/02/05 - Motivation: Before now, me as a undergraduate student, I was parsimony who usually depend on colab, kaggle, friend’s server(occasional) whenever i need GPU. . And this time it’s been for a while to install GPU than I expected and I share the several component that stood in my way. Written at Oct 24 2019, if you think this is deprecated, please do not have a leap of faith. Just for the record, I’ve used Kaggle, Colab, GCP, Azure, EC2 as GPU cloud. 1. Did not know there is JupyterLab option in Google Cloud Platform. : At the first time when GCP came out, there was no AI Platform service. So from starting vm instance to launching jupyter and installing packages, I did all of the things myself. (and I learned 🤗) $	curl -O https://repo. continuum. io/archive/Anaconda3-5. 0. 1-Linux-x86_64. sh[Downloading conda in ssh] I created VM instance,selected zone, machine type and disk type. Then, define firewall rules and in ssh terminal, install jupyter and other packages. But you can do all of these things just using AI Platform.  [AI Platform] I think it especially save your time if you are living in Asia-Pacific, which google doesn’t support not that much GPU resources.  2. Consider if the platform has limited resources in a region you live in. : I live in South Korea, East Asia, and it seems like this region has lots of limitation in GPU (except quite expensive AWS) And the Taiwan which was the only one region where I can launch my own VM with GPU (I tried all the other regions in the list) sometimes do normaly, but not always. 😥After launching, I did several works and next day I could not start VM. (I didn’t count it, but tried it a few hours because I didn’t want cost any more time…) Endlessly failed to start instance, then I choose to move AWS as an alternative way. 3. Fast. ai gives deliberate guide and I didn’t know it. : Fast. ai offer the guide for all available platform. (Colab, salamander, Gradient, Kaggle, Colab, and so on) It is so important, and really needs, because cloud computing options are vary as occasion and purpose arise. I didn’t know fast. ai has manual to running GCP, and I think it’s as good a reason as any for me to be have taken time. It helped me so much when I had aws and shortened my time. I don’t want to read all of the manual in amazno. . (It is recommended. . but I’d rather read GIT PRO now…) ssh -i ~/. ssh/&lt;your_private_key_pair&gt; -L localhost:8888:localhost:8888 ubuntu@&lt;your instance IP&gt;4. You should wait to add more volume just after add volume, by building AWS EC2. : Since Elastic Block Store(EBS) storage supports optimized storage, users can’t extend storage volume two times in a row.  Unfortunately, at the first time, I didn’t know it (again 👻) and when VM lacked volume, I doubled dist capacity (76*2) at a rough but It needs more.  &lt;!– this time I installed GPU in two years, and it became little complicated compared to 2 years ago. And this time for the first time(maybe not the first time. . but i handled it in my class or with my friend. but it’s my first time on my own. ) I very I’m started to using used google colab, kaggleand, GCP-JupyterLab, ec2 - friend made, aws vm machine but I had a environment variable but i did not know of it. On these days, I could not get a resources from taiwan…    I couldn’t notice a deliberate     Anyway, as a result I tried myself gcp myself and aws ec2 with fast. ai But I think doing on my self surely takes much time (in this point I wonder why I’m doing this, and should remind me, especially I was studying disk volume optimization)     disk volume exceed - https://askubuntu. com/questions/919748/no-space-left-on-device-even-though-there-is:  "
     }, {
-    "id": 22,
+    "id": 24,
     "url": "http://localhost:4000/2020/02/dps-week4/",
     "title": "Digital Product School week 4",
     "body": "2020/02/01 - The 4th week retropect at Digital Product School Week 4 - Find solution ideas and run experiments   [This week’s schedule] CONTENT:  Ideation Techniques     What is ideation techniques?    Generating idea in my team AIdeation Team brain storming of idea Die Produkt MacherMondayIdeation Techniques:   [slides from @steffen] What is ideation techniques?: We tried to find out user’s painpoint last week. Tried to users talk about their, pain point. No question directly, but extract from them their pain with transportation. Generating idea in my team:   AIdeation:   TuesdayTeam brain storming of idea:   Based on generated idea on Monday, we extended our idea doing rolling-paper!     Die Produkt Macher: What is lean start-up?  Lean startup is a methodology for developing businesses and products that aims to shorten product development cycles and rapidly discover if a proposed business model is viable; this is achieved by adopting a combination of business-hypothesis-driven experimentation, iterative product releases, and validated learning. - wikipedia WendesdayThursdayFriday"
     }, {
-    "id": 23,
+    "id": 25,
     "url": "http://localhost:4000/2020/01/retrosprect-of-acl-paper-2020/",
     "title": "Retrospect of ACL 2020 paper writing",
     "body": "2020/01/29 - 2020 Annual Conference of the Association for Computational Linguistics Why I can’t use ‘Cebuano’ for the research?: Why I had to change target language from ‘Cebuano’ to ‘Tagalog’?-&gt; No language translator options except google translation. But before knowing that I already consult my friend, whose mother tongue is English. So I had to aplogize her, but couldn’t tell her why suddenly I changed my plan. -&gt; I realized there are many languages even can’t be researched at all. . -&gt; Getting accustomed to discrimination makes misunderstanding, sometimes. At my country, we couldn’t use music streaming service, because of legal problem. But at that moment, I thought it was discrimination, which is done by music company. "
     }, {
-    "id": 24,
+    "id": 26,
     "url": "http://localhost:4000/2020/01/Git-Merge/",
     "title": "Why am I not listed as a contributor?!",
     "body": "2020/01/10 - From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’m not listed as contributor of repo which my PR accessed. How did I come to a repository?: When I’m stuck, I would prefer to code, than to go deep in theory. (It must be so. . too much to understand 🤒)It was BERT released by Google AI I felt keenly the necessity of implementing, because not only couldn’t understand the way they figured out positional encoding formula, but how it actually works. What does it mean to “scale” dot product in Attention? (Now I know it’s far from my section 😂) Figure 1. Scaled Dot Product. Adopted from tensorflow blogWhat was the code error?: For implement code in paper, I read the papers Transformer and BERT, structured the model, and refered the others’ code. Meanwhile, I found out a small error in tokenization process, which was changing a token into [MASK], enabled bidirectional representation. I’ve made PR, and got merged. But I was not in contributors. Why?: Figure 2. Merged Pull request Adopted from graykode projectActually I happened to know there can be couple of reasons github doesn’t include my name as contributor. Well, if contributors tab has more than 100 people, in which case it shows you up only if you are in the top 100 contributors because displaying too many contributors can make webpages down. Somethimes, however, it doesn’t that problem. Why not? Two possibilities are there.    First, According to Joel-Glovier, if repository maintainer merged-as-a-rebase PR will end up showing as maintainer’s commit. But maintainer shouldn’t normally do this.     Second, if you happend to commit using a different git email that what is in your GitHub profile, it will not be attached to your Github user, and “doesn’t show up” as you.  Reference:  Michał Chromiak’s blog Github: why are my contributions are not showing on my profile atlassian-gitfetch"
     }, {
-    "id": 25,
-    "url": "http://localhost:4000/2019/12/lesson1-fastai/",
-    "title": "Fine Grained Classification",
-    "body": "2019/12/31 - Finally you can solve the mystery behind this weird drawing. . through this course. juptyer notebook magic: %reload_ext autoreload%autoreload 2%matplotlib inlinethis is special directives to jupyter notebook, not python code. And it is called ‘magics’ (but i think jeremy is magicion)  If somebody changes underlying library code while I’m running this, please reload it automatically If somebody asks to plot something, then please plot it here in this Jupyter NotebookDon’t hesitate to import start~ Digging into untar_data, path. ls: Union[pathlib. Path, str]: typed programming language? -&gt; maybe i think disclaim the type beforehand for sure.  Q. like assert? path. ls()this is some module that fast. ai made because os. listdir(‘path’) is unconvinient. Python3 pathlib library!: pathlib "
-    }, {
-    "id": 26,
+    "id": 27,
     "url": "http://localhost:4000/2019/12/jeremy-howard/",
     "title": "Jeremy Howard",
     "body": "2019/12/15 - This is journey to find out ‘who am I trying to be?’: How he impacted me?  The person who made me start Computer Vision again. He emphasized the importance of studying NLP and Computer together to understand the deep-learning. He didn’t order it to study, but always he pursuade me with reasonable way.  “It’s not just something I can throw away. NLP and computer vision a few weeks apart and that’s going to force your brain to realize like ‘oh I have to remember this’”  He made me admit my failure in deep-learning. I started to objectify where am I. What should I do when I’m frustrated.  “Keep going. You’re not expected to remember everything. Yet.  You’re not expected to understand everything. Yet. You’re not expected to know why everything works.  Yet. ”  His articles are numerous, below.      What is torch. nn Really?   High Performance Numeric Programming with Swift: Explorations and Reflections   C++11, random distributions, and Swift    And especially, I like this book. Designing great data products Great predictive modeling is an important part of the solution, but it no longer stands on its own; as products become more sophisticated, it disappears into the plumbing. Designing great data products And he is also famous for words. Here are some.  we’re going to try and use that to really understand what’s going on. So to warn you, none of it is rocket science but a lot of its going to look really new. So don’t expect to get it the first time but expect to listen and jump into the notebook try a few things test things out look particularly at like tensor shapes and inputs and outputs to check your understanding then go back and listen again. But and kind of try it, a few times, because you will get there right, it’s just that there’s going to be a lot of new concepts because we haven’t done that much stuff in pure Pytorch. Lesson 6: Deep Learning 2019 "
     }, {
-    "id": 27,
+    "id": 28,
     "url": "http://localhost:4000/2019/11/julia-evans/",
     "title": "Julia Evans",
     "body": "2019/11/20 - This is journey to find out ‘who am I trying to be?’: The women who surprised me in many ways. First, she approached me to teaching some concepts drawing cartoons. It was at Hackers news, which was hightest ranks. Personally I have the use of not to reading title, so and cartoon was so cute and clear. I naturally gonna understood mechanism and astonished by her explaination ability. Her value, which she was taught by many people so want to do same things, moved me. Volume of her knowledge, that just reading post title is a deal of work, amazed me. "
     }, {
-    "id": 28,
+    "id": 29,
     "url": "http://localhost:4000/2019/11/coc-retropective/",
     "title": "Retrospective on Pycon 2019 Korea (CoC Committee)",
     "body": "2019/11/05 - When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized our responsibility, and if I passed each other’s burden, It could be my burden next time. In solidarity of the obligation, we finished conference well. And after participating PyCon Korea 2018 as volunteer, I’ve joined PyCon Korea Organizer last year. &lt;Figure 1&gt; First meeting of PyCon 2019 Korea Organizers It’s been a while since PyCon 2019 finished. It’s held on Aug 15 - 18, at Coex Grand Balloom &lt;Figure 2&gt; Ongoing session, speaking on news comment processing &lt;Figure 3&gt; Sponsor Booth iin Coex Hall &lt;Figure 4&gt; After PyCon 2019, with all of volunteer, organizer, speakers 😍 🥰  Serving as part of the coc TF, I spent large fraction of last year doing CoC job. here’s the path what we’ve been grappled with to grasp a solution. First half: Before the conference Toward Diverse Community:  Formally we’ve been reusing and modifying PyCon US CoC, but we needed fit in Korean and I was part of that to revise code of conduct. Except ‘That’ Diversity, Because it is ‘Harassment’:  Specific point was harassment, and the others were not. process of finding the points. How can we settle this point?Second half: During the conference Handling the potential Harassment: Disjunction of policy and real-time situation: This ‘PyCon 2019 Korea retrospective series’ would be devided into 3 Episodes.  “Retrospective on Pycon 2019 Korea (CoC Committee)” “Retrospective on Pycon 2019 Korea (Program Chair)” (20 Nov, To Be Update) “Maintaining participation while still making timely decisions” (29 Nov, To Be Update)"
     }, {
-    "id": 29,
+    "id": 30,
     "url": "http://localhost:4000/2019/11/elif-shafak/",
     "title": "Elif Shafak",
     "body": "2019/11/05 - This is journey to find out ‘who am I trying to be?’: For creative-minded people, Istanbul is a treasure. ’ Photo © Chris Boland, licensed under CC BY-NC-ND 2. 0    it suddenly felt like what I was trying to convey was more complicated and detailed than what the circumstances allowed me to say.     And I did what I usually do in similar situations: I stammered, I shut down, and I stopped talking. I stopped talking because the truth was complicated, even though I knew, deep within, that one should never, ever remain silent for fear of complexity.         &lt;Figure 1&gt; Elif Shafak   Photo credit: www. elifsafak. com. tr      I want to talk about emotions and the need to boost our emotional intelligence. I think it’s a pity that mainstream political theory pays very little attention to emotions.     Oftentimes, analysts and experts are so busy with data and metrics that they seem to forget those things in life that are difficult to measure and perhaps impossible to cluster under statistical models. But I think this is a mistake, for two main reasons. We are emotional beings.     I think it’s going to be one of our biggest intellectual challenges, because our political systems are replete with emotions. In country after country, we have seen illiberal politicians exploiting these emotions. And yet within the academia and among the intelligentsia, we are yet to take emotions seriously. I think we should.  1 2 Reference:       British Council Worldwide &#8617;        Ted Talk &#8617;    "
     }, {
-    "id": 30,
+    "id": 31,
     "url": "http://localhost:4000/2019/01/dps-week1/",
     "title": "Digital Product School week 1",
     "body": "2019/01/11 - The 1th week retropect at Digital Product School [This week’s schedule] CONTENT:  Welcome to Digital Product School!     Trip to Spitzingsee   Welcome to Design Office   Specifying our goal of product   Welcome to Digital Product School!: Trip to Spitzingsee:    At the first day of Digital Product School, we had a off-site with all of batch 9 people. All the costs were managed by dps. At the beautiful mountain, we settled the team, and got my team goal.     Basically, there are two kind of team in DPS. (1) Wild team - the team has fixed topic(2) Company team - the team which has specific stakeholders, and also topic defined by that stakeholders  The Core-team will fix what team you will join in DPS for 3 months based on ymy professionals, they announce it at off-site. [My team for 3 months at DPS]    And we decide on my batch #9 theme song. How? Each team draw for songs and pitch ‘why this song should be batch #9 theme song’The result? Imagine dragon - Believer (I didn’t know at the moment, this song would be stamped in my memory)     We have a workshop for getting to know each other. For example, we share 1) what do I expect from 3 months of dps, 2) when I feel happy in my life time, 3) what I worked for last week, 4) what was my last project and 5) what plays important role in my life      My team's board  Cero  Welcome to Design Office:  At first day of design office, we had workshop, which celebrates my day in dps also discuss specific rule, menifesto and stakeholders    We get sticker and attach it in map depends on my nationality  Now time to get to know my team’s stakeholders. What they want for us? What they expect from us? How free my team are on the topic?To be honest, it is endless tug-of-war. We should discuss with my stakeholders, endlessly, and find out solution which can meet interest of users, stakeholders and my team. Basically, my team’s main stakeholder is ADAC, but BMW, City of munich and Nokia will also participate as my team’s stakeholders. Specifying our goal of product: "
@@ -504,7 +509,7 @@ <h5 class="font-weight-bold">Written by  </h5>
           
           
             <div class="col-md-6 text-right pr-0">
-                <a class="text-dark" href="/2019/12/lesson1-fastai/"> Fine Grained Classification  <img height="30px" class="ml-1" src="/assets/images/1-visual-cnn.png"> </a>
+                <a class="text-dark" href="/2020/01/Git-Merge/"> Why am I not listed as a contributor?!  <img height="30px" class="ml-1" src="/assets/images/1.png"> </a>
             </div>
           
         </div>
diff --git a/_site/2019/12/lesson1-fastai/index.html b/_site/2019/12/lesson1-fastai/index.html
deleted file mode 100644
index 7988caeda0..0000000000
--- a/_site/2019/12/lesson1-fastai/index.html
+++ /dev/null
@@ -1,537 +0,0 @@
-<!DOCTYPE html>
-<html lang="en">
-<head>
-    <meta charset="utf-8">
-    <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
-    <meta http-equiv="X-UA-Compatible" content="ie=edge">
-    <title>Fine Grained Classification | SpellOnYou</title>
-
-    <!-- Begin Jekyll SEO tag v2.5.0 -->
-<title>Fine Grained Classification | SpellOnYou</title>
-<meta name="generator" content="Jekyll v3.8.5" />
-<meta property="og:title" content="Fine Grained Classification" />
-<meta name="author" content="dionne" />
-<meta property="og:locale" content="en_US" />
-<meta name="description" content="Finally you can solve the mystery behind this weird drawing.. through this course." />
-<meta property="og:description" content="Finally you can solve the mystery behind this weird drawing.. through this course." />
-<link rel="canonical" href="http://localhost:4000/2019/12/lesson1-fastai/" />
-<meta property="og:url" content="http://localhost:4000/2019/12/lesson1-fastai/" />
-<meta property="og:site_name" content="SpellOnYou" />
-<meta property="og:image" content="http://localhost:4000/assets/images/1-visual-cnn.png" />
-<meta property="og:type" content="article" />
-<meta property="article:published_time" content="2019-12-31T00:00:00+01:00" />
-<script type="application/ld+json">
-{"description":"Finally you can solve the mystery behind this weird drawing.. through this course.","author":{"@type":"Person","name":"dionne"},"@type":"BlogPosting","url":"http://localhost:4000/2019/12/lesson1-fastai/","publisher":{"@type":"Organization","logo":{"@type":"ImageObject","url":"http://localhost:4000/assets/images/logo.png"},"name":"dionne"},"image":"http://localhost:4000/assets/images/1-visual-cnn.png","headline":"Fine Grained Classification","dateModified":"2019-12-31T00:00:00+01:00","datePublished":"2019-12-31T00:00:00+01:00","mainEntityOfPage":{"@type":"WebPage","@id":"http://localhost:4000/2019/12/lesson1-fastai/"},"@context":"http://schema.org"}</script>
-<!-- End Jekyll SEO tag -->
-
-
-    <link rel="shortcut icon" type="image/x-icon" href="/assets/images/favicon.ico">
-
-    <!-- Font Awesome Icons -->
-    <link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.3.1/css/all.css" integrity="sha384-mzrmE5qonljUremFsqc01SB46JvROS7bZs3IO2EmfFsd15uHvIt+Y8vEf7N7fWAU" crossorigin="anonymous">
-
-    <!-- Google Fonts-->
-    <link href="https://fonts.googleapis.com/css?family=Source+Sans+Pro&display=swap" rel="stylesheet">
-<!-- :400,400i,700 -->
-    <!-- Bootstrap Modified -->
-    <link rel="stylesheet" href="/assets/css/main.css">
-
-    <!-- Theme Stylesheet -->
-    <link rel="stylesheet" href="/assets/css/theme.css">
-
-    <!-- Jquery on header to make sure everything works, the rest  of the scripts in footer for fast loading -->
-    <script
-    src="https://code.jquery.com/jquery-3.3.1.min.js"
-    integrity="sha256-FgpCb/KJQlLNfOu91ta32o/NMZxltwRo8QtmkMRdAu8="
-    crossorigin="anonymous"></script>
-
-    <!-- Mathjax Support -->
-<script type="text/javascript" async
-  src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-MML-AM_CHTML">
-</script>
-<!-- This goes before </head> closing tag, Google Analytics can be placed here --> 
-
-
-</head>
-
-<body class="">
-
-    <!-- Navbar -->
-    <nav id="MagicMenu" class="topnav navbar navbar-expand-lg navbar-light bg-white fixed-top">
-    <div class="container">
-        <a class="navbar-brand" href="/index.html"><strong>SpellOnYou</strong></a>
-        <button class="navbar-toggler collapsed" type="button" data-toggle="collapse" data-target="#navbarColor02" aria-controls="navbarColor02" aria-expanded="false" aria-label="Toggle navigation">
-        <span class="navbar-toggler-icon"></span>
-        </button>
-        <div class="navbar-collapse collapse" id="navbarColor02" style="">
-            <ul class="navbar-nav mr-auto d-flex align-items-center">
-               <!--  Replace menu links here -->
-
-<li class="nav-item">
-<a class="nav-link" href="/index.html">Home</a>
-</li>
-<li class="nav-item">
-<a class="nav-link" href="/authors-list.html">Authors</a>
-</li>
-<li class="nav-item">
-<a class="nav-link" href="/contact.html">Contact</a>
-</li>
-<li class="nav-item">
-<a class="nav-link" href="/categories.html">Categories</a>
-</li>
-<!-- <li class="nav-item">
-<a target="_blank" class="nav-link" href="https://www.wowthemes.net/themes/mundana-ghost/">Ghost</a>
-</li> -->
-<li class="nav-item">
-<a class="nav-link" href="/resume.html">Resume <i class="fa fa-coffee text-danger"></i></a>
-</li>
-
-            </ul>
-            <ul class="navbar-nav ml-auto d-flex align-items-center">
-                <script src="/assets/js/lunr.js"></script>
-
-<script>
-$(function() {
-    $("#lunrsearchresults").on('click', '#btnx', function () {
-        $('#lunrsearchresults').hide( 1000 );
-        $( "body" ).removeClass( "modal-open" );
-    });
-});
-    
-
-var documents = [{
-    "id": 0,
-    "url": "http://localhost:4000/404/",
-    "title": "",
-    "body": " 404 Page not found :(  The requested page could not be found. "
-    }, {
-    "id": 1,
-    "url": "http://localhost:4000/about.html",
-    "title": "About",
-    "body": "Made with by Dionne @karma_Aleph. "
-    }, {
-    "id": 2,
-    "url": "http://localhost:4000/author-dionne.html",
-    "title": "dionne",
-    "body": "                        {{page. title}} Follow:         {{ site. authors. dionne. site }}         {{ site. authors. dionne. bio }}                                   Posts by {{page. title}}:       {% assign posts = site. posts | where: author , dionne  %}      {% for post in posts %}      {% include main-loop-card. html %}      {% endfor %}  "
-    }, {
-    "id": 3,
-    "url": "http://localhost:4000/authors-list.html",
-    "title": "Authors",
-    "body": "{{page. title}}:     {% for author in site. authors %}                                           {{ author[1]. name }} | jiwon kim :       (View Posts)      {{ author[1]. bio }}                          &nbsp;       &nbsp;                                    {% endfor %}  "
-    }, {
-    "id": 4,
-    "url": "http://localhost:4000/categories.html",
-    "title": "Categories",
-    "body": "          Categories          {% for category in site. categories %}     {{ category[0] }}:           {% assign pages_list = category[1] %}    {% for post in pages_list %}    {% if post. title != null %}     {% if group == null or group == post. group %}           {% include main-loop-card. html %}     {% endif %}    {% endif %}    {% endfor %}    {% assign pages_list = nil %}    {% assign group = nil %}    {% endfor %}                  {% include sidebar-featured. html %}          "
-    }, {
-    "id": 5,
-    "url": "http://localhost:4000/contact.html",
-    "title": "Contact",
-    "body": "  Please send your message to {{site. name}}. I will reply as soon as possible!   "
-    }, {
-    "id": 6,
-    "url": "http://localhost:4000/",
-    "title": "Jiwon Kim | Dionne Blog",
-    "body": "  {% if page. url ==  /  %}            {% assign latest_post = site. posts[0] %}          &lt;div class= topfirstimage  style= background-image: url({% if latest_post. image contains  ://  %}{{ latest_post. image }}{% else %} {{site. baseurl}}/{{ latest_post. image}}{% endif %}); height: 200px;  background-size: cover;  background-repeat: no-repeat; &gt;&lt;/div&gt;           {{ latest_post. title }}  :       {{ latest_post. excerpt | strip_html | strip_newlines | truncate: 136 }}               In         {% for category in latest_post. categories %}        {{ category }},         {% endfor %}                                {{ latest_post. date | date: '%b %d, %Y' }}                            {%- assign second_post = site. posts[1] -%}                        {% if second_post. image %}                         &lt;img class= w-100  src= {% if second_post. image contains  ://  %}{{ second_post. image }}{% else %}{{ second_post. image | absolute_url }}{% endif %}  alt= {{ second_post. title }} &gt;                        {% endif %}                                    {{ second_post. title }}          :                       In             {% for category in second_post. categories %}            {{ category }},             {% endfor %}                                                      {{ second_post. date | date: '%b %d, %Y' }}                                    {%- assign third_post = site. posts[2] -%}                        {% if third_post. image %}                         &lt;img class= w-100  src= {% if third_post. image contains  ://  %}{{ third_post. image }}{% else %}{{site. baseurl}}/{{ third_post. image }}{% endif %}  alt= {{ third_post. title }} &gt;                        {% endif %}                                    {{ third_post. title }}          :                       In             {% for category in third_post. categories %}            {{ category }},             {% endfor %}                                                      {{ third_post. date | date: '%b %d, %Y' }}                                    {%- assign fourth_post = site. posts[3] -%}                        {% if fourth_post. image %}                        &lt;img class= w-100  src= {% if fourth_post. image contains  ://  %}{{ fourth_post. image }}{% else %}{{site. baseurl}}/{{ fourth_post. image }}{% endif %}  alt= {{ fourth_post. title }} &gt;                        {% endif %}                                    {{ fourth_post. title }}          :                       In             {% for category in fourth_post. categories %}            {{ category }},             {% endfor %}                                                      {{ fourth_post. date | date: '%b %d, %Y' }}                                  {% for post in site. posts %} {% if post. tags contains  sticky  %}                    {{post. title}}                  {{ post. excerpt | strip_html | strip_newlines | truncate: 136 }}                 Read More            	             {% endif %}{% endfor %}  {% endif %}                All Stories:         {% for post in paginator. posts %}          {% include main-loop-card. html %}        {% endfor %}                   {% if paginator. total_pages &gt; 1 %}              {% if paginator. previous_page %}        &laquo; Prev       {% else %}        &laquo;       {% endif %}       {% for page in (1. . paginator. total_pages) %}        {% if page == paginator. page %}        {{ page }}        {% elsif page == 1 %}        {{ page }}        {% else %}        {{ page }}        {% endif %}       {% endfor %}       {% if paginator. next_page %}        Next &raquo;       {% else %}        &raquo;       {% endif %}            {% endif %}                     {% include sidebar-featured. html %}      "
-    }, {
-    "id": 7,
-    "url": "http://localhost:4000/privacy-policy.html",
-    "title": "Privacy Policy",
-    "body": "”{{site. name}}” takes your privacy seriously. To better protect your privacy we provide this privacy policy notice explaining the way your personal information is collected and used. Collection of Routine Information: This website track basic information about their visitors. This information includes, but is not limited to, IP addresses, browser details, timestamps and referring pages. None of this information can personally identify specific visitor to this website. The information is tracked for routine administration and maintenance purposes. Cookies: Where necessary, this website uses cookies to store information about a visitor’s preferences and history in order to better serve the visitor and/or present the visitor with customized content. Advertisement and Other Third Parties: Advertising partners and other third parties may use cookies, scripts and/or web beacons to track visitor activities on this website in order to display advertisements and other useful information. Such tracking is done directly by the third parties through their own servers and is subject to their own privacy policies. This website has no access or control over these cookies, scripts and/or web beacons that may be used by third parties. Learn how to opt out of Google’s cookie usage. Links to Third Party Websites: We have included links on this website for your use and reference. We are not responsible for the privacy policies on these websites. You should be aware that the privacy policies of these websites may differ from our own. Security: The security of your personal information is important to us, but remember that no method of transmission over the Internet, or method of electronic storage, is 100% secure. While we strive to use commercially acceptable means to protect your personal information, we cannot guarantee its absolute security. Changes To This Privacy Policy: This Privacy Policy is effective and will remain in effect except with respect to any changes in its provisions in the future, which will be in effect immediately after being posted on this page. We reserve the right to update or change our Privacy Policy at any time and you should check this Privacy Policy periodically. If we make any material changes to this Privacy Policy, we will notify you either through the email address you have provided us, or by placing a prominent notice on our website. Contact Information: For any questions or concerns regarding the privacy policy, please contact us here. "
-    }, {
-    "id": 8,
-    "url": "http://localhost:4000/resume.html",
-    "title": "Resume",
-    "body": "Do you see someone in the wild, not owning their platform? Let them know: (•_•)&lt;)   )╯Always /   \ \(•_•) (   (&gt; Own /   \ (•_•)&lt;)   )&gt; Your Platform /   \ Be in charge of the relationship with your audience. Deliver value and then ask for money. Avoid unnecessary middlemen. See My Resume "
-    }, {
-    "id": 9,
-    "url": "http://localhost:4000/tags.html",
-    "title": "Tags",
-    "body": "          Tags          {% for tag in site. tags %}     {{ tag[0] }}:           {% assign pages_list = tag[1] %}    {% for post in pages_list %}    {% if post. title != null %}     {% if group == null or group == post. group %}           {% include main-loop-card. html %}     {% endif %}    {% endif %}    {% endfor %}    {% assign pages_list = nil %}    {% assign group = nil %}    {% endfor %}                  {% include sidebar-featured. html %}          "
-    }, {
-    "id": 10,
-    "url": "http://localhost:4000/robots.txt",
-    "title": "",
-    "body": "      Sitemap: {{ “sitemap. xml”   absolute_url }}   "
-    }, {
-    "id": 11,
-    "url": "http://localhost:4000/page2/",
-    "title": "Jiwon Kim | Dionne Blog",
-    "body": "  {% if page. url ==  /  %}            {% assign latest_post = site. posts[0] %}          &lt;div class= topfirstimage  style= background-image: url({% if latest_post. image contains  ://  %}{{ latest_post. image }}{% else %} {{site. baseurl}}/{{ latest_post. image}}{% endif %}); height: 200px;  background-size: cover;  background-repeat: no-repeat; &gt;&lt;/div&gt;           {{ latest_post. title }}  :       {{ latest_post. excerpt | strip_html | strip_newlines | truncate: 136 }}               In         {% for category in latest_post. categories %}        {{ category }},         {% endfor %}                                {{ latest_post. date | date: '%b %d, %Y' }}                            {%- assign second_post = site. posts[1] -%}                        {% if second_post. image %}                         &lt;img class= w-100  src= {% if second_post. image contains  ://  %}{{ second_post. image }}{% else %}{{ second_post. image | absolute_url }}{% endif %}  alt= {{ second_post. title }} &gt;                        {% endif %}                                    {{ second_post. title }}          :                       In             {% for category in second_post. categories %}            {{ category }},             {% endfor %}                                                      {{ second_post. date | date: '%b %d, %Y' }}                                    {%- assign third_post = site. posts[2] -%}                        {% if third_post. image %}                         &lt;img class= w-100  src= {% if third_post. image contains  ://  %}{{ third_post. image }}{% else %}{{site. baseurl}}/{{ third_post. image }}{% endif %}  alt= {{ third_post. title }} &gt;                        {% endif %}                                    {{ third_post. title }}          :                       In             {% for category in third_post. categories %}            {{ category }},             {% endfor %}                                                      {{ third_post. date | date: '%b %d, %Y' }}                                    {%- assign fourth_post = site. posts[3] -%}                        {% if fourth_post. image %}                        &lt;img class= w-100  src= {% if fourth_post. image contains  ://  %}{{ fourth_post. image }}{% else %}{{site. baseurl}}/{{ fourth_post. image }}{% endif %}  alt= {{ fourth_post. title }} &gt;                        {% endif %}                                    {{ fourth_post. title }}          :                       In             {% for category in fourth_post. categories %}            {{ category }},             {% endfor %}                                                      {{ fourth_post. date | date: '%b %d, %Y' }}                                  {% for post in site. posts %} {% if post. tags contains  sticky  %}                    {{post. title}}                  {{ post. excerpt | strip_html | strip_newlines | truncate: 136 }}                 Read More            	             {% endif %}{% endfor %}  {% endif %}                All Stories:         {% for post in paginator. posts %}          {% include main-loop-card. html %}        {% endfor %}                   {% if paginator. total_pages &gt; 1 %}              {% if paginator. previous_page %}        &laquo; Prev       {% else %}        &laquo;       {% endif %}       {% for page in (1. . paginator. total_pages) %}        {% if page == paginator. page %}        {{ page }}        {% elsif page == 1 %}        {{ page }}        {% else %}        {{ page }}        {% endif %}       {% endfor %}       {% if paginator. next_page %}        Next &raquo;       {% else %}        &raquo;       {% endif %}            {% endif %}                     {% include sidebar-featured. html %}      "
-    }, {
-    "id": 12,
-    "url": "http://localhost:4000/2020/03/note08-fastai-4/",
-    "title": "Gradient backward, Chain Rule, Refactoring",
-    "body": "2020/03/02 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves   ⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
-    }, {
-    "id": 13,
-    "url": "http://localhost:4000/2020/03/note08-fastai-3/",
-    "title": "Implement forward&backward pass from scratch",
-    "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring1. The forward and backward passes: 1. 1 Normalization: train_mean,train_std = x_train. mean(),x_train. std()&gt;&gt;&gt; train_mean,train_std(tensor(0. 1304), tensor(0. 3073))Remember!  Dataset, which is x_train, mean and standard deviation is not 0&amp;1. But we need them to be which means we should substract means and divide data by std.  You should not standarlize validation set because training set and validation set should be aparted.  after normalize, mean is close to zero, and standard deviation is close to 1. 1. 2 Variable definition:  n,m: size of the training set c: the number of activations we need in our model2. Foundation Version: 2. 1 Basic architecture:  Our model has one hidden layer, output to have 10 activations, used in cross entropy.    But in process of building architecture, we will use mean square error, output to have 1 activations and lator change it to cross entropy   number of hidden unit; 50see below pic  We want to make w1&amp;w2 mean and std be 0&amp;1.      why initializating and make mean zero and std one is important?   paper highlighting importance of normalisation - training 10,000 layer network without regularisation1   2. 1. 1 simplified kaiming initQ: Why we did init, normalize with only validation data? Because we can not handle and get statistics from each value of x_valid?{: style=”color:red; font-size: 130%; text-align: center;”}  what about hidden(first) layer?w1 = torch. randn(m,nh)b1 = torch. zeros(nh)t = lin(x_valid, w1, b1) # hidden&gt;&gt;&gt; t. mean(), t. std()((tensor(2. 3191), tensor(27. 0303))In output(second) layer, w2 = torch. randn(nh,1)b2 = torch. zeros(1)t2 = lin(t, w2, b2) # output&gt;&gt;&gt; t2. mean(), t2. std()(tensor(-58. 2665), tensor(170. 9717))   which is terribly far from normalzed value.     But if we apply simplified kaiming init  w1 = torch. randn(m,nh)/math. sqrt(m); b1 = torch. zeros(nh)w2 = torch. randn(nh,1)/math. sqrt(nh); b2 = torch. zeros(1)t = lin(x_valid, w1, b1)t. mean(),t. std()&gt;&gt;&gt; (tensor(-0. 0516), tensor(0. 9354)) But, actually, we use activations not only linear function After applying activations relu at linear layer, mean and deviation became 0. 5. 2. 1. 2 Glorrot initializationPaper2: Understanding the difficulty of training deep feedforward neural networks  Gaussian(, bell shaped, normal distributions) is not trained very well.  How to initialize neural nets? with the size of layer , the number of filters .  But there is No acount for import of ReLU If we got 1000 layers, vanishing gradients problem emerges2. 1. 3 Kaiming initializatingPaper3: Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification  Kaiming He, explained here rectifier: rectified linear unit rectifier network: neural network with rectifier linear units  This is kaiming init, and why suddenly replace one to two on a top?     to avoid vanishing gradient(weights)   But it doesn’t give very nice mean tough.    2. 1. 4 Pytorch package Why fan_out?     according to pytorch documentation,   choosing 'fan_in' preserves the magnitude of the variance of the wights in the forward pass. choosing 'fan_out' preserves the magnitues in the backward pass(, which means matmul; with transposed matrix) ➡️ in the other words, torch use fan_out cz pytorch transpose in linear transformaton.  What about CNN in Pytorch?I tried torch. nn. Conv2d. conv2d_forward?? Jeremy digged into using torch. nn. modules. conv. _ConvNd. reset_parameters?? 2  in Pytorch, it doesn’t seem to be implemented kaiming init in right formula. so we should use our own operation.  But actually, this has been discussed in Pytorch community before. 3 4 Jeremy said it enhanced variance also, so I sampled 100 times and counted better results.  To make sure the shape seems sensible. check with assert. (remember we will replace 1 to 10 in cross entropy)assert model(x_valid). shape==torch. Size([x_valid. shape[0],1])&gt;&gt;&gt; model(x_valid). shape(10000, 1) We have made Relu, init, linear, it seems we can forward pass code we need for basic architecture nh = 50def lin(x, w, b): return x@w + b;w1 = torch. randn(m,nh)*math. sqrt(2. /m ); b1 = torch. zeros(nh)w2 = torch. randn(nh,1); b2 = torch. zeros(1)def relu(x): return x. clamp_min(0. ) - 0. 5t1 = relu(lin(x_valid, w1, b1))def model(xb):  l1 = lin(xb, w1, b1)  l2 = relu(l1)  l3 = lin(l2, w2, b2)  return l32. 2 Loss function: MSE:  Mean squared error need unit vector, so we remove unit axis.   def mse(output, targ): return (output. squeeze(-1) - targ). pow(2). mean()    In python, in case you remove axis, you use ‘squeeze’, or add axis use ‘unsqueeze’ torch. squeeze where code commonly broken. so, when you use squeeze, clarify dimension axis you want to removetmp = torch. tensor([1,1])tmp. squeeze()&gt;&gt;&gt; tensor([1, 1]) make sure to make as float when you calculateBut why??? because it is tensor?{: style=”color:red; font-size: 130%;”} Here’s the error when I don’t transform the data type ---------------------------------------------------------------------------TypeError                 Traceback (most recent call last)&lt;ipython-input-22-ae6009bef8b4&gt; in &lt;module&gt;()----&gt; 1 y_train = get_data()[1] # call data again   2 mse(preds, y_train)TypeError: 'map' object is not subscriptable This is forward passFootnote: Other materials:  Understanding the difficulty of training deep feedforward neural networks, paper that introduced Xavier initialization      Fixup Initialization: Residual Learning Without Normalization &#8617;        Pytorch implementaion on Kaiming init of conv and linear layers &#8617;        Pytorch kaiming init issue &#8617;        Pytorch kaiming init explained &#8617;    "
-    }, {
-    "id": 14,
-    "url": "http://localhost:4000/2020/03/note08-fastai-2/",
-    "title": "What's inside Pytorch Operator?",
-    "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, RefactoringWhat’s inside Pytorch Operator?: Section02 Time comparison with pure Python:    Matmul with broadcasting&gt; 3194. 95 times faster     Einstein summation&gt; 16090. 91 times faster     Pytorch’s operator&gt; 49166. 67 times faster  1. Elementwise op: 1. 1 Frobenius norm:   above converted into (m*m). sum(). sqrt() Plus, don’t suffer from mathmatical symbols. He also copy and paste that equations from wikipedia.  and if you need latex form, download it from archive. 2. Elementwise Matmul:  What is the meaning of elementwise?   We do not calculate each component. But all of the component at once. Because, length of column of A and row of B are fixed.   How much time we saved?  So now that takes 1. 37ms. We have removed one line of code and it is a 178 times faster…#TODOI don’t know where the 5 from. but keep it. Maybe this is related with frobenius norm…?as a result, the code before for k in range(ac):  c[i,j] += a[i,k] + b[k,j]the code after c[i,j] = (a[i,:] * b[:,j]). sum()To compare it (result betweet original and adjusted version) we use not test_eq but other function. The reason for this is that due to rounding errors from math operations, matrices may not be exactly the same. As a result, we want a function that will “is a equal to b within some tolerance” #exportdef near(a,b):   return torch. allclose(a, b, rtol=1e-3, atol=1e-5)def test_near(a,b):   test(a,b,near)test_near(t1, matmul(m1, m2))3. Broadcasting:  Now, we will use the broadcasting and removec[i,j] = (a[i,:] * b[:,j]). sum() How it works?&gt;&gt;&gt; a=tensor([[10,10,10],     [20,20,20],     [30,30,30]])&gt;&gt;&gt; b=tensor([1,2,3,])&gt;&gt;&gt; a,b  (tensor([[10, 10, 10],     [20, 20, 20],     [30, 30, 30]]),tensor([1, 2, 3]))     &gt;&gt;&gt; a+btensor([[11, 12, 13],    [21, 22, 23],    [31, 32, 33]])  &lt;Figure 2&gt; demonstrated how array b is broadcasting(or copied but not occupy memory) to compatible with a. Refered from numpy_tutorial   there is no loop, but it seems there is exactly the loop.     This is not from jeremy (actually after a moment he cover it) but i wondered How to broadcast an array by columns?  c=tensor([[1],[2],[3]])a+ctensor([[11, 11, 11],    [22, 22, 22],    [33, 33, 33]])s  What is tensor. stride()?help(t. stride)Help on built-in function stride:  stride(…) method of torch. Tensor instancestride(dim) -&gt; tuple or intReturns the stride of :attr:’self’ tensor. Stride is the jump necessary to go from one element to the next one in the specified dimension :attr:’dim’. A tuple of all strides is returned when no argument is passed in. Otherwise, an integer value is returned as the stride in the particular dimension :attr:’dim’. Args:  dim (int, optional): the desired dimension in which stride is requiredExample::* x = torch. tensor([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]])`x. stride()&gt;&gt;&gt; (5, 1)x. stride(0)&gt;&gt;&gt; 5x. stride(-1)&gt;&gt;&gt; 1   unsqueeze &amp; None index   We can manipulate rank of tensor Special value ‘None’, which means please squeeze a new axis here== please broadcast herec = torch. tensor([10,20,30])c[None,:] in c, squeeze a new axis in here please. 2. 2 Matmul with broadcasting: for i in range(ar):#  c[i,j] = (a[i,:]).      *[:,j]. sum() #previous  c[i]  = (a[i]. unsqueeze(-1) * b). sum(dim=0) And Using None also (As howard teached)c[i]  = (a[i ]. unsqueeze(-1) * b). sum(dim=0) #howardc[i]  = (a[i][:,None] * b). sum(dim=0) # using Nonec[i]  = (a[i,:,None]*b). sum(dim=0)⭐️Tips🌟 1) Anytime there’s a trailinng(final) colon in numpy or pytorch you can delete it ex) c[i, :] = c [i]2) any number of colon commas at the start, you can switch it with the single elipsis.  ex) c[:,:,:,:,i] = c […,i] 2. 3 Broadcasting Rules:  What if we tensor. size([1,3]) * tensor. size([3,1])?  torch. Size([3, 3])    What is scale????   What if they are one array is times of the other array? ex) Image : 256 x 256 x 3Scale : 128 x 256 x 3Result: ?   Why I did not inserted axis via None, but happened broadcasting? &gt;&gt;&gt; c * c[:,None]tensor([[100. , 200. , 300. ],    [200. , 400. , 600. ],    [300. , 600. , 900. ]])maybe it broadcast cz following array has 3 rows as same principle, no matter what nature shape was, if we do the operation tensor broadcasts to the other. &gt;&gt;&gt; c==c[None]tensor([[True, True, True]])&gt;&gt;&gt; c[None]==c[None,:]tensor([[True, True, True]])&gt;&gt;&gt;c[None,:]==ctensor([[True, True, True]])3. Einstein summation:  Creates batch-wise, remove inner most loop, and replaced it with an elementwise producta. k. ac[i,j] += a[i,k] * b[k,j]inner most loop c[i,j] = (a[i,:] * b[:,j]). sum()elementwise product  Because K is repeated so we do a dot product. And it is torch. Usage of einsum()1) transpose2) diagnalisation tracing3) batch-wise (matmul) …  einstein summation notationdef matmul(a,b): return torch. einsum('ik,kj-&gt;ij', a, b)so after all, we are now 16000 times faster than Python. 4. Pytorch op: 49166. 67 times faster than pure python And we will use this matrix multiplication in Fully Connect forward, with some initialized parameters and ReLU. But before that, we need initialized parameters and ReLU, Footnote:  TensorRank ti noteResources:  Frobenius Norm Review Broadcasting Review (especially Rule)     Refer colab! (I totally confused with extension of arrays)    torch. allclose Review np. einsum Reviewh "
-    }, {
-    "id": 15,
-    "url": "http://localhost:4000/2020/02/note08-fastai-1/",
-    "title": "What is the meaning of 'deep-learning from foundations?'",
-    "body": "2020/02/29 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ I don’t know if you read this article, but I heartily appreciate Rachael Thomas and Jeremy Howard for providing these priceless lectures for free Homework:  Review concepts 16 concepts from Course 1 (lessons 1 - 7)(1) Affine Functions &amp; non-linearities; 2) Parameters &amp; activations; 3) Random initialization &amp; transfer learning; 4) SGD, Momentum, Adam; 5) Convolutions; Batch-norm; 6) Dropout; 7) Data augmentation; 8) Weight decay; 9) Res/dense blocks; 10) Image classification and regression; 11)Embeddings; 12) Continuous &amp; Categorical variables; 13) Collaborative filtering; 14) Language models; 15) NLP classification; 16) Segmentation; U-net; GANS) Make sure you understand broadcasting Read section 2. 2 in Delving Deep into Rectifiers Try to replicate as much of the notebooks as you can without peeking; when you get stuck, peek at the lesson notebook, but then close it and try to do it yourself calculus for machine learning     based on weight…    einsum conventionCONTENTS:  What is going on in this course?     What is ‘from foundations’?   Steps to a basic modern CNN model   Today’s implementation goal: 1) matmul -&gt; 4) FC backward    Library development using jupyter notebook     jupyter notebook certainly can make module    Elementwise ops     How can we make python faster?         What is element wise operation?           FootnoteWhat is going on in this course?: What is ‘from foundations’?: 1) Recreate fast. ai and Pytorch 2) using pure python  Evade OverfittingOverfit : validation error getting worsetraining loss &lt; validation loss  Know the name of the symbol you usefind in this page if you don’t know the symbol that you are using or just draw it here (run by ML!) Steps to a basic modern CNN model:  1) Matrix multiplication -&gt; 2) Relu/Initialization -&gt; 3) Fully-connected Forward-&gt; 4) Fully-connected Backward -&gt; 5) Train loop -&gt; 6) Convolution-&gt; 7) Optimization -&gt;8) Batchnormalization -&gt; 9) Resnet Today’s implementation goal: 1) matmul -&gt; 4) FC backward: Library development using jupyter notebook: what is assers? jupyter notebook certainly can make module:  There will be #export tag that Howard (and we) want to extract special notebook2script. py will detect sign of #expert and convert following into python module and test ittest\_eq(TEST,'test')test\_eq(TEST,'test1')  what is run_notebook. py?     when you want to test your module in command line interface   		!python run\_notebook. py 01_matmul. ipynb  Is there any difference between 1) and 2)?1) test -&gt; test01 2) test01 -&gt; test #TODO I don’t know yet  look into run_notebook. py, package fire Jeremy used. What is that?read and run the code in a notebook, and in the process, Jeremy made Python Fire library called!shockingly, fire takes any kind of function and converts into CLI command. fire library was released by Google open source, Thursday, March 2, 2017    Get data   pytorch and numpy are pretty much same.  variable c explains how many pixels there are in in MNIST, 28 pixels PyTorch’s view() method: torch function that manipulating tensor, and squeeze() in torch &amp; mathmatical operation similar function Rao &amp; McMahan said usually this functions result in feature vector.    In part 1, you can use view function several times.     Initial python model     Which is Linear, like $Xw$(weight)$+a$(bias) $= Y$     If you don’t know hou to multiple matrix, refer this site matmul visulization site   How many time spends if we we use pure python   function matmul, typical matrix multiplication function, takes about 1 second for calculating 1 single train data! (maybe assumed stochastic, 5 data points in validation)     it takes about 11. 36 hours to update parameters even single layer and 1 iteration! (if that was my computer, it would be 14 hours. . )🤪   THIS is why we need to consider ‘time’&amp;’space’ This is kinda slow - what if we could speed it up by 50,000 times? Let’s try! Elementwise ops: How can we make python faster?:  If we want to calculate faster, then do remove pythonic calcuation, by passing its computation down to something that is written something other than python, like pytorch.  According to PyTorch doc it uses C++ (via ATen), so we are going to implement that function with python. What is element wise operation?:  items makes a pair, operate corresponding componentFootnote:  notebooks material video broadcasting excel"
-    }, {
-    "id": 16,
-    "url": "http://localhost:4000/2020/02/what-is-convolution/",
-    "title": "Digging into convolution",
-    "body": "2020/02/28 - Issues 1) Kaiming Initializtion in Pytorch was in trouble. 1 2) Jeremy started to dig in, in lesson09, but I didn’t know why the size of tensor is 2 and even understand this spreadsheet data. 3 Homework: Read Visualizing and Understanding Convolutional Networks paper  What is a convolution?     Visualization         one kernel     Matthew D Zeiler &amp; Rob Fergus Paper          Convolution can be represented as matmul   Padding   Kernel has rank 3   How can we find a side-edge, a gradient and area of constant weight?   What is a convolution?: A convolutional neural network is that your red, green, and blue pixels go into the simple computation, and something comes out of that, and then the result of that goes into a second layer, and the result of that goes into the third layer and so forth. Visualization: one kernel Refer this site for visualizing CNN filteringMatthew D Zeiler &amp; Rob Fergus PaperLecture01     Nine examples of the actual coefficients from the **first layer**  Convolution can be represented as matmul: CNNs from different viewpoints {align-items: center;}    [A B C D E F G H I J] is 3 by 3 image data flatten to vector.   As a result, convolution is a just matrix just two things happens     Some of entries are set to zeros at all the times   same color always have the same weight. That called weight time / wegith sharing    So, we can implement a convolution with matrix multiplication. But, we don’t do that because it’s slow!Padding:  What most of libraries do is just put zeros asdie of matrix  fast. ai uses reflection paddings (what is this? Jeremy said he uttered it)Kernel has rank 3:  As standard picture input would be 4 5, it would be actually 3d, not 2d.  If we make kernel as a 3x3 size, we pass over same kernel all the different Red, Green, Blue Pixels.      This could make problem, because, if we want to detect frog, which is green, we would want more activations on the green(I made a test cell in my colab 6)   How can we find a side-edge, a gradient and area of constant weight?: Not top-edge!  One kernel can find only the top-edge, so we should stack the kernels 7 So, we pass it through bunch of kernels to the input images, and that process gives us height x width x corresponding number of kernels.  Usually that number of chanel is 16 And if we want to get the more channels and features, we should repeat that process     This process gives rise to memory out of control, we do the stride   #### conv-example. xlsx  2 convolutional filters At a second layer, filter is 3x3x2 tensor, because to add up together the first layer’s channel. Reference:       Problem was math. sqrt(5) was not kaiming initialization formula, Implementation in Pytorch &#8617;        size of tensor, lecture09 &#8617;        conv-example. xlsx &#8617;        Why do computer use red, green and blue instead of primary colors &#8617;        Grayscale is a group of shades without any visible color. … Each of these dots has its own brightness level as well and, therefore, can be converted to grayscale. A grayscale image is one with all color information removed.  &#8617;        Testing RGB and grayscale &#8617;        stack kernel and make new rank of tensor at output, Lesson06-2019 &#8617;    "
-    }, {
-    "id": 17,
-    "url": "http://localhost:4000/2020/02/dps-week8/",
-    "title": "Digital Product School week 8&9",
-    "body": "2020/02/24 - The 8th week retropect at Digital Product School Week 8/9 - Ship your MVP/Release next iteration each day     This week's schedule  CONTENT:  Preparing engineering weekly Agile Process     Daily Stand-up    Making application flowchart (feat draw. io) / ER diagram     Flowchart, understaning user journey   ER diagram    Engineering weekly AI lunch Connecting firebase andPreparing engineering weekly: This week at Wednesday, I planned to explain the Language Modelings, mainly focusing ELMo, ULMFiT, BERT and GPT-2. Slides is available here Changed the presentation, because there were people who are not in ML domain. hereWhenever I do the presentation, I learn more than the information I give them. At the same time, I realize I need to learn more than I know. Agile Process: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m not sure ‘what is agile’ but this is what we tried to make agile process.  Daily Stand-up: Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Making application flowchart (feat draw. io) / ER diagram:  Before we start the party, we should clarify the flowchart and ER diagram of our application. Flowchart, understaning user journey: Thanks for google, we could use draw. io for our framechart framework. Actually, we cana choice other good flatform, but draw. io has connected app throgh google drive, most of our engineer was used to it. And after this job, I got to know there is also (of course) rule with the symbols, color, size, space, scaling and direction of arrow -reference. But why we should do this? WE have made our storymap before!! I think storymap is for visualize our status and app. So it should be shared with whole the team, and they should able to understand each role’s issue. But flowchart is more like testing technical feasibility, and error that user can experience. So it could be little more specific, complicated, and hypothetical.     This week engineer's main plan  ER diagram: Even if we use NoSQL database through firebase, my team was accustomed to SQL more. That what we educated when we were at college, so we had to organize our concept while we were learning NoSQL. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Before today, my AI collegues presented regression, knn and it was my turn. I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! AI lunch: Connecting firebase and: "
-    }, {
-    "id": 18,
-    "url": "http://localhost:4000/2020/02/fast.ai-nlp-note-16/",
-    "title": "Algorithmic bias",
-    "body": "2020/02/20 - Algorithms can encode &amp; magnify human bias Case Study 1: Facial Recognition &amp; Predictive Policing:  Joy Buolamwini &amp; Timnit Gebru, gendershades. org     Microsoft, FACE+, IBM - All of these things are sell now.    Largest gap between $\therefore\ Lighter Male\ &gt;\ Darker\ Female $      This US mayor joked cops should “mount . 50-caliber” guns where AI predicts crime      With machine learning, with automation, there’s a 99% success, so that robot is ㅡwill beㅡ99% accurate in telling us what is going to happen next, which is really interesting.     - city official in Lancater, CA, approving on using IBM for public security  Bias:  Bias is type of error Statistical Bias: difference between a statistic’s expected value and the true value Unjust Bias: disproportionate preference for or prejudice against a group Unconscious bias: bias that we don’t realize we have But, term bias is too generic to be productive. Different sources of bias have different causes Representation Bias: Dataset was not representative of the algorithm that might be used on later. Above : Data is okay, but algorithm has some problem. Below : Data has error.  For example, object detection production that performs very well in common product of US. But in contrast, change of target product region, like Zimbabwe, Solomon Island, and so on, reduced the performence remarkably. It is not the algorithmic problem, so we should care about data volume of region. Evaluation Bias: Benchmark datasets spur on research, 4. 4% of IJB-A images are dark-skinned women. 2/3 of ImageNet images from the West (Sharkar et al, 2017) Case Study 2: Recidivism Algorithm Used Prison Sentencing: Case Study 3: Online Ad Delivery: Bias in NLP: ( Nothing to do with the course, but I’m researching this field these days. )    But all about Englsih     ImpactThe person is doctor. The person is nurse -&gt; 그는 의사다. 그녀는 간호사다.  Concept of “biased data” often too generic to be useful:  Different sources of bias have different sources Data, models and systems are not unchanging numbers on a screen. They’re the result of a complex process that starts with years of historical context and involves a series of choices and norms, from data measurement to model evaluation to human interpretation. - Harini Suresh, “The problem with Biased Data” Five Sources of Bias in ML:  Representation Bias Evaluation Bias Measurement Bias Aggregation Bias(46:02) Historical Bias(46:26)     A few studies(47:13)   Racial Bias, Even when we have good intentions(new york times)(47:10)   gender(48:59)   Humans are biased, so why does algorithmic bias matter?: Algorithms &amp; humans are used differently (humans are usually decision maker)  Algorithms are accurate and objective No way to apeal if there if error processed large scale cheap Machine learning can amplify bias Machine learning can create feedback loops. Technology is power. And with that comes responsibility. Solutions:  Analyze a project at work/school:     Questions about AI   5 types of bias (Suresh &amp; Guttag)   Datasheets for datasets, Modelcards for model reporting   Accuracy rate on different sub-groups    Work with domain experts &amp; those impacted Increase diversity in our workspace Advocate for good policy Be on the ongoing lookout for bias"
-    }, {
-    "id": 19,
-    "url": "http://localhost:4000/2020/02/classifier-city/",
-    "title": "Making a classifier with image dataset made from gooogle",
-    "body": "2020/02/15 - CONTENTS:  Creating dataset from google images     Using google_images_download   Create ImageDataBunch    Train model     fit_one_cycle()   Let’s find-tune   Let’s train the whole model!   Let’s make batch size bigger!    Interpretation Model in productionCode can be found hereDeployed model here Making a classifier which can distinguish Seoul from Munich and Sanfrancisco!(hoping my well in Munich!) Creating dataset from google images: In machine learning, you always need data before you build your model. You can use either URLs or google_images_download package. Since Jeremy explained specifically, I will try the other. Using google_images_download: note: This is not google official package Refer to Official Doncument, put that arguments. from google_images_download import google_images_downloadresponse = google_images_download. googleimagesdownload()  #class instantiationout_dir = os. path. abspath('. . /. . /materials/dataset/pkg/')os. mkdir(out_dir)arguments = { keywords : Cebu,Munich,Seoul ,        print_urls :True,        suffix_keywords : city ,        output_directory :out_dir,        type : photo ,      }paths = response. download(arguments)  #passing the arguments to the functionprint(paths)and if you need, here is main code. Create ImageDataBunch: We need to separate validation set because we just grabbed these imagese from Google. Most of the dataset we use (kaggle/research) splited into train / validation / test so if they are not devided beforehand we should make databunch, and Jeremy recommended assign 20% to validation. Help on function verify_images in module fastai. vision. data:verify_images(path: Union[pathlib. Path, str], delete: bool = True, max_workers: int = 4, max_size: int = None, recurse: bool = False, dest: Union[pathlib. Path, str] = '. ', n_channels: int = 3, interp=2, ext: str = None, img_format: str = None, resume: bool = None, **kwargs)  Check if the images in `path` aren't broken, maybe resize them and copy it in `dest`. Data from google image url Data from package Train model:           len(class)   len(train)   len(valid)         Data_url   3   432   108       Data_pkg   3   216   53   Uisng model: restnet34 1, Measurement: accuracy 2 fit_one_cycle(): What is fit one cycle? Cyclical Learning Rates for Training Neural Networks One of the way to find good learning rate. Core idea is to start with small learning rate (like 1e-4, 1e-3) and increase the learning rate after each mini-batch till loss starts exploding. And pick up learning rate one order lower than exploding point. For example, plotted learning rate is like below picture, picking up around 1e-2 is the best way.  Why this methods Traditionally, the learning rate is decreased as the learning starts converging with time. But this paper suggests to cycle our learning rate, because it makes us avoid local minimum. Basically this cyclic method enables us to explore whole of loss function so that find out global minimum. In other words, higher learning rate behaves like regularisation. Let’s find-tune: Do train just one last layer by learning rate found by find_lr This section you should find the strongest downward slope that kind of sticking around for quite a while. And choose just one order lower than lowest point. As explained before, I will pick up 1e-2. And of course, this is fine-tuning, we don’t need discriminative learning rate yet.  Let’s train the whole model!: link When you plot the learning rate again, maybe you will get soaring shape of learning rate. Rule of thumb, When you slice the learning rate, use learning rate you used at unfrozen part. Divide it by 5 or 10 and put it on maximum bound. At minimum bound, get the point just before it soared, and divide it by 10.  Let’s make batch size bigger!: Since default batch size is 64, I tried it to 128. And it gets way more better result(even it’s still underfitting!) And if I freeze model and train whole model again, the model would be better. Also, you can use this method to the other big dataset model training! Interpretation: See the confusion matrix. Result is quite great. *Since I’m using colab, I will skip data cleansing. But I highly recommend you to use ImageCleaner widget, only if you are using jupyter notebook (not jupyter lab) Model in production: You can deploy your model in simple way. I referred fast. ai, and used render(it’s free for limited time). You can find detailed document here. and you can create a route like this. @app. route( /classify-url , methods=[ GET ])async def classify_url(request):  bytes = await get_bytes(request. query_params[ url ])  img = open_image(BytesIO(bytes))  _,_,losses = learner. predict(img)  return JSONResponse({     predictions : sorted(      zip(cat_learner. data. classes, map(float, losses)),      key=lambda p: p[1],      reverse=True    )  })You can find my deployed model here Reference: How to create a deep learning dataset using Google Images towardsdatascience - one cycle policy       Deep Residual Learning for Image Recognition &#8617;        Accuracy_and_precision &#8617;    "
-    }, {
-    "id": 20,
-    "url": "http://localhost:4000/2020/02/dps-week5/",
-    "title": "Digital Product School week 5",
-    "body": "2020/02/09 - The 5th week retropect at Digital Product School Week 5 - Create a Storymap and sync it with Lean Canvas     This week's schedule  CONTENT:  How to create our story map Prepare your story Discover your product’s AI potentialMondayHow to create our story map:     We need this 'aha' moment  There was a Milestone workshop, about our weekly goal. As we are agile working, we go fast and change every week’s goal. This week we will finalize our story map based on user’s pain-point and HMW questions.     How should we make our story-map  Basically we should make story map based on this rule  Tell stories, don’t just write them!     We always need context, that means all the story component should be connected    Visualize your product to establish a shared understanding and speed up discussions!     Post-it filled of text is not enough, we should fill it with visualizations then team mates can understand it fast    Only discuss in front our your story map! (Speed)     So we can update our story-map as soon as we change our opinion   And also    Use a story map to find the parts that matter most and to identify holes in your idea!     Since the story map consists of techinical part, we should consider each story’s technical feasibility    Minimise output, maximise outcome and impact! Build tests to figure out what’s minimum and what’s viable!     This story map functions to find out our minimum value of ideas    Work iteratively: Change your story map according to your learnings!     We should repeat this process again and again    PMs: Make sure Storymap is up to date!Prepare your story:     team cero, our whole story map    Our goal    Technical feasibility of our storyWhat is your strategy to make user achieve something? This would be our expand point Discover your product’s AI potential: How can we apply AI to our product? Let’s write down our ‘HMW’ questions, and find out all p ossibilities. These are suggestion of possibilities, so don’t attached to feasibility (we will do in at lean start-up)     Software section's expectation    AI section's expectationTuesday    Engineer's task, week5This 5th week, engineers settled WendesdayThursdayFriday"
-    }, {
-    "id": 21,
-    "url": "http://localhost:4000/2020/02/GPU-time/",
-    "title": "4 reasons took much time to setting GPU for fast.ai than I expected",
-    "body": "2020/02/05 - Motivation: Before now, me as a undergraduate student, I was parsimony who usually depend on colab, kaggle, friend’s server(occasional) whenever i need GPU. . And this time it’s been for a while to install GPU than I expected and I share the several component that stood in my way. Written at Oct 24 2019, if you think this is deprecated, please do not have a leap of faith. Just for the record, I’ve used Kaggle, Colab, GCP, Azure, EC2 as GPU cloud. 1. Did not know there is JupyterLab option in Google Cloud Platform. : At the first time when GCP came out, there was no AI Platform service. So from starting vm instance to launching jupyter and installing packages, I did all of the things myself. (and I learned 🤗) $	curl -O https://repo. continuum. io/archive/Anaconda3-5. 0. 1-Linux-x86_64. sh[Downloading conda in ssh] I created VM instance,selected zone, machine type and disk type. Then, define firewall rules and in ssh terminal, install jupyter and other packages. But you can do all of these things just using AI Platform.  [AI Platform] I think it especially save your time if you are living in Asia-Pacific, which google doesn’t support not that much GPU resources.  2. Consider if the platform has limited resources in a region you live in. : I live in South Korea, East Asia, and it seems like this region has lots of limitation in GPU (except quite expensive AWS) And the Taiwan which was the only one region where I can launch my own VM with GPU (I tried all the other regions in the list) sometimes do normaly, but not always. 😥After launching, I did several works and next day I could not start VM. (I didn’t count it, but tried it a few hours because I didn’t want cost any more time…) Endlessly failed to start instance, then I choose to move AWS as an alternative way. 3. Fast. ai gives deliberate guide and I didn’t know it. : Fast. ai offer the guide for all available platform. (Colab, salamander, Gradient, Kaggle, Colab, and so on) It is so important, and really needs, because cloud computing options are vary as occasion and purpose arise. I didn’t know fast. ai has manual to running GCP, and I think it’s as good a reason as any for me to be have taken time. It helped me so much when I had aws and shortened my time. I don’t want to read all of the manual in amazno. . (It is recommended. . but I’d rather read GIT PRO now…) ssh -i ~/. ssh/&lt;your_private_key_pair&gt; -L localhost:8888:localhost:8888 ubuntu@&lt;your instance IP&gt;4. You should wait to add more volume just after add volume, by building AWS EC2. : Since Elastic Block Store(EBS) storage supports optimized storage, users can’t extend storage volume two times in a row.  Unfortunately, at the first time, I didn’t know it (again 👻) and when VM lacked volume, I doubled dist capacity (76*2) at a rough but It needs more.  &lt;!– this time I installed GPU in two years, and it became little complicated compared to 2 years ago. And this time for the first time(maybe not the first time. . but i handled it in my class or with my friend. but it’s my first time on my own. ) I very I’m started to using used google colab, kaggleand, GCP-JupyterLab, ec2 - friend made, aws vm machine but I had a environment variable but i did not know of it. On these days, I could not get a resources from taiwan…    I couldn’t notice a deliberate     Anyway, as a result I tried myself gcp myself and aws ec2 with fast. ai But I think doing on my self surely takes much time (in this point I wonder why I’m doing this, and should remind me, especially I was studying disk volume optimization)     disk volume exceed - https://askubuntu. com/questions/919748/no-space-left-on-device-even-though-there-is:  "
-    }, {
-    "id": 22,
-    "url": "http://localhost:4000/2020/02/dps-week4/",
-    "title": "Digital Product School week 4",
-    "body": "2020/02/01 - The 4th week retropect at Digital Product School Week 4 - Find solution ideas and run experiments   [This week’s schedule] CONTENT:  Ideation Techniques     What is ideation techniques?    Generating idea in my team AIdeation Team brain storming of idea Die Produkt MacherMondayIdeation Techniques:   [slides from @steffen] What is ideation techniques?: We tried to find out user’s painpoint last week. Tried to users talk about their, pain point. No question directly, but extract from them their pain with transportation. Generating idea in my team:   AIdeation:   TuesdayTeam brain storming of idea:   Based on generated idea on Monday, we extended our idea doing rolling-paper!     Die Produkt Macher: What is lean start-up?  Lean startup is a methodology for developing businesses and products that aims to shorten product development cycles and rapidly discover if a proposed business model is viable; this is achieved by adopting a combination of business-hypothesis-driven experimentation, iterative product releases, and validated learning. - wikipedia WendesdayThursdayFriday"
-    }, {
-    "id": 23,
-    "url": "http://localhost:4000/2020/01/retrosprect-of-acl-paper-2020/",
-    "title": "Retrospect of ACL 2020 paper writing",
-    "body": "2020/01/29 - 2020 Annual Conference of the Association for Computational Linguistics Why I can’t use ‘Cebuano’ for the research?: Why I had to change target language from ‘Cebuano’ to ‘Tagalog’?-&gt; No language translator options except google translation. But before knowing that I already consult my friend, whose mother tongue is English. So I had to aplogize her, but couldn’t tell her why suddenly I changed my plan. -&gt; I realized there are many languages even can’t be researched at all. . -&gt; Getting accustomed to discrimination makes misunderstanding, sometimes. At my country, we couldn’t use music streaming service, because of legal problem. But at that moment, I thought it was discrimination, which is done by music company. "
-    }, {
-    "id": 24,
-    "url": "http://localhost:4000/2020/01/Git-Merge/",
-    "title": "Why am I not listed as a contributor?!",
-    "body": "2020/01/10 - From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’m not listed as contributor of repo which my PR accessed. How did I come to a repository?: When I’m stuck, I would prefer to code, than to go deep in theory. (It must be so. . too much to understand 🤒)It was BERT released by Google AI I felt keenly the necessity of implementing, because not only couldn’t understand the way they figured out positional encoding formula, but how it actually works. What does it mean to “scale” dot product in Attention? (Now I know it’s far from my section 😂) Figure 1. Scaled Dot Product. Adopted from tensorflow blogWhat was the code error?: For implement code in paper, I read the papers Transformer and BERT, structured the model, and refered the others’ code. Meanwhile, I found out a small error in tokenization process, which was changing a token into [MASK], enabled bidirectional representation. I’ve made PR, and got merged. But I was not in contributors. Why?: Figure 2. Merged Pull request Adopted from graykode projectActually I happened to know there can be couple of reasons github doesn’t include my name as contributor. Well, if contributors tab has more than 100 people, in which case it shows you up only if you are in the top 100 contributors because displaying too many contributors can make webpages down. Somethimes, however, it doesn’t that problem. Why not? Two possibilities are there.    First, According to Joel-Glovier, if repository maintainer merged-as-a-rebase PR will end up showing as maintainer’s commit. But maintainer shouldn’t normally do this.     Second, if you happend to commit using a different git email that what is in your GitHub profile, it will not be attached to your Github user, and “doesn’t show up” as you.  Reference:  Michał Chromiak’s blog Github: why are my contributions are not showing on my profile atlassian-gitfetch"
-    }, {
-    "id": 25,
-    "url": "http://localhost:4000/2019/12/lesson1-fastai/",
-    "title": "Fine Grained Classification",
-    "body": "2019/12/31 - Finally you can solve the mystery behind this weird drawing. . through this course. juptyer notebook magic: %reload_ext autoreload%autoreload 2%matplotlib inlinethis is special directives to jupyter notebook, not python code. And it is called ‘magics’ (but i think jeremy is magicion)  If somebody changes underlying library code while I’m running this, please reload it automatically If somebody asks to plot something, then please plot it here in this Jupyter NotebookDon’t hesitate to import start~ Digging into untar_data, path. ls: Union[pathlib. Path, str]: typed programming language? -&gt; maybe i think disclaim the type beforehand for sure.  Q. like assert? path. ls()this is some module that fast. ai made because os. listdir(‘path’) is unconvinient. Python3 pathlib library!: pathlib "
-    }, {
-    "id": 26,
-    "url": "http://localhost:4000/2019/12/jeremy-howard/",
-    "title": "Jeremy Howard",
-    "body": "2019/12/15 - This is journey to find out ‘who am I trying to be?’: How he impacted me?  The person who made me start Computer Vision again. He emphasized the importance of studying NLP and Computer together to understand the deep-learning. He didn’t order it to study, but always he pursuade me with reasonable way.  “It’s not just something I can throw away. NLP and computer vision a few weeks apart and that’s going to force your brain to realize like ‘oh I have to remember this’”  He made me admit my failure in deep-learning. I started to objectify where am I. What should I do when I’m frustrated.  “Keep going. You’re not expected to remember everything. Yet.  You’re not expected to understand everything. Yet. You’re not expected to know why everything works.  Yet. ”  His articles are numerous, below.      What is torch. nn Really?   High Performance Numeric Programming with Swift: Explorations and Reflections   C++11, random distributions, and Swift    And especially, I like this book. Designing great data products Great predictive modeling is an important part of the solution, but it no longer stands on its own; as products become more sophisticated, it disappears into the plumbing. Designing great data products And he is also famous for words. Here are some.  we’re going to try and use that to really understand what’s going on. So to warn you, none of it is rocket science but a lot of its going to look really new. So don’t expect to get it the first time but expect to listen and jump into the notebook try a few things test things out look particularly at like tensor shapes and inputs and outputs to check your understanding then go back and listen again. But and kind of try it, a few times, because you will get there right, it’s just that there’s going to be a lot of new concepts because we haven’t done that much stuff in pure Pytorch. Lesson 6: Deep Learning 2019 "
-    }, {
-    "id": 27,
-    "url": "http://localhost:4000/2019/11/julia-evans/",
-    "title": "Julia Evans",
-    "body": "2019/11/20 - This is journey to find out ‘who am I trying to be?’: The women who surprised me in many ways. First, she approached me to teaching some concepts drawing cartoons. It was at Hackers news, which was hightest ranks. Personally I have the use of not to reading title, so and cartoon was so cute and clear. I naturally gonna understood mechanism and astonished by her explaination ability. Her value, which she was taught by many people so want to do same things, moved me. Volume of her knowledge, that just reading post title is a deal of work, amazed me. "
-    }, {
-    "id": 28,
-    "url": "http://localhost:4000/2019/11/coc-retropective/",
-    "title": "Retrospective on Pycon 2019 Korea (CoC Committee)",
-    "body": "2019/11/05 - When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized our responsibility, and if I passed each other’s burden, It could be my burden next time. In solidarity of the obligation, we finished conference well. And after participating PyCon Korea 2018 as volunteer, I’ve joined PyCon Korea Organizer last year. &lt;Figure 1&gt; First meeting of PyCon 2019 Korea Organizers It’s been a while since PyCon 2019 finished. It’s held on Aug 15 - 18, at Coex Grand Balloom &lt;Figure 2&gt; Ongoing session, speaking on news comment processing &lt;Figure 3&gt; Sponsor Booth iin Coex Hall &lt;Figure 4&gt; After PyCon 2019, with all of volunteer, organizer, speakers 😍 🥰  Serving as part of the coc TF, I spent large fraction of last year doing CoC job. here’s the path what we’ve been grappled with to grasp a solution. First half: Before the conference Toward Diverse Community:  Formally we’ve been reusing and modifying PyCon US CoC, but we needed fit in Korean and I was part of that to revise code of conduct. Except ‘That’ Diversity, Because it is ‘Harassment’:  Specific point was harassment, and the others were not. process of finding the points. How can we settle this point?Second half: During the conference Handling the potential Harassment: Disjunction of policy and real-time situation: This ‘PyCon 2019 Korea retrospective series’ would be devided into 3 Episodes.  “Retrospective on Pycon 2019 Korea (CoC Committee)” “Retrospective on Pycon 2019 Korea (Program Chair)” (20 Nov, To Be Update) “Maintaining participation while still making timely decisions” (29 Nov, To Be Update)"
-    }, {
-    "id": 29,
-    "url": "http://localhost:4000/2019/11/elif-shafak/",
-    "title": "Elif Shafak",
-    "body": "2019/11/05 - This is journey to find out ‘who am I trying to be?’: For creative-minded people, Istanbul is a treasure. ’ Photo © Chris Boland, licensed under CC BY-NC-ND 2. 0    it suddenly felt like what I was trying to convey was more complicated and detailed than what the circumstances allowed me to say.     And I did what I usually do in similar situations: I stammered, I shut down, and I stopped talking. I stopped talking because the truth was complicated, even though I knew, deep within, that one should never, ever remain silent for fear of complexity.         &lt;Figure 1&gt; Elif Shafak   Photo credit: www. elifsafak. com. tr      I want to talk about emotions and the need to boost our emotional intelligence. I think it’s a pity that mainstream political theory pays very little attention to emotions.     Oftentimes, analysts and experts are so busy with data and metrics that they seem to forget those things in life that are difficult to measure and perhaps impossible to cluster under statistical models. But I think this is a mistake, for two main reasons. We are emotional beings.     I think it’s going to be one of our biggest intellectual challenges, because our political systems are replete with emotions. In country after country, we have seen illiberal politicians exploiting these emotions. And yet within the academia and among the intelligentsia, we are yet to take emotions seriously. I think we should.  1 2 Reference:       British Council Worldwide &#8617;        Ted Talk &#8617;    "
-    }, {
-    "id": 30,
-    "url": "http://localhost:4000/2019/01/dps-week1/",
-    "title": "Digital Product School week 1",
-    "body": "2019/01/11 - The 1th week retropect at Digital Product School [This week’s schedule] CONTENT:  Welcome to Digital Product School!     Trip to Spitzingsee   Welcome to Design Office   Specifying our goal of product   Welcome to Digital Product School!: Trip to Spitzingsee:    At the first day of Digital Product School, we had a off-site with all of batch 9 people. All the costs were managed by dps. At the beautiful mountain, we settled the team, and got my team goal.     Basically, there are two kind of team in DPS. (1) Wild team - the team has fixed topic(2) Company team - the team which has specific stakeholders, and also topic defined by that stakeholders  The Core-team will fix what team you will join in DPS for 3 months based on ymy professionals, they announce it at off-site. [My team for 3 months at DPS]    And we decide on my batch #9 theme song. How? Each team draw for songs and pitch ‘why this song should be batch #9 theme song’The result? Imagine dragon - Believer (I didn’t know at the moment, this song would be stamped in my memory)     We have a workshop for getting to know each other. For example, we share 1) what do I expect from 3 months of dps, 2) when I feel happy in my life time, 3) what I worked for last week, 4) what was my last project and 5) what plays important role in my life      My team's board  Cero  Welcome to Design Office:  At first day of design office, we had workshop, which celebrates my day in dps also discuss specific rule, menifesto and stakeholders    We get sticker and attach it in map depends on my nationality  Now time to get to know my team’s stakeholders. What they want for us? What they expect from us? How free my team are on the topic?To be honest, it is endless tug-of-war. We should discuss with my stakeholders, endlessly, and find out solution which can meet interest of users, stakeholders and my team. Basically, my team’s main stakeholder is ADAC, but BMW, City of munich and Nokia will also participate as my team’s stakeholders. Specifying our goal of product: "
-    }];
-
-var idx = lunr(function () {
-    this.ref('id')
-    this.field('title')
-    this.field('body')
-
-    documents.forEach(function (doc) {
-        this.add(doc)
-    }, this)
-});
-
-
-    
-function lunr_search(term) {
-    $('#lunrsearchresults').show( 1000 );
-    $( "body" ).addClass( "modal-open" );
-    
-    document.getElementById('lunrsearchresults').innerHTML = '<div id="resultsmodal" class="modal fade show d-block"  tabindex="-1" role="dialog" aria-labelledby="resultsmodal"> <div class="modal-dialog shadow-lg" role="document"> <div class="modal-content"> <div class="modal-header" id="modtit"> <button type="button" class="close" id="btnx" data-dismiss="modal" aria-label="Close"> &times; </button> </div> <div class="modal-body"> <ul class="mb-0"> </ul>    </div> <div class="modal-footer"><button id="btnx" type="button" class="btn btn-secondary btn-sm" data-dismiss="modal">Close</button></div></div> </div></div>';
-    if(term) {
-        document.getElementById('modtit').innerHTML = "<h5 class='modal-title'>Search results for '" + term + "'</h5>" + document.getElementById('modtit').innerHTML;
-        //put results on the screen.
-        var results = idx.search(term);
-        if(results.length>0){
-            //console.log(idx.search(term));
-            //if results
-            for (var i = 0; i < results.length; i++) {
-                // more statements
-                var ref = results[i]['ref'];
-                var url = documents[ref]['url'];
-                var title = documents[ref]['title'];
-                var body = documents[ref]['body'].substring(0,160)+'...';
-                document.querySelectorAll('#lunrsearchresults ul')[0].innerHTML = document.querySelectorAll('#lunrsearchresults ul')[0].innerHTML + "<li class='lunrsearchresult'><a href='" + url + "'><span class='title'>" + title + "</span><br /><small><span class='body'>"+ body +"</span><br /><span class='url'>"+ url +"</span></small></a></li>";
-            }
-        } else {
-            document.querySelectorAll('#lunrsearchresults ul')[0].innerHTML = "<li class='lunrsearchresult'>Sorry, no results found. Close & try a different search!</li>";
-        }
-    }
-    return false;
-}
-</script>
-<style>
-    .lunrsearchresult .title {color: #d9230f;}
-    .lunrsearchresult .url {color: silver;}
-    .lunrsearchresult a {display: block; color: #777;}
-    .lunrsearchresult a:hover, .lunrsearchresult a:focus {text-decoration: none;}
-    .lunrsearchresult a:hover .title {text-decoration: underline;}
-</style>
-
-
-
-
-<form class="bd-search hidden-sm-down" onSubmit="return lunr_search(document.getElementById('lunrsearch').value);">
-<input type="text" class="form-control text-small"  id="lunrsearch" name="q" value="" placeholder="Type keyword and enter..."> 
-</form>
-            </ul>
-        </div>
-    </div>
-    </nav>
-
-    <!-- Search Results -->
-    <div id="lunrsearchresults">
-        <ul class="mb-0"></ul>
-    </div>
-
-    <!-- Content -->
-    <main role="main" class="site-content">
-        
-
-<div class="container">
-<div class="jumbotron jumbotron-fluid mb-3 pl-0 pt-0 pb-0 bg-white position-relative">
-		<div class="h-100 tofront">
-			<div class="row  justify-content-between ">
-				<div class=" col-md-6  pr-0 pr-md-4 pt-4 pb-4 align-self-center">
-					<p class="text-uppercase font-weight-bold">
-                        <span class="catlist">
-						
-                          <a class="sscroll text-danger" href="/categories.html#fast.ai-v3">fast.ai-v3</a><span class="sep">, </span>
-                        
-                        </span>
-					</p>
-					<h1 class="display-4 mb-4 article-headline">Fine Grained Classification</h1>
-					<div class="d-flex align-items-center">
-                        
-						<small class="ml-3">  <span><a target="_blank" href="" class="btn btn-outline-success btn-sm btn-round ml-1">Follow</a></span>
-                            <span class="text-muted d-block mt-1">Dec 31, 2019 · <span class="reading-time">
-  
-  
-    1 min read
-  
-</span>
-    </span>
-						</small>
-					</div>
-				</div>
-                
-				<div class="col-md-6 pr-0 align-self-center">
-					<img class="rounded" src="/assets/images/1-visual-cnn.png" alt="Fine Grained Classification">
-				</div>
-                
-			</div>
-		</div>
-	</div>
-</div>
-
-
-
-
-
-<div class="container-lg pt-4 pb-4">
-	<div class="row justify-content-center">
-        
-        
-        <!-- Share -->
-		<div class="col-lg-2 pr-4 mb-4 col-md-12">
-			<div class="sticky-top sticky-top-offset text-center">
-				<div class="text-muted">
-					Share this
-				</div>
-				<div class="share d-inline-block">
-					<!-- AddToAny BEGIN -->
-					<div class="a2a_kit a2a_kit_size_32 a2a_default_style">
-						<a class="a2a_dd" href="https://www.addtoany.com/share"></a>
-						<a class="a2a_button_facebook"></a>
-						<a class="a2a_button_twitter"></a>
-					</div>
-					<script async src="https://static.addtoany.com/menu/page.js"></script>
-					<!-- AddToAny END -->
-				</div>
-			</div>
-		</div>
-        
-        
-		<div class="col-md-12 col-lg-10">
-            
-            <!-- Article -->
-			<article class="article-post">                
-			<p>Finally you can solve the mystery behind this weird drawing.. through this course.<br /></p>
-
-<h4 id="juptyer-notebook-magic">juptyer notebook magic</h4>
-
-<div class="language-python highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="o">%</span><span class="n">reload_ext</span> <span class="n">autoreload</span>
-<span class="o">%</span><span class="n">autoreload</span> <span class="mi">2</span>
-<span class="o">%</span><span class="n">matplotlib</span> <span class="n">inline</span>
-</code></pre></div></div>
-
-<p>this is special directives to jupyter notebook, not python code.
-And it is called ‘magics’ (but i think jeremy is magicion)</p>
-
-<ul>
-  <li>If somebody changes underlying library code while I’m running this, please reload it automatically</li>
-  <li>If somebody asks to plot something, then please plot it here in this Jupyter Notebook</li>
-</ul>
-
-<p><strong>Don’t hesitate to import start~</strong></p>
-
-<h4 id="digging-into-untar_data-pathls">Digging into <code class="highlighter-rouge">untar_data</code>, <code class="highlighter-rouge">path.ls</code></h4>
-
-<p>Union[pathlib.Path, str]: typed programming language? -&gt; maybe i think disclaim the type beforehand for sure.</p>
-
-<blockquote>
-  <p>Q. like assert?</p>
-</blockquote>
-
-<div class="language-python highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="n">path</span><span class="o">.</span><span class="n">ls</span><span class="p">()</span>
-</code></pre></div></div>
-<p>this is some module that fast.ai made because os.listdir(‘path’) is unconvinient.</p>
-
-<h4 id="python3-pathlib-library">Python3 pathlib library!</h4>
-
-<p><a href="https://docs.python.org/3/library/pathlib.html">pathlib</a></p>
-                
-			</article>
-			
-			<!-- Tags -->
-			<div class="mb-4">
-				<span class="taglist">
-				
-				</span>
-			</div>
- 
-            <!-- Mailchimp Subscribe Form -->
-            
-            
-<!-- TODO: all of author mal funciton -->            
-             <!-- Author Box -->
-                	
-<!-- 				<div class="row mt-5">
-					<div class="col-md-2 align-self-center">
-                          
-					</div>
-					<div class="col-md-10">		
-                        <h5 class="font-weight-bold">Written by  </h5>
-											
-					</div>
-				</div>				 -->
-                
-            
-            <!-- Comments -->
-            
-                <!--  Don't edit anything here. Set your disqus id in _config.yml -->
-
-<div id="comments" class="mt-5">
-    <div id="disqus_thread">
-    </div>
-    <script type="text/javascript">
-        var disqus_shortname = 'dionne'; 
-        var disqus_developer = 0;
-        (function() {
-            var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
-            dsq.src = window.location.protocol + '//' + disqus_shortname + '.disqus.com/embed.js';
-            (document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
-        })();
-    </script>
-    <noscript>
-    Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a>
-    </noscript>
-</div>
-            
-            
-		</div>
-        
-        
-	</div>
-</div>
-
-
-<!-- Aletbar Prev/Next -->
-<div class="alertbar">
-    <div class="container">
-        <div class="row prevnextlinks small font-weight-bold">
-          
-            <div class="col-md-6 rightborder pl-0">
-                <a class="text-dark" href="/2019/12/jeremy-howard/"> <img height="30px" class="mr-1" src="/assets/images/jeremy-howard.jpeg">  Jeremy Howard</a>
-            </div>
-          
-          
-            <div class="col-md-6 text-right pr-0">
-                <a class="text-dark" href="/2020/01/Git-Merge/"> Why am I not listed as a contributor?!  <img height="30px" class="ml-1" src="/assets/images/1.png"> </a>
-            </div>
-          
-        </div>
-    </div>
-</div>
-
-    </main>
-
-
-    <!-- Scripts: popper, bootstrap, theme, lunr -->
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.14.6/umd/popper.min.js" integrity="sha384-wHAiFfRlMFy6i5SRaxvfOCifBUQy1xHdJ/yoi7FRNXMRBu5WHdZYu1hA6ZOblgut" crossorigin="anonymous"></script>
-
-    <script src="https://stackpath.bootstrapcdn.com/bootstrap/4.2.1/js/bootstrap.min.js" integrity="sha384-B0UglyR+jN6CkvvICOB2joaf5I4l3gm9GU6Hc1og6Ls7i6U/mkkaduKaBhlAXv9k" crossorigin="anonymous"></script>
-
-    <script src="/assets/js/theme.js"></script>
-
-
-    <!-- Footer -->
-    <footer class="bg-white border-top p-3 text-muted small">
-        <div class="container">
-        <div class="row align-items-center justify-content-between">
-            <div>
-                <span class="navbar-brand mr-2 mb-0"><strong>SpellOnYou</strong></span>
-                <span>Copyright © <script>document.write(new Date().getFullYear())</script>.</span>
-
-                <!--  Github Repo Star Btn-->
-                <a class="text-dark ml-1" target="_blank" href="https://github.com/spellonyou"><i class="fab fa-github"></i> VisitMe</a>
-
-            </div>
-            <div>
-                Made with <a target="_blank" class="text-dark font-weight-bold" href="https://www.wowthemes.net/mundana-jekyll-theme/"> Mundana Jekyll Theme </a> by <a class="text-dark" target="_blank" href="https://www.wowthemes.net">WowThemes</a>.
-            </div>
-        </div>
-        </div>
-    </footer>
-
-    <!-- All this area goes before </body> closing tag --> 
-
-
-</body>
-
-</html>
diff --git a/_site/2020/01/Git-Merge/index.html b/_site/2020/01/Git-Merge/index.html
index 4b12cea5ba..ff6b8f8f39 100644
--- a/_site/2020/01/Git-Merge/index.html
+++ b/_site/2020/01/Git-Merge/index.html
@@ -19,9 +19,9 @@
 <meta property="og:site_name" content="SpellOnYou" />
 <meta property="og:image" content="http://localhost:4000/assets/images/1.png" />
 <meta property="og:type" content="article" />
-<meta property="article:published_time" content="2020-01-10T00:00:00+01:00" />
+<meta property="article:published_time" content="2020-01-10T00:00:00+09:00" />
 <script type="application/ld+json">
-{"description":"From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’m not listed as contributor of repo which my PR accessed.","author":{"@type":"Person","name":"dionne"},"@type":"BlogPosting","url":"http://localhost:4000/2020/01/Git-Merge/","publisher":{"@type":"Organization","logo":{"@type":"ImageObject","url":"http://localhost:4000/assets/images/logo.png"},"name":"dionne"},"image":"http://localhost:4000/assets/images/1.png","headline":"Why am I not listed as a contributor?!","dateModified":"2020-01-10T00:00:00+01:00","datePublished":"2020-01-10T00:00:00+01:00","mainEntityOfPage":{"@type":"WebPage","@id":"http://localhost:4000/2020/01/Git-Merge/"},"@context":"http://schema.org"}</script>
+{"description":"From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’m not listed as contributor of repo which my PR accessed.","author":{"@type":"Person","name":"dionne"},"@type":"BlogPosting","url":"http://localhost:4000/2020/01/Git-Merge/","publisher":{"@type":"Organization","logo":{"@type":"ImageObject","url":"http://localhost:4000/assets/images/logo.png"},"name":"dionne"},"image":"http://localhost:4000/assets/images/1.png","headline":"Why am I not listed as a contributor?!","dateModified":"2020-01-10T00:00:00+09:00","datePublished":"2020-01-10T00:00:00+09:00","mainEntityOfPage":{"@type":"WebPage","@id":"http://localhost:4000/2020/01/Git-Merge/"},"@context":"http://schema.org"}</script>
 <!-- End Jekyll SEO tag -->
 
 
@@ -161,96 +161,101 @@
     "body": "  {% if page. url ==  /  %}            {% assign latest_post = site. posts[0] %}          &lt;div class= topfirstimage  style= background-image: url({% if latest_post. image contains  ://  %}{{ latest_post. image }}{% else %} {{site. baseurl}}/{{ latest_post. image}}{% endif %}); height: 200px;  background-size: cover;  background-repeat: no-repeat; &gt;&lt;/div&gt;           {{ latest_post. title }}  :       {{ latest_post. excerpt | strip_html | strip_newlines | truncate: 136 }}               In         {% for category in latest_post. categories %}        {{ category }},         {% endfor %}                                {{ latest_post. date | date: '%b %d, %Y' }}                            {%- assign second_post = site. posts[1] -%}                        {% if second_post. image %}                         &lt;img class= w-100  src= {% if second_post. image contains  ://  %}{{ second_post. image }}{% else %}{{ second_post. image | absolute_url }}{% endif %}  alt= {{ second_post. title }} &gt;                        {% endif %}                                    {{ second_post. title }}          :                       In             {% for category in second_post. categories %}            {{ category }},             {% endfor %}                                                      {{ second_post. date | date: '%b %d, %Y' }}                                    {%- assign third_post = site. posts[2] -%}                        {% if third_post. image %}                         &lt;img class= w-100  src= {% if third_post. image contains  ://  %}{{ third_post. image }}{% else %}{{site. baseurl}}/{{ third_post. image }}{% endif %}  alt= {{ third_post. title }} &gt;                        {% endif %}                                    {{ third_post. title }}          :                       In             {% for category in third_post. categories %}            {{ category }},             {% endfor %}                                                      {{ third_post. date | date: '%b %d, %Y' }}                                    {%- assign fourth_post = site. posts[3] -%}                        {% if fourth_post. image %}                        &lt;img class= w-100  src= {% if fourth_post. image contains  ://  %}{{ fourth_post. image }}{% else %}{{site. baseurl}}/{{ fourth_post. image }}{% endif %}  alt= {{ fourth_post. title }} &gt;                        {% endif %}                                    {{ fourth_post. title }}          :                       In             {% for category in fourth_post. categories %}            {{ category }},             {% endfor %}                                                      {{ fourth_post. date | date: '%b %d, %Y' }}                                  {% for post in site. posts %} {% if post. tags contains  sticky  %}                    {{post. title}}                  {{ post. excerpt | strip_html | strip_newlines | truncate: 136 }}                 Read More            	             {% endif %}{% endfor %}  {% endif %}                All Stories:         {% for post in paginator. posts %}          {% include main-loop-card. html %}        {% endfor %}                   {% if paginator. total_pages &gt; 1 %}              {% if paginator. previous_page %}        &laquo; Prev       {% else %}        &laquo;       {% endif %}       {% for page in (1. . paginator. total_pages) %}        {% if page == paginator. page %}        {{ page }}        {% elsif page == 1 %}        {{ page }}        {% else %}        {{ page }}        {% endif %}       {% endfor %}       {% if paginator. next_page %}        Next &raquo;       {% else %}        &raquo;       {% endif %}            {% endif %}                     {% include sidebar-featured. html %}      "
     }, {
     "id": 12,
+    "url": "http://localhost:4000/2020/04/v3-2019-lesson06-note/",
+    "title": "fastai 2019 course-v3 Part1, lesson06",
+    "body": "2020/04/15 - Lesson 06Rossmann(Tabular):  Tabular data: be careful on Categorical variable vs Continuous variable.  if datatype is int, fastai think it is classification, not a regression.  Root mean square percentage error. as loss function.  When you assign the y_range, it’s better to assign little bit more than actual maximum. &gt; because it’s sigmoid.  intermediate layers, which is weight matrix is 1) 1000, and 2) 500 -&gt; which means our parameter would be 500*1000. learn. modelWhat is dropout and embedding dropout?: Nitish Srivastava, Dropout: A Simple way to prevent Neural Networks from Overfitting  you can dropout with p value, make it specified to specific layer, or make it applied to all the layers.  Pytorch code 1) bernoulli, which decides whether you will hold it? 2) and divide the noise value depends on noise value. so noise became 2 or remain 0.      According to pytorch code, We do change at training time, but we do nothing at test time. and this means you don’t have to do anything special with inference time. ’   TODO: find at forums what is inference time - Related to NVIDIA, GPU.     Embedding dropout is just a dropout.      It’s different between continuous variable and embedding layer.  TODO Still can’t understand. why embedding dropout is effective. or,… in need.    Let’s delete at random, some of the results of the embedding.    and It worked well especially at Kaggle   Batch Normalization: Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift -&gt; came out false! According to How Does Batch Normalization Help Optimization?  The key was multiplicative bias {\gamma} and additive bias {\beta}` Explain     Let $$ \hat{y} = f(w_1, w_2, w_3, … , x)} $$ , loss = MSE , Then y_range should be between 1 and 5`   And Activation function ends with -1 -&gt; +1   To mitigate this problem, we can add the other parameter, like $$w_n$$   But there’re so much interactions in the process so just re-scale the output.    Momentum parameter at BatchNorm1d:  Different from momentum like in optimization.  This momentum is Exponentially weighted moving average of the mean, instead of deviation.      If this is small number: mean standard deviation would be less from mini_batch to mini_batch » less regularization effect. (If this is large number, variation would be greater from mini_batch to mini_batch » more regularization effect)   TODO: can’t sure, but i understand, this is not about how to update parameter but about how much reflect previous value when scale and shift   Q. Preference between batchnorm and the other regularizations(drop out, weight decay)A. Nope, always try and see the results## lesson6-pets-more### Data Augmentation- Last reg- `get_transforms` has lots of params (even not yet learned all) -&gt; check documentation	- Remember you can implement all the doc contents bc it's made from nbdev	- TODO: try this!!- Essence of data augmentation is you should maintain the label, while somewhat making sense. 	- ex) tilt, because it's optically sensible, you can always change the angle of the data view. - zeros, border, and reflection but always `reflection` works most of the time, so that is the default### Convolutional Kernel(What is convolution?)- Will make heat\_map from scratch, which means the parts convolution focuses on![setosa_visualization]()- http://setosa. io/ev/image-kernels/	- javascript thing	- How convolution works	- Kernel. which does element-wise multiplication, and sum them up	- so it has on pixel less at borders -&gt; so it uses padding, and fastai uses reflection as said. - why this Kernel(matrix) helps catching horizontal edge side?	- because this kernel`(picture2)` weights differently, depends on `x axis`	- why familiar, because it's similar intuition with fugus`(paper)` paper- CNN from different viewpoints`link`	- output of pixel is results from different linear equations. 	- If you connect this with represents of neural network nodes, you can see that the specific inp nodes connected with specific out nodes. 	- **Summarize**: cnn does 1) matmul some of the elements are always zero 2) same weight for every row, which is called `weight time? weight. . ?, 1:18:50` `(picture)`#### Further lowdown- Because generally image has 3 channels, we need rank 3 kernel. - And **do multiply with all channel output is one pixel**. (`draw by your self`)	- but this kernel will catch one feature, like horizontal, so that we make more kernel so that output becomes (h * w * kernel)	- And that `kernel` come to `channel`- **Conv2d**: with 3 by 3 kernel, stride 2 conv -&gt; (h/2 * w/2 * kernel)	- skip or jump over input pixel	- to protect from memory out of control~~~pythonlearn. modellearn. summary()~~~TODO: understand yourself the blocks of conv-kernel: - Usually use big kernel size at first layer (will study this at part2)- Bottom right highlighting kernel(`pic / draw`)- `torch. tensor. expand`: for memory efficient, because we should do RGB- We do not make separate kernel, but make rank 4 kernel	- 4d tensor is just stacked kernel- `t[None]. shape` create new unit axis, and why? we make this -&gt; it should move unit of batch, not one size image. ### Average pooling, feature- suppose our pre-trained model results in size of `11 by 11 by 512 ` `pic 4` and my classification task has 37 classes	* take the first face of channel, which is 11 by 11 and `mean` it, so that make rank 2 tensor, 512 by 1	* and make 2d matrix, which is 512 by 37 and multiply so that we can get 37 by 1 matrix. - Feature, at convolution block	- So, when we transfer-learning without unfreeze, every element of last matrix (512 by 1) should represent(or could catch) each feature. 	### Heatmap, Hook~~~hook_output(model[0]) -&gt; acts -&gt; avg_acts~~~- if we average the block with `axis=feature`, result of matrix(11 by 11) depicts `how activated was that area?` -&gt; it is heatmap, `avg_acts`- and acts comes from hook, which is more advanced pytorch feature. 	- hook into pytorch machine itself, and run any arbitrary Pytorch code	- Why this is cool?: Normally it gives set of outputs of forward pass, but we can interrupt and hook the forward pass. 	- Also can store the output of the convolutional part of the model, which is before avg_pooling- Thinking back when we do cut off `after` the conv part. 	- but with fast. ai the original convolutional part of the model would be *the first thing in the model*, specifically could be given from `learn. model. eval()[0]`	- And this is gotten from `hooked_output` and having hooked the output, we can pass our x_minibatch to output. 	- Not directly, but with normalized, minibatch, put on to the gpu	- `one_item()` function do it, when we have one data `TODO: this is assignment` do it yourself without one_item function	- and `. cuda()` put it on gpu- you should print out very often the shape of tensor, and try think why. "
+    }, {
+    "id": 13,
+    "url": "http://localhost:4000/2020/04/qna-image-segmentation/",
+    "title": "[Q&A] Image Segmentation, using Unet with Driving Video data",
+    "body": "2020/04/02 - This post is about my questions while I was studying USF Deep Learning course about image segmentation task. All the answers are from the course, source code, library document, or document. I cared about being clear at reporting information including source of information, however if there are still anything unclear, please contact me. And thank you Jeremy&amp;Rachael for everything. Also Thank you Cambridge Computer Vision Lab to made us to study with your labor. The Cambridge-driving Labeled Video Database (CamVid) is the first collection of videos with object class semantic labels, complete with metadata. The database provides ground truth labels that associate each pixel with one of 32 semantic classes. If someone is interested in this project, please check the site and see the details. Now, let’s start first using jupyter’s one of tricks which I love most. It enables cell to print the code without print function. from IPython. core. interactiveshell import InteractiveShell# pretty print all cell's output and not just the last oneInteractiveShell. ast_node_interactivity =  all from fastai. vision import *from fastai. callbacks. hooks import *from fastai. utils. mem import *path = untar_data(URLs. CAMVID) # The locations where the data and models are downloaded are set in config. ymlpath. ls() I’m trying to accustomed to using pathlib module, not just it became built-in module in python, but I felt uncomfortable myself with os module. However, still unpredictable conflicts are remain, even in the quite standard library like Pytorch, tensorflow, onnx. (it require me string for path. not PosixPath. will send PR. . ) [PosixPath('/root/. fastai/data/camvid/valid. txt'), PosixPath('/root/. fastai/data/camvid/images'), PosixPath('/root/. fastai/data/camvid/labels'), PosixPath('/root/. fastai/data/camvid/codes. txt')]path_img = path/'images'path_lbl = path/'labels'fnames = get_image_files(path_img) #filenamelbl_names = get_image_files(path_lbl)1. (Play with data) My Hypothesis:  File name has A_B format. and A / B would be at key-value position.  Use collections - defaultdict  Default Dict: Link:   easy to group a sequence of key and value pairs into a dictionary of list?from collections import defaultdictfnames[0], lbl_names[0](PosixPath('/root/. fastai/data/camvid/images/0001TP_009210. png'), PosixPath('/root/. fastai/data/camvid/labels/0016E5_01800_P. png'))files = [tuple(i. stem. split('_')) for i in fnames]labels = [tuple(i. stem. split('_')[:-1]) for i in lbl_names]d = defaultdict(list)for k, v in files: d[k]. append(v)d. keys()len(d['0001TP'])124for k, v in d. items():  print(k, v)0001TP ['009210', '008850', '007350', '008970', '009840', '010140', '008490', '008520', '009540', '008250', '008340', '006840', '007860', '007410', '007740', '009870', '010080', '007890', '008790', '010020', '008400', '007080', '008280', '010380', '009330', '009060', '007470', '006810', '009720', '008580', '007110', '008730', '009150', '007680', '009780', '007800', '007290', '008760', '009510', '008640', '008310', '007440', '006900', '007500', '008460', '009030', '008130', '009480', '009900', '010230', '009270', '008040', '007590', '007950', '009990', '008550', '007260', '008100', '007530', '006960', '008190', '009420', '009930', '009000', '007830', '008940', '006690', '009570', '008880', '010170', '007560', '009300', '006750', '009360', '010200', '007320', '008010', '009120', '007620', '007200', '007140', '010320', '006720', '008670', '007230', '008370', '010260', '009690', '006930', '009090', '007770', '010290', '010350', '008610', '008070', '009600', '008430', '009450', '007380', '009240', '007710', '007170', '008160', '008910', '007020', '006780', '007050', '009960', '009810', '008220', '009180', '009750', '010050', '009660', '010110', '007920', '009630', '007650', '006990', '008700', '009390', '007980', '008820', '006870']0016E5 ['01290', '08159', '05760', '08133', '08063', '06660', '00960', '05850', '00750', '06960', '08035', '08107', '07975', '08017', '05610', '07140', '08119', '08027', '07170', '08400', '08093', '02100', '06390', '04470', '08340', '06060', '00600', '07470', '08151', '07800', '01620', '05730', '01530', '00690', '08430', '05940', '01980', '07320', '08069', '07965', '04380', '05430', '01410', '06780', '08007', '08087', '08079', '06600', '08109', '05490', '00901', '04590', '04680', '08045', '01770', '06690', '08085', '06810', '00420', '08011', '07440', '02190', '06300', '04800', '01500', '00450', '08029', '01470', '06330', '07997', '08067', '05370', '08013', '08190', '00840', '02370', '08049', '08135', '01440', '06870', '05820', '05280', '08051', '04440', '08091', '01380', '00630', '07290', '05520', '04770', '00540', '07995', '07999', '05550', '07920', '08101', '08141', '08053', '04620', '08103', '05160', '07350', '08057', '06030', '06000', '08550', '07963', '08089', '05970', '08047', '05640', '06240', '05220', '04350', '01590', '07959', '01950', '08117', '06180', '01560', '05400', '08043', '07680', '00780', '08081', '07050', '01020', '01350', '04530', '06720', '07969', '08149', '08003', '08131', '08129', '08033', '05460', '01650', '07530', '08023', '05340', '08640', '05100', '08075', '01230', '04980', '02070', '01080', '06210', '05910', '08009', '01800', '05190', '02400', '08083', '08019', '07620', '07200', '07890', '08059', '06990', '04410', '08121', '08123', '06930', '08137', '08147', '08095', '06570', '06150', '08153', '06840', '05250', '00510', '08370', '08580', '08113', '07410', '08097', '01200', '04950', '07770', '07650', '04710', '06090', '08055', '07110', '07981', '00990', '08250', '08127', '01920', '07985', '08220', '08005', '08157', '05130', '08071', '01140', '04830', '07740', '08143', '06120', '02040', '08111', '08115', '00660', '08280', '06420', '07983', '02220', '05700', '01860', '01260', '04920', '06510', '07020', '08073', '08105', '08125', '06360', '07860', '07993', '00810', '06540', '08099', '08139', '02010', '07973', '08155', '07991', '06630', '00480', '06750', '04890', '08001', '08025', '00870', '08490', '01830', '07977', '05010', '01170', '07961', '01680', '01050', '07987', '07080', '04560', '00930', '05310', '02340', '05790', '08460', '00720', '08031', '02280', '08039', '08037', '08065', '06270', '08077', '06900', '04650', '06480', '07230', '08041', '06450', '00570', '07989', '04740', '07979', '02250', '07380', '00390', '01710', '07590', '08021', '08520', '07500', '01110', '04500', '02310', '07971', '02130', '05580', '05880', '08610', '08310', '08145', '05670', '04860', '07260', '08015', '07967', '01740', '01320', '07560', '07830', '01890', '08061', '02160', '07710', '05070', '05040']Seq05VD ['f00030', 'f02550', 'f03450', 'f01110', 'f00480', 'f00210', 'f04590', 'f04170', 'f01800', 'f03990', 'f03360', 'f03900', 'f02070', 'f00810', 'f03690', 'f01350', 'f01530', 'f04980', 'f05100', 'f03060', 'f00900', 'f03870', 'f02460', 'f01470', 'f02370', 'f02820', 'f04080', 'f02760', 'f04860', 'f02250', 'f04200', 'f00270', 'f03720', 'f02850', 'f04410', 'f01200', 'f03090', 'f02010', 'f03930', 'f00090', 'f01650', 'f01890', 'f03840', 'f03030', 'f02130', 'f01230', 'f04110', 'f02520', 'f04140', 'f04020', 'f00060', 'f03420', 'f01560', 'f00120', 'f04290', 'f02340', 'f00300', 'f01380', 'f00870', 'f01860', 'f02970', 'f04560', 'f02730', 'f00330', 'f04530', 'f03780', 'f01770', 'f03390', 'f05040', 'f02430', 'f03330', 'f00660', 'f01740', 'f02100', 'f04800', 'f04050', 'f00510', 'f02790', 'f04350', 'f00690', 'f00540', 'f02490', 'f00960', 'f00930', 'f04230', 'f02880', 'f03600', 'f01020', 'f01500', 'f02400', 'f04830', 'f04470', 'f03300', 'f02670', 'f00450', 'f01980', 'f01170', 'f01620', 'f04500', 'f01080', 'f03180', 'f05070', 'f03150', 'f04950', 'f01440', 'f03510', 'f01710', 'f00360', 'f04770', 'f02910', 'f01050', 'f00630', 'f04320', 'f00570', 'f03240', 'f02190', 'f01140', 'f03540', 'f02220', 'f02640', 'f03960', 'f00000', 'f04920', 'f01950', 'f00990', 'f03480', 'f03000', 'f00420', 'f04620', 'f03210', 'f00780', 'f03570', 'f01590', 'f00750', 'f01920', 'f04650', 'f03750', 'f03630', 'f02310', 'f02610', 'f02580', 'f04740', 'f02280', 'f04680', 'f00390', 'f00720', 'f03660', 'f02040', 'f03270', 'f00180', 'f03810', 'f01410', 'f01290', 'f03120', 'f00840', 'f04440', 'f00150', 'f01260', 'f02700', 'f02940', 'f00600', 'f01830', 'f04260', 'f05010', 'f04890', 'f02160', 'f00240', 'f04380', 'f01680', 'f04710', 'f01320']0006R0 ['f02820', 'f03690', 'f03180', 'f02550', 'f01020', 'f03660', 'f02340', 'f01170', 'f02610', 'f02940', 'f01290', 'f02100', 'f01350', 'f03270', 'f03870', 'f01380', 'f01980', 'f03810', 'f02430', 'f02310', 'f01830', 'f03480', 'f02970', 'f01890', 'f03210', 'f03930', 'f02040', 'f02070', 'f02400', 'f01560', 'f03030', 'f01770', 'f01590', 'f01950', 'f03420', 'f01650', 'f03450', 'f00990', 'f03630', 'f01500', 'f03570', 'f00930', 'f03090', 'f03360', 'f02880', 'f02460', 'f01440', 'f01920', 'f01230', 'f03840', 'f02730', 'f01620', 'f02220', 'f03750', 'f03330', 'f03540', 'f02520', 'f02790', 'f01050', 'f03120', 'f01800', 'f01140', 'f01860', 'f01530', 'f01470', 'f02670', 'f02490', 'f01260', 'f01110', 'f02760', 'f01680', 'f03150', 'f02580', 'f03300', 'f02280', 'f01200', 'f03390', 'f03510', 'f02640', 'f02190', 'f02370', 'f01320', 'f02130', 'f03600', 'f03240', 'f03780', 'f03720', 'f02700', 'f01410', 'f01080', 'f02850', 'f01710', 'f03900', 'f03060', 'f01740', 'f02010', 'f02250', 'f00960', 'f03000', 'f02160', 'f02910']for k, v in d. items():  print(k, len(d[k]))0001TP 1240016E5 305Seq05VD 1710006R0 101for i in d2. keys():  print(i,len(d2[i]))0016E5 3050001TP 1240006R0 101Seq05VD 171files[0], labels[0](('0001TP', '009210'), ('0016E5', '01800'))2. My question: Link:  Why do we need masking? and does color from fastai library? (have to look into source code) What do the parameter alpha do? When people make masked img, would it be have ranged integer limit? Does image normalization related with this?lbl_sorted = sorted(lbl_names)f_sorted = sorted(fnames)lbl_1 = lbl_sorted[33]f_1 = f_sorted[33]img = open_image(lbl_1)mask = open_mask(lbl_1)_,axs = plt. subplots(1,2, figsize=(10,5))# img. show(ax=axs[0], y=mask, title='masked')img. show(ax=axs[0], title='1')mask. show(ax=axs[1], title='2', alpha=1. ) img_2 = open_image(f_1)mask_2 = open_mask(f_1)_,axs = plt. subplots(1,2, figsize=(10,5))# img. show(ax=axs[0], y=mask, title='masked')img_2. show(ax=axs[0], title='3',)mask_2. show(ax=axs[1], title='4', alpha=1. ) open_mask(lbl_1). data. shapetorch. Size([1, 720, 960])open_mask(lbl_1). data. shapetorch. Size([1, 720, 960])open_image(f_1). data. shapetorch. Size([3, 720, 960])open_image(f_1). data. shapetorch. Size([3, 720, 960])img. data #labeled datatensor([[[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]],    [[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]],    [[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]]])mask. data # after mask, labeled datatensor([[[ 4, 4, 4, . . . , 21, 21, 21],     [ 4, 4, 4, . . . , 21, 21, 21],     [ 4, 4, 4, . . . , 21, 21, 21],     . . . ,     [17, 17, 17, . . . , 30, 30, 30],     [17, 17, 17, . . . , 30, 30, 30],     [17, 17, 17, . . . , 30, 30, 30]]])img_2. data, mask_2. data(tensor([[[0. 0706, 0. 0667, 0. 0706, . . . , 0. 6431, 0. 6549, 0. 6627],     [0. 0745, 0. 0706, 0. 0706, . . . , 0. 6431, 0. 6510, 0. 6549],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 6392, 0. 6588, 0. 6588],     . . . ,     [0. 0863, 0. 0824, 0. 0824, . . . , 0. 1333, 0. 1216, 0. 1255],     [0. 0902, 0. 0863, 0. 0824, . . . , 0. 1255, 0. 1176, 0. 1216],     [0. 0863, 0. 0824, 0. 0784, . . . , 0. 1137, 0. 1059, 0. 1137]],     [[0. 0706, 0. 0667, 0. 0706, . . . , 0. 7490, 0. 7608, 0. 7686],     [0. 0745, 0. 0706, 0. 0706, . . . , 0. 7451, 0. 7569, 0. 7608],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 7412, 0. 7529, 0. 7529],     . . . ,     [0. 0980, 0. 0941, 0. 0941, . . . , 0. 1804, 0. 1686, 0. 1725],     [0. 1059, 0. 1020, 0. 0980, . . . , 0. 1725, 0. 1647, 0. 1686],     [0. 1020, 0. 0980, 0. 0941, . . . , 0. 1608, 0. 1529, 0. 1608]],     [[0. 0784, 0. 0745, 0. 0784, . . . , 0. 7569, 0. 7686, 0. 7765],     [0. 0824, 0. 0784, 0. 0784, . . . , 0. 7647, 0. 7647, 0. 7686],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 7608, 0. 7647, 0. 7647],     . . . ,     [0. 1216, 0. 1176, 0. 1176, . . . , 0. 2000, 0. 1882, 0. 1922],     [0. 1176, 0. 1137, 0. 1098, . . . , 0. 1843, 0. 1765, 0. 1804],     [0. 1137, 0. 1098, 0. 1059, . . . , 0. 1725, 0. 1647, 0. 1725]]]), tensor([[[ 18, 17, 18, . . . , 183, 186, 188],     [ 19, 18, 18, . . . , 183, 185, 186],     [ 20, 18, 19, . . . , 182, 185, 185],     . . . ,     [ 25, 24, 24, . . . , 43, 40, 41],     [ 26, 25, 24, . . . , 41, 39, 40],     [ 25, 24, 23, . . . , 38, 36, 38]]]))3. What is a difference between image and imageSegment?: imageSegment  An ImageSegment object has the same properties as an Image. The only difference is that when applying the transformations to an ImageSegment, it will ignore the functions that deal with lighting and keep values of 0 and 1.  It’s easy to show the segmentation mask over the associated Image by using the y argument of show_image. img = open_image(fnames[0])mask = open_mask(lbl_names[0])_,axs = plt. subplots(1,3, figsize=(8,4))img. show(ax=axs[0], title='no mask')img. show(ax=axs[1], y=mask, title='masked') #seg mask over the img using y argmask. show(ax=axs[2], title='mask only', alpha=1. ) vision. image ##4. Why/How img div by 255 and how it results fast. ai : vision. image - If div=True, pixel values are divided by 255. to become floats between 0. and 1.  At times, you want to get rid of distortions caused by lights and shadows in an image.    Normalizing the RGB values of an image can at times be a simple and effective way of achieving this.     So sum of the pixel’s value over all channels(which is S) divides each intensified channel so that nomalized value will be R/S, G/S and B/S (where, S=R+G+B).   Detailed explain here4. Python Evaluation Order: Python evaluates expressions from left to right. Notice that while evaluating an assignment, the right-hand side is evaluated before the left-hand side. mask_tmp, trg_tmp, void_tmp = 2, 1, 10mask_tmp = trg_tmp != void_tmpprint(mask_tmp, trg_tmp, void_tmp) # (1) target is not same with voidTrue 1 10# Example 1x = 1y = 2x,y = y,x+yx, y(2, 3)# Example 2x = 1y = 2x = yy = x+yx, y(2, 4)5. model learner parameter :: pct_start: A: Percentage of total number of epochs when learning rate rises during one cycle. Q: Sorry, I still confused that one cycle in the new API only runs one epoch. How the percentage of total number of epochs works? Can you give a example? If learn. fit_one_cycle(10, slice(1e-4,1e-3,1e-2), pct_start=0. 05)??A: Ok, strictly correct answer would be percentage of iterations, so you can have lr both increase and decrease during same epoch. In your example, say, you have 100 iterations per epoch, then for half an epoch (0. 05 * (10 * 100) = 50) lr will rise, then slowly decrease. Q2: Thanks for this explanation … so essentially, it is the percentage of overall iterations where the LR is increasing, correct? So, given the default of 0. 3, it means that your LR is going up for 30% of your iterations and then decreasing over the last 70%. Is that a correct summation of what is happening? A2: Yes, I think that’s correct. You can verify that by changing its value and check:learn. recorder. plot_lr() For example if pct_start = 0. 2 source: forums. fastai "
+    }, {
+    "id": 14,
     "url": "http://localhost:4000/2020/03/note08-fastai-4/",
     "title": "Gradient backward, Chain Rule, Refactoring",
-    "body": "2020/03/02 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves   ⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
+    "body": "2020/03/02 -  This note is divided into 4 section.      Section1: What is the meaning of ‘deep-learning from foundations?’   Section2: What’s inside Pytorch Operator?   Section3: Implement forward&amp;backward pass from scratch   Section4: Gradient backward, Chain Rule, Refactoring   ” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves    Postfix underscore means in pytorch, in-place function, What is in-place function?⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
     }, {
-    "id": 13,
+    "id": 15,
     "url": "http://localhost:4000/2020/03/note08-fastai-3/",
     "title": "Implement forward&backward pass from scratch",
     "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring1. The forward and backward passes: 1. 1 Normalization: train_mean,train_std = x_train. mean(),x_train. std()&gt;&gt;&gt; train_mean,train_std(tensor(0. 1304), tensor(0. 3073))Remember!  Dataset, which is x_train, mean and standard deviation is not 0&amp;1. But we need them to be which means we should substract means and divide data by std.  You should not standarlize validation set because training set and validation set should be aparted.  after normalize, mean is close to zero, and standard deviation is close to 1. 1. 2 Variable definition:  n,m: size of the training set c: the number of activations we need in our model2. Foundation Version: 2. 1 Basic architecture:  Our model has one hidden layer, output to have 10 activations, used in cross entropy.    But in process of building architecture, we will use mean square error, output to have 1 activations and lator change it to cross entropy   number of hidden unit; 50see below pic  We want to make w1&amp;w2 mean and std be 0&amp;1.      why initializating and make mean zero and std one is important?   paper highlighting importance of normalisation - training 10,000 layer network without regularisation1   2. 1. 1 simplified kaiming initQ: Why we did init, normalize with only validation data? Because we can not handle and get statistics from each value of x_valid?{: style=”color:red; font-size: 130%; text-align: center;”}  what about hidden(first) layer?w1 = torch. randn(m,nh)b1 = torch. zeros(nh)t = lin(x_valid, w1, b1) # hidden&gt;&gt;&gt; t. mean(), t. std()((tensor(2. 3191), tensor(27. 0303))In output(second) layer, w2 = torch. randn(nh,1)b2 = torch. zeros(1)t2 = lin(t, w2, b2) # output&gt;&gt;&gt; t2. mean(), t2. std()(tensor(-58. 2665), tensor(170. 9717))   which is terribly far from normalzed value.     But if we apply simplified kaiming init  w1 = torch. randn(m,nh)/math. sqrt(m); b1 = torch. zeros(nh)w2 = torch. randn(nh,1)/math. sqrt(nh); b2 = torch. zeros(1)t = lin(x_valid, w1, b1)t. mean(),t. std()&gt;&gt;&gt; (tensor(-0. 0516), tensor(0. 9354)) But, actually, we use activations not only linear function After applying activations relu at linear layer, mean and deviation became 0. 5. 2. 1. 2 Glorrot initializationPaper2: Understanding the difficulty of training deep feedforward neural networks  Gaussian(, bell shaped, normal distributions) is not trained very well.  How to initialize neural nets? with the size of layer , the number of filters .  But there is No acount for import of ReLU If we got 1000 layers, vanishing gradients problem emerges2. 1. 3 Kaiming initializatingPaper3: Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification  Kaiming He, explained here rectifier: rectified linear unit rectifier network: neural network with rectifier linear units  This is kaiming init, and why suddenly replace one to two on a top?     to avoid vanishing gradient(weights)   But it doesn’t give very nice mean tough.    2. 1. 4 Pytorch package Why fan_out?     according to pytorch documentation,   choosing 'fan_in' preserves the magnitude of the variance of the wights in the forward pass. choosing 'fan_out' preserves the magnitues in the backward pass(, which means matmul; with transposed matrix) ➡️ in the other words, torch use fan_out cz pytorch transpose in linear transformaton.  What about CNN in Pytorch?I tried torch. nn. Conv2d. conv2d_forward?? Jeremy digged into using torch. nn. modules. conv. _ConvNd. reset_parameters?? 2  in Pytorch, it doesn’t seem to be implemented kaiming init in right formula. so we should use our own operation.  But actually, this has been discussed in Pytorch community before. 3 4 Jeremy said it enhanced variance also, so I sampled 100 times and counted better results.  To make sure the shape seems sensible. check with assert. (remember we will replace 1 to 10 in cross entropy)assert model(x_valid). shape==torch. Size([x_valid. shape[0],1])&gt;&gt;&gt; model(x_valid). shape(10000, 1) We have made Relu, init, linear, it seems we can forward pass code we need for basic architecture nh = 50def lin(x, w, b): return x@w + b;w1 = torch. randn(m,nh)*math. sqrt(2. /m ); b1 = torch. zeros(nh)w2 = torch. randn(nh,1); b2 = torch. zeros(1)def relu(x): return x. clamp_min(0. ) - 0. 5t1 = relu(lin(x_valid, w1, b1))def model(xb):  l1 = lin(xb, w1, b1)  l2 = relu(l1)  l3 = lin(l2, w2, b2)  return l32. 2 Loss function: MSE:  Mean squared error need unit vector, so we remove unit axis.   def mse(output, targ): return (output. squeeze(-1) - targ). pow(2). mean()    In python, in case you remove axis, you use ‘squeeze’, or add axis use ‘unsqueeze’ torch. squeeze where code commonly broken. so, when you use squeeze, clarify dimension axis you want to removetmp = torch. tensor([1,1])tmp. squeeze()&gt;&gt;&gt; tensor([1, 1]) make sure to make as float when you calculateBut why??? because it is tensor?{: style=”color:red; font-size: 130%;”} Here’s the error when I don’t transform the data type ---------------------------------------------------------------------------TypeError                 Traceback (most recent call last)&lt;ipython-input-22-ae6009bef8b4&gt; in &lt;module&gt;()----&gt; 1 y_train = get_data()[1] # call data again   2 mse(preds, y_train)TypeError: 'map' object is not subscriptable This is forward passFootnote: Other materials:  Understanding the difficulty of training deep feedforward neural networks, paper that introduced Xavier initialization      Fixup Initialization: Residual Learning Without Normalization &#8617;        Pytorch implementaion on Kaiming init of conv and linear layers &#8617;        Pytorch kaiming init issue &#8617;        Pytorch kaiming init explained &#8617;    "
     }, {
-    "id": 14,
+    "id": 16,
     "url": "http://localhost:4000/2020/03/note08-fastai-2/",
     "title": "What's inside Pytorch Operator?",
     "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, RefactoringWhat’s inside Pytorch Operator?: Section02 Time comparison with pure Python:    Matmul with broadcasting&gt; 3194. 95 times faster     Einstein summation&gt; 16090. 91 times faster     Pytorch’s operator&gt; 49166. 67 times faster  1. Elementwise op: 1. 1 Frobenius norm:   above converted into (m*m). sum(). sqrt() Plus, don’t suffer from mathmatical symbols. He also copy and paste that equations from wikipedia.  and if you need latex form, download it from archive. 2. Elementwise Matmul:  What is the meaning of elementwise?   We do not calculate each component. But all of the component at once. Because, length of column of A and row of B are fixed.   How much time we saved?  So now that takes 1. 37ms. We have removed one line of code and it is a 178 times faster…#TODOI don’t know where the 5 from. but keep it. Maybe this is related with frobenius norm…?as a result, the code before for k in range(ac):  c[i,j] += a[i,k] + b[k,j]the code after c[i,j] = (a[i,:] * b[:,j]). sum()To compare it (result betweet original and adjusted version) we use not test_eq but other function. The reason for this is that due to rounding errors from math operations, matrices may not be exactly the same. As a result, we want a function that will “is a equal to b within some tolerance” #exportdef near(a,b):   return torch. allclose(a, b, rtol=1e-3, atol=1e-5)def test_near(a,b):   test(a,b,near)test_near(t1, matmul(m1, m2))3. Broadcasting:  Now, we will use the broadcasting and removec[i,j] = (a[i,:] * b[:,j]). sum() How it works?&gt;&gt;&gt; a=tensor([[10,10,10],     [20,20,20],     [30,30,30]])&gt;&gt;&gt; b=tensor([1,2,3,])&gt;&gt;&gt; a,b  (tensor([[10, 10, 10],     [20, 20, 20],     [30, 30, 30]]),tensor([1, 2, 3]))     &gt;&gt;&gt; a+btensor([[11, 12, 13],    [21, 22, 23],    [31, 32, 33]])  &lt;Figure 2&gt; demonstrated how array b is broadcasting(or copied but not occupy memory) to compatible with a. Refered from numpy_tutorial   there is no loop, but it seems there is exactly the loop.     This is not from jeremy (actually after a moment he cover it) but i wondered How to broadcast an array by columns?  c=tensor([[1],[2],[3]])a+ctensor([[11, 11, 11],    [22, 22, 22],    [33, 33, 33]])s  What is tensor. stride()?help(t. stride)Help on built-in function stride:  stride(…) method of torch. Tensor instancestride(dim) -&gt; tuple or intReturns the stride of :attr:’self’ tensor. Stride is the jump necessary to go from one element to the next one in the specified dimension :attr:’dim’. A tuple of all strides is returned when no argument is passed in. Otherwise, an integer value is returned as the stride in the particular dimension :attr:’dim’. Args:  dim (int, optional): the desired dimension in which stride is requiredExample::* x = torch. tensor([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]])`x. stride()&gt;&gt;&gt; (5, 1)x. stride(0)&gt;&gt;&gt; 5x. stride(-1)&gt;&gt;&gt; 1   unsqueeze &amp; None index   We can manipulate rank of tensor Special value ‘None’, which means please squeeze a new axis here== please broadcast herec = torch. tensor([10,20,30])c[None,:] in c, squeeze a new axis in here please. 2. 2 Matmul with broadcasting: for i in range(ar):#  c[i,j] = (a[i,:]).      *[:,j]. sum() #previous  c[i]  = (a[i]. unsqueeze(-1) * b). sum(dim=0) And Using None also (As howard teached)c[i]  = (a[i ]. unsqueeze(-1) * b). sum(dim=0) #howardc[i]  = (a[i][:,None] * b). sum(dim=0) # using Nonec[i]  = (a[i,:,None]*b). sum(dim=0)⭐️Tips🌟 1) Anytime there’s a trailinng(final) colon in numpy or pytorch you can delete it ex) c[i, :] = c [i]2) any number of colon commas at the start, you can switch it with the single elipsis.  ex) c[:,:,:,:,i] = c […,i] 2. 3 Broadcasting Rules:  What if we tensor. size([1,3]) * tensor. size([3,1])?  torch. Size([3, 3])    What is scale????   What if they are one array is times of the other array? ex) Image : 256 x 256 x 3Scale : 128 x 256 x 3Result: ?   Why I did not inserted axis via None, but happened broadcasting? &gt;&gt;&gt; c * c[:,None]tensor([[100. , 200. , 300. ],    [200. , 400. , 600. ],    [300. , 600. , 900. ]])maybe it broadcast cz following array has 3 rows as same principle, no matter what nature shape was, if we do the operation tensor broadcasts to the other. &gt;&gt;&gt; c==c[None]tensor([[True, True, True]])&gt;&gt;&gt; c[None]==c[None,:]tensor([[True, True, True]])&gt;&gt;&gt;c[None,:]==ctensor([[True, True, True]])3. Einstein summation:  Creates batch-wise, remove inner most loop, and replaced it with an elementwise producta. k. ac[i,j] += a[i,k] * b[k,j]inner most loop c[i,j] = (a[i,:] * b[:,j]). sum()elementwise product  Because K is repeated so we do a dot product. And it is torch. Usage of einsum()1) transpose2) diagnalisation tracing3) batch-wise (matmul) …  einstein summation notationdef matmul(a,b): return torch. einsum('ik,kj-&gt;ij', a, b)so after all, we are now 16000 times faster than Python. 4. Pytorch op: 49166. 67 times faster than pure python And we will use this matrix multiplication in Fully Connect forward, with some initialized parameters and ReLU. But before that, we need initialized parameters and ReLU, Footnote:  TensorRank ti noteResources:  Frobenius Norm Review Broadcasting Review (especially Rule)     Refer colab! (I totally confused with extension of arrays)    torch. allclose Review np. einsum Reviewh "
     }, {
-    "id": 15,
+    "id": 17,
     "url": "http://localhost:4000/2020/02/note08-fastai-1/",
     "title": "What is the meaning of 'deep-learning from foundations?'",
     "body": "2020/02/29 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ I don’t know if you read this article, but I heartily appreciate Rachael Thomas and Jeremy Howard for providing these priceless lectures for free Homework:  Review concepts 16 concepts from Course 1 (lessons 1 - 7)(1) Affine Functions &amp; non-linearities; 2) Parameters &amp; activations; 3) Random initialization &amp; transfer learning; 4) SGD, Momentum, Adam; 5) Convolutions; Batch-norm; 6) Dropout; 7) Data augmentation; 8) Weight decay; 9) Res/dense blocks; 10) Image classification and regression; 11)Embeddings; 12) Continuous &amp; Categorical variables; 13) Collaborative filtering; 14) Language models; 15) NLP classification; 16) Segmentation; U-net; GANS) Make sure you understand broadcasting Read section 2. 2 in Delving Deep into Rectifiers Try to replicate as much of the notebooks as you can without peeking; when you get stuck, peek at the lesson notebook, but then close it and try to do it yourself calculus for machine learning     based on weight…    einsum conventionCONTENTS:  What is going on in this course?     What is ‘from foundations’?   Steps to a basic modern CNN model   Today’s implementation goal: 1) matmul -&gt; 4) FC backward    Library development using jupyter notebook     jupyter notebook certainly can make module    Elementwise ops     How can we make python faster?         What is element wise operation?           FootnoteWhat is going on in this course?: What is ‘from foundations’?: 1) Recreate fast. ai and Pytorch 2) using pure python  Evade OverfittingOverfit : validation error getting worsetraining loss &lt; validation loss  Know the name of the symbol you usefind in this page if you don’t know the symbol that you are using or just draw it here (run by ML!) Steps to a basic modern CNN model:  1) Matrix multiplication -&gt; 2) Relu/Initialization -&gt; 3) Fully-connected Forward-&gt; 4) Fully-connected Backward -&gt; 5) Train loop -&gt; 6) Convolution-&gt; 7) Optimization -&gt;8) Batchnormalization -&gt; 9) Resnet Today’s implementation goal: 1) matmul -&gt; 4) FC backward: Library development using jupyter notebook: what is assers? jupyter notebook certainly can make module:  There will be #export tag that Howard (and we) want to extract special notebook2script. py will detect sign of #expert and convert following into python module and test ittest\_eq(TEST,'test')test\_eq(TEST,'test1')  what is run_notebook. py?     when you want to test your module in command line interface   		!python run\_notebook. py 01_matmul. ipynb  Is there any difference between 1) and 2)?1) test -&gt; test01 2) test01 -&gt; test #TODO I don’t know yet  look into run_notebook. py, package fire Jeremy used. What is that?read and run the code in a notebook, and in the process, Jeremy made Python Fire library called!shockingly, fire takes any kind of function and converts into CLI command. fire library was released by Google open source, Thursday, March 2, 2017    Get data   pytorch and numpy are pretty much same.  variable c explains how many pixels there are in in MNIST, 28 pixels PyTorch’s view() method: torch function that manipulating tensor, and squeeze() in torch &amp; mathmatical operation similar function Rao &amp; McMahan said usually this functions result in feature vector.    In part 1, you can use view function several times.     Initial python model     Which is Linear, like $Xw$(weight)$+a$(bias) $= Y$     If you don’t know hou to multiple matrix, refer this site matmul visulization site   How many time spends if we we use pure python   function matmul, typical matrix multiplication function, takes about 1 second for calculating 1 single train data! (maybe assumed stochastic, 5 data points in validation)     it takes about 11. 36 hours to update parameters even single layer and 1 iteration! (if that was my computer, it would be 14 hours. . )🤪   THIS is why we need to consider ‘time’&amp;’space’ This is kinda slow - what if we could speed it up by 50,000 times? Let’s try! Elementwise ops: How can we make python faster?:  If we want to calculate faster, then do remove pythonic calcuation, by passing its computation down to something that is written something other than python, like pytorch.  According to PyTorch doc it uses C++ (via ATen), so we are going to implement that function with python. What is element wise operation?:  items makes a pair, operate corresponding componentFootnote:  notebooks material video broadcasting excel"
     }, {
-    "id": 16,
+    "id": 18,
     "url": "http://localhost:4000/2020/02/what-is-convolution/",
     "title": "Digging into convolution",
     "body": "2020/02/28 - Issues 1) Kaiming Initializtion in Pytorch was in trouble. 1 2) Jeremy started to dig in, in lesson09, but I didn’t know why the size of tensor is 2 and even understand this spreadsheet data. 3 Homework: Read Visualizing and Understanding Convolutional Networks paper  What is a convolution?     Visualization         one kernel     Matthew D Zeiler &amp; Rob Fergus Paper          Convolution can be represented as matmul   Padding   Kernel has rank 3   How can we find a side-edge, a gradient and area of constant weight?   What is a convolution?: A convolutional neural network is that your red, green, and blue pixels go into the simple computation, and something comes out of that, and then the result of that goes into a second layer, and the result of that goes into the third layer and so forth. Visualization: one kernel Refer this site for visualizing CNN filteringMatthew D Zeiler &amp; Rob Fergus PaperLecture01     Nine examples of the actual coefficients from the **first layer**  Convolution can be represented as matmul: CNNs from different viewpoints {align-items: center;}    [A B C D E F G H I J] is 3 by 3 image data flatten to vector.   As a result, convolution is a just matrix just two things happens     Some of entries are set to zeros at all the times   same color always have the same weight. That called weight time / wegith sharing    So, we can implement a convolution with matrix multiplication. But, we don’t do that because it’s slow!Padding:  What most of libraries do is just put zeros asdie of matrix  fast. ai uses reflection paddings (what is this? Jeremy said he uttered it)Kernel has rank 3:  As standard picture input would be 4 5, it would be actually 3d, not 2d.  If we make kernel as a 3x3 size, we pass over same kernel all the different Red, Green, Blue Pixels.      This could make problem, because, if we want to detect frog, which is green, we would want more activations on the green(I made a test cell in my colab 6)   How can we find a side-edge, a gradient and area of constant weight?: Not top-edge!  One kernel can find only the top-edge, so we should stack the kernels 7 So, we pass it through bunch of kernels to the input images, and that process gives us height x width x corresponding number of kernels.  Usually that number of chanel is 16 And if we want to get the more channels and features, we should repeat that process     This process gives rise to memory out of control, we do the stride   #### conv-example. xlsx  2 convolutional filters At a second layer, filter is 3x3x2 tensor, because to add up together the first layer’s channel. Reference:       Problem was math. sqrt(5) was not kaiming initialization formula, Implementation in Pytorch &#8617;        size of tensor, lecture09 &#8617;        conv-example. xlsx &#8617;        Why do computer use red, green and blue instead of primary colors &#8617;        Grayscale is a group of shades without any visible color. … Each of these dots has its own brightness level as well and, therefore, can be converted to grayscale. A grayscale image is one with all color information removed.  &#8617;        Testing RGB and grayscale &#8617;        stack kernel and make new rank of tensor at output, Lesson06-2019 &#8617;    "
     }, {
-    "id": 17,
+    "id": 19,
     "url": "http://localhost:4000/2020/02/dps-week8/",
-    "title": "Digital Product School week 8&9",
-    "body": "2020/02/24 - The 8th week retropect at Digital Product School Week 8/9 - Ship your MVP/Release next iteration each day     This week's schedule  CONTENT:  Preparing engineering weekly Agile Process     Daily Stand-up    Making application flowchart (feat draw. io) / ER diagram     Flowchart, understaning user journey   ER diagram    Engineering weekly AI lunch Connecting firebase andPreparing engineering weekly: This week at Wednesday, I planned to explain the Language Modelings, mainly focusing ELMo, ULMFiT, BERT and GPT-2. Slides is available here Changed the presentation, because there were people who are not in ML domain. hereWhenever I do the presentation, I learn more than the information I give them. At the same time, I realize I need to learn more than I know. Agile Process: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m not sure ‘what is agile’ but this is what we tried to make agile process.  Daily Stand-up: Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Making application flowchart (feat draw. io) / ER diagram:  Before we start the party, we should clarify the flowchart and ER diagram of our application. Flowchart, understaning user journey: Thanks for google, we could use draw. io for our framechart framework. Actually, we cana choice other good flatform, but draw. io has connected app throgh google drive, most of our engineer was used to it. And after this job, I got to know there is also (of course) rule with the symbols, color, size, space, scaling and direction of arrow -reference. But why we should do this? WE have made our storymap before!! I think storymap is for visualize our status and app. So it should be shared with whole the team, and they should able to understand each role’s issue. But flowchart is more like testing technical feasibility, and error that user can experience. So it could be little more specific, complicated, and hypothetical.     This week engineer's main plan  ER diagram: Even if we use NoSQL database through firebase, my team was accustomed to SQL more. That what we educated when we were at college, so we had to organize our concept while we were learning NoSQL. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Before today, my AI collegues presented regression, knn and it was my turn. I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! AI lunch: Connecting firebase and: "
+    "title": "My life in Digital Product School - week 8/19/10",
+    "body": "2020/02/24 - The 8/9/10th week retropect at Digital Product School Week 8 - Ship your MVPWeek 9/10 - Release next iteration each day     Week 8th schedule  CONTENT:  Agile Product Development     Daily Stand-up(planning)   Gemba Walk   Sprint Reviews    Engineering weeklyAgile Product Development: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m still not sure ‘what is agile’ but this is how we tried to make agile process.  Daily Stand-up(planning): Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Gemba Walk:     Team Cero with core team  Every 2 weeks, we do the Gemba work, which is ‘question everything to the core team’ time. At this period, people can ask anything related to our product, workshop, and framework. Core team will help just for each team, and each team can solve the problem related to their work. &lt; br/&gt;Why we need this session? because with workshop and general schedule, core team has no time just focus on each team. So through this session, we can have opportunity to understand each program and workshop, like why we are using this platform, and when is the due of our small project, and we have this problem and we need help for this. whatever small problem you have, core team is always willing to help you. Sprint Reviews: Every Friday, we have time to summarise what we did for the week. Maybe we need HMW question and our storymap to share our process and then tell and share what we did try, what point we succeeded and what point it was deviant of our prediction, and why we tried it. .     Sprint of Ve-link  And then, just after all team’s ppt, we do vote with such a cute marvel. Always it’s very difficult to vote (of course you can’t vote to your team!) Because it depends on criteria what do I value!But since this is process of our agile work, I try to focus on what they have changed since last week, and why they did it, how they did it. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Everyone have their knowledge to share and we can be tutor and at the same time can be of tutee. Previously, my AI collegues presented regression, knn. And because I’m somewhat specialized to NLP, I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! "
     }, {
-    "id": 18,
+    "id": 20,
     "url": "http://localhost:4000/2020/02/fast.ai-nlp-note-16/",
     "title": "Algorithmic bias",
     "body": "2020/02/20 - Algorithms can encode &amp; magnify human bias Case Study 1: Facial Recognition &amp; Predictive Policing:  Joy Buolamwini &amp; Timnit Gebru, gendershades. org     Microsoft, FACE+, IBM - All of these things are sell now.    Largest gap between $\therefore\ Lighter Male\ &gt;\ Darker\ Female $      This US mayor joked cops should “mount . 50-caliber” guns where AI predicts crime      With machine learning, with automation, there’s a 99% success, so that robot is ㅡwill beㅡ99% accurate in telling us what is going to happen next, which is really interesting.     - city official in Lancater, CA, approving on using IBM for public security  Bias:  Bias is type of error Statistical Bias: difference between a statistic’s expected value and the true value Unjust Bias: disproportionate preference for or prejudice against a group Unconscious bias: bias that we don’t realize we have But, term bias is too generic to be productive. Different sources of bias have different causes Representation Bias: Dataset was not representative of the algorithm that might be used on later. Above : Data is okay, but algorithm has some problem. Below : Data has error.  For example, object detection production that performs very well in common product of US. But in contrast, change of target product region, like Zimbabwe, Solomon Island, and so on, reduced the performence remarkably. It is not the algorithmic problem, so we should care about data volume of region. Evaluation Bias: Benchmark datasets spur on research, 4. 4% of IJB-A images are dark-skinned women. 2/3 of ImageNet images from the West (Sharkar et al, 2017) Case Study 2: Recidivism Algorithm Used Prison Sentencing: Case Study 3: Online Ad Delivery: Bias in NLP: ( Nothing to do with the course, but I’m researching this field these days. )    But all about Englsih     ImpactThe person is doctor. The person is nurse -&gt; 그는 의사다. 그녀는 간호사다.  Concept of “biased data” often too generic to be useful:  Different sources of bias have different sources Data, models and systems are not unchanging numbers on a screen. They’re the result of a complex process that starts with years of historical context and involves a series of choices and norms, from data measurement to model evaluation to human interpretation. - Harini Suresh, “The problem with Biased Data” Five Sources of Bias in ML:  Representation Bias Evaluation Bias Measurement Bias Aggregation Bias(46:02) Historical Bias(46:26)     A few studies(47:13)   Racial Bias, Even when we have good intentions(new york times)(47:10)   gender(48:59)   Humans are biased, so why does algorithmic bias matter?: Algorithms &amp; humans are used differently (humans are usually decision maker)  Algorithms are accurate and objective No way to apeal if there if error processed large scale cheap Machine learning can amplify bias Machine learning can create feedback loops. Technology is power. And with that comes responsibility. Solutions:  Analyze a project at work/school:     Questions about AI   5 types of bias (Suresh &amp; Guttag)   Datasheets for datasets, Modelcards for model reporting   Accuracy rate on different sub-groups    Work with domain experts &amp; those impacted Increase diversity in our workspace Advocate for good policy Be on the ongoing lookout for bias"
     }, {
-    "id": 19,
+    "id": 21,
     "url": "http://localhost:4000/2020/02/classifier-city/",
     "title": "Making a classifier with image dataset made from gooogle",
     "body": "2020/02/15 - CONTENTS:  Creating dataset from google images     Using google_images_download   Create ImageDataBunch    Train model     fit_one_cycle()   Let’s find-tune   Let’s train the whole model!   Let’s make batch size bigger!    Interpretation Model in productionCode can be found hereDeployed model here Making a classifier which can distinguish Seoul from Munich and Sanfrancisco!(hoping my well in Munich!) Creating dataset from google images: In machine learning, you always need data before you build your model. You can use either URLs or google_images_download package. Since Jeremy explained specifically, I will try the other. Using google_images_download: note: This is not google official package Refer to Official Doncument, put that arguments. from google_images_download import google_images_downloadresponse = google_images_download. googleimagesdownload()  #class instantiationout_dir = os. path. abspath('. . /. . /materials/dataset/pkg/')os. mkdir(out_dir)arguments = { keywords : Cebu,Munich,Seoul ,        print_urls :True,        suffix_keywords : city ,        output_directory :out_dir,        type : photo ,      }paths = response. download(arguments)  #passing the arguments to the functionprint(paths)and if you need, here is main code. Create ImageDataBunch: We need to separate validation set because we just grabbed these imagese from Google. Most of the dataset we use (kaggle/research) splited into train / validation / test so if they are not devided beforehand we should make databunch, and Jeremy recommended assign 20% to validation. Help on function verify_images in module fastai. vision. data:verify_images(path: Union[pathlib. Path, str], delete: bool = True, max_workers: int = 4, max_size: int = None, recurse: bool = False, dest: Union[pathlib. Path, str] = '. ', n_channels: int = 3, interp=2, ext: str = None, img_format: str = None, resume: bool = None, **kwargs)  Check if the images in `path` aren't broken, maybe resize them and copy it in `dest`. Data from google image url Data from package Train model:           len(class)   len(train)   len(valid)         Data_url   3   432   108       Data_pkg   3   216   53   Uisng model: restnet34 1, Measurement: accuracy 2 fit_one_cycle(): What is fit one cycle? Cyclical Learning Rates for Training Neural Networks One of the way to find good learning rate. Core idea is to start with small learning rate (like 1e-4, 1e-3) and increase the learning rate after each mini-batch till loss starts exploding. And pick up learning rate one order lower than exploding point. For example, plotted learning rate is like below picture, picking up around 1e-2 is the best way.  Why this methods Traditionally, the learning rate is decreased as the learning starts converging with time. But this paper suggests to cycle our learning rate, because it makes us avoid local minimum. Basically this cyclic method enables us to explore whole of loss function so that find out global minimum. In other words, higher learning rate behaves like regularisation. Let’s find-tune: Do train just one last layer by learning rate found by find_lr This section you should find the strongest downward slope that kind of sticking around for quite a while. And choose just one order lower than lowest point. As explained before, I will pick up 1e-2. And of course, this is fine-tuning, we don’t need discriminative learning rate yet.  Let’s train the whole model!: link When you plot the learning rate again, maybe you will get soaring shape of learning rate. Rule of thumb, When you slice the learning rate, use learning rate you used at unfrozen part. Divide it by 5 or 10 and put it on maximum bound. At minimum bound, get the point just before it soared, and divide it by 10.  Let’s make batch size bigger!: Since default batch size is 64, I tried it to 128. And it gets way more better result(even it’s still underfitting!) And if I freeze model and train whole model again, the model would be better. Also, you can use this method to the other big dataset model training! Interpretation: See the confusion matrix. Result is quite great. *Since I’m using colab, I will skip data cleansing. But I highly recommend you to use ImageCleaner widget, only if you are using jupyter notebook (not jupyter lab) Model in production: You can deploy your model in simple way. I referred fast. ai, and used render(it’s free for limited time). You can find detailed document here. and you can create a route like this. @app. route( /classify-url , methods=[ GET ])async def classify_url(request):  bytes = await get_bytes(request. query_params[ url ])  img = open_image(BytesIO(bytes))  _,_,losses = learner. predict(img)  return JSONResponse({     predictions : sorted(      zip(cat_learner. data. classes, map(float, losses)),      key=lambda p: p[1],      reverse=True    )  })You can find my deployed model here Reference: How to create a deep learning dataset using Google Images towardsdatascience - one cycle policy       Deep Residual Learning for Image Recognition &#8617;        Accuracy_and_precision &#8617;    "
     }, {
-    "id": 20,
+    "id": 22,
     "url": "http://localhost:4000/2020/02/dps-week5/",
     "title": "Digital Product School week 5",
     "body": "2020/02/09 - The 5th week retropect at Digital Product School Week 5 - Create a Storymap and sync it with Lean Canvas     This week's schedule  CONTENT:  How to create our story map Prepare your story Discover your product’s AI potentialMondayHow to create our story map:     We need this 'aha' moment  There was a Milestone workshop, about our weekly goal. As we are agile working, we go fast and change every week’s goal. This week we will finalize our story map based on user’s pain-point and HMW questions.     How should we make our story-map  Basically we should make story map based on this rule  Tell stories, don’t just write them!     We always need context, that means all the story component should be connected    Visualize your product to establish a shared understanding and speed up discussions!     Post-it filled of text is not enough, we should fill it with visualizations then team mates can understand it fast    Only discuss in front our your story map! (Speed)     So we can update our story-map as soon as we change our opinion   And also    Use a story map to find the parts that matter most and to identify holes in your idea!     Since the story map consists of techinical part, we should consider each story’s technical feasibility    Minimise output, maximise outcome and impact! Build tests to figure out what’s minimum and what’s viable!     This story map functions to find out our minimum value of ideas    Work iteratively: Change your story map according to your learnings!     We should repeat this process again and again    PMs: Make sure Storymap is up to date!Prepare your story:     team cero, our whole story map    Our goal    Technical feasibility of our storyWhat is your strategy to make user achieve something? This would be our expand point Discover your product’s AI potential: How can we apply AI to our product? Let’s write down our ‘HMW’ questions, and find out all p ossibilities. These are suggestion of possibilities, so don’t attached to feasibility (we will do in at lean start-up)     Software section's expectation    AI section's expectationTuesday    Engineer's task, week5This 5th week, engineers settled WendesdayThursdayFriday"
     }, {
-    "id": 21,
+    "id": 23,
     "url": "http://localhost:4000/2020/02/GPU-time/",
     "title": "4 reasons took much time to setting GPU for fast.ai than I expected",
     "body": "2020/02/05 - Motivation: Before now, me as a undergraduate student, I was parsimony who usually depend on colab, kaggle, friend’s server(occasional) whenever i need GPU. . And this time it’s been for a while to install GPU than I expected and I share the several component that stood in my way. Written at Oct 24 2019, if you think this is deprecated, please do not have a leap of faith. Just for the record, I’ve used Kaggle, Colab, GCP, Azure, EC2 as GPU cloud. 1. Did not know there is JupyterLab option in Google Cloud Platform. : At the first time when GCP came out, there was no AI Platform service. So from starting vm instance to launching jupyter and installing packages, I did all of the things myself. (and I learned 🤗) $	curl -O https://repo. continuum. io/archive/Anaconda3-5. 0. 1-Linux-x86_64. sh[Downloading conda in ssh] I created VM instance,selected zone, machine type and disk type. Then, define firewall rules and in ssh terminal, install jupyter and other packages. But you can do all of these things just using AI Platform.  [AI Platform] I think it especially save your time if you are living in Asia-Pacific, which google doesn’t support not that much GPU resources.  2. Consider if the platform has limited resources in a region you live in. : I live in South Korea, East Asia, and it seems like this region has lots of limitation in GPU (except quite expensive AWS) And the Taiwan which was the only one region where I can launch my own VM with GPU (I tried all the other regions in the list) sometimes do normaly, but not always. 😥After launching, I did several works and next day I could not start VM. (I didn’t count it, but tried it a few hours because I didn’t want cost any more time…) Endlessly failed to start instance, then I choose to move AWS as an alternative way. 3. Fast. ai gives deliberate guide and I didn’t know it. : Fast. ai offer the guide for all available platform. (Colab, salamander, Gradient, Kaggle, Colab, and so on) It is so important, and really needs, because cloud computing options are vary as occasion and purpose arise. I didn’t know fast. ai has manual to running GCP, and I think it’s as good a reason as any for me to be have taken time. It helped me so much when I had aws and shortened my time. I don’t want to read all of the manual in amazno. . (It is recommended. . but I’d rather read GIT PRO now…) ssh -i ~/. ssh/&lt;your_private_key_pair&gt; -L localhost:8888:localhost:8888 ubuntu@&lt;your instance IP&gt;4. You should wait to add more volume just after add volume, by building AWS EC2. : Since Elastic Block Store(EBS) storage supports optimized storage, users can’t extend storage volume two times in a row.  Unfortunately, at the first time, I didn’t know it (again 👻) and when VM lacked volume, I doubled dist capacity (76*2) at a rough but It needs more.  &lt;!– this time I installed GPU in two years, and it became little complicated compared to 2 years ago. And this time for the first time(maybe not the first time. . but i handled it in my class or with my friend. but it’s my first time on my own. ) I very I’m started to using used google colab, kaggleand, GCP-JupyterLab, ec2 - friend made, aws vm machine but I had a environment variable but i did not know of it. On these days, I could not get a resources from taiwan…    I couldn’t notice a deliberate     Anyway, as a result I tried myself gcp myself and aws ec2 with fast. ai But I think doing on my self surely takes much time (in this point I wonder why I’m doing this, and should remind me, especially I was studying disk volume optimization)     disk volume exceed - https://askubuntu. com/questions/919748/no-space-left-on-device-even-though-there-is:  "
     }, {
-    "id": 22,
+    "id": 24,
     "url": "http://localhost:4000/2020/02/dps-week4/",
     "title": "Digital Product School week 4",
     "body": "2020/02/01 - The 4th week retropect at Digital Product School Week 4 - Find solution ideas and run experiments   [This week’s schedule] CONTENT:  Ideation Techniques     What is ideation techniques?    Generating idea in my team AIdeation Team brain storming of idea Die Produkt MacherMondayIdeation Techniques:   [slides from @steffen] What is ideation techniques?: We tried to find out user’s painpoint last week. Tried to users talk about their, pain point. No question directly, but extract from them their pain with transportation. Generating idea in my team:   AIdeation:   TuesdayTeam brain storming of idea:   Based on generated idea on Monday, we extended our idea doing rolling-paper!     Die Produkt Macher: What is lean start-up?  Lean startup is a methodology for developing businesses and products that aims to shorten product development cycles and rapidly discover if a proposed business model is viable; this is achieved by adopting a combination of business-hypothesis-driven experimentation, iterative product releases, and validated learning. - wikipedia WendesdayThursdayFriday"
     }, {
-    "id": 23,
+    "id": 25,
     "url": "http://localhost:4000/2020/01/retrosprect-of-acl-paper-2020/",
     "title": "Retrospect of ACL 2020 paper writing",
     "body": "2020/01/29 - 2020 Annual Conference of the Association for Computational Linguistics Why I can’t use ‘Cebuano’ for the research?: Why I had to change target language from ‘Cebuano’ to ‘Tagalog’?-&gt; No language translator options except google translation. But before knowing that I already consult my friend, whose mother tongue is English. So I had to aplogize her, but couldn’t tell her why suddenly I changed my plan. -&gt; I realized there are many languages even can’t be researched at all. . -&gt; Getting accustomed to discrimination makes misunderstanding, sometimes. At my country, we couldn’t use music streaming service, because of legal problem. But at that moment, I thought it was discrimination, which is done by music company. "
     }, {
-    "id": 24,
+    "id": 26,
     "url": "http://localhost:4000/2020/01/Git-Merge/",
     "title": "Why am I not listed as a contributor?!",
     "body": "2020/01/10 - From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’m not listed as contributor of repo which my PR accessed. How did I come to a repository?: When I’m stuck, I would prefer to code, than to go deep in theory. (It must be so. . too much to understand 🤒)It was BERT released by Google AI I felt keenly the necessity of implementing, because not only couldn’t understand the way they figured out positional encoding formula, but how it actually works. What does it mean to “scale” dot product in Attention? (Now I know it’s far from my section 😂) Figure 1. Scaled Dot Product. Adopted from tensorflow blogWhat was the code error?: For implement code in paper, I read the papers Transformer and BERT, structured the model, and refered the others’ code. Meanwhile, I found out a small error in tokenization process, which was changing a token into [MASK], enabled bidirectional representation. I’ve made PR, and got merged. But I was not in contributors. Why?: Figure 2. Merged Pull request Adopted from graykode projectActually I happened to know there can be couple of reasons github doesn’t include my name as contributor. Well, if contributors tab has more than 100 people, in which case it shows you up only if you are in the top 100 contributors because displaying too many contributors can make webpages down. Somethimes, however, it doesn’t that problem. Why not? Two possibilities are there.    First, According to Joel-Glovier, if repository maintainer merged-as-a-rebase PR will end up showing as maintainer’s commit. But maintainer shouldn’t normally do this.     Second, if you happend to commit using a different git email that what is in your GitHub profile, it will not be attached to your Github user, and “doesn’t show up” as you.  Reference:  Michał Chromiak’s blog Github: why are my contributions are not showing on my profile atlassian-gitfetch"
     }, {
-    "id": 25,
-    "url": "http://localhost:4000/2019/12/lesson1-fastai/",
-    "title": "Fine Grained Classification",
-    "body": "2019/12/31 - Finally you can solve the mystery behind this weird drawing. . through this course. juptyer notebook magic: %reload_ext autoreload%autoreload 2%matplotlib inlinethis is special directives to jupyter notebook, not python code. And it is called ‘magics’ (but i think jeremy is magicion)  If somebody changes underlying library code while I’m running this, please reload it automatically If somebody asks to plot something, then please plot it here in this Jupyter NotebookDon’t hesitate to import start~ Digging into untar_data, path. ls: Union[pathlib. Path, str]: typed programming language? -&gt; maybe i think disclaim the type beforehand for sure.  Q. like assert? path. ls()this is some module that fast. ai made because os. listdir(‘path’) is unconvinient. Python3 pathlib library!: pathlib "
-    }, {
-    "id": 26,
+    "id": 27,
     "url": "http://localhost:4000/2019/12/jeremy-howard/",
     "title": "Jeremy Howard",
     "body": "2019/12/15 - This is journey to find out ‘who am I trying to be?’: How he impacted me?  The person who made me start Computer Vision again. He emphasized the importance of studying NLP and Computer together to understand the deep-learning. He didn’t order it to study, but always he pursuade me with reasonable way.  “It’s not just something I can throw away. NLP and computer vision a few weeks apart and that’s going to force your brain to realize like ‘oh I have to remember this’”  He made me admit my failure in deep-learning. I started to objectify where am I. What should I do when I’m frustrated.  “Keep going. You’re not expected to remember everything. Yet.  You’re not expected to understand everything. Yet. You’re not expected to know why everything works.  Yet. ”  His articles are numerous, below.      What is torch. nn Really?   High Performance Numeric Programming with Swift: Explorations and Reflections   C++11, random distributions, and Swift    And especially, I like this book. Designing great data products Great predictive modeling is an important part of the solution, but it no longer stands on its own; as products become more sophisticated, it disappears into the plumbing. Designing great data products And he is also famous for words. Here are some.  we’re going to try and use that to really understand what’s going on. So to warn you, none of it is rocket science but a lot of its going to look really new. So don’t expect to get it the first time but expect to listen and jump into the notebook try a few things test things out look particularly at like tensor shapes and inputs and outputs to check your understanding then go back and listen again. But and kind of try it, a few times, because you will get there right, it’s just that there’s going to be a lot of new concepts because we haven’t done that much stuff in pure Pytorch. Lesson 6: Deep Learning 2019 "
     }, {
-    "id": 27,
+    "id": 28,
     "url": "http://localhost:4000/2019/11/julia-evans/",
     "title": "Julia Evans",
     "body": "2019/11/20 - This is journey to find out ‘who am I trying to be?’: The women who surprised me in many ways. First, she approached me to teaching some concepts drawing cartoons. It was at Hackers news, which was hightest ranks. Personally I have the use of not to reading title, so and cartoon was so cute and clear. I naturally gonna understood mechanism and astonished by her explaination ability. Her value, which she was taught by many people so want to do same things, moved me. Volume of her knowledge, that just reading post title is a deal of work, amazed me. "
     }, {
-    "id": 28,
+    "id": 29,
     "url": "http://localhost:4000/2019/11/coc-retropective/",
     "title": "Retrospective on Pycon 2019 Korea (CoC Committee)",
     "body": "2019/11/05 - When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized our responsibility, and if I passed each other’s burden, It could be my burden next time. In solidarity of the obligation, we finished conference well. And after participating PyCon Korea 2018 as volunteer, I’ve joined PyCon Korea Organizer last year. &lt;Figure 1&gt; First meeting of PyCon 2019 Korea Organizers It’s been a while since PyCon 2019 finished. It’s held on Aug 15 - 18, at Coex Grand Balloom &lt;Figure 2&gt; Ongoing session, speaking on news comment processing &lt;Figure 3&gt; Sponsor Booth iin Coex Hall &lt;Figure 4&gt; After PyCon 2019, with all of volunteer, organizer, speakers 😍 🥰  Serving as part of the coc TF, I spent large fraction of last year doing CoC job. here’s the path what we’ve been grappled with to grasp a solution. First half: Before the conference Toward Diverse Community:  Formally we’ve been reusing and modifying PyCon US CoC, but we needed fit in Korean and I was part of that to revise code of conduct. Except ‘That’ Diversity, Because it is ‘Harassment’:  Specific point was harassment, and the others were not. process of finding the points. How can we settle this point?Second half: During the conference Handling the potential Harassment: Disjunction of policy and real-time situation: This ‘PyCon 2019 Korea retrospective series’ would be devided into 3 Episodes.  “Retrospective on Pycon 2019 Korea (CoC Committee)” “Retrospective on Pycon 2019 Korea (Program Chair)” (20 Nov, To Be Update) “Maintaining participation while still making timely decisions” (29 Nov, To Be Update)"
     }, {
-    "id": 29,
+    "id": 30,
     "url": "http://localhost:4000/2019/11/elif-shafak/",
     "title": "Elif Shafak",
     "body": "2019/11/05 - This is journey to find out ‘who am I trying to be?’: For creative-minded people, Istanbul is a treasure. ’ Photo © Chris Boland, licensed under CC BY-NC-ND 2. 0    it suddenly felt like what I was trying to convey was more complicated and detailed than what the circumstances allowed me to say.     And I did what I usually do in similar situations: I stammered, I shut down, and I stopped talking. I stopped talking because the truth was complicated, even though I knew, deep within, that one should never, ever remain silent for fear of complexity.         &lt;Figure 1&gt; Elif Shafak   Photo credit: www. elifsafak. com. tr      I want to talk about emotions and the need to boost our emotional intelligence. I think it’s a pity that mainstream political theory pays very little attention to emotions.     Oftentimes, analysts and experts are so busy with data and metrics that they seem to forget those things in life that are difficult to measure and perhaps impossible to cluster under statistical models. But I think this is a mistake, for two main reasons. We are emotional beings.     I think it’s going to be one of our biggest intellectual challenges, because our political systems are replete with emotions. In country after country, we have seen illiberal politicians exploiting these emotions. And yet within the academia and among the intelligentsia, we are yet to take emotions seriously. I think we should.  1 2 Reference:       British Council Worldwide &#8617;        Ted Talk &#8617;    "
     }, {
-    "id": 30,
+    "id": 31,
     "url": "http://localhost:4000/2019/01/dps-week1/",
     "title": "Digital Product School week 1",
     "body": "2019/01/11 - The 1th week retropect at Digital Product School [This week’s schedule] CONTENT:  Welcome to Digital Product School!     Trip to Spitzingsee   Welcome to Design Office   Specifying our goal of product   Welcome to Digital Product School!: Trip to Spitzingsee:    At the first day of Digital Product School, we had a off-site with all of batch 9 people. All the costs were managed by dps. At the beautiful mountain, we settled the team, and got my team goal.     Basically, there are two kind of team in DPS. (1) Wild team - the team has fixed topic(2) Company team - the team which has specific stakeholders, and also topic defined by that stakeholders  The Core-team will fix what team you will join in DPS for 3 months based on ymy professionals, they announce it at off-site. [My team for 3 months at DPS]    And we decide on my batch #9 theme song. How? Each team draw for songs and pitch ‘why this song should be batch #9 theme song’The result? Imagine dragon - Believer (I didn’t know at the moment, this song would be stamped in my memory)     We have a workshop for getting to know each other. For example, we share 1) what do I expect from 3 months of dps, 2) when I feel happy in my life time, 3) what I worked for last week, 4) what was my last project and 5) what plays important role in my life      My team's board  Cero  Welcome to Design Office:  At first day of design office, we had workshop, which celebrates my day in dps also discuss specific rule, menifesto and stakeholders    We get sticker and attach it in map depends on my nationality  Now time to get to know my team’s stakeholders. What they want for us? What they expect from us? How free my team are on the topic?To be honest, it is endless tug-of-war. We should discuss with my stakeholders, endlessly, and find out solution which can meet interest of users, stakeholders and my team. Basically, my team’s main stakeholder is ADAC, but BMW, City of munich and Nokia will also participate as my team’s stakeholders. Specifying our goal of product: "
@@ -504,7 +509,7 @@ <h5 class="font-weight-bold">Written by  </h5>
         <div class="row prevnextlinks small font-weight-bold">
           
             <div class="col-md-6 rightborder pl-0">
-                <a class="text-dark" href="/2019/12/lesson1-fastai/"> <img height="30px" class="mr-1" src="/assets/images/1-visual-cnn.png">  Fine Grained Classification</a>
+                <a class="text-dark" href="/2019/12/jeremy-howard/"> <img height="30px" class="mr-1" src="/assets/images/jeremy-howard.jpeg">  Jeremy Howard</a>
             </div>
           
           
diff --git a/_site/2020/01/retrosprect-of-acl-paper-2020/index.html b/_site/2020/01/retrosprect-of-acl-paper-2020/index.html
index 5c9f8cf605..f7de9e92b2 100644
--- a/_site/2020/01/retrosprect-of-acl-paper-2020/index.html
+++ b/_site/2020/01/retrosprect-of-acl-paper-2020/index.html
@@ -19,9 +19,9 @@
 <meta property="og:site_name" content="SpellOnYou" />
 <meta property="og:image" content="http://localhost:4000/assets/images/acl2020.png" />
 <meta property="og:type" content="article" />
-<meta property="article:published_time" content="2020-01-29T00:00:00+01:00" />
+<meta property="article:published_time" content="2020-01-29T00:00:00+09:00" />
 <script type="application/ld+json">
-{"description":"2020 Annual Conference of the Association for Computational Linguistics","author":{"@type":"Person","name":"dionne"},"@type":"BlogPosting","url":"http://localhost:4000/2020/01/retrosprect-of-acl-paper-2020/","publisher":{"@type":"Organization","logo":{"@type":"ImageObject","url":"http://localhost:4000/assets/images/logo.png"},"name":"dionne"},"image":"http://localhost:4000/assets/images/acl2020.png","headline":"Retrospect of ACL 2020 paper writing","dateModified":"2020-01-29T00:00:00+01:00","datePublished":"2020-01-29T00:00:00+01:00","mainEntityOfPage":{"@type":"WebPage","@id":"http://localhost:4000/2020/01/retrosprect-of-acl-paper-2020/"},"@context":"http://schema.org"}</script>
+{"description":"2020 Annual Conference of the Association for Computational Linguistics","author":{"@type":"Person","name":"dionne"},"@type":"BlogPosting","url":"http://localhost:4000/2020/01/retrosprect-of-acl-paper-2020/","publisher":{"@type":"Organization","logo":{"@type":"ImageObject","url":"http://localhost:4000/assets/images/logo.png"},"name":"dionne"},"image":"http://localhost:4000/assets/images/acl2020.png","headline":"Retrospect of ACL 2020 paper writing","dateModified":"2020-01-29T00:00:00+09:00","datePublished":"2020-01-29T00:00:00+09:00","mainEntityOfPage":{"@type":"WebPage","@id":"http://localhost:4000/2020/01/retrosprect-of-acl-paper-2020/"},"@context":"http://schema.org"}</script>
 <!-- End Jekyll SEO tag -->
 
 
@@ -161,96 +161,101 @@
     "body": "  {% if page. url ==  /  %}            {% assign latest_post = site. posts[0] %}          &lt;div class= topfirstimage  style= background-image: url({% if latest_post. image contains  ://  %}{{ latest_post. image }}{% else %} {{site. baseurl}}/{{ latest_post. image}}{% endif %}); height: 200px;  background-size: cover;  background-repeat: no-repeat; &gt;&lt;/div&gt;           {{ latest_post. title }}  :       {{ latest_post. excerpt | strip_html | strip_newlines | truncate: 136 }}               In         {% for category in latest_post. categories %}        {{ category }},         {% endfor %}                                {{ latest_post. date | date: '%b %d, %Y' }}                            {%- assign second_post = site. posts[1] -%}                        {% if second_post. image %}                         &lt;img class= w-100  src= {% if second_post. image contains  ://  %}{{ second_post. image }}{% else %}{{ second_post. image | absolute_url }}{% endif %}  alt= {{ second_post. title }} &gt;                        {% endif %}                                    {{ second_post. title }}          :                       In             {% for category in second_post. categories %}            {{ category }},             {% endfor %}                                                      {{ second_post. date | date: '%b %d, %Y' }}                                    {%- assign third_post = site. posts[2] -%}                        {% if third_post. image %}                         &lt;img class= w-100  src= {% if third_post. image contains  ://  %}{{ third_post. image }}{% else %}{{site. baseurl}}/{{ third_post. image }}{% endif %}  alt= {{ third_post. title }} &gt;                        {% endif %}                                    {{ third_post. title }}          :                       In             {% for category in third_post. categories %}            {{ category }},             {% endfor %}                                                      {{ third_post. date | date: '%b %d, %Y' }}                                    {%- assign fourth_post = site. posts[3] -%}                        {% if fourth_post. image %}                        &lt;img class= w-100  src= {% if fourth_post. image contains  ://  %}{{ fourth_post. image }}{% else %}{{site. baseurl}}/{{ fourth_post. image }}{% endif %}  alt= {{ fourth_post. title }} &gt;                        {% endif %}                                    {{ fourth_post. title }}          :                       In             {% for category in fourth_post. categories %}            {{ category }},             {% endfor %}                                                      {{ fourth_post. date | date: '%b %d, %Y' }}                                  {% for post in site. posts %} {% if post. tags contains  sticky  %}                    {{post. title}}                  {{ post. excerpt | strip_html | strip_newlines | truncate: 136 }}                 Read More            	             {% endif %}{% endfor %}  {% endif %}                All Stories:         {% for post in paginator. posts %}          {% include main-loop-card. html %}        {% endfor %}                   {% if paginator. total_pages &gt; 1 %}              {% if paginator. previous_page %}        &laquo; Prev       {% else %}        &laquo;       {% endif %}       {% for page in (1. . paginator. total_pages) %}        {% if page == paginator. page %}        {{ page }}        {% elsif page == 1 %}        {{ page }}        {% else %}        {{ page }}        {% endif %}       {% endfor %}       {% if paginator. next_page %}        Next &raquo;       {% else %}        &raquo;       {% endif %}            {% endif %}                     {% include sidebar-featured. html %}      "
     }, {
     "id": 12,
+    "url": "http://localhost:4000/2020/04/v3-2019-lesson06-note/",
+    "title": "fastai 2019 course-v3 Part1, lesson06",
+    "body": "2020/04/15 - Lesson 06Rossmann(Tabular):  Tabular data: be careful on Categorical variable vs Continuous variable.  if datatype is int, fastai think it is classification, not a regression.  Root mean square percentage error. as loss function.  When you assign the y_range, it’s better to assign little bit more than actual maximum. &gt; because it’s sigmoid.  intermediate layers, which is weight matrix is 1) 1000, and 2) 500 -&gt; which means our parameter would be 500*1000. learn. modelWhat is dropout and embedding dropout?: Nitish Srivastava, Dropout: A Simple way to prevent Neural Networks from Overfitting  you can dropout with p value, make it specified to specific layer, or make it applied to all the layers.  Pytorch code 1) bernoulli, which decides whether you will hold it? 2) and divide the noise value depends on noise value. so noise became 2 or remain 0.      According to pytorch code, We do change at training time, but we do nothing at test time. and this means you don’t have to do anything special with inference time. ’   TODO: find at forums what is inference time - Related to NVIDIA, GPU.     Embedding dropout is just a dropout.      It’s different between continuous variable and embedding layer.  TODO Still can’t understand. why embedding dropout is effective. or,… in need.    Let’s delete at random, some of the results of the embedding.    and It worked well especially at Kaggle   Batch Normalization: Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift -&gt; came out false! According to How Does Batch Normalization Help Optimization?  The key was multiplicative bias {\gamma} and additive bias {\beta}` Explain     Let $$ \hat{y} = f(w_1, w_2, w_3, … , x)} $$ , loss = MSE , Then y_range should be between 1 and 5`   And Activation function ends with -1 -&gt; +1   To mitigate this problem, we can add the other parameter, like $$w_n$$   But there’re so much interactions in the process so just re-scale the output.    Momentum parameter at BatchNorm1d:  Different from momentum like in optimization.  This momentum is Exponentially weighted moving average of the mean, instead of deviation.      If this is small number: mean standard deviation would be less from mini_batch to mini_batch » less regularization effect. (If this is large number, variation would be greater from mini_batch to mini_batch » more regularization effect)   TODO: can’t sure, but i understand, this is not about how to update parameter but about how much reflect previous value when scale and shift   Q. Preference between batchnorm and the other regularizations(drop out, weight decay)A. Nope, always try and see the results## lesson6-pets-more### Data Augmentation- Last reg- `get_transforms` has lots of params (even not yet learned all) -&gt; check documentation	- Remember you can implement all the doc contents bc it's made from nbdev	- TODO: try this!!- Essence of data augmentation is you should maintain the label, while somewhat making sense. 	- ex) tilt, because it's optically sensible, you can always change the angle of the data view. - zeros, border, and reflection but always `reflection` works most of the time, so that is the default### Convolutional Kernel(What is convolution?)- Will make heat\_map from scratch, which means the parts convolution focuses on![setosa_visualization]()- http://setosa. io/ev/image-kernels/	- javascript thing	- How convolution works	- Kernel. which does element-wise multiplication, and sum them up	- so it has on pixel less at borders -&gt; so it uses padding, and fastai uses reflection as said. - why this Kernel(matrix) helps catching horizontal edge side?	- because this kernel`(picture2)` weights differently, depends on `x axis`	- why familiar, because it's similar intuition with fugus`(paper)` paper- CNN from different viewpoints`link`	- output of pixel is results from different linear equations. 	- If you connect this with represents of neural network nodes, you can see that the specific inp nodes connected with specific out nodes. 	- **Summarize**: cnn does 1) matmul some of the elements are always zero 2) same weight for every row, which is called `weight time? weight. . ?, 1:18:50` `(picture)`#### Further lowdown- Because generally image has 3 channels, we need rank 3 kernel. - And **do multiply with all channel output is one pixel**. (`draw by your self`)	- but this kernel will catch one feature, like horizontal, so that we make more kernel so that output becomes (h * w * kernel)	- And that `kernel` come to `channel`- **Conv2d**: with 3 by 3 kernel, stride 2 conv -&gt; (h/2 * w/2 * kernel)	- skip or jump over input pixel	- to protect from memory out of control~~~pythonlearn. modellearn. summary()~~~TODO: understand yourself the blocks of conv-kernel: - Usually use big kernel size at first layer (will study this at part2)- Bottom right highlighting kernel(`pic / draw`)- `torch. tensor. expand`: for memory efficient, because we should do RGB- We do not make separate kernel, but make rank 4 kernel	- 4d tensor is just stacked kernel- `t[None]. shape` create new unit axis, and why? we make this -&gt; it should move unit of batch, not one size image. ### Average pooling, feature- suppose our pre-trained model results in size of `11 by 11 by 512 ` `pic 4` and my classification task has 37 classes	* take the first face of channel, which is 11 by 11 and `mean` it, so that make rank 2 tensor, 512 by 1	* and make 2d matrix, which is 512 by 37 and multiply so that we can get 37 by 1 matrix. - Feature, at convolution block	- So, when we transfer-learning without unfreeze, every element of last matrix (512 by 1) should represent(or could catch) each feature. 	### Heatmap, Hook~~~hook_output(model[0]) -&gt; acts -&gt; avg_acts~~~- if we average the block with `axis=feature`, result of matrix(11 by 11) depicts `how activated was that area?` -&gt; it is heatmap, `avg_acts`- and acts comes from hook, which is more advanced pytorch feature. 	- hook into pytorch machine itself, and run any arbitrary Pytorch code	- Why this is cool?: Normally it gives set of outputs of forward pass, but we can interrupt and hook the forward pass. 	- Also can store the output of the convolutional part of the model, which is before avg_pooling- Thinking back when we do cut off `after` the conv part. 	- but with fast. ai the original convolutional part of the model would be *the first thing in the model*, specifically could be given from `learn. model. eval()[0]`	- And this is gotten from `hooked_output` and having hooked the output, we can pass our x_minibatch to output. 	- Not directly, but with normalized, minibatch, put on to the gpu	- `one_item()` function do it, when we have one data `TODO: this is assignment` do it yourself without one_item function	- and `. cuda()` put it on gpu- you should print out very often the shape of tensor, and try think why. "
+    }, {
+    "id": 13,
+    "url": "http://localhost:4000/2020/04/qna-image-segmentation/",
+    "title": "[Q&A] Image Segmentation, using Unet with Driving Video data",
+    "body": "2020/04/02 - This post is about my questions while I was studying USF Deep Learning course about image segmentation task. All the answers are from the course, source code, library document, or document. I cared about being clear at reporting information including source of information, however if there are still anything unclear, please contact me. And thank you Jeremy&amp;Rachael for everything. Also Thank you Cambridge Computer Vision Lab to made us to study with your labor. The Cambridge-driving Labeled Video Database (CamVid) is the first collection of videos with object class semantic labels, complete with metadata. The database provides ground truth labels that associate each pixel with one of 32 semantic classes. If someone is interested in this project, please check the site and see the details. Now, let’s start first using jupyter’s one of tricks which I love most. It enables cell to print the code without print function. from IPython. core. interactiveshell import InteractiveShell# pretty print all cell's output and not just the last oneInteractiveShell. ast_node_interactivity =  all from fastai. vision import *from fastai. callbacks. hooks import *from fastai. utils. mem import *path = untar_data(URLs. CAMVID) # The locations where the data and models are downloaded are set in config. ymlpath. ls() I’m trying to accustomed to using pathlib module, not just it became built-in module in python, but I felt uncomfortable myself with os module. However, still unpredictable conflicts are remain, even in the quite standard library like Pytorch, tensorflow, onnx. (it require me string for path. not PosixPath. will send PR. . ) [PosixPath('/root/. fastai/data/camvid/valid. txt'), PosixPath('/root/. fastai/data/camvid/images'), PosixPath('/root/. fastai/data/camvid/labels'), PosixPath('/root/. fastai/data/camvid/codes. txt')]path_img = path/'images'path_lbl = path/'labels'fnames = get_image_files(path_img) #filenamelbl_names = get_image_files(path_lbl)1. (Play with data) My Hypothesis:  File name has A_B format. and A / B would be at key-value position.  Use collections - defaultdict  Default Dict: Link:   easy to group a sequence of key and value pairs into a dictionary of list?from collections import defaultdictfnames[0], lbl_names[0](PosixPath('/root/. fastai/data/camvid/images/0001TP_009210. png'), PosixPath('/root/. fastai/data/camvid/labels/0016E5_01800_P. png'))files = [tuple(i. stem. split('_')) for i in fnames]labels = [tuple(i. stem. split('_')[:-1]) for i in lbl_names]d = defaultdict(list)for k, v in files: d[k]. append(v)d. keys()len(d['0001TP'])124for k, v in d. items():  print(k, v)0001TP ['009210', '008850', '007350', '008970', '009840', '010140', '008490', '008520', '009540', '008250', '008340', '006840', '007860', '007410', '007740', '009870', '010080', '007890', '008790', '010020', '008400', '007080', '008280', '010380', '009330', '009060', '007470', '006810', '009720', '008580', '007110', '008730', '009150', '007680', '009780', '007800', '007290', '008760', '009510', '008640', '008310', '007440', '006900', '007500', '008460', '009030', '008130', '009480', '009900', '010230', '009270', '008040', '007590', '007950', '009990', '008550', '007260', '008100', '007530', '006960', '008190', '009420', '009930', '009000', '007830', '008940', '006690', '009570', '008880', '010170', '007560', '009300', '006750', '009360', '010200', '007320', '008010', '009120', '007620', '007200', '007140', '010320', '006720', '008670', '007230', '008370', '010260', '009690', '006930', '009090', '007770', '010290', '010350', '008610', '008070', '009600', '008430', '009450', '007380', '009240', '007710', '007170', '008160', '008910', '007020', '006780', '007050', '009960', '009810', '008220', '009180', '009750', '010050', '009660', '010110', '007920', '009630', '007650', '006990', '008700', '009390', '007980', '008820', '006870']0016E5 ['01290', '08159', '05760', '08133', '08063', '06660', '00960', '05850', '00750', '06960', '08035', '08107', '07975', '08017', '05610', '07140', '08119', '08027', '07170', '08400', '08093', '02100', '06390', '04470', '08340', '06060', '00600', '07470', '08151', '07800', '01620', '05730', '01530', '00690', '08430', '05940', '01980', '07320', '08069', '07965', '04380', '05430', '01410', '06780', '08007', '08087', '08079', '06600', '08109', '05490', '00901', '04590', '04680', '08045', '01770', '06690', '08085', '06810', '00420', '08011', '07440', '02190', '06300', '04800', '01500', '00450', '08029', '01470', '06330', '07997', '08067', '05370', '08013', '08190', '00840', '02370', '08049', '08135', '01440', '06870', '05820', '05280', '08051', '04440', '08091', '01380', '00630', '07290', '05520', '04770', '00540', '07995', '07999', '05550', '07920', '08101', '08141', '08053', '04620', '08103', '05160', '07350', '08057', '06030', '06000', '08550', '07963', '08089', '05970', '08047', '05640', '06240', '05220', '04350', '01590', '07959', '01950', '08117', '06180', '01560', '05400', '08043', '07680', '00780', '08081', '07050', '01020', '01350', '04530', '06720', '07969', '08149', '08003', '08131', '08129', '08033', '05460', '01650', '07530', '08023', '05340', '08640', '05100', '08075', '01230', '04980', '02070', '01080', '06210', '05910', '08009', '01800', '05190', '02400', '08083', '08019', '07620', '07200', '07890', '08059', '06990', '04410', '08121', '08123', '06930', '08137', '08147', '08095', '06570', '06150', '08153', '06840', '05250', '00510', '08370', '08580', '08113', '07410', '08097', '01200', '04950', '07770', '07650', '04710', '06090', '08055', '07110', '07981', '00990', '08250', '08127', '01920', '07985', '08220', '08005', '08157', '05130', '08071', '01140', '04830', '07740', '08143', '06120', '02040', '08111', '08115', '00660', '08280', '06420', '07983', '02220', '05700', '01860', '01260', '04920', '06510', '07020', '08073', '08105', '08125', '06360', '07860', '07993', '00810', '06540', '08099', '08139', '02010', '07973', '08155', '07991', '06630', '00480', '06750', '04890', '08001', '08025', '00870', '08490', '01830', '07977', '05010', '01170', '07961', '01680', '01050', '07987', '07080', '04560', '00930', '05310', '02340', '05790', '08460', '00720', '08031', '02280', '08039', '08037', '08065', '06270', '08077', '06900', '04650', '06480', '07230', '08041', '06450', '00570', '07989', '04740', '07979', '02250', '07380', '00390', '01710', '07590', '08021', '08520', '07500', '01110', '04500', '02310', '07971', '02130', '05580', '05880', '08610', '08310', '08145', '05670', '04860', '07260', '08015', '07967', '01740', '01320', '07560', '07830', '01890', '08061', '02160', '07710', '05070', '05040']Seq05VD ['f00030', 'f02550', 'f03450', 'f01110', 'f00480', 'f00210', 'f04590', 'f04170', 'f01800', 'f03990', 'f03360', 'f03900', 'f02070', 'f00810', 'f03690', 'f01350', 'f01530', 'f04980', 'f05100', 'f03060', 'f00900', 'f03870', 'f02460', 'f01470', 'f02370', 'f02820', 'f04080', 'f02760', 'f04860', 'f02250', 'f04200', 'f00270', 'f03720', 'f02850', 'f04410', 'f01200', 'f03090', 'f02010', 'f03930', 'f00090', 'f01650', 'f01890', 'f03840', 'f03030', 'f02130', 'f01230', 'f04110', 'f02520', 'f04140', 'f04020', 'f00060', 'f03420', 'f01560', 'f00120', 'f04290', 'f02340', 'f00300', 'f01380', 'f00870', 'f01860', 'f02970', 'f04560', 'f02730', 'f00330', 'f04530', 'f03780', 'f01770', 'f03390', 'f05040', 'f02430', 'f03330', 'f00660', 'f01740', 'f02100', 'f04800', 'f04050', 'f00510', 'f02790', 'f04350', 'f00690', 'f00540', 'f02490', 'f00960', 'f00930', 'f04230', 'f02880', 'f03600', 'f01020', 'f01500', 'f02400', 'f04830', 'f04470', 'f03300', 'f02670', 'f00450', 'f01980', 'f01170', 'f01620', 'f04500', 'f01080', 'f03180', 'f05070', 'f03150', 'f04950', 'f01440', 'f03510', 'f01710', 'f00360', 'f04770', 'f02910', 'f01050', 'f00630', 'f04320', 'f00570', 'f03240', 'f02190', 'f01140', 'f03540', 'f02220', 'f02640', 'f03960', 'f00000', 'f04920', 'f01950', 'f00990', 'f03480', 'f03000', 'f00420', 'f04620', 'f03210', 'f00780', 'f03570', 'f01590', 'f00750', 'f01920', 'f04650', 'f03750', 'f03630', 'f02310', 'f02610', 'f02580', 'f04740', 'f02280', 'f04680', 'f00390', 'f00720', 'f03660', 'f02040', 'f03270', 'f00180', 'f03810', 'f01410', 'f01290', 'f03120', 'f00840', 'f04440', 'f00150', 'f01260', 'f02700', 'f02940', 'f00600', 'f01830', 'f04260', 'f05010', 'f04890', 'f02160', 'f00240', 'f04380', 'f01680', 'f04710', 'f01320']0006R0 ['f02820', 'f03690', 'f03180', 'f02550', 'f01020', 'f03660', 'f02340', 'f01170', 'f02610', 'f02940', 'f01290', 'f02100', 'f01350', 'f03270', 'f03870', 'f01380', 'f01980', 'f03810', 'f02430', 'f02310', 'f01830', 'f03480', 'f02970', 'f01890', 'f03210', 'f03930', 'f02040', 'f02070', 'f02400', 'f01560', 'f03030', 'f01770', 'f01590', 'f01950', 'f03420', 'f01650', 'f03450', 'f00990', 'f03630', 'f01500', 'f03570', 'f00930', 'f03090', 'f03360', 'f02880', 'f02460', 'f01440', 'f01920', 'f01230', 'f03840', 'f02730', 'f01620', 'f02220', 'f03750', 'f03330', 'f03540', 'f02520', 'f02790', 'f01050', 'f03120', 'f01800', 'f01140', 'f01860', 'f01530', 'f01470', 'f02670', 'f02490', 'f01260', 'f01110', 'f02760', 'f01680', 'f03150', 'f02580', 'f03300', 'f02280', 'f01200', 'f03390', 'f03510', 'f02640', 'f02190', 'f02370', 'f01320', 'f02130', 'f03600', 'f03240', 'f03780', 'f03720', 'f02700', 'f01410', 'f01080', 'f02850', 'f01710', 'f03900', 'f03060', 'f01740', 'f02010', 'f02250', 'f00960', 'f03000', 'f02160', 'f02910']for k, v in d. items():  print(k, len(d[k]))0001TP 1240016E5 305Seq05VD 1710006R0 101for i in d2. keys():  print(i,len(d2[i]))0016E5 3050001TP 1240006R0 101Seq05VD 171files[0], labels[0](('0001TP', '009210'), ('0016E5', '01800'))2. My question: Link:  Why do we need masking? and does color from fastai library? (have to look into source code) What do the parameter alpha do? When people make masked img, would it be have ranged integer limit? Does image normalization related with this?lbl_sorted = sorted(lbl_names)f_sorted = sorted(fnames)lbl_1 = lbl_sorted[33]f_1 = f_sorted[33]img = open_image(lbl_1)mask = open_mask(lbl_1)_,axs = plt. subplots(1,2, figsize=(10,5))# img. show(ax=axs[0], y=mask, title='masked')img. show(ax=axs[0], title='1')mask. show(ax=axs[1], title='2', alpha=1. ) img_2 = open_image(f_1)mask_2 = open_mask(f_1)_,axs = plt. subplots(1,2, figsize=(10,5))# img. show(ax=axs[0], y=mask, title='masked')img_2. show(ax=axs[0], title='3',)mask_2. show(ax=axs[1], title='4', alpha=1. ) open_mask(lbl_1). data. shapetorch. Size([1, 720, 960])open_mask(lbl_1). data. shapetorch. Size([1, 720, 960])open_image(f_1). data. shapetorch. Size([3, 720, 960])open_image(f_1). data. shapetorch. Size([3, 720, 960])img. data #labeled datatensor([[[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]],    [[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]],    [[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]]])mask. data # after mask, labeled datatensor([[[ 4, 4, 4, . . . , 21, 21, 21],     [ 4, 4, 4, . . . , 21, 21, 21],     [ 4, 4, 4, . . . , 21, 21, 21],     . . . ,     [17, 17, 17, . . . , 30, 30, 30],     [17, 17, 17, . . . , 30, 30, 30],     [17, 17, 17, . . . , 30, 30, 30]]])img_2. data, mask_2. data(tensor([[[0. 0706, 0. 0667, 0. 0706, . . . , 0. 6431, 0. 6549, 0. 6627],     [0. 0745, 0. 0706, 0. 0706, . . . , 0. 6431, 0. 6510, 0. 6549],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 6392, 0. 6588, 0. 6588],     . . . ,     [0. 0863, 0. 0824, 0. 0824, . . . , 0. 1333, 0. 1216, 0. 1255],     [0. 0902, 0. 0863, 0. 0824, . . . , 0. 1255, 0. 1176, 0. 1216],     [0. 0863, 0. 0824, 0. 0784, . . . , 0. 1137, 0. 1059, 0. 1137]],     [[0. 0706, 0. 0667, 0. 0706, . . . , 0. 7490, 0. 7608, 0. 7686],     [0. 0745, 0. 0706, 0. 0706, . . . , 0. 7451, 0. 7569, 0. 7608],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 7412, 0. 7529, 0. 7529],     . . . ,     [0. 0980, 0. 0941, 0. 0941, . . . , 0. 1804, 0. 1686, 0. 1725],     [0. 1059, 0. 1020, 0. 0980, . . . , 0. 1725, 0. 1647, 0. 1686],     [0. 1020, 0. 0980, 0. 0941, . . . , 0. 1608, 0. 1529, 0. 1608]],     [[0. 0784, 0. 0745, 0. 0784, . . . , 0. 7569, 0. 7686, 0. 7765],     [0. 0824, 0. 0784, 0. 0784, . . . , 0. 7647, 0. 7647, 0. 7686],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 7608, 0. 7647, 0. 7647],     . . . ,     [0. 1216, 0. 1176, 0. 1176, . . . , 0. 2000, 0. 1882, 0. 1922],     [0. 1176, 0. 1137, 0. 1098, . . . , 0. 1843, 0. 1765, 0. 1804],     [0. 1137, 0. 1098, 0. 1059, . . . , 0. 1725, 0. 1647, 0. 1725]]]), tensor([[[ 18, 17, 18, . . . , 183, 186, 188],     [ 19, 18, 18, . . . , 183, 185, 186],     [ 20, 18, 19, . . . , 182, 185, 185],     . . . ,     [ 25, 24, 24, . . . , 43, 40, 41],     [ 26, 25, 24, . . . , 41, 39, 40],     [ 25, 24, 23, . . . , 38, 36, 38]]]))3. What is a difference between image and imageSegment?: imageSegment  An ImageSegment object has the same properties as an Image. The only difference is that when applying the transformations to an ImageSegment, it will ignore the functions that deal with lighting and keep values of 0 and 1.  It’s easy to show the segmentation mask over the associated Image by using the y argument of show_image. img = open_image(fnames[0])mask = open_mask(lbl_names[0])_,axs = plt. subplots(1,3, figsize=(8,4))img. show(ax=axs[0], title='no mask')img. show(ax=axs[1], y=mask, title='masked') #seg mask over the img using y argmask. show(ax=axs[2], title='mask only', alpha=1. ) vision. image ##4. Why/How img div by 255 and how it results fast. ai : vision. image - If div=True, pixel values are divided by 255. to become floats between 0. and 1.  At times, you want to get rid of distortions caused by lights and shadows in an image.    Normalizing the RGB values of an image can at times be a simple and effective way of achieving this.     So sum of the pixel’s value over all channels(which is S) divides each intensified channel so that nomalized value will be R/S, G/S and B/S (where, S=R+G+B).   Detailed explain here4. Python Evaluation Order: Python evaluates expressions from left to right. Notice that while evaluating an assignment, the right-hand side is evaluated before the left-hand side. mask_tmp, trg_tmp, void_tmp = 2, 1, 10mask_tmp = trg_tmp != void_tmpprint(mask_tmp, trg_tmp, void_tmp) # (1) target is not same with voidTrue 1 10# Example 1x = 1y = 2x,y = y,x+yx, y(2, 3)# Example 2x = 1y = 2x = yy = x+yx, y(2, 4)5. model learner parameter :: pct_start: A: Percentage of total number of epochs when learning rate rises during one cycle. Q: Sorry, I still confused that one cycle in the new API only runs one epoch. How the percentage of total number of epochs works? Can you give a example? If learn. fit_one_cycle(10, slice(1e-4,1e-3,1e-2), pct_start=0. 05)??A: Ok, strictly correct answer would be percentage of iterations, so you can have lr both increase and decrease during same epoch. In your example, say, you have 100 iterations per epoch, then for half an epoch (0. 05 * (10 * 100) = 50) lr will rise, then slowly decrease. Q2: Thanks for this explanation … so essentially, it is the percentage of overall iterations where the LR is increasing, correct? So, given the default of 0. 3, it means that your LR is going up for 30% of your iterations and then decreasing over the last 70%. Is that a correct summation of what is happening? A2: Yes, I think that’s correct. You can verify that by changing its value and check:learn. recorder. plot_lr() For example if pct_start = 0. 2 source: forums. fastai "
+    }, {
+    "id": 14,
     "url": "http://localhost:4000/2020/03/note08-fastai-4/",
     "title": "Gradient backward, Chain Rule, Refactoring",
-    "body": "2020/03/02 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves   ⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
+    "body": "2020/03/02 -  This note is divided into 4 section.      Section1: What is the meaning of ‘deep-learning from foundations?’   Section2: What’s inside Pytorch Operator?   Section3: Implement forward&amp;backward pass from scratch   Section4: Gradient backward, Chain Rule, Refactoring   ” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves    Postfix underscore means in pytorch, in-place function, What is in-place function?⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
     }, {
-    "id": 13,
+    "id": 15,
     "url": "http://localhost:4000/2020/03/note08-fastai-3/",
     "title": "Implement forward&backward pass from scratch",
     "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring1. The forward and backward passes: 1. 1 Normalization: train_mean,train_std = x_train. mean(),x_train. std()&gt;&gt;&gt; train_mean,train_std(tensor(0. 1304), tensor(0. 3073))Remember!  Dataset, which is x_train, mean and standard deviation is not 0&amp;1. But we need them to be which means we should substract means and divide data by std.  You should not standarlize validation set because training set and validation set should be aparted.  after normalize, mean is close to zero, and standard deviation is close to 1. 1. 2 Variable definition:  n,m: size of the training set c: the number of activations we need in our model2. Foundation Version: 2. 1 Basic architecture:  Our model has one hidden layer, output to have 10 activations, used in cross entropy.    But in process of building architecture, we will use mean square error, output to have 1 activations and lator change it to cross entropy   number of hidden unit; 50see below pic  We want to make w1&amp;w2 mean and std be 0&amp;1.      why initializating and make mean zero and std one is important?   paper highlighting importance of normalisation - training 10,000 layer network without regularisation1   2. 1. 1 simplified kaiming initQ: Why we did init, normalize with only validation data? Because we can not handle and get statistics from each value of x_valid?{: style=”color:red; font-size: 130%; text-align: center;”}  what about hidden(first) layer?w1 = torch. randn(m,nh)b1 = torch. zeros(nh)t = lin(x_valid, w1, b1) # hidden&gt;&gt;&gt; t. mean(), t. std()((tensor(2. 3191), tensor(27. 0303))In output(second) layer, w2 = torch. randn(nh,1)b2 = torch. zeros(1)t2 = lin(t, w2, b2) # output&gt;&gt;&gt; t2. mean(), t2. std()(tensor(-58. 2665), tensor(170. 9717))   which is terribly far from normalzed value.     But if we apply simplified kaiming init  w1 = torch. randn(m,nh)/math. sqrt(m); b1 = torch. zeros(nh)w2 = torch. randn(nh,1)/math. sqrt(nh); b2 = torch. zeros(1)t = lin(x_valid, w1, b1)t. mean(),t. std()&gt;&gt;&gt; (tensor(-0. 0516), tensor(0. 9354)) But, actually, we use activations not only linear function After applying activations relu at linear layer, mean and deviation became 0. 5. 2. 1. 2 Glorrot initializationPaper2: Understanding the difficulty of training deep feedforward neural networks  Gaussian(, bell shaped, normal distributions) is not trained very well.  How to initialize neural nets? with the size of layer , the number of filters .  But there is No acount for import of ReLU If we got 1000 layers, vanishing gradients problem emerges2. 1. 3 Kaiming initializatingPaper3: Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification  Kaiming He, explained here rectifier: rectified linear unit rectifier network: neural network with rectifier linear units  This is kaiming init, and why suddenly replace one to two on a top?     to avoid vanishing gradient(weights)   But it doesn’t give very nice mean tough.    2. 1. 4 Pytorch package Why fan_out?     according to pytorch documentation,   choosing 'fan_in' preserves the magnitude of the variance of the wights in the forward pass. choosing 'fan_out' preserves the magnitues in the backward pass(, which means matmul; with transposed matrix) ➡️ in the other words, torch use fan_out cz pytorch transpose in linear transformaton.  What about CNN in Pytorch?I tried torch. nn. Conv2d. conv2d_forward?? Jeremy digged into using torch. nn. modules. conv. _ConvNd. reset_parameters?? 2  in Pytorch, it doesn’t seem to be implemented kaiming init in right formula. so we should use our own operation.  But actually, this has been discussed in Pytorch community before. 3 4 Jeremy said it enhanced variance also, so I sampled 100 times and counted better results.  To make sure the shape seems sensible. check with assert. (remember we will replace 1 to 10 in cross entropy)assert model(x_valid). shape==torch. Size([x_valid. shape[0],1])&gt;&gt;&gt; model(x_valid). shape(10000, 1) We have made Relu, init, linear, it seems we can forward pass code we need for basic architecture nh = 50def lin(x, w, b): return x@w + b;w1 = torch. randn(m,nh)*math. sqrt(2. /m ); b1 = torch. zeros(nh)w2 = torch. randn(nh,1); b2 = torch. zeros(1)def relu(x): return x. clamp_min(0. ) - 0. 5t1 = relu(lin(x_valid, w1, b1))def model(xb):  l1 = lin(xb, w1, b1)  l2 = relu(l1)  l3 = lin(l2, w2, b2)  return l32. 2 Loss function: MSE:  Mean squared error need unit vector, so we remove unit axis.   def mse(output, targ): return (output. squeeze(-1) - targ). pow(2). mean()    In python, in case you remove axis, you use ‘squeeze’, or add axis use ‘unsqueeze’ torch. squeeze where code commonly broken. so, when you use squeeze, clarify dimension axis you want to removetmp = torch. tensor([1,1])tmp. squeeze()&gt;&gt;&gt; tensor([1, 1]) make sure to make as float when you calculateBut why??? because it is tensor?{: style=”color:red; font-size: 130%;”} Here’s the error when I don’t transform the data type ---------------------------------------------------------------------------TypeError                 Traceback (most recent call last)&lt;ipython-input-22-ae6009bef8b4&gt; in &lt;module&gt;()----&gt; 1 y_train = get_data()[1] # call data again   2 mse(preds, y_train)TypeError: 'map' object is not subscriptable This is forward passFootnote: Other materials:  Understanding the difficulty of training deep feedforward neural networks, paper that introduced Xavier initialization      Fixup Initialization: Residual Learning Without Normalization &#8617;        Pytorch implementaion on Kaiming init of conv and linear layers &#8617;        Pytorch kaiming init issue &#8617;        Pytorch kaiming init explained &#8617;    "
     }, {
-    "id": 14,
+    "id": 16,
     "url": "http://localhost:4000/2020/03/note08-fastai-2/",
     "title": "What's inside Pytorch Operator?",
     "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, RefactoringWhat’s inside Pytorch Operator?: Section02 Time comparison with pure Python:    Matmul with broadcasting&gt; 3194. 95 times faster     Einstein summation&gt; 16090. 91 times faster     Pytorch’s operator&gt; 49166. 67 times faster  1. Elementwise op: 1. 1 Frobenius norm:   above converted into (m*m). sum(). sqrt() Plus, don’t suffer from mathmatical symbols. He also copy and paste that equations from wikipedia.  and if you need latex form, download it from archive. 2. Elementwise Matmul:  What is the meaning of elementwise?   We do not calculate each component. But all of the component at once. Because, length of column of A and row of B are fixed.   How much time we saved?  So now that takes 1. 37ms. We have removed one line of code and it is a 178 times faster…#TODOI don’t know where the 5 from. but keep it. Maybe this is related with frobenius norm…?as a result, the code before for k in range(ac):  c[i,j] += a[i,k] + b[k,j]the code after c[i,j] = (a[i,:] * b[:,j]). sum()To compare it (result betweet original and adjusted version) we use not test_eq but other function. The reason for this is that due to rounding errors from math operations, matrices may not be exactly the same. As a result, we want a function that will “is a equal to b within some tolerance” #exportdef near(a,b):   return torch. allclose(a, b, rtol=1e-3, atol=1e-5)def test_near(a,b):   test(a,b,near)test_near(t1, matmul(m1, m2))3. Broadcasting:  Now, we will use the broadcasting and removec[i,j] = (a[i,:] * b[:,j]). sum() How it works?&gt;&gt;&gt; a=tensor([[10,10,10],     [20,20,20],     [30,30,30]])&gt;&gt;&gt; b=tensor([1,2,3,])&gt;&gt;&gt; a,b  (tensor([[10, 10, 10],     [20, 20, 20],     [30, 30, 30]]),tensor([1, 2, 3]))     &gt;&gt;&gt; a+btensor([[11, 12, 13],    [21, 22, 23],    [31, 32, 33]])  &lt;Figure 2&gt; demonstrated how array b is broadcasting(or copied but not occupy memory) to compatible with a. Refered from numpy_tutorial   there is no loop, but it seems there is exactly the loop.     This is not from jeremy (actually after a moment he cover it) but i wondered How to broadcast an array by columns?  c=tensor([[1],[2],[3]])a+ctensor([[11, 11, 11],    [22, 22, 22],    [33, 33, 33]])s  What is tensor. stride()?help(t. stride)Help on built-in function stride:  stride(…) method of torch. Tensor instancestride(dim) -&gt; tuple or intReturns the stride of :attr:’self’ tensor. Stride is the jump necessary to go from one element to the next one in the specified dimension :attr:’dim’. A tuple of all strides is returned when no argument is passed in. Otherwise, an integer value is returned as the stride in the particular dimension :attr:’dim’. Args:  dim (int, optional): the desired dimension in which stride is requiredExample::* x = torch. tensor([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]])`x. stride()&gt;&gt;&gt; (5, 1)x. stride(0)&gt;&gt;&gt; 5x. stride(-1)&gt;&gt;&gt; 1   unsqueeze &amp; None index   We can manipulate rank of tensor Special value ‘None’, which means please squeeze a new axis here== please broadcast herec = torch. tensor([10,20,30])c[None,:] in c, squeeze a new axis in here please. 2. 2 Matmul with broadcasting: for i in range(ar):#  c[i,j] = (a[i,:]).      *[:,j]. sum() #previous  c[i]  = (a[i]. unsqueeze(-1) * b). sum(dim=0) And Using None also (As howard teached)c[i]  = (a[i ]. unsqueeze(-1) * b). sum(dim=0) #howardc[i]  = (a[i][:,None] * b). sum(dim=0) # using Nonec[i]  = (a[i,:,None]*b). sum(dim=0)⭐️Tips🌟 1) Anytime there’s a trailinng(final) colon in numpy or pytorch you can delete it ex) c[i, :] = c [i]2) any number of colon commas at the start, you can switch it with the single elipsis.  ex) c[:,:,:,:,i] = c […,i] 2. 3 Broadcasting Rules:  What if we tensor. size([1,3]) * tensor. size([3,1])?  torch. Size([3, 3])    What is scale????   What if they are one array is times of the other array? ex) Image : 256 x 256 x 3Scale : 128 x 256 x 3Result: ?   Why I did not inserted axis via None, but happened broadcasting? &gt;&gt;&gt; c * c[:,None]tensor([[100. , 200. , 300. ],    [200. , 400. , 600. ],    [300. , 600. , 900. ]])maybe it broadcast cz following array has 3 rows as same principle, no matter what nature shape was, if we do the operation tensor broadcasts to the other. &gt;&gt;&gt; c==c[None]tensor([[True, True, True]])&gt;&gt;&gt; c[None]==c[None,:]tensor([[True, True, True]])&gt;&gt;&gt;c[None,:]==ctensor([[True, True, True]])3. Einstein summation:  Creates batch-wise, remove inner most loop, and replaced it with an elementwise producta. k. ac[i,j] += a[i,k] * b[k,j]inner most loop c[i,j] = (a[i,:] * b[:,j]). sum()elementwise product  Because K is repeated so we do a dot product. And it is torch. Usage of einsum()1) transpose2) diagnalisation tracing3) batch-wise (matmul) …  einstein summation notationdef matmul(a,b): return torch. einsum('ik,kj-&gt;ij', a, b)so after all, we are now 16000 times faster than Python. 4. Pytorch op: 49166. 67 times faster than pure python And we will use this matrix multiplication in Fully Connect forward, with some initialized parameters and ReLU. But before that, we need initialized parameters and ReLU, Footnote:  TensorRank ti noteResources:  Frobenius Norm Review Broadcasting Review (especially Rule)     Refer colab! (I totally confused with extension of arrays)    torch. allclose Review np. einsum Reviewh "
     }, {
-    "id": 15,
+    "id": 17,
     "url": "http://localhost:4000/2020/02/note08-fastai-1/",
     "title": "What is the meaning of 'deep-learning from foundations?'",
     "body": "2020/02/29 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ I don’t know if you read this article, but I heartily appreciate Rachael Thomas and Jeremy Howard for providing these priceless lectures for free Homework:  Review concepts 16 concepts from Course 1 (lessons 1 - 7)(1) Affine Functions &amp; non-linearities; 2) Parameters &amp; activations; 3) Random initialization &amp; transfer learning; 4) SGD, Momentum, Adam; 5) Convolutions; Batch-norm; 6) Dropout; 7) Data augmentation; 8) Weight decay; 9) Res/dense blocks; 10) Image classification and regression; 11)Embeddings; 12) Continuous &amp; Categorical variables; 13) Collaborative filtering; 14) Language models; 15) NLP classification; 16) Segmentation; U-net; GANS) Make sure you understand broadcasting Read section 2. 2 in Delving Deep into Rectifiers Try to replicate as much of the notebooks as you can without peeking; when you get stuck, peek at the lesson notebook, but then close it and try to do it yourself calculus for machine learning     based on weight…    einsum conventionCONTENTS:  What is going on in this course?     What is ‘from foundations’?   Steps to a basic modern CNN model   Today’s implementation goal: 1) matmul -&gt; 4) FC backward    Library development using jupyter notebook     jupyter notebook certainly can make module    Elementwise ops     How can we make python faster?         What is element wise operation?           FootnoteWhat is going on in this course?: What is ‘from foundations’?: 1) Recreate fast. ai and Pytorch 2) using pure python  Evade OverfittingOverfit : validation error getting worsetraining loss &lt; validation loss  Know the name of the symbol you usefind in this page if you don’t know the symbol that you are using or just draw it here (run by ML!) Steps to a basic modern CNN model:  1) Matrix multiplication -&gt; 2) Relu/Initialization -&gt; 3) Fully-connected Forward-&gt; 4) Fully-connected Backward -&gt; 5) Train loop -&gt; 6) Convolution-&gt; 7) Optimization -&gt;8) Batchnormalization -&gt; 9) Resnet Today’s implementation goal: 1) matmul -&gt; 4) FC backward: Library development using jupyter notebook: what is assers? jupyter notebook certainly can make module:  There will be #export tag that Howard (and we) want to extract special notebook2script. py will detect sign of #expert and convert following into python module and test ittest\_eq(TEST,'test')test\_eq(TEST,'test1')  what is run_notebook. py?     when you want to test your module in command line interface   		!python run\_notebook. py 01_matmul. ipynb  Is there any difference between 1) and 2)?1) test -&gt; test01 2) test01 -&gt; test #TODO I don’t know yet  look into run_notebook. py, package fire Jeremy used. What is that?read and run the code in a notebook, and in the process, Jeremy made Python Fire library called!shockingly, fire takes any kind of function and converts into CLI command. fire library was released by Google open source, Thursday, March 2, 2017    Get data   pytorch and numpy are pretty much same.  variable c explains how many pixels there are in in MNIST, 28 pixels PyTorch’s view() method: torch function that manipulating tensor, and squeeze() in torch &amp; mathmatical operation similar function Rao &amp; McMahan said usually this functions result in feature vector.    In part 1, you can use view function several times.     Initial python model     Which is Linear, like $Xw$(weight)$+a$(bias) $= Y$     If you don’t know hou to multiple matrix, refer this site matmul visulization site   How many time spends if we we use pure python   function matmul, typical matrix multiplication function, takes about 1 second for calculating 1 single train data! (maybe assumed stochastic, 5 data points in validation)     it takes about 11. 36 hours to update parameters even single layer and 1 iteration! (if that was my computer, it would be 14 hours. . )🤪   THIS is why we need to consider ‘time’&amp;’space’ This is kinda slow - what if we could speed it up by 50,000 times? Let’s try! Elementwise ops: How can we make python faster?:  If we want to calculate faster, then do remove pythonic calcuation, by passing its computation down to something that is written something other than python, like pytorch.  According to PyTorch doc it uses C++ (via ATen), so we are going to implement that function with python. What is element wise operation?:  items makes a pair, operate corresponding componentFootnote:  notebooks material video broadcasting excel"
     }, {
-    "id": 16,
+    "id": 18,
     "url": "http://localhost:4000/2020/02/what-is-convolution/",
     "title": "Digging into convolution",
     "body": "2020/02/28 - Issues 1) Kaiming Initializtion in Pytorch was in trouble. 1 2) Jeremy started to dig in, in lesson09, but I didn’t know why the size of tensor is 2 and even understand this spreadsheet data. 3 Homework: Read Visualizing and Understanding Convolutional Networks paper  What is a convolution?     Visualization         one kernel     Matthew D Zeiler &amp; Rob Fergus Paper          Convolution can be represented as matmul   Padding   Kernel has rank 3   How can we find a side-edge, a gradient and area of constant weight?   What is a convolution?: A convolutional neural network is that your red, green, and blue pixels go into the simple computation, and something comes out of that, and then the result of that goes into a second layer, and the result of that goes into the third layer and so forth. Visualization: one kernel Refer this site for visualizing CNN filteringMatthew D Zeiler &amp; Rob Fergus PaperLecture01     Nine examples of the actual coefficients from the **first layer**  Convolution can be represented as matmul: CNNs from different viewpoints {align-items: center;}    [A B C D E F G H I J] is 3 by 3 image data flatten to vector.   As a result, convolution is a just matrix just two things happens     Some of entries are set to zeros at all the times   same color always have the same weight. That called weight time / wegith sharing    So, we can implement a convolution with matrix multiplication. But, we don’t do that because it’s slow!Padding:  What most of libraries do is just put zeros asdie of matrix  fast. ai uses reflection paddings (what is this? Jeremy said he uttered it)Kernel has rank 3:  As standard picture input would be 4 5, it would be actually 3d, not 2d.  If we make kernel as a 3x3 size, we pass over same kernel all the different Red, Green, Blue Pixels.      This could make problem, because, if we want to detect frog, which is green, we would want more activations on the green(I made a test cell in my colab 6)   How can we find a side-edge, a gradient and area of constant weight?: Not top-edge!  One kernel can find only the top-edge, so we should stack the kernels 7 So, we pass it through bunch of kernels to the input images, and that process gives us height x width x corresponding number of kernels.  Usually that number of chanel is 16 And if we want to get the more channels and features, we should repeat that process     This process gives rise to memory out of control, we do the stride   #### conv-example. xlsx  2 convolutional filters At a second layer, filter is 3x3x2 tensor, because to add up together the first layer’s channel. Reference:       Problem was math. sqrt(5) was not kaiming initialization formula, Implementation in Pytorch &#8617;        size of tensor, lecture09 &#8617;        conv-example. xlsx &#8617;        Why do computer use red, green and blue instead of primary colors &#8617;        Grayscale is a group of shades without any visible color. … Each of these dots has its own brightness level as well and, therefore, can be converted to grayscale. A grayscale image is one with all color information removed.  &#8617;        Testing RGB and grayscale &#8617;        stack kernel and make new rank of tensor at output, Lesson06-2019 &#8617;    "
     }, {
-    "id": 17,
+    "id": 19,
     "url": "http://localhost:4000/2020/02/dps-week8/",
-    "title": "Digital Product School week 8&9",
-    "body": "2020/02/24 - The 8th week retropect at Digital Product School Week 8/9 - Ship your MVP/Release next iteration each day     This week's schedule  CONTENT:  Preparing engineering weekly Agile Process     Daily Stand-up    Making application flowchart (feat draw. io) / ER diagram     Flowchart, understaning user journey   ER diagram    Engineering weekly AI lunch Connecting firebase andPreparing engineering weekly: This week at Wednesday, I planned to explain the Language Modelings, mainly focusing ELMo, ULMFiT, BERT and GPT-2. Slides is available here Changed the presentation, because there were people who are not in ML domain. hereWhenever I do the presentation, I learn more than the information I give them. At the same time, I realize I need to learn more than I know. Agile Process: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m not sure ‘what is agile’ but this is what we tried to make agile process.  Daily Stand-up: Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Making application flowchart (feat draw. io) / ER diagram:  Before we start the party, we should clarify the flowchart and ER diagram of our application. Flowchart, understaning user journey: Thanks for google, we could use draw. io for our framechart framework. Actually, we cana choice other good flatform, but draw. io has connected app throgh google drive, most of our engineer was used to it. And after this job, I got to know there is also (of course) rule with the symbols, color, size, space, scaling and direction of arrow -reference. But why we should do this? WE have made our storymap before!! I think storymap is for visualize our status and app. So it should be shared with whole the team, and they should able to understand each role’s issue. But flowchart is more like testing technical feasibility, and error that user can experience. So it could be little more specific, complicated, and hypothetical.     This week engineer's main plan  ER diagram: Even if we use NoSQL database through firebase, my team was accustomed to SQL more. That what we educated when we were at college, so we had to organize our concept while we were learning NoSQL. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Before today, my AI collegues presented regression, knn and it was my turn. I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! AI lunch: Connecting firebase and: "
+    "title": "My life in Digital Product School - week 8/19/10",
+    "body": "2020/02/24 - The 8/9/10th week retropect at Digital Product School Week 8 - Ship your MVPWeek 9/10 - Release next iteration each day     Week 8th schedule  CONTENT:  Agile Product Development     Daily Stand-up(planning)   Gemba Walk   Sprint Reviews    Engineering weeklyAgile Product Development: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m still not sure ‘what is agile’ but this is how we tried to make agile process.  Daily Stand-up(planning): Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Gemba Walk:     Team Cero with core team  Every 2 weeks, we do the Gemba work, which is ‘question everything to the core team’ time. At this period, people can ask anything related to our product, workshop, and framework. Core team will help just for each team, and each team can solve the problem related to their work. &lt; br/&gt;Why we need this session? because with workshop and general schedule, core team has no time just focus on each team. So through this session, we can have opportunity to understand each program and workshop, like why we are using this platform, and when is the due of our small project, and we have this problem and we need help for this. whatever small problem you have, core team is always willing to help you. Sprint Reviews: Every Friday, we have time to summarise what we did for the week. Maybe we need HMW question and our storymap to share our process and then tell and share what we did try, what point we succeeded and what point it was deviant of our prediction, and why we tried it. .     Sprint of Ve-link  And then, just after all team’s ppt, we do vote with such a cute marvel. Always it’s very difficult to vote (of course you can’t vote to your team!) Because it depends on criteria what do I value!But since this is process of our agile work, I try to focus on what they have changed since last week, and why they did it, how they did it. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Everyone have their knowledge to share and we can be tutor and at the same time can be of tutee. Previously, my AI collegues presented regression, knn. And because I’m somewhat specialized to NLP, I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! "
     }, {
-    "id": 18,
+    "id": 20,
     "url": "http://localhost:4000/2020/02/fast.ai-nlp-note-16/",
     "title": "Algorithmic bias",
     "body": "2020/02/20 - Algorithms can encode &amp; magnify human bias Case Study 1: Facial Recognition &amp; Predictive Policing:  Joy Buolamwini &amp; Timnit Gebru, gendershades. org     Microsoft, FACE+, IBM - All of these things are sell now.    Largest gap between $\therefore\ Lighter Male\ &gt;\ Darker\ Female $      This US mayor joked cops should “mount . 50-caliber” guns where AI predicts crime      With machine learning, with automation, there’s a 99% success, so that robot is ㅡwill beㅡ99% accurate in telling us what is going to happen next, which is really interesting.     - city official in Lancater, CA, approving on using IBM for public security  Bias:  Bias is type of error Statistical Bias: difference between a statistic’s expected value and the true value Unjust Bias: disproportionate preference for or prejudice against a group Unconscious bias: bias that we don’t realize we have But, term bias is too generic to be productive. Different sources of bias have different causes Representation Bias: Dataset was not representative of the algorithm that might be used on later. Above : Data is okay, but algorithm has some problem. Below : Data has error.  For example, object detection production that performs very well in common product of US. But in contrast, change of target product region, like Zimbabwe, Solomon Island, and so on, reduced the performence remarkably. It is not the algorithmic problem, so we should care about data volume of region. Evaluation Bias: Benchmark datasets spur on research, 4. 4% of IJB-A images are dark-skinned women. 2/3 of ImageNet images from the West (Sharkar et al, 2017) Case Study 2: Recidivism Algorithm Used Prison Sentencing: Case Study 3: Online Ad Delivery: Bias in NLP: ( Nothing to do with the course, but I’m researching this field these days. )    But all about Englsih     ImpactThe person is doctor. The person is nurse -&gt; 그는 의사다. 그녀는 간호사다.  Concept of “biased data” often too generic to be useful:  Different sources of bias have different sources Data, models and systems are not unchanging numbers on a screen. They’re the result of a complex process that starts with years of historical context and involves a series of choices and norms, from data measurement to model evaluation to human interpretation. - Harini Suresh, “The problem with Biased Data” Five Sources of Bias in ML:  Representation Bias Evaluation Bias Measurement Bias Aggregation Bias(46:02) Historical Bias(46:26)     A few studies(47:13)   Racial Bias, Even when we have good intentions(new york times)(47:10)   gender(48:59)   Humans are biased, so why does algorithmic bias matter?: Algorithms &amp; humans are used differently (humans are usually decision maker)  Algorithms are accurate and objective No way to apeal if there if error processed large scale cheap Machine learning can amplify bias Machine learning can create feedback loops. Technology is power. And with that comes responsibility. Solutions:  Analyze a project at work/school:     Questions about AI   5 types of bias (Suresh &amp; Guttag)   Datasheets for datasets, Modelcards for model reporting   Accuracy rate on different sub-groups    Work with domain experts &amp; those impacted Increase diversity in our workspace Advocate for good policy Be on the ongoing lookout for bias"
     }, {
-    "id": 19,
+    "id": 21,
     "url": "http://localhost:4000/2020/02/classifier-city/",
     "title": "Making a classifier with image dataset made from gooogle",
     "body": "2020/02/15 - CONTENTS:  Creating dataset from google images     Using google_images_download   Create ImageDataBunch    Train model     fit_one_cycle()   Let’s find-tune   Let’s train the whole model!   Let’s make batch size bigger!    Interpretation Model in productionCode can be found hereDeployed model here Making a classifier which can distinguish Seoul from Munich and Sanfrancisco!(hoping my well in Munich!) Creating dataset from google images: In machine learning, you always need data before you build your model. You can use either URLs or google_images_download package. Since Jeremy explained specifically, I will try the other. Using google_images_download: note: This is not google official package Refer to Official Doncument, put that arguments. from google_images_download import google_images_downloadresponse = google_images_download. googleimagesdownload()  #class instantiationout_dir = os. path. abspath('. . /. . /materials/dataset/pkg/')os. mkdir(out_dir)arguments = { keywords : Cebu,Munich,Seoul ,        print_urls :True,        suffix_keywords : city ,        output_directory :out_dir,        type : photo ,      }paths = response. download(arguments)  #passing the arguments to the functionprint(paths)and if you need, here is main code. Create ImageDataBunch: We need to separate validation set because we just grabbed these imagese from Google. Most of the dataset we use (kaggle/research) splited into train / validation / test so if they are not devided beforehand we should make databunch, and Jeremy recommended assign 20% to validation. Help on function verify_images in module fastai. vision. data:verify_images(path: Union[pathlib. Path, str], delete: bool = True, max_workers: int = 4, max_size: int = None, recurse: bool = False, dest: Union[pathlib. Path, str] = '. ', n_channels: int = 3, interp=2, ext: str = None, img_format: str = None, resume: bool = None, **kwargs)  Check if the images in `path` aren't broken, maybe resize them and copy it in `dest`. Data from google image url Data from package Train model:           len(class)   len(train)   len(valid)         Data_url   3   432   108       Data_pkg   3   216   53   Uisng model: restnet34 1, Measurement: accuracy 2 fit_one_cycle(): What is fit one cycle? Cyclical Learning Rates for Training Neural Networks One of the way to find good learning rate. Core idea is to start with small learning rate (like 1e-4, 1e-3) and increase the learning rate after each mini-batch till loss starts exploding. And pick up learning rate one order lower than exploding point. For example, plotted learning rate is like below picture, picking up around 1e-2 is the best way.  Why this methods Traditionally, the learning rate is decreased as the learning starts converging with time. But this paper suggests to cycle our learning rate, because it makes us avoid local minimum. Basically this cyclic method enables us to explore whole of loss function so that find out global minimum. In other words, higher learning rate behaves like regularisation. Let’s find-tune: Do train just one last layer by learning rate found by find_lr This section you should find the strongest downward slope that kind of sticking around for quite a while. And choose just one order lower than lowest point. As explained before, I will pick up 1e-2. And of course, this is fine-tuning, we don’t need discriminative learning rate yet.  Let’s train the whole model!: link When you plot the learning rate again, maybe you will get soaring shape of learning rate. Rule of thumb, When you slice the learning rate, use learning rate you used at unfrozen part. Divide it by 5 or 10 and put it on maximum bound. At minimum bound, get the point just before it soared, and divide it by 10.  Let’s make batch size bigger!: Since default batch size is 64, I tried it to 128. And it gets way more better result(even it’s still underfitting!) And if I freeze model and train whole model again, the model would be better. Also, you can use this method to the other big dataset model training! Interpretation: See the confusion matrix. Result is quite great. *Since I’m using colab, I will skip data cleansing. But I highly recommend you to use ImageCleaner widget, only if you are using jupyter notebook (not jupyter lab) Model in production: You can deploy your model in simple way. I referred fast. ai, and used render(it’s free for limited time). You can find detailed document here. and you can create a route like this. @app. route( /classify-url , methods=[ GET ])async def classify_url(request):  bytes = await get_bytes(request. query_params[ url ])  img = open_image(BytesIO(bytes))  _,_,losses = learner. predict(img)  return JSONResponse({     predictions : sorted(      zip(cat_learner. data. classes, map(float, losses)),      key=lambda p: p[1],      reverse=True    )  })You can find my deployed model here Reference: How to create a deep learning dataset using Google Images towardsdatascience - one cycle policy       Deep Residual Learning for Image Recognition &#8617;        Accuracy_and_precision &#8617;    "
     }, {
-    "id": 20,
+    "id": 22,
     "url": "http://localhost:4000/2020/02/dps-week5/",
     "title": "Digital Product School week 5",
     "body": "2020/02/09 - The 5th week retropect at Digital Product School Week 5 - Create a Storymap and sync it with Lean Canvas     This week's schedule  CONTENT:  How to create our story map Prepare your story Discover your product’s AI potentialMondayHow to create our story map:     We need this 'aha' moment  There was a Milestone workshop, about our weekly goal. As we are agile working, we go fast and change every week’s goal. This week we will finalize our story map based on user’s pain-point and HMW questions.     How should we make our story-map  Basically we should make story map based on this rule  Tell stories, don’t just write them!     We always need context, that means all the story component should be connected    Visualize your product to establish a shared understanding and speed up discussions!     Post-it filled of text is not enough, we should fill it with visualizations then team mates can understand it fast    Only discuss in front our your story map! (Speed)     So we can update our story-map as soon as we change our opinion   And also    Use a story map to find the parts that matter most and to identify holes in your idea!     Since the story map consists of techinical part, we should consider each story’s technical feasibility    Minimise output, maximise outcome and impact! Build tests to figure out what’s minimum and what’s viable!     This story map functions to find out our minimum value of ideas    Work iteratively: Change your story map according to your learnings!     We should repeat this process again and again    PMs: Make sure Storymap is up to date!Prepare your story:     team cero, our whole story map    Our goal    Technical feasibility of our storyWhat is your strategy to make user achieve something? This would be our expand point Discover your product’s AI potential: How can we apply AI to our product? Let’s write down our ‘HMW’ questions, and find out all p ossibilities. These are suggestion of possibilities, so don’t attached to feasibility (we will do in at lean start-up)     Software section's expectation    AI section's expectationTuesday    Engineer's task, week5This 5th week, engineers settled WendesdayThursdayFriday"
     }, {
-    "id": 21,
+    "id": 23,
     "url": "http://localhost:4000/2020/02/GPU-time/",
     "title": "4 reasons took much time to setting GPU for fast.ai than I expected",
     "body": "2020/02/05 - Motivation: Before now, me as a undergraduate student, I was parsimony who usually depend on colab, kaggle, friend’s server(occasional) whenever i need GPU. . And this time it’s been for a while to install GPU than I expected and I share the several component that stood in my way. Written at Oct 24 2019, if you think this is deprecated, please do not have a leap of faith. Just for the record, I’ve used Kaggle, Colab, GCP, Azure, EC2 as GPU cloud. 1. Did not know there is JupyterLab option in Google Cloud Platform. : At the first time when GCP came out, there was no AI Platform service. So from starting vm instance to launching jupyter and installing packages, I did all of the things myself. (and I learned 🤗) $	curl -O https://repo. continuum. io/archive/Anaconda3-5. 0. 1-Linux-x86_64. sh[Downloading conda in ssh] I created VM instance,selected zone, machine type and disk type. Then, define firewall rules and in ssh terminal, install jupyter and other packages. But you can do all of these things just using AI Platform.  [AI Platform] I think it especially save your time if you are living in Asia-Pacific, which google doesn’t support not that much GPU resources.  2. Consider if the platform has limited resources in a region you live in. : I live in South Korea, East Asia, and it seems like this region has lots of limitation in GPU (except quite expensive AWS) And the Taiwan which was the only one region where I can launch my own VM with GPU (I tried all the other regions in the list) sometimes do normaly, but not always. 😥After launching, I did several works and next day I could not start VM. (I didn’t count it, but tried it a few hours because I didn’t want cost any more time…) Endlessly failed to start instance, then I choose to move AWS as an alternative way. 3. Fast. ai gives deliberate guide and I didn’t know it. : Fast. ai offer the guide for all available platform. (Colab, salamander, Gradient, Kaggle, Colab, and so on) It is so important, and really needs, because cloud computing options are vary as occasion and purpose arise. I didn’t know fast. ai has manual to running GCP, and I think it’s as good a reason as any for me to be have taken time. It helped me so much when I had aws and shortened my time. I don’t want to read all of the manual in amazno. . (It is recommended. . but I’d rather read GIT PRO now…) ssh -i ~/. ssh/&lt;your_private_key_pair&gt; -L localhost:8888:localhost:8888 ubuntu@&lt;your instance IP&gt;4. You should wait to add more volume just after add volume, by building AWS EC2. : Since Elastic Block Store(EBS) storage supports optimized storage, users can’t extend storage volume two times in a row.  Unfortunately, at the first time, I didn’t know it (again 👻) and when VM lacked volume, I doubled dist capacity (76*2) at a rough but It needs more.  &lt;!– this time I installed GPU in two years, and it became little complicated compared to 2 years ago. And this time for the first time(maybe not the first time. . but i handled it in my class or with my friend. but it’s my first time on my own. ) I very I’m started to using used google colab, kaggleand, GCP-JupyterLab, ec2 - friend made, aws vm machine but I had a environment variable but i did not know of it. On these days, I could not get a resources from taiwan…    I couldn’t notice a deliberate     Anyway, as a result I tried myself gcp myself and aws ec2 with fast. ai But I think doing on my self surely takes much time (in this point I wonder why I’m doing this, and should remind me, especially I was studying disk volume optimization)     disk volume exceed - https://askubuntu. com/questions/919748/no-space-left-on-device-even-though-there-is:  "
     }, {
-    "id": 22,
+    "id": 24,
     "url": "http://localhost:4000/2020/02/dps-week4/",
     "title": "Digital Product School week 4",
     "body": "2020/02/01 - The 4th week retropect at Digital Product School Week 4 - Find solution ideas and run experiments   [This week’s schedule] CONTENT:  Ideation Techniques     What is ideation techniques?    Generating idea in my team AIdeation Team brain storming of idea Die Produkt MacherMondayIdeation Techniques:   [slides from @steffen] What is ideation techniques?: We tried to find out user’s painpoint last week. Tried to users talk about their, pain point. No question directly, but extract from them their pain with transportation. Generating idea in my team:   AIdeation:   TuesdayTeam brain storming of idea:   Based on generated idea on Monday, we extended our idea doing rolling-paper!     Die Produkt Macher: What is lean start-up?  Lean startup is a methodology for developing businesses and products that aims to shorten product development cycles and rapidly discover if a proposed business model is viable; this is achieved by adopting a combination of business-hypothesis-driven experimentation, iterative product releases, and validated learning. - wikipedia WendesdayThursdayFriday"
     }, {
-    "id": 23,
+    "id": 25,
     "url": "http://localhost:4000/2020/01/retrosprect-of-acl-paper-2020/",
     "title": "Retrospect of ACL 2020 paper writing",
     "body": "2020/01/29 - 2020 Annual Conference of the Association for Computational Linguistics Why I can’t use ‘Cebuano’ for the research?: Why I had to change target language from ‘Cebuano’ to ‘Tagalog’?-&gt; No language translator options except google translation. But before knowing that I already consult my friend, whose mother tongue is English. So I had to aplogize her, but couldn’t tell her why suddenly I changed my plan. -&gt; I realized there are many languages even can’t be researched at all. . -&gt; Getting accustomed to discrimination makes misunderstanding, sometimes. At my country, we couldn’t use music streaming service, because of legal problem. But at that moment, I thought it was discrimination, which is done by music company. "
     }, {
-    "id": 24,
+    "id": 26,
     "url": "http://localhost:4000/2020/01/Git-Merge/",
     "title": "Why am I not listed as a contributor?!",
     "body": "2020/01/10 - From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’m not listed as contributor of repo which my PR accessed. How did I come to a repository?: When I’m stuck, I would prefer to code, than to go deep in theory. (It must be so. . too much to understand 🤒)It was BERT released by Google AI I felt keenly the necessity of implementing, because not only couldn’t understand the way they figured out positional encoding formula, but how it actually works. What does it mean to “scale” dot product in Attention? (Now I know it’s far from my section 😂) Figure 1. Scaled Dot Product. Adopted from tensorflow blogWhat was the code error?: For implement code in paper, I read the papers Transformer and BERT, structured the model, and refered the others’ code. Meanwhile, I found out a small error in tokenization process, which was changing a token into [MASK], enabled bidirectional representation. I’ve made PR, and got merged. But I was not in contributors. Why?: Figure 2. Merged Pull request Adopted from graykode projectActually I happened to know there can be couple of reasons github doesn’t include my name as contributor. Well, if contributors tab has more than 100 people, in which case it shows you up only if you are in the top 100 contributors because displaying too many contributors can make webpages down. Somethimes, however, it doesn’t that problem. Why not? Two possibilities are there.    First, According to Joel-Glovier, if repository maintainer merged-as-a-rebase PR will end up showing as maintainer’s commit. But maintainer shouldn’t normally do this.     Second, if you happend to commit using a different git email that what is in your GitHub profile, it will not be attached to your Github user, and “doesn’t show up” as you.  Reference:  Michał Chromiak’s blog Github: why are my contributions are not showing on my profile atlassian-gitfetch"
     }, {
-    "id": 25,
-    "url": "http://localhost:4000/2019/12/lesson1-fastai/",
-    "title": "Fine Grained Classification",
-    "body": "2019/12/31 - Finally you can solve the mystery behind this weird drawing. . through this course. juptyer notebook magic: %reload_ext autoreload%autoreload 2%matplotlib inlinethis is special directives to jupyter notebook, not python code. And it is called ‘magics’ (but i think jeremy is magicion)  If somebody changes underlying library code while I’m running this, please reload it automatically If somebody asks to plot something, then please plot it here in this Jupyter NotebookDon’t hesitate to import start~ Digging into untar_data, path. ls: Union[pathlib. Path, str]: typed programming language? -&gt; maybe i think disclaim the type beforehand for sure.  Q. like assert? path. ls()this is some module that fast. ai made because os. listdir(‘path’) is unconvinient. Python3 pathlib library!: pathlib "
-    }, {
-    "id": 26,
+    "id": 27,
     "url": "http://localhost:4000/2019/12/jeremy-howard/",
     "title": "Jeremy Howard",
     "body": "2019/12/15 - This is journey to find out ‘who am I trying to be?’: How he impacted me?  The person who made me start Computer Vision again. He emphasized the importance of studying NLP and Computer together to understand the deep-learning. He didn’t order it to study, but always he pursuade me with reasonable way.  “It’s not just something I can throw away. NLP and computer vision a few weeks apart and that’s going to force your brain to realize like ‘oh I have to remember this’”  He made me admit my failure in deep-learning. I started to objectify where am I. What should I do when I’m frustrated.  “Keep going. You’re not expected to remember everything. Yet.  You’re not expected to understand everything. Yet. You’re not expected to know why everything works.  Yet. ”  His articles are numerous, below.      What is torch. nn Really?   High Performance Numeric Programming with Swift: Explorations and Reflections   C++11, random distributions, and Swift    And especially, I like this book. Designing great data products Great predictive modeling is an important part of the solution, but it no longer stands on its own; as products become more sophisticated, it disappears into the plumbing. Designing great data products And he is also famous for words. Here are some.  we’re going to try and use that to really understand what’s going on. So to warn you, none of it is rocket science but a lot of its going to look really new. So don’t expect to get it the first time but expect to listen and jump into the notebook try a few things test things out look particularly at like tensor shapes and inputs and outputs to check your understanding then go back and listen again. But and kind of try it, a few times, because you will get there right, it’s just that there’s going to be a lot of new concepts because we haven’t done that much stuff in pure Pytorch. Lesson 6: Deep Learning 2019 "
     }, {
-    "id": 27,
+    "id": 28,
     "url": "http://localhost:4000/2019/11/julia-evans/",
     "title": "Julia Evans",
     "body": "2019/11/20 - This is journey to find out ‘who am I trying to be?’: The women who surprised me in many ways. First, she approached me to teaching some concepts drawing cartoons. It was at Hackers news, which was hightest ranks. Personally I have the use of not to reading title, so and cartoon was so cute and clear. I naturally gonna understood mechanism and astonished by her explaination ability. Her value, which she was taught by many people so want to do same things, moved me. Volume of her knowledge, that just reading post title is a deal of work, amazed me. "
     }, {
-    "id": 28,
+    "id": 29,
     "url": "http://localhost:4000/2019/11/coc-retropective/",
     "title": "Retrospective on Pycon 2019 Korea (CoC Committee)",
     "body": "2019/11/05 - When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized our responsibility, and if I passed each other’s burden, It could be my burden next time. In solidarity of the obligation, we finished conference well. And after participating PyCon Korea 2018 as volunteer, I’ve joined PyCon Korea Organizer last year. &lt;Figure 1&gt; First meeting of PyCon 2019 Korea Organizers It’s been a while since PyCon 2019 finished. It’s held on Aug 15 - 18, at Coex Grand Balloom &lt;Figure 2&gt; Ongoing session, speaking on news comment processing &lt;Figure 3&gt; Sponsor Booth iin Coex Hall &lt;Figure 4&gt; After PyCon 2019, with all of volunteer, organizer, speakers 😍 🥰  Serving as part of the coc TF, I spent large fraction of last year doing CoC job. here’s the path what we’ve been grappled with to grasp a solution. First half: Before the conference Toward Diverse Community:  Formally we’ve been reusing and modifying PyCon US CoC, but we needed fit in Korean and I was part of that to revise code of conduct. Except ‘That’ Diversity, Because it is ‘Harassment’:  Specific point was harassment, and the others were not. process of finding the points. How can we settle this point?Second half: During the conference Handling the potential Harassment: Disjunction of policy and real-time situation: This ‘PyCon 2019 Korea retrospective series’ would be devided into 3 Episodes.  “Retrospective on Pycon 2019 Korea (CoC Committee)” “Retrospective on Pycon 2019 Korea (Program Chair)” (20 Nov, To Be Update) “Maintaining participation while still making timely decisions” (29 Nov, To Be Update)"
     }, {
-    "id": 29,
+    "id": 30,
     "url": "http://localhost:4000/2019/11/elif-shafak/",
     "title": "Elif Shafak",
     "body": "2019/11/05 - This is journey to find out ‘who am I trying to be?’: For creative-minded people, Istanbul is a treasure. ’ Photo © Chris Boland, licensed under CC BY-NC-ND 2. 0    it suddenly felt like what I was trying to convey was more complicated and detailed than what the circumstances allowed me to say.     And I did what I usually do in similar situations: I stammered, I shut down, and I stopped talking. I stopped talking because the truth was complicated, even though I knew, deep within, that one should never, ever remain silent for fear of complexity.         &lt;Figure 1&gt; Elif Shafak   Photo credit: www. elifsafak. com. tr      I want to talk about emotions and the need to boost our emotional intelligence. I think it’s a pity that mainstream political theory pays very little attention to emotions.     Oftentimes, analysts and experts are so busy with data and metrics that they seem to forget those things in life that are difficult to measure and perhaps impossible to cluster under statistical models. But I think this is a mistake, for two main reasons. We are emotional beings.     I think it’s going to be one of our biggest intellectual challenges, because our political systems are replete with emotions. In country after country, we have seen illiberal politicians exploiting these emotions. And yet within the academia and among the intelligentsia, we are yet to take emotions seriously. I think we should.  1 2 Reference:       British Council Worldwide &#8617;        Ted Talk &#8617;    "
     }, {
-    "id": 30,
+    "id": 31,
     "url": "http://localhost:4000/2019/01/dps-week1/",
     "title": "Digital Product School week 1",
     "body": "2019/01/11 - The 1th week retropect at Digital Product School [This week’s schedule] CONTENT:  Welcome to Digital Product School!     Trip to Spitzingsee   Welcome to Design Office   Specifying our goal of product   Welcome to Digital Product School!: Trip to Spitzingsee:    At the first day of Digital Product School, we had a off-site with all of batch 9 people. All the costs were managed by dps. At the beautiful mountain, we settled the team, and got my team goal.     Basically, there are two kind of team in DPS. (1) Wild team - the team has fixed topic(2) Company team - the team which has specific stakeholders, and also topic defined by that stakeholders  The Core-team will fix what team you will join in DPS for 3 months based on ymy professionals, they announce it at off-site. [My team for 3 months at DPS]    And we decide on my batch #9 theme song. How? Each team draw for songs and pitch ‘why this song should be batch #9 theme song’The result? Imagine dragon - Believer (I didn’t know at the moment, this song would be stamped in my memory)     We have a workshop for getting to know each other. For example, we share 1) what do I expect from 3 months of dps, 2) when I feel happy in my life time, 3) what I worked for last week, 4) what was my last project and 5) what plays important role in my life      My team's board  Cero  Welcome to Design Office:  At first day of design office, we had workshop, which celebrates my day in dps also discuss specific rule, menifesto and stakeholders    We get sticker and attach it in map depends on my nationality  Now time to get to know my team’s stakeholders. What they want for us? What they expect from us? How free my team are on the topic?To be honest, it is endless tug-of-war. We should discuss with my stakeholders, endlessly, and find out solution which can meet interest of users, stakeholders and my team. Basically, my team’s main stakeholder is ADAC, but BMW, City of munich and Nokia will also participate as my team’s stakeholders. Specifying our goal of product: "
diff --git a/_site/2020/02/GPU-time/index.html b/_site/2020/02/GPU-time/index.html
index 0e58c9da21..2511842bff 100644
--- a/_site/2020/02/GPU-time/index.html
+++ b/_site/2020/02/GPU-time/index.html
@@ -19,9 +19,9 @@
 <meta property="og:site_name" content="SpellOnYou" />
 <meta property="og:image" content="http://localhost:4000/assets/images/10.png" />
 <meta property="og:type" content="article" />
-<meta property="article:published_time" content="2020-02-05T00:00:00+01:00" />
+<meta property="article:published_time" content="2020-02-05T00:00:00+09:00" />
 <script type="application/ld+json">
-{"description":"Motivation","author":{"@type":"Person","name":"dionne"},"@type":"BlogPosting","url":"http://localhost:4000/2020/02/GPU-time/","publisher":{"@type":"Organization","logo":{"@type":"ImageObject","url":"http://localhost:4000/assets/images/logo.png"},"name":"dionne"},"image":"http://localhost:4000/assets/images/10.png","headline":"4 reasons took much time to setting GPU for fast.ai than I expected","dateModified":"2020-02-05T00:00:00+01:00","datePublished":"2020-02-05T00:00:00+01:00","mainEntityOfPage":{"@type":"WebPage","@id":"http://localhost:4000/2020/02/GPU-time/"},"@context":"http://schema.org"}</script>
+{"description":"Motivation","author":{"@type":"Person","name":"dionne"},"@type":"BlogPosting","url":"http://localhost:4000/2020/02/GPU-time/","publisher":{"@type":"Organization","logo":{"@type":"ImageObject","url":"http://localhost:4000/assets/images/logo.png"},"name":"dionne"},"image":"http://localhost:4000/assets/images/10.png","headline":"4 reasons took much time to setting GPU for fast.ai than I expected","dateModified":"2020-02-05T00:00:00+09:00","datePublished":"2020-02-05T00:00:00+09:00","mainEntityOfPage":{"@type":"WebPage","@id":"http://localhost:4000/2020/02/GPU-time/"},"@context":"http://schema.org"}</script>
 <!-- End Jekyll SEO tag -->
 
 
@@ -161,96 +161,101 @@
     "body": "  {% if page. url ==  /  %}            {% assign latest_post = site. posts[0] %}          &lt;div class= topfirstimage  style= background-image: url({% if latest_post. image contains  ://  %}{{ latest_post. image }}{% else %} {{site. baseurl}}/{{ latest_post. image}}{% endif %}); height: 200px;  background-size: cover;  background-repeat: no-repeat; &gt;&lt;/div&gt;           {{ latest_post. title }}  :       {{ latest_post. excerpt | strip_html | strip_newlines | truncate: 136 }}               In         {% for category in latest_post. categories %}        {{ category }},         {% endfor %}                                {{ latest_post. date | date: '%b %d, %Y' }}                            {%- assign second_post = site. posts[1] -%}                        {% if second_post. image %}                         &lt;img class= w-100  src= {% if second_post. image contains  ://  %}{{ second_post. image }}{% else %}{{ second_post. image | absolute_url }}{% endif %}  alt= {{ second_post. title }} &gt;                        {% endif %}                                    {{ second_post. title }}          :                       In             {% for category in second_post. categories %}            {{ category }},             {% endfor %}                                                      {{ second_post. date | date: '%b %d, %Y' }}                                    {%- assign third_post = site. posts[2] -%}                        {% if third_post. image %}                         &lt;img class= w-100  src= {% if third_post. image contains  ://  %}{{ third_post. image }}{% else %}{{site. baseurl}}/{{ third_post. image }}{% endif %}  alt= {{ third_post. title }} &gt;                        {% endif %}                                    {{ third_post. title }}          :                       In             {% for category in third_post. categories %}            {{ category }},             {% endfor %}                                                      {{ third_post. date | date: '%b %d, %Y' }}                                    {%- assign fourth_post = site. posts[3] -%}                        {% if fourth_post. image %}                        &lt;img class= w-100  src= {% if fourth_post. image contains  ://  %}{{ fourth_post. image }}{% else %}{{site. baseurl}}/{{ fourth_post. image }}{% endif %}  alt= {{ fourth_post. title }} &gt;                        {% endif %}                                    {{ fourth_post. title }}          :                       In             {% for category in fourth_post. categories %}            {{ category }},             {% endfor %}                                                      {{ fourth_post. date | date: '%b %d, %Y' }}                                  {% for post in site. posts %} {% if post. tags contains  sticky  %}                    {{post. title}}                  {{ post. excerpt | strip_html | strip_newlines | truncate: 136 }}                 Read More            	             {% endif %}{% endfor %}  {% endif %}                All Stories:         {% for post in paginator. posts %}          {% include main-loop-card. html %}        {% endfor %}                   {% if paginator. total_pages &gt; 1 %}              {% if paginator. previous_page %}        &laquo; Prev       {% else %}        &laquo;       {% endif %}       {% for page in (1. . paginator. total_pages) %}        {% if page == paginator. page %}        {{ page }}        {% elsif page == 1 %}        {{ page }}        {% else %}        {{ page }}        {% endif %}       {% endfor %}       {% if paginator. next_page %}        Next &raquo;       {% else %}        &raquo;       {% endif %}            {% endif %}                     {% include sidebar-featured. html %}      "
     }, {
     "id": 12,
+    "url": "http://localhost:4000/2020/04/v3-2019-lesson06-note/",
+    "title": "fastai 2019 course-v3 Part1, lesson06",
+    "body": "2020/04/15 - Lesson 06Rossmann(Tabular):  Tabular data: be careful on Categorical variable vs Continuous variable.  if datatype is int, fastai think it is classification, not a regression.  Root mean square percentage error. as loss function.  When you assign the y_range, it’s better to assign little bit more than actual maximum. &gt; because it’s sigmoid.  intermediate layers, which is weight matrix is 1) 1000, and 2) 500 -&gt; which means our parameter would be 500*1000. learn. modelWhat is dropout and embedding dropout?: Nitish Srivastava, Dropout: A Simple way to prevent Neural Networks from Overfitting  you can dropout with p value, make it specified to specific layer, or make it applied to all the layers.  Pytorch code 1) bernoulli, which decides whether you will hold it? 2) and divide the noise value depends on noise value. so noise became 2 or remain 0.      According to pytorch code, We do change at training time, but we do nothing at test time. and this means you don’t have to do anything special with inference time. ’   TODO: find at forums what is inference time - Related to NVIDIA, GPU.     Embedding dropout is just a dropout.      It’s different between continuous variable and embedding layer.  TODO Still can’t understand. why embedding dropout is effective. or,… in need.    Let’s delete at random, some of the results of the embedding.    and It worked well especially at Kaggle   Batch Normalization: Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift -&gt; came out false! According to How Does Batch Normalization Help Optimization?  The key was multiplicative bias {\gamma} and additive bias {\beta}` Explain     Let $$ \hat{y} = f(w_1, w_2, w_3, … , x)} $$ , loss = MSE , Then y_range should be between 1 and 5`   And Activation function ends with -1 -&gt; +1   To mitigate this problem, we can add the other parameter, like $$w_n$$   But there’re so much interactions in the process so just re-scale the output.    Momentum parameter at BatchNorm1d:  Different from momentum like in optimization.  This momentum is Exponentially weighted moving average of the mean, instead of deviation.      If this is small number: mean standard deviation would be less from mini_batch to mini_batch » less regularization effect. (If this is large number, variation would be greater from mini_batch to mini_batch » more regularization effect)   TODO: can’t sure, but i understand, this is not about how to update parameter but about how much reflect previous value when scale and shift   Q. Preference between batchnorm and the other regularizations(drop out, weight decay)A. Nope, always try and see the results## lesson6-pets-more### Data Augmentation- Last reg- `get_transforms` has lots of params (even not yet learned all) -&gt; check documentation	- Remember you can implement all the doc contents bc it's made from nbdev	- TODO: try this!!- Essence of data augmentation is you should maintain the label, while somewhat making sense. 	- ex) tilt, because it's optically sensible, you can always change the angle of the data view. - zeros, border, and reflection but always `reflection` works most of the time, so that is the default### Convolutional Kernel(What is convolution?)- Will make heat\_map from scratch, which means the parts convolution focuses on![setosa_visualization]()- http://setosa. io/ev/image-kernels/	- javascript thing	- How convolution works	- Kernel. which does element-wise multiplication, and sum them up	- so it has on pixel less at borders -&gt; so it uses padding, and fastai uses reflection as said. - why this Kernel(matrix) helps catching horizontal edge side?	- because this kernel`(picture2)` weights differently, depends on `x axis`	- why familiar, because it's similar intuition with fugus`(paper)` paper- CNN from different viewpoints`link`	- output of pixel is results from different linear equations. 	- If you connect this with represents of neural network nodes, you can see that the specific inp nodes connected with specific out nodes. 	- **Summarize**: cnn does 1) matmul some of the elements are always zero 2) same weight for every row, which is called `weight time? weight. . ?, 1:18:50` `(picture)`#### Further lowdown- Because generally image has 3 channels, we need rank 3 kernel. - And **do multiply with all channel output is one pixel**. (`draw by your self`)	- but this kernel will catch one feature, like horizontal, so that we make more kernel so that output becomes (h * w * kernel)	- And that `kernel` come to `channel`- **Conv2d**: with 3 by 3 kernel, stride 2 conv -&gt; (h/2 * w/2 * kernel)	- skip or jump over input pixel	- to protect from memory out of control~~~pythonlearn. modellearn. summary()~~~TODO: understand yourself the blocks of conv-kernel: - Usually use big kernel size at first layer (will study this at part2)- Bottom right highlighting kernel(`pic / draw`)- `torch. tensor. expand`: for memory efficient, because we should do RGB- We do not make separate kernel, but make rank 4 kernel	- 4d tensor is just stacked kernel- `t[None]. shape` create new unit axis, and why? we make this -&gt; it should move unit of batch, not one size image. ### Average pooling, feature- suppose our pre-trained model results in size of `11 by 11 by 512 ` `pic 4` and my classification task has 37 classes	* take the first face of channel, which is 11 by 11 and `mean` it, so that make rank 2 tensor, 512 by 1	* and make 2d matrix, which is 512 by 37 and multiply so that we can get 37 by 1 matrix. - Feature, at convolution block	- So, when we transfer-learning without unfreeze, every element of last matrix (512 by 1) should represent(or could catch) each feature. 	### Heatmap, Hook~~~hook_output(model[0]) -&gt; acts -&gt; avg_acts~~~- if we average the block with `axis=feature`, result of matrix(11 by 11) depicts `how activated was that area?` -&gt; it is heatmap, `avg_acts`- and acts comes from hook, which is more advanced pytorch feature. 	- hook into pytorch machine itself, and run any arbitrary Pytorch code	- Why this is cool?: Normally it gives set of outputs of forward pass, but we can interrupt and hook the forward pass. 	- Also can store the output of the convolutional part of the model, which is before avg_pooling- Thinking back when we do cut off `after` the conv part. 	- but with fast. ai the original convolutional part of the model would be *the first thing in the model*, specifically could be given from `learn. model. eval()[0]`	- And this is gotten from `hooked_output` and having hooked the output, we can pass our x_minibatch to output. 	- Not directly, but with normalized, minibatch, put on to the gpu	- `one_item()` function do it, when we have one data `TODO: this is assignment` do it yourself without one_item function	- and `. cuda()` put it on gpu- you should print out very often the shape of tensor, and try think why. "
+    }, {
+    "id": 13,
+    "url": "http://localhost:4000/2020/04/qna-image-segmentation/",
+    "title": "[Q&A] Image Segmentation, using Unet with Driving Video data",
+    "body": "2020/04/02 - This post is about my questions while I was studying USF Deep Learning course about image segmentation task. All the answers are from the course, source code, library document, or document. I cared about being clear at reporting information including source of information, however if there are still anything unclear, please contact me. And thank you Jeremy&amp;Rachael for everything. Also Thank you Cambridge Computer Vision Lab to made us to study with your labor. The Cambridge-driving Labeled Video Database (CamVid) is the first collection of videos with object class semantic labels, complete with metadata. The database provides ground truth labels that associate each pixel with one of 32 semantic classes. If someone is interested in this project, please check the site and see the details. Now, let’s start first using jupyter’s one of tricks which I love most. It enables cell to print the code without print function. from IPython. core. interactiveshell import InteractiveShell# pretty print all cell's output and not just the last oneInteractiveShell. ast_node_interactivity =  all from fastai. vision import *from fastai. callbacks. hooks import *from fastai. utils. mem import *path = untar_data(URLs. CAMVID) # The locations where the data and models are downloaded are set in config. ymlpath. ls() I’m trying to accustomed to using pathlib module, not just it became built-in module in python, but I felt uncomfortable myself with os module. However, still unpredictable conflicts are remain, even in the quite standard library like Pytorch, tensorflow, onnx. (it require me string for path. not PosixPath. will send PR. . ) [PosixPath('/root/. fastai/data/camvid/valid. txt'), PosixPath('/root/. fastai/data/camvid/images'), PosixPath('/root/. fastai/data/camvid/labels'), PosixPath('/root/. fastai/data/camvid/codes. txt')]path_img = path/'images'path_lbl = path/'labels'fnames = get_image_files(path_img) #filenamelbl_names = get_image_files(path_lbl)1. (Play with data) My Hypothesis:  File name has A_B format. and A / B would be at key-value position.  Use collections - defaultdict  Default Dict: Link:   easy to group a sequence of key and value pairs into a dictionary of list?from collections import defaultdictfnames[0], lbl_names[0](PosixPath('/root/. fastai/data/camvid/images/0001TP_009210. png'), PosixPath('/root/. fastai/data/camvid/labels/0016E5_01800_P. png'))files = [tuple(i. stem. split('_')) for i in fnames]labels = [tuple(i. stem. split('_')[:-1]) for i in lbl_names]d = defaultdict(list)for k, v in files: d[k]. append(v)d. keys()len(d['0001TP'])124for k, v in d. items():  print(k, v)0001TP ['009210', '008850', '007350', '008970', '009840', '010140', '008490', '008520', '009540', '008250', '008340', '006840', '007860', '007410', '007740', '009870', '010080', '007890', '008790', '010020', '008400', '007080', '008280', '010380', '009330', '009060', '007470', '006810', '009720', '008580', '007110', '008730', '009150', '007680', '009780', '007800', '007290', '008760', '009510', '008640', '008310', '007440', '006900', '007500', '008460', '009030', '008130', '009480', '009900', '010230', '009270', '008040', '007590', '007950', '009990', '008550', '007260', '008100', '007530', '006960', '008190', '009420', '009930', '009000', '007830', '008940', '006690', '009570', '008880', '010170', '007560', '009300', '006750', '009360', '010200', '007320', '008010', '009120', '007620', '007200', '007140', '010320', '006720', '008670', '007230', '008370', '010260', '009690', '006930', '009090', '007770', '010290', '010350', '008610', '008070', '009600', '008430', '009450', '007380', '009240', '007710', '007170', '008160', '008910', '007020', '006780', '007050', '009960', '009810', '008220', '009180', '009750', '010050', '009660', '010110', '007920', '009630', '007650', '006990', '008700', '009390', '007980', '008820', '006870']0016E5 ['01290', '08159', '05760', '08133', '08063', '06660', '00960', '05850', '00750', '06960', '08035', '08107', '07975', '08017', '05610', '07140', '08119', '08027', '07170', '08400', '08093', '02100', '06390', '04470', '08340', '06060', '00600', '07470', '08151', '07800', '01620', '05730', '01530', '00690', '08430', '05940', '01980', '07320', '08069', '07965', '04380', '05430', '01410', '06780', '08007', '08087', '08079', '06600', '08109', '05490', '00901', '04590', '04680', '08045', '01770', '06690', '08085', '06810', '00420', '08011', '07440', '02190', '06300', '04800', '01500', '00450', '08029', '01470', '06330', '07997', '08067', '05370', '08013', '08190', '00840', '02370', '08049', '08135', '01440', '06870', '05820', '05280', '08051', '04440', '08091', '01380', '00630', '07290', '05520', '04770', '00540', '07995', '07999', '05550', '07920', '08101', '08141', '08053', '04620', '08103', '05160', '07350', '08057', '06030', '06000', '08550', '07963', '08089', '05970', '08047', '05640', '06240', '05220', '04350', '01590', '07959', '01950', '08117', '06180', '01560', '05400', '08043', '07680', '00780', '08081', '07050', '01020', '01350', '04530', '06720', '07969', '08149', '08003', '08131', '08129', '08033', '05460', '01650', '07530', '08023', '05340', '08640', '05100', '08075', '01230', '04980', '02070', '01080', '06210', '05910', '08009', '01800', '05190', '02400', '08083', '08019', '07620', '07200', '07890', '08059', '06990', '04410', '08121', '08123', '06930', '08137', '08147', '08095', '06570', '06150', '08153', '06840', '05250', '00510', '08370', '08580', '08113', '07410', '08097', '01200', '04950', '07770', '07650', '04710', '06090', '08055', '07110', '07981', '00990', '08250', '08127', '01920', '07985', '08220', '08005', '08157', '05130', '08071', '01140', '04830', '07740', '08143', '06120', '02040', '08111', '08115', '00660', '08280', '06420', '07983', '02220', '05700', '01860', '01260', '04920', '06510', '07020', '08073', '08105', '08125', '06360', '07860', '07993', '00810', '06540', '08099', '08139', '02010', '07973', '08155', '07991', '06630', '00480', '06750', '04890', '08001', '08025', '00870', '08490', '01830', '07977', '05010', '01170', '07961', '01680', '01050', '07987', '07080', '04560', '00930', '05310', '02340', '05790', '08460', '00720', '08031', '02280', '08039', '08037', '08065', '06270', '08077', '06900', '04650', '06480', '07230', '08041', '06450', '00570', '07989', '04740', '07979', '02250', '07380', '00390', '01710', '07590', '08021', '08520', '07500', '01110', '04500', '02310', '07971', '02130', '05580', '05880', '08610', '08310', '08145', '05670', '04860', '07260', '08015', '07967', '01740', '01320', '07560', '07830', '01890', '08061', '02160', '07710', '05070', '05040']Seq05VD ['f00030', 'f02550', 'f03450', 'f01110', 'f00480', 'f00210', 'f04590', 'f04170', 'f01800', 'f03990', 'f03360', 'f03900', 'f02070', 'f00810', 'f03690', 'f01350', 'f01530', 'f04980', 'f05100', 'f03060', 'f00900', 'f03870', 'f02460', 'f01470', 'f02370', 'f02820', 'f04080', 'f02760', 'f04860', 'f02250', 'f04200', 'f00270', 'f03720', 'f02850', 'f04410', 'f01200', 'f03090', 'f02010', 'f03930', 'f00090', 'f01650', 'f01890', 'f03840', 'f03030', 'f02130', 'f01230', 'f04110', 'f02520', 'f04140', 'f04020', 'f00060', 'f03420', 'f01560', 'f00120', 'f04290', 'f02340', 'f00300', 'f01380', 'f00870', 'f01860', 'f02970', 'f04560', 'f02730', 'f00330', 'f04530', 'f03780', 'f01770', 'f03390', 'f05040', 'f02430', 'f03330', 'f00660', 'f01740', 'f02100', 'f04800', 'f04050', 'f00510', 'f02790', 'f04350', 'f00690', 'f00540', 'f02490', 'f00960', 'f00930', 'f04230', 'f02880', 'f03600', 'f01020', 'f01500', 'f02400', 'f04830', 'f04470', 'f03300', 'f02670', 'f00450', 'f01980', 'f01170', 'f01620', 'f04500', 'f01080', 'f03180', 'f05070', 'f03150', 'f04950', 'f01440', 'f03510', 'f01710', 'f00360', 'f04770', 'f02910', 'f01050', 'f00630', 'f04320', 'f00570', 'f03240', 'f02190', 'f01140', 'f03540', 'f02220', 'f02640', 'f03960', 'f00000', 'f04920', 'f01950', 'f00990', 'f03480', 'f03000', 'f00420', 'f04620', 'f03210', 'f00780', 'f03570', 'f01590', 'f00750', 'f01920', 'f04650', 'f03750', 'f03630', 'f02310', 'f02610', 'f02580', 'f04740', 'f02280', 'f04680', 'f00390', 'f00720', 'f03660', 'f02040', 'f03270', 'f00180', 'f03810', 'f01410', 'f01290', 'f03120', 'f00840', 'f04440', 'f00150', 'f01260', 'f02700', 'f02940', 'f00600', 'f01830', 'f04260', 'f05010', 'f04890', 'f02160', 'f00240', 'f04380', 'f01680', 'f04710', 'f01320']0006R0 ['f02820', 'f03690', 'f03180', 'f02550', 'f01020', 'f03660', 'f02340', 'f01170', 'f02610', 'f02940', 'f01290', 'f02100', 'f01350', 'f03270', 'f03870', 'f01380', 'f01980', 'f03810', 'f02430', 'f02310', 'f01830', 'f03480', 'f02970', 'f01890', 'f03210', 'f03930', 'f02040', 'f02070', 'f02400', 'f01560', 'f03030', 'f01770', 'f01590', 'f01950', 'f03420', 'f01650', 'f03450', 'f00990', 'f03630', 'f01500', 'f03570', 'f00930', 'f03090', 'f03360', 'f02880', 'f02460', 'f01440', 'f01920', 'f01230', 'f03840', 'f02730', 'f01620', 'f02220', 'f03750', 'f03330', 'f03540', 'f02520', 'f02790', 'f01050', 'f03120', 'f01800', 'f01140', 'f01860', 'f01530', 'f01470', 'f02670', 'f02490', 'f01260', 'f01110', 'f02760', 'f01680', 'f03150', 'f02580', 'f03300', 'f02280', 'f01200', 'f03390', 'f03510', 'f02640', 'f02190', 'f02370', 'f01320', 'f02130', 'f03600', 'f03240', 'f03780', 'f03720', 'f02700', 'f01410', 'f01080', 'f02850', 'f01710', 'f03900', 'f03060', 'f01740', 'f02010', 'f02250', 'f00960', 'f03000', 'f02160', 'f02910']for k, v in d. items():  print(k, len(d[k]))0001TP 1240016E5 305Seq05VD 1710006R0 101for i in d2. keys():  print(i,len(d2[i]))0016E5 3050001TP 1240006R0 101Seq05VD 171files[0], labels[0](('0001TP', '009210'), ('0016E5', '01800'))2. My question: Link:  Why do we need masking? and does color from fastai library? (have to look into source code) What do the parameter alpha do? When people make masked img, would it be have ranged integer limit? Does image normalization related with this?lbl_sorted = sorted(lbl_names)f_sorted = sorted(fnames)lbl_1 = lbl_sorted[33]f_1 = f_sorted[33]img = open_image(lbl_1)mask = open_mask(lbl_1)_,axs = plt. subplots(1,2, figsize=(10,5))# img. show(ax=axs[0], y=mask, title='masked')img. show(ax=axs[0], title='1')mask. show(ax=axs[1], title='2', alpha=1. ) img_2 = open_image(f_1)mask_2 = open_mask(f_1)_,axs = plt. subplots(1,2, figsize=(10,5))# img. show(ax=axs[0], y=mask, title='masked')img_2. show(ax=axs[0], title='3',)mask_2. show(ax=axs[1], title='4', alpha=1. ) open_mask(lbl_1). data. shapetorch. Size([1, 720, 960])open_mask(lbl_1). data. shapetorch. Size([1, 720, 960])open_image(f_1). data. shapetorch. Size([3, 720, 960])open_image(f_1). data. shapetorch. Size([3, 720, 960])img. data #labeled datatensor([[[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]],    [[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]],    [[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]]])mask. data # after mask, labeled datatensor([[[ 4, 4, 4, . . . , 21, 21, 21],     [ 4, 4, 4, . . . , 21, 21, 21],     [ 4, 4, 4, . . . , 21, 21, 21],     . . . ,     [17, 17, 17, . . . , 30, 30, 30],     [17, 17, 17, . . . , 30, 30, 30],     [17, 17, 17, . . . , 30, 30, 30]]])img_2. data, mask_2. data(tensor([[[0. 0706, 0. 0667, 0. 0706, . . . , 0. 6431, 0. 6549, 0. 6627],     [0. 0745, 0. 0706, 0. 0706, . . . , 0. 6431, 0. 6510, 0. 6549],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 6392, 0. 6588, 0. 6588],     . . . ,     [0. 0863, 0. 0824, 0. 0824, . . . , 0. 1333, 0. 1216, 0. 1255],     [0. 0902, 0. 0863, 0. 0824, . . . , 0. 1255, 0. 1176, 0. 1216],     [0. 0863, 0. 0824, 0. 0784, . . . , 0. 1137, 0. 1059, 0. 1137]],     [[0. 0706, 0. 0667, 0. 0706, . . . , 0. 7490, 0. 7608, 0. 7686],     [0. 0745, 0. 0706, 0. 0706, . . . , 0. 7451, 0. 7569, 0. 7608],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 7412, 0. 7529, 0. 7529],     . . . ,     [0. 0980, 0. 0941, 0. 0941, . . . , 0. 1804, 0. 1686, 0. 1725],     [0. 1059, 0. 1020, 0. 0980, . . . , 0. 1725, 0. 1647, 0. 1686],     [0. 1020, 0. 0980, 0. 0941, . . . , 0. 1608, 0. 1529, 0. 1608]],     [[0. 0784, 0. 0745, 0. 0784, . . . , 0. 7569, 0. 7686, 0. 7765],     [0. 0824, 0. 0784, 0. 0784, . . . , 0. 7647, 0. 7647, 0. 7686],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 7608, 0. 7647, 0. 7647],     . . . ,     [0. 1216, 0. 1176, 0. 1176, . . . , 0. 2000, 0. 1882, 0. 1922],     [0. 1176, 0. 1137, 0. 1098, . . . , 0. 1843, 0. 1765, 0. 1804],     [0. 1137, 0. 1098, 0. 1059, . . . , 0. 1725, 0. 1647, 0. 1725]]]), tensor([[[ 18, 17, 18, . . . , 183, 186, 188],     [ 19, 18, 18, . . . , 183, 185, 186],     [ 20, 18, 19, . . . , 182, 185, 185],     . . . ,     [ 25, 24, 24, . . . , 43, 40, 41],     [ 26, 25, 24, . . . , 41, 39, 40],     [ 25, 24, 23, . . . , 38, 36, 38]]]))3. What is a difference between image and imageSegment?: imageSegment  An ImageSegment object has the same properties as an Image. The only difference is that when applying the transformations to an ImageSegment, it will ignore the functions that deal with lighting and keep values of 0 and 1.  It’s easy to show the segmentation mask over the associated Image by using the y argument of show_image. img = open_image(fnames[0])mask = open_mask(lbl_names[0])_,axs = plt. subplots(1,3, figsize=(8,4))img. show(ax=axs[0], title='no mask')img. show(ax=axs[1], y=mask, title='masked') #seg mask over the img using y argmask. show(ax=axs[2], title='mask only', alpha=1. ) vision. image ##4. Why/How img div by 255 and how it results fast. ai : vision. image - If div=True, pixel values are divided by 255. to become floats between 0. and 1.  At times, you want to get rid of distortions caused by lights and shadows in an image.    Normalizing the RGB values of an image can at times be a simple and effective way of achieving this.     So sum of the pixel’s value over all channels(which is S) divides each intensified channel so that nomalized value will be R/S, G/S and B/S (where, S=R+G+B).   Detailed explain here4. Python Evaluation Order: Python evaluates expressions from left to right. Notice that while evaluating an assignment, the right-hand side is evaluated before the left-hand side. mask_tmp, trg_tmp, void_tmp = 2, 1, 10mask_tmp = trg_tmp != void_tmpprint(mask_tmp, trg_tmp, void_tmp) # (1) target is not same with voidTrue 1 10# Example 1x = 1y = 2x,y = y,x+yx, y(2, 3)# Example 2x = 1y = 2x = yy = x+yx, y(2, 4)5. model learner parameter :: pct_start: A: Percentage of total number of epochs when learning rate rises during one cycle. Q: Sorry, I still confused that one cycle in the new API only runs one epoch. How the percentage of total number of epochs works? Can you give a example? If learn. fit_one_cycle(10, slice(1e-4,1e-3,1e-2), pct_start=0. 05)??A: Ok, strictly correct answer would be percentage of iterations, so you can have lr both increase and decrease during same epoch. In your example, say, you have 100 iterations per epoch, then for half an epoch (0. 05 * (10 * 100) = 50) lr will rise, then slowly decrease. Q2: Thanks for this explanation … so essentially, it is the percentage of overall iterations where the LR is increasing, correct? So, given the default of 0. 3, it means that your LR is going up for 30% of your iterations and then decreasing over the last 70%. Is that a correct summation of what is happening? A2: Yes, I think that’s correct. You can verify that by changing its value and check:learn. recorder. plot_lr() For example if pct_start = 0. 2 source: forums. fastai "
+    }, {
+    "id": 14,
     "url": "http://localhost:4000/2020/03/note08-fastai-4/",
     "title": "Gradient backward, Chain Rule, Refactoring",
-    "body": "2020/03/02 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves   ⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
+    "body": "2020/03/02 -  This note is divided into 4 section.      Section1: What is the meaning of ‘deep-learning from foundations?’   Section2: What’s inside Pytorch Operator?   Section3: Implement forward&amp;backward pass from scratch   Section4: Gradient backward, Chain Rule, Refactoring   ” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves    Postfix underscore means in pytorch, in-place function, What is in-place function?⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
     }, {
-    "id": 13,
+    "id": 15,
     "url": "http://localhost:4000/2020/03/note08-fastai-3/",
     "title": "Implement forward&backward pass from scratch",
     "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring1. The forward and backward passes: 1. 1 Normalization: train_mean,train_std = x_train. mean(),x_train. std()&gt;&gt;&gt; train_mean,train_std(tensor(0. 1304), tensor(0. 3073))Remember!  Dataset, which is x_train, mean and standard deviation is not 0&amp;1. But we need them to be which means we should substract means and divide data by std.  You should not standarlize validation set because training set and validation set should be aparted.  after normalize, mean is close to zero, and standard deviation is close to 1. 1. 2 Variable definition:  n,m: size of the training set c: the number of activations we need in our model2. Foundation Version: 2. 1 Basic architecture:  Our model has one hidden layer, output to have 10 activations, used in cross entropy.    But in process of building architecture, we will use mean square error, output to have 1 activations and lator change it to cross entropy   number of hidden unit; 50see below pic  We want to make w1&amp;w2 mean and std be 0&amp;1.      why initializating and make mean zero and std one is important?   paper highlighting importance of normalisation - training 10,000 layer network without regularisation1   2. 1. 1 simplified kaiming initQ: Why we did init, normalize with only validation data? Because we can not handle and get statistics from each value of x_valid?{: style=”color:red; font-size: 130%; text-align: center;”}  what about hidden(first) layer?w1 = torch. randn(m,nh)b1 = torch. zeros(nh)t = lin(x_valid, w1, b1) # hidden&gt;&gt;&gt; t. mean(), t. std()((tensor(2. 3191), tensor(27. 0303))In output(second) layer, w2 = torch. randn(nh,1)b2 = torch. zeros(1)t2 = lin(t, w2, b2) # output&gt;&gt;&gt; t2. mean(), t2. std()(tensor(-58. 2665), tensor(170. 9717))   which is terribly far from normalzed value.     But if we apply simplified kaiming init  w1 = torch. randn(m,nh)/math. sqrt(m); b1 = torch. zeros(nh)w2 = torch. randn(nh,1)/math. sqrt(nh); b2 = torch. zeros(1)t = lin(x_valid, w1, b1)t. mean(),t. std()&gt;&gt;&gt; (tensor(-0. 0516), tensor(0. 9354)) But, actually, we use activations not only linear function After applying activations relu at linear layer, mean and deviation became 0. 5. 2. 1. 2 Glorrot initializationPaper2: Understanding the difficulty of training deep feedforward neural networks  Gaussian(, bell shaped, normal distributions) is not trained very well.  How to initialize neural nets? with the size of layer , the number of filters .  But there is No acount for import of ReLU If we got 1000 layers, vanishing gradients problem emerges2. 1. 3 Kaiming initializatingPaper3: Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification  Kaiming He, explained here rectifier: rectified linear unit rectifier network: neural network with rectifier linear units  This is kaiming init, and why suddenly replace one to two on a top?     to avoid vanishing gradient(weights)   But it doesn’t give very nice mean tough.    2. 1. 4 Pytorch package Why fan_out?     according to pytorch documentation,   choosing 'fan_in' preserves the magnitude of the variance of the wights in the forward pass. choosing 'fan_out' preserves the magnitues in the backward pass(, which means matmul; with transposed matrix) ➡️ in the other words, torch use fan_out cz pytorch transpose in linear transformaton.  What about CNN in Pytorch?I tried torch. nn. Conv2d. conv2d_forward?? Jeremy digged into using torch. nn. modules. conv. _ConvNd. reset_parameters?? 2  in Pytorch, it doesn’t seem to be implemented kaiming init in right formula. so we should use our own operation.  But actually, this has been discussed in Pytorch community before. 3 4 Jeremy said it enhanced variance also, so I sampled 100 times and counted better results.  To make sure the shape seems sensible. check with assert. (remember we will replace 1 to 10 in cross entropy)assert model(x_valid). shape==torch. Size([x_valid. shape[0],1])&gt;&gt;&gt; model(x_valid). shape(10000, 1) We have made Relu, init, linear, it seems we can forward pass code we need for basic architecture nh = 50def lin(x, w, b): return x@w + b;w1 = torch. randn(m,nh)*math. sqrt(2. /m ); b1 = torch. zeros(nh)w2 = torch. randn(nh,1); b2 = torch. zeros(1)def relu(x): return x. clamp_min(0. ) - 0. 5t1 = relu(lin(x_valid, w1, b1))def model(xb):  l1 = lin(xb, w1, b1)  l2 = relu(l1)  l3 = lin(l2, w2, b2)  return l32. 2 Loss function: MSE:  Mean squared error need unit vector, so we remove unit axis.   def mse(output, targ): return (output. squeeze(-1) - targ). pow(2). mean()    In python, in case you remove axis, you use ‘squeeze’, or add axis use ‘unsqueeze’ torch. squeeze where code commonly broken. so, when you use squeeze, clarify dimension axis you want to removetmp = torch. tensor([1,1])tmp. squeeze()&gt;&gt;&gt; tensor([1, 1]) make sure to make as float when you calculateBut why??? because it is tensor?{: style=”color:red; font-size: 130%;”} Here’s the error when I don’t transform the data type ---------------------------------------------------------------------------TypeError                 Traceback (most recent call last)&lt;ipython-input-22-ae6009bef8b4&gt; in &lt;module&gt;()----&gt; 1 y_train = get_data()[1] # call data again   2 mse(preds, y_train)TypeError: 'map' object is not subscriptable This is forward passFootnote: Other materials:  Understanding the difficulty of training deep feedforward neural networks, paper that introduced Xavier initialization      Fixup Initialization: Residual Learning Without Normalization &#8617;        Pytorch implementaion on Kaiming init of conv and linear layers &#8617;        Pytorch kaiming init issue &#8617;        Pytorch kaiming init explained &#8617;    "
     }, {
-    "id": 14,
+    "id": 16,
     "url": "http://localhost:4000/2020/03/note08-fastai-2/",
     "title": "What's inside Pytorch Operator?",
     "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, RefactoringWhat’s inside Pytorch Operator?: Section02 Time comparison with pure Python:    Matmul with broadcasting&gt; 3194. 95 times faster     Einstein summation&gt; 16090. 91 times faster     Pytorch’s operator&gt; 49166. 67 times faster  1. Elementwise op: 1. 1 Frobenius norm:   above converted into (m*m). sum(). sqrt() Plus, don’t suffer from mathmatical symbols. He also copy and paste that equations from wikipedia.  and if you need latex form, download it from archive. 2. Elementwise Matmul:  What is the meaning of elementwise?   We do not calculate each component. But all of the component at once. Because, length of column of A and row of B are fixed.   How much time we saved?  So now that takes 1. 37ms. We have removed one line of code and it is a 178 times faster…#TODOI don’t know where the 5 from. but keep it. Maybe this is related with frobenius norm…?as a result, the code before for k in range(ac):  c[i,j] += a[i,k] + b[k,j]the code after c[i,j] = (a[i,:] * b[:,j]). sum()To compare it (result betweet original and adjusted version) we use not test_eq but other function. The reason for this is that due to rounding errors from math operations, matrices may not be exactly the same. As a result, we want a function that will “is a equal to b within some tolerance” #exportdef near(a,b):   return torch. allclose(a, b, rtol=1e-3, atol=1e-5)def test_near(a,b):   test(a,b,near)test_near(t1, matmul(m1, m2))3. Broadcasting:  Now, we will use the broadcasting and removec[i,j] = (a[i,:] * b[:,j]). sum() How it works?&gt;&gt;&gt; a=tensor([[10,10,10],     [20,20,20],     [30,30,30]])&gt;&gt;&gt; b=tensor([1,2,3,])&gt;&gt;&gt; a,b  (tensor([[10, 10, 10],     [20, 20, 20],     [30, 30, 30]]),tensor([1, 2, 3]))     &gt;&gt;&gt; a+btensor([[11, 12, 13],    [21, 22, 23],    [31, 32, 33]])  &lt;Figure 2&gt; demonstrated how array b is broadcasting(or copied but not occupy memory) to compatible with a. Refered from numpy_tutorial   there is no loop, but it seems there is exactly the loop.     This is not from jeremy (actually after a moment he cover it) but i wondered How to broadcast an array by columns?  c=tensor([[1],[2],[3]])a+ctensor([[11, 11, 11],    [22, 22, 22],    [33, 33, 33]])s  What is tensor. stride()?help(t. stride)Help on built-in function stride:  stride(…) method of torch. Tensor instancestride(dim) -&gt; tuple or intReturns the stride of :attr:’self’ tensor. Stride is the jump necessary to go from one element to the next one in the specified dimension :attr:’dim’. A tuple of all strides is returned when no argument is passed in. Otherwise, an integer value is returned as the stride in the particular dimension :attr:’dim’. Args:  dim (int, optional): the desired dimension in which stride is requiredExample::* x = torch. tensor([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]])`x. stride()&gt;&gt;&gt; (5, 1)x. stride(0)&gt;&gt;&gt; 5x. stride(-1)&gt;&gt;&gt; 1   unsqueeze &amp; None index   We can manipulate rank of tensor Special value ‘None’, which means please squeeze a new axis here== please broadcast herec = torch. tensor([10,20,30])c[None,:] in c, squeeze a new axis in here please. 2. 2 Matmul with broadcasting: for i in range(ar):#  c[i,j] = (a[i,:]).      *[:,j]. sum() #previous  c[i]  = (a[i]. unsqueeze(-1) * b). sum(dim=0) And Using None also (As howard teached)c[i]  = (a[i ]. unsqueeze(-1) * b). sum(dim=0) #howardc[i]  = (a[i][:,None] * b). sum(dim=0) # using Nonec[i]  = (a[i,:,None]*b). sum(dim=0)⭐️Tips🌟 1) Anytime there’s a trailinng(final) colon in numpy or pytorch you can delete it ex) c[i, :] = c [i]2) any number of colon commas at the start, you can switch it with the single elipsis.  ex) c[:,:,:,:,i] = c […,i] 2. 3 Broadcasting Rules:  What if we tensor. size([1,3]) * tensor. size([3,1])?  torch. Size([3, 3])    What is scale????   What if they are one array is times of the other array? ex) Image : 256 x 256 x 3Scale : 128 x 256 x 3Result: ?   Why I did not inserted axis via None, but happened broadcasting? &gt;&gt;&gt; c * c[:,None]tensor([[100. , 200. , 300. ],    [200. , 400. , 600. ],    [300. , 600. , 900. ]])maybe it broadcast cz following array has 3 rows as same principle, no matter what nature shape was, if we do the operation tensor broadcasts to the other. &gt;&gt;&gt; c==c[None]tensor([[True, True, True]])&gt;&gt;&gt; c[None]==c[None,:]tensor([[True, True, True]])&gt;&gt;&gt;c[None,:]==ctensor([[True, True, True]])3. Einstein summation:  Creates batch-wise, remove inner most loop, and replaced it with an elementwise producta. k. ac[i,j] += a[i,k] * b[k,j]inner most loop c[i,j] = (a[i,:] * b[:,j]). sum()elementwise product  Because K is repeated so we do a dot product. And it is torch. Usage of einsum()1) transpose2) diagnalisation tracing3) batch-wise (matmul) …  einstein summation notationdef matmul(a,b): return torch. einsum('ik,kj-&gt;ij', a, b)so after all, we are now 16000 times faster than Python. 4. Pytorch op: 49166. 67 times faster than pure python And we will use this matrix multiplication in Fully Connect forward, with some initialized parameters and ReLU. But before that, we need initialized parameters and ReLU, Footnote:  TensorRank ti noteResources:  Frobenius Norm Review Broadcasting Review (especially Rule)     Refer colab! (I totally confused with extension of arrays)    torch. allclose Review np. einsum Reviewh "
     }, {
-    "id": 15,
+    "id": 17,
     "url": "http://localhost:4000/2020/02/note08-fastai-1/",
     "title": "What is the meaning of 'deep-learning from foundations?'",
     "body": "2020/02/29 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ I don’t know if you read this article, but I heartily appreciate Rachael Thomas and Jeremy Howard for providing these priceless lectures for free Homework:  Review concepts 16 concepts from Course 1 (lessons 1 - 7)(1) Affine Functions &amp; non-linearities; 2) Parameters &amp; activations; 3) Random initialization &amp; transfer learning; 4) SGD, Momentum, Adam; 5) Convolutions; Batch-norm; 6) Dropout; 7) Data augmentation; 8) Weight decay; 9) Res/dense blocks; 10) Image classification and regression; 11)Embeddings; 12) Continuous &amp; Categorical variables; 13) Collaborative filtering; 14) Language models; 15) NLP classification; 16) Segmentation; U-net; GANS) Make sure you understand broadcasting Read section 2. 2 in Delving Deep into Rectifiers Try to replicate as much of the notebooks as you can without peeking; when you get stuck, peek at the lesson notebook, but then close it and try to do it yourself calculus for machine learning     based on weight…    einsum conventionCONTENTS:  What is going on in this course?     What is ‘from foundations’?   Steps to a basic modern CNN model   Today’s implementation goal: 1) matmul -&gt; 4) FC backward    Library development using jupyter notebook     jupyter notebook certainly can make module    Elementwise ops     How can we make python faster?         What is element wise operation?           FootnoteWhat is going on in this course?: What is ‘from foundations’?: 1) Recreate fast. ai and Pytorch 2) using pure python  Evade OverfittingOverfit : validation error getting worsetraining loss &lt; validation loss  Know the name of the symbol you usefind in this page if you don’t know the symbol that you are using or just draw it here (run by ML!) Steps to a basic modern CNN model:  1) Matrix multiplication -&gt; 2) Relu/Initialization -&gt; 3) Fully-connected Forward-&gt; 4) Fully-connected Backward -&gt; 5) Train loop -&gt; 6) Convolution-&gt; 7) Optimization -&gt;8) Batchnormalization -&gt; 9) Resnet Today’s implementation goal: 1) matmul -&gt; 4) FC backward: Library development using jupyter notebook: what is assers? jupyter notebook certainly can make module:  There will be #export tag that Howard (and we) want to extract special notebook2script. py will detect sign of #expert and convert following into python module and test ittest\_eq(TEST,'test')test\_eq(TEST,'test1')  what is run_notebook. py?     when you want to test your module in command line interface   		!python run\_notebook. py 01_matmul. ipynb  Is there any difference between 1) and 2)?1) test -&gt; test01 2) test01 -&gt; test #TODO I don’t know yet  look into run_notebook. py, package fire Jeremy used. What is that?read and run the code in a notebook, and in the process, Jeremy made Python Fire library called!shockingly, fire takes any kind of function and converts into CLI command. fire library was released by Google open source, Thursday, March 2, 2017    Get data   pytorch and numpy are pretty much same.  variable c explains how many pixels there are in in MNIST, 28 pixels PyTorch’s view() method: torch function that manipulating tensor, and squeeze() in torch &amp; mathmatical operation similar function Rao &amp; McMahan said usually this functions result in feature vector.    In part 1, you can use view function several times.     Initial python model     Which is Linear, like $Xw$(weight)$+a$(bias) $= Y$     If you don’t know hou to multiple matrix, refer this site matmul visulization site   How many time spends if we we use pure python   function matmul, typical matrix multiplication function, takes about 1 second for calculating 1 single train data! (maybe assumed stochastic, 5 data points in validation)     it takes about 11. 36 hours to update parameters even single layer and 1 iteration! (if that was my computer, it would be 14 hours. . )🤪   THIS is why we need to consider ‘time’&amp;’space’ This is kinda slow - what if we could speed it up by 50,000 times? Let’s try! Elementwise ops: How can we make python faster?:  If we want to calculate faster, then do remove pythonic calcuation, by passing its computation down to something that is written something other than python, like pytorch.  According to PyTorch doc it uses C++ (via ATen), so we are going to implement that function with python. What is element wise operation?:  items makes a pair, operate corresponding componentFootnote:  notebooks material video broadcasting excel"
     }, {
-    "id": 16,
+    "id": 18,
     "url": "http://localhost:4000/2020/02/what-is-convolution/",
     "title": "Digging into convolution",
     "body": "2020/02/28 - Issues 1) Kaiming Initializtion in Pytorch was in trouble. 1 2) Jeremy started to dig in, in lesson09, but I didn’t know why the size of tensor is 2 and even understand this spreadsheet data. 3 Homework: Read Visualizing and Understanding Convolutional Networks paper  What is a convolution?     Visualization         one kernel     Matthew D Zeiler &amp; Rob Fergus Paper          Convolution can be represented as matmul   Padding   Kernel has rank 3   How can we find a side-edge, a gradient and area of constant weight?   What is a convolution?: A convolutional neural network is that your red, green, and blue pixels go into the simple computation, and something comes out of that, and then the result of that goes into a second layer, and the result of that goes into the third layer and so forth. Visualization: one kernel Refer this site for visualizing CNN filteringMatthew D Zeiler &amp; Rob Fergus PaperLecture01     Nine examples of the actual coefficients from the **first layer**  Convolution can be represented as matmul: CNNs from different viewpoints {align-items: center;}    [A B C D E F G H I J] is 3 by 3 image data flatten to vector.   As a result, convolution is a just matrix just two things happens     Some of entries are set to zeros at all the times   same color always have the same weight. That called weight time / wegith sharing    So, we can implement a convolution with matrix multiplication. But, we don’t do that because it’s slow!Padding:  What most of libraries do is just put zeros asdie of matrix  fast. ai uses reflection paddings (what is this? Jeremy said he uttered it)Kernel has rank 3:  As standard picture input would be 4 5, it would be actually 3d, not 2d.  If we make kernel as a 3x3 size, we pass over same kernel all the different Red, Green, Blue Pixels.      This could make problem, because, if we want to detect frog, which is green, we would want more activations on the green(I made a test cell in my colab 6)   How can we find a side-edge, a gradient and area of constant weight?: Not top-edge!  One kernel can find only the top-edge, so we should stack the kernels 7 So, we pass it through bunch of kernels to the input images, and that process gives us height x width x corresponding number of kernels.  Usually that number of chanel is 16 And if we want to get the more channels and features, we should repeat that process     This process gives rise to memory out of control, we do the stride   #### conv-example. xlsx  2 convolutional filters At a second layer, filter is 3x3x2 tensor, because to add up together the first layer’s channel. Reference:       Problem was math. sqrt(5) was not kaiming initialization formula, Implementation in Pytorch &#8617;        size of tensor, lecture09 &#8617;        conv-example. xlsx &#8617;        Why do computer use red, green and blue instead of primary colors &#8617;        Grayscale is a group of shades without any visible color. … Each of these dots has its own brightness level as well and, therefore, can be converted to grayscale. A grayscale image is one with all color information removed.  &#8617;        Testing RGB and grayscale &#8617;        stack kernel and make new rank of tensor at output, Lesson06-2019 &#8617;    "
     }, {
-    "id": 17,
+    "id": 19,
     "url": "http://localhost:4000/2020/02/dps-week8/",
-    "title": "Digital Product School week 8&9",
-    "body": "2020/02/24 - The 8th week retropect at Digital Product School Week 8/9 - Ship your MVP/Release next iteration each day     This week's schedule  CONTENT:  Preparing engineering weekly Agile Process     Daily Stand-up    Making application flowchart (feat draw. io) / ER diagram     Flowchart, understaning user journey   ER diagram    Engineering weekly AI lunch Connecting firebase andPreparing engineering weekly: This week at Wednesday, I planned to explain the Language Modelings, mainly focusing ELMo, ULMFiT, BERT and GPT-2. Slides is available here Changed the presentation, because there were people who are not in ML domain. hereWhenever I do the presentation, I learn more than the information I give them. At the same time, I realize I need to learn more than I know. Agile Process: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m not sure ‘what is agile’ but this is what we tried to make agile process.  Daily Stand-up: Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Making application flowchart (feat draw. io) / ER diagram:  Before we start the party, we should clarify the flowchart and ER diagram of our application. Flowchart, understaning user journey: Thanks for google, we could use draw. io for our framechart framework. Actually, we cana choice other good flatform, but draw. io has connected app throgh google drive, most of our engineer was used to it. And after this job, I got to know there is also (of course) rule with the symbols, color, size, space, scaling and direction of arrow -reference. But why we should do this? WE have made our storymap before!! I think storymap is for visualize our status and app. So it should be shared with whole the team, and they should able to understand each role’s issue. But flowchart is more like testing technical feasibility, and error that user can experience. So it could be little more specific, complicated, and hypothetical.     This week engineer's main plan  ER diagram: Even if we use NoSQL database through firebase, my team was accustomed to SQL more. That what we educated when we were at college, so we had to organize our concept while we were learning NoSQL. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Before today, my AI collegues presented regression, knn and it was my turn. I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! AI lunch: Connecting firebase and: "
+    "title": "My life in Digital Product School - week 8/19/10",
+    "body": "2020/02/24 - The 8/9/10th week retropect at Digital Product School Week 8 - Ship your MVPWeek 9/10 - Release next iteration each day     Week 8th schedule  CONTENT:  Agile Product Development     Daily Stand-up(planning)   Gemba Walk   Sprint Reviews    Engineering weeklyAgile Product Development: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m still not sure ‘what is agile’ but this is how we tried to make agile process.  Daily Stand-up(planning): Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Gemba Walk:     Team Cero with core team  Every 2 weeks, we do the Gemba work, which is ‘question everything to the core team’ time. At this period, people can ask anything related to our product, workshop, and framework. Core team will help just for each team, and each team can solve the problem related to their work. &lt; br/&gt;Why we need this session? because with workshop and general schedule, core team has no time just focus on each team. So through this session, we can have opportunity to understand each program and workshop, like why we are using this platform, and when is the due of our small project, and we have this problem and we need help for this. whatever small problem you have, core team is always willing to help you. Sprint Reviews: Every Friday, we have time to summarise what we did for the week. Maybe we need HMW question and our storymap to share our process and then tell and share what we did try, what point we succeeded and what point it was deviant of our prediction, and why we tried it. .     Sprint of Ve-link  And then, just after all team’s ppt, we do vote with such a cute marvel. Always it’s very difficult to vote (of course you can’t vote to your team!) Because it depends on criteria what do I value!But since this is process of our agile work, I try to focus on what they have changed since last week, and why they did it, how they did it. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Everyone have their knowledge to share and we can be tutor and at the same time can be of tutee. Previously, my AI collegues presented regression, knn. And because I’m somewhat specialized to NLP, I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! "
     }, {
-    "id": 18,
+    "id": 20,
     "url": "http://localhost:4000/2020/02/fast.ai-nlp-note-16/",
     "title": "Algorithmic bias",
     "body": "2020/02/20 - Algorithms can encode &amp; magnify human bias Case Study 1: Facial Recognition &amp; Predictive Policing:  Joy Buolamwini &amp; Timnit Gebru, gendershades. org     Microsoft, FACE+, IBM - All of these things are sell now.    Largest gap between $\therefore\ Lighter Male\ &gt;\ Darker\ Female $      This US mayor joked cops should “mount . 50-caliber” guns where AI predicts crime      With machine learning, with automation, there’s a 99% success, so that robot is ㅡwill beㅡ99% accurate in telling us what is going to happen next, which is really interesting.     - city official in Lancater, CA, approving on using IBM for public security  Bias:  Bias is type of error Statistical Bias: difference between a statistic’s expected value and the true value Unjust Bias: disproportionate preference for or prejudice against a group Unconscious bias: bias that we don’t realize we have But, term bias is too generic to be productive. Different sources of bias have different causes Representation Bias: Dataset was not representative of the algorithm that might be used on later. Above : Data is okay, but algorithm has some problem. Below : Data has error.  For example, object detection production that performs very well in common product of US. But in contrast, change of target product region, like Zimbabwe, Solomon Island, and so on, reduced the performence remarkably. It is not the algorithmic problem, so we should care about data volume of region. Evaluation Bias: Benchmark datasets spur on research, 4. 4% of IJB-A images are dark-skinned women. 2/3 of ImageNet images from the West (Sharkar et al, 2017) Case Study 2: Recidivism Algorithm Used Prison Sentencing: Case Study 3: Online Ad Delivery: Bias in NLP: ( Nothing to do with the course, but I’m researching this field these days. )    But all about Englsih     ImpactThe person is doctor. The person is nurse -&gt; 그는 의사다. 그녀는 간호사다.  Concept of “biased data” often too generic to be useful:  Different sources of bias have different sources Data, models and systems are not unchanging numbers on a screen. They’re the result of a complex process that starts with years of historical context and involves a series of choices and norms, from data measurement to model evaluation to human interpretation. - Harini Suresh, “The problem with Biased Data” Five Sources of Bias in ML:  Representation Bias Evaluation Bias Measurement Bias Aggregation Bias(46:02) Historical Bias(46:26)     A few studies(47:13)   Racial Bias, Even when we have good intentions(new york times)(47:10)   gender(48:59)   Humans are biased, so why does algorithmic bias matter?: Algorithms &amp; humans are used differently (humans are usually decision maker)  Algorithms are accurate and objective No way to apeal if there if error processed large scale cheap Machine learning can amplify bias Machine learning can create feedback loops. Technology is power. And with that comes responsibility. Solutions:  Analyze a project at work/school:     Questions about AI   5 types of bias (Suresh &amp; Guttag)   Datasheets for datasets, Modelcards for model reporting   Accuracy rate on different sub-groups    Work with domain experts &amp; those impacted Increase diversity in our workspace Advocate for good policy Be on the ongoing lookout for bias"
     }, {
-    "id": 19,
+    "id": 21,
     "url": "http://localhost:4000/2020/02/classifier-city/",
     "title": "Making a classifier with image dataset made from gooogle",
     "body": "2020/02/15 - CONTENTS:  Creating dataset from google images     Using google_images_download   Create ImageDataBunch    Train model     fit_one_cycle()   Let’s find-tune   Let’s train the whole model!   Let’s make batch size bigger!    Interpretation Model in productionCode can be found hereDeployed model here Making a classifier which can distinguish Seoul from Munich and Sanfrancisco!(hoping my well in Munich!) Creating dataset from google images: In machine learning, you always need data before you build your model. You can use either URLs or google_images_download package. Since Jeremy explained specifically, I will try the other. Using google_images_download: note: This is not google official package Refer to Official Doncument, put that arguments. from google_images_download import google_images_downloadresponse = google_images_download. googleimagesdownload()  #class instantiationout_dir = os. path. abspath('. . /. . /materials/dataset/pkg/')os. mkdir(out_dir)arguments = { keywords : Cebu,Munich,Seoul ,        print_urls :True,        suffix_keywords : city ,        output_directory :out_dir,        type : photo ,      }paths = response. download(arguments)  #passing the arguments to the functionprint(paths)and if you need, here is main code. Create ImageDataBunch: We need to separate validation set because we just grabbed these imagese from Google. Most of the dataset we use (kaggle/research) splited into train / validation / test so if they are not devided beforehand we should make databunch, and Jeremy recommended assign 20% to validation. Help on function verify_images in module fastai. vision. data:verify_images(path: Union[pathlib. Path, str], delete: bool = True, max_workers: int = 4, max_size: int = None, recurse: bool = False, dest: Union[pathlib. Path, str] = '. ', n_channels: int = 3, interp=2, ext: str = None, img_format: str = None, resume: bool = None, **kwargs)  Check if the images in `path` aren't broken, maybe resize them and copy it in `dest`. Data from google image url Data from package Train model:           len(class)   len(train)   len(valid)         Data_url   3   432   108       Data_pkg   3   216   53   Uisng model: restnet34 1, Measurement: accuracy 2 fit_one_cycle(): What is fit one cycle? Cyclical Learning Rates for Training Neural Networks One of the way to find good learning rate. Core idea is to start with small learning rate (like 1e-4, 1e-3) and increase the learning rate after each mini-batch till loss starts exploding. And pick up learning rate one order lower than exploding point. For example, plotted learning rate is like below picture, picking up around 1e-2 is the best way.  Why this methods Traditionally, the learning rate is decreased as the learning starts converging with time. But this paper suggests to cycle our learning rate, because it makes us avoid local minimum. Basically this cyclic method enables us to explore whole of loss function so that find out global minimum. In other words, higher learning rate behaves like regularisation. Let’s find-tune: Do train just one last layer by learning rate found by find_lr This section you should find the strongest downward slope that kind of sticking around for quite a while. And choose just one order lower than lowest point. As explained before, I will pick up 1e-2. And of course, this is fine-tuning, we don’t need discriminative learning rate yet.  Let’s train the whole model!: link When you plot the learning rate again, maybe you will get soaring shape of learning rate. Rule of thumb, When you slice the learning rate, use learning rate you used at unfrozen part. Divide it by 5 or 10 and put it on maximum bound. At minimum bound, get the point just before it soared, and divide it by 10.  Let’s make batch size bigger!: Since default batch size is 64, I tried it to 128. And it gets way more better result(even it’s still underfitting!) And if I freeze model and train whole model again, the model would be better. Also, you can use this method to the other big dataset model training! Interpretation: See the confusion matrix. Result is quite great. *Since I’m using colab, I will skip data cleansing. But I highly recommend you to use ImageCleaner widget, only if you are using jupyter notebook (not jupyter lab) Model in production: You can deploy your model in simple way. I referred fast. ai, and used render(it’s free for limited time). You can find detailed document here. and you can create a route like this. @app. route( /classify-url , methods=[ GET ])async def classify_url(request):  bytes = await get_bytes(request. query_params[ url ])  img = open_image(BytesIO(bytes))  _,_,losses = learner. predict(img)  return JSONResponse({     predictions : sorted(      zip(cat_learner. data. classes, map(float, losses)),      key=lambda p: p[1],      reverse=True    )  })You can find my deployed model here Reference: How to create a deep learning dataset using Google Images towardsdatascience - one cycle policy       Deep Residual Learning for Image Recognition &#8617;        Accuracy_and_precision &#8617;    "
     }, {
-    "id": 20,
+    "id": 22,
     "url": "http://localhost:4000/2020/02/dps-week5/",
     "title": "Digital Product School week 5",
     "body": "2020/02/09 - The 5th week retropect at Digital Product School Week 5 - Create a Storymap and sync it with Lean Canvas     This week's schedule  CONTENT:  How to create our story map Prepare your story Discover your product’s AI potentialMondayHow to create our story map:     We need this 'aha' moment  There was a Milestone workshop, about our weekly goal. As we are agile working, we go fast and change every week’s goal. This week we will finalize our story map based on user’s pain-point and HMW questions.     How should we make our story-map  Basically we should make story map based on this rule  Tell stories, don’t just write them!     We always need context, that means all the story component should be connected    Visualize your product to establish a shared understanding and speed up discussions!     Post-it filled of text is not enough, we should fill it with visualizations then team mates can understand it fast    Only discuss in front our your story map! (Speed)     So we can update our story-map as soon as we change our opinion   And also    Use a story map to find the parts that matter most and to identify holes in your idea!     Since the story map consists of techinical part, we should consider each story’s technical feasibility    Minimise output, maximise outcome and impact! Build tests to figure out what’s minimum and what’s viable!     This story map functions to find out our minimum value of ideas    Work iteratively: Change your story map according to your learnings!     We should repeat this process again and again    PMs: Make sure Storymap is up to date!Prepare your story:     team cero, our whole story map    Our goal    Technical feasibility of our storyWhat is your strategy to make user achieve something? This would be our expand point Discover your product’s AI potential: How can we apply AI to our product? Let’s write down our ‘HMW’ questions, and find out all p ossibilities. These are suggestion of possibilities, so don’t attached to feasibility (we will do in at lean start-up)     Software section's expectation    AI section's expectationTuesday    Engineer's task, week5This 5th week, engineers settled WendesdayThursdayFriday"
     }, {
-    "id": 21,
+    "id": 23,
     "url": "http://localhost:4000/2020/02/GPU-time/",
     "title": "4 reasons took much time to setting GPU for fast.ai than I expected",
     "body": "2020/02/05 - Motivation: Before now, me as a undergraduate student, I was parsimony who usually depend on colab, kaggle, friend’s server(occasional) whenever i need GPU. . And this time it’s been for a while to install GPU than I expected and I share the several component that stood in my way. Written at Oct 24 2019, if you think this is deprecated, please do not have a leap of faith. Just for the record, I’ve used Kaggle, Colab, GCP, Azure, EC2 as GPU cloud. 1. Did not know there is JupyterLab option in Google Cloud Platform. : At the first time when GCP came out, there was no AI Platform service. So from starting vm instance to launching jupyter and installing packages, I did all of the things myself. (and I learned 🤗) $	curl -O https://repo. continuum. io/archive/Anaconda3-5. 0. 1-Linux-x86_64. sh[Downloading conda in ssh] I created VM instance,selected zone, machine type and disk type. Then, define firewall rules and in ssh terminal, install jupyter and other packages. But you can do all of these things just using AI Platform.  [AI Platform] I think it especially save your time if you are living in Asia-Pacific, which google doesn’t support not that much GPU resources.  2. Consider if the platform has limited resources in a region you live in. : I live in South Korea, East Asia, and it seems like this region has lots of limitation in GPU (except quite expensive AWS) And the Taiwan which was the only one region where I can launch my own VM with GPU (I tried all the other regions in the list) sometimes do normaly, but not always. 😥After launching, I did several works and next day I could not start VM. (I didn’t count it, but tried it a few hours because I didn’t want cost any more time…) Endlessly failed to start instance, then I choose to move AWS as an alternative way. 3. Fast. ai gives deliberate guide and I didn’t know it. : Fast. ai offer the guide for all available platform. (Colab, salamander, Gradient, Kaggle, Colab, and so on) It is so important, and really needs, because cloud computing options are vary as occasion and purpose arise. I didn’t know fast. ai has manual to running GCP, and I think it’s as good a reason as any for me to be have taken time. It helped me so much when I had aws and shortened my time. I don’t want to read all of the manual in amazno. . (It is recommended. . but I’d rather read GIT PRO now…) ssh -i ~/. ssh/&lt;your_private_key_pair&gt; -L localhost:8888:localhost:8888 ubuntu@&lt;your instance IP&gt;4. You should wait to add more volume just after add volume, by building AWS EC2. : Since Elastic Block Store(EBS) storage supports optimized storage, users can’t extend storage volume two times in a row.  Unfortunately, at the first time, I didn’t know it (again 👻) and when VM lacked volume, I doubled dist capacity (76*2) at a rough but It needs more.  &lt;!– this time I installed GPU in two years, and it became little complicated compared to 2 years ago. And this time for the first time(maybe not the first time. . but i handled it in my class or with my friend. but it’s my first time on my own. ) I very I’m started to using used google colab, kaggleand, GCP-JupyterLab, ec2 - friend made, aws vm machine but I had a environment variable but i did not know of it. On these days, I could not get a resources from taiwan…    I couldn’t notice a deliberate     Anyway, as a result I tried myself gcp myself and aws ec2 with fast. ai But I think doing on my self surely takes much time (in this point I wonder why I’m doing this, and should remind me, especially I was studying disk volume optimization)     disk volume exceed - https://askubuntu. com/questions/919748/no-space-left-on-device-even-though-there-is:  "
     }, {
-    "id": 22,
+    "id": 24,
     "url": "http://localhost:4000/2020/02/dps-week4/",
     "title": "Digital Product School week 4",
     "body": "2020/02/01 - The 4th week retropect at Digital Product School Week 4 - Find solution ideas and run experiments   [This week’s schedule] CONTENT:  Ideation Techniques     What is ideation techniques?    Generating idea in my team AIdeation Team brain storming of idea Die Produkt MacherMondayIdeation Techniques:   [slides from @steffen] What is ideation techniques?: We tried to find out user’s painpoint last week. Tried to users talk about their, pain point. No question directly, but extract from them their pain with transportation. Generating idea in my team:   AIdeation:   TuesdayTeam brain storming of idea:   Based on generated idea on Monday, we extended our idea doing rolling-paper!     Die Produkt Macher: What is lean start-up?  Lean startup is a methodology for developing businesses and products that aims to shorten product development cycles and rapidly discover if a proposed business model is viable; this is achieved by adopting a combination of business-hypothesis-driven experimentation, iterative product releases, and validated learning. - wikipedia WendesdayThursdayFriday"
     }, {
-    "id": 23,
+    "id": 25,
     "url": "http://localhost:4000/2020/01/retrosprect-of-acl-paper-2020/",
     "title": "Retrospect of ACL 2020 paper writing",
     "body": "2020/01/29 - 2020 Annual Conference of the Association for Computational Linguistics Why I can’t use ‘Cebuano’ for the research?: Why I had to change target language from ‘Cebuano’ to ‘Tagalog’?-&gt; No language translator options except google translation. But before knowing that I already consult my friend, whose mother tongue is English. So I had to aplogize her, but couldn’t tell her why suddenly I changed my plan. -&gt; I realized there are many languages even can’t be researched at all. . -&gt; Getting accustomed to discrimination makes misunderstanding, sometimes. At my country, we couldn’t use music streaming service, because of legal problem. But at that moment, I thought it was discrimination, which is done by music company. "
     }, {
-    "id": 24,
+    "id": 26,
     "url": "http://localhost:4000/2020/01/Git-Merge/",
     "title": "Why am I not listed as a contributor?!",
     "body": "2020/01/10 - From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’m not listed as contributor of repo which my PR accessed. How did I come to a repository?: When I’m stuck, I would prefer to code, than to go deep in theory. (It must be so. . too much to understand 🤒)It was BERT released by Google AI I felt keenly the necessity of implementing, because not only couldn’t understand the way they figured out positional encoding formula, but how it actually works. What does it mean to “scale” dot product in Attention? (Now I know it’s far from my section 😂) Figure 1. Scaled Dot Product. Adopted from tensorflow blogWhat was the code error?: For implement code in paper, I read the papers Transformer and BERT, structured the model, and refered the others’ code. Meanwhile, I found out a small error in tokenization process, which was changing a token into [MASK], enabled bidirectional representation. I’ve made PR, and got merged. But I was not in contributors. Why?: Figure 2. Merged Pull request Adopted from graykode projectActually I happened to know there can be couple of reasons github doesn’t include my name as contributor. Well, if contributors tab has more than 100 people, in which case it shows you up only if you are in the top 100 contributors because displaying too many contributors can make webpages down. Somethimes, however, it doesn’t that problem. Why not? Two possibilities are there.    First, According to Joel-Glovier, if repository maintainer merged-as-a-rebase PR will end up showing as maintainer’s commit. But maintainer shouldn’t normally do this.     Second, if you happend to commit using a different git email that what is in your GitHub profile, it will not be attached to your Github user, and “doesn’t show up” as you.  Reference:  Michał Chromiak’s blog Github: why are my contributions are not showing on my profile atlassian-gitfetch"
     }, {
-    "id": 25,
-    "url": "http://localhost:4000/2019/12/lesson1-fastai/",
-    "title": "Fine Grained Classification",
-    "body": "2019/12/31 - Finally you can solve the mystery behind this weird drawing. . through this course. juptyer notebook magic: %reload_ext autoreload%autoreload 2%matplotlib inlinethis is special directives to jupyter notebook, not python code. And it is called ‘magics’ (but i think jeremy is magicion)  If somebody changes underlying library code while I’m running this, please reload it automatically If somebody asks to plot something, then please plot it here in this Jupyter NotebookDon’t hesitate to import start~ Digging into untar_data, path. ls: Union[pathlib. Path, str]: typed programming language? -&gt; maybe i think disclaim the type beforehand for sure.  Q. like assert? path. ls()this is some module that fast. ai made because os. listdir(‘path’) is unconvinient. Python3 pathlib library!: pathlib "
-    }, {
-    "id": 26,
+    "id": 27,
     "url": "http://localhost:4000/2019/12/jeremy-howard/",
     "title": "Jeremy Howard",
     "body": "2019/12/15 - This is journey to find out ‘who am I trying to be?’: How he impacted me?  The person who made me start Computer Vision again. He emphasized the importance of studying NLP and Computer together to understand the deep-learning. He didn’t order it to study, but always he pursuade me with reasonable way.  “It’s not just something I can throw away. NLP and computer vision a few weeks apart and that’s going to force your brain to realize like ‘oh I have to remember this’”  He made me admit my failure in deep-learning. I started to objectify where am I. What should I do when I’m frustrated.  “Keep going. You’re not expected to remember everything. Yet.  You’re not expected to understand everything. Yet. You’re not expected to know why everything works.  Yet. ”  His articles are numerous, below.      What is torch. nn Really?   High Performance Numeric Programming with Swift: Explorations and Reflections   C++11, random distributions, and Swift    And especially, I like this book. Designing great data products Great predictive modeling is an important part of the solution, but it no longer stands on its own; as products become more sophisticated, it disappears into the plumbing. Designing great data products And he is also famous for words. Here are some.  we’re going to try and use that to really understand what’s going on. So to warn you, none of it is rocket science but a lot of its going to look really new. So don’t expect to get it the first time but expect to listen and jump into the notebook try a few things test things out look particularly at like tensor shapes and inputs and outputs to check your understanding then go back and listen again. But and kind of try it, a few times, because you will get there right, it’s just that there’s going to be a lot of new concepts because we haven’t done that much stuff in pure Pytorch. Lesson 6: Deep Learning 2019 "
     }, {
-    "id": 27,
+    "id": 28,
     "url": "http://localhost:4000/2019/11/julia-evans/",
     "title": "Julia Evans",
     "body": "2019/11/20 - This is journey to find out ‘who am I trying to be?’: The women who surprised me in many ways. First, she approached me to teaching some concepts drawing cartoons. It was at Hackers news, which was hightest ranks. Personally I have the use of not to reading title, so and cartoon was so cute and clear. I naturally gonna understood mechanism and astonished by her explaination ability. Her value, which she was taught by many people so want to do same things, moved me. Volume of her knowledge, that just reading post title is a deal of work, amazed me. "
     }, {
-    "id": 28,
+    "id": 29,
     "url": "http://localhost:4000/2019/11/coc-retropective/",
     "title": "Retrospective on Pycon 2019 Korea (CoC Committee)",
     "body": "2019/11/05 - When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized our responsibility, and if I passed each other’s burden, It could be my burden next time. In solidarity of the obligation, we finished conference well. And after participating PyCon Korea 2018 as volunteer, I’ve joined PyCon Korea Organizer last year. &lt;Figure 1&gt; First meeting of PyCon 2019 Korea Organizers It’s been a while since PyCon 2019 finished. It’s held on Aug 15 - 18, at Coex Grand Balloom &lt;Figure 2&gt; Ongoing session, speaking on news comment processing &lt;Figure 3&gt; Sponsor Booth iin Coex Hall &lt;Figure 4&gt; After PyCon 2019, with all of volunteer, organizer, speakers 😍 🥰  Serving as part of the coc TF, I spent large fraction of last year doing CoC job. here’s the path what we’ve been grappled with to grasp a solution. First half: Before the conference Toward Diverse Community:  Formally we’ve been reusing and modifying PyCon US CoC, but we needed fit in Korean and I was part of that to revise code of conduct. Except ‘That’ Diversity, Because it is ‘Harassment’:  Specific point was harassment, and the others were not. process of finding the points. How can we settle this point?Second half: During the conference Handling the potential Harassment: Disjunction of policy and real-time situation: This ‘PyCon 2019 Korea retrospective series’ would be devided into 3 Episodes.  “Retrospective on Pycon 2019 Korea (CoC Committee)” “Retrospective on Pycon 2019 Korea (Program Chair)” (20 Nov, To Be Update) “Maintaining participation while still making timely decisions” (29 Nov, To Be Update)"
     }, {
-    "id": 29,
+    "id": 30,
     "url": "http://localhost:4000/2019/11/elif-shafak/",
     "title": "Elif Shafak",
     "body": "2019/11/05 - This is journey to find out ‘who am I trying to be?’: For creative-minded people, Istanbul is a treasure. ’ Photo © Chris Boland, licensed under CC BY-NC-ND 2. 0    it suddenly felt like what I was trying to convey was more complicated and detailed than what the circumstances allowed me to say.     And I did what I usually do in similar situations: I stammered, I shut down, and I stopped talking. I stopped talking because the truth was complicated, even though I knew, deep within, that one should never, ever remain silent for fear of complexity.         &lt;Figure 1&gt; Elif Shafak   Photo credit: www. elifsafak. com. tr      I want to talk about emotions and the need to boost our emotional intelligence. I think it’s a pity that mainstream political theory pays very little attention to emotions.     Oftentimes, analysts and experts are so busy with data and metrics that they seem to forget those things in life that are difficult to measure and perhaps impossible to cluster under statistical models. But I think this is a mistake, for two main reasons. We are emotional beings.     I think it’s going to be one of our biggest intellectual challenges, because our political systems are replete with emotions. In country after country, we have seen illiberal politicians exploiting these emotions. And yet within the academia and among the intelligentsia, we are yet to take emotions seriously. I think we should.  1 2 Reference:       British Council Worldwide &#8617;        Ted Talk &#8617;    "
     }, {
-    "id": 30,
+    "id": 31,
     "url": "http://localhost:4000/2019/01/dps-week1/",
     "title": "Digital Product School week 1",
     "body": "2019/01/11 - The 1th week retropect at Digital Product School [This week’s schedule] CONTENT:  Welcome to Digital Product School!     Trip to Spitzingsee   Welcome to Design Office   Specifying our goal of product   Welcome to Digital Product School!: Trip to Spitzingsee:    At the first day of Digital Product School, we had a off-site with all of batch 9 people. All the costs were managed by dps. At the beautiful mountain, we settled the team, and got my team goal.     Basically, there are two kind of team in DPS. (1) Wild team - the team has fixed topic(2) Company team - the team which has specific stakeholders, and also topic defined by that stakeholders  The Core-team will fix what team you will join in DPS for 3 months based on ymy professionals, they announce it at off-site. [My team for 3 months at DPS]    And we decide on my batch #9 theme song. How? Each team draw for songs and pitch ‘why this song should be batch #9 theme song’The result? Imagine dragon - Believer (I didn’t know at the moment, this song would be stamped in my memory)     We have a workshop for getting to know each other. For example, we share 1) what do I expect from 3 months of dps, 2) when I feel happy in my life time, 3) what I worked for last week, 4) what was my last project and 5) what plays important role in my life      My team's board  Cero  Welcome to Design Office:  At first day of design office, we had workshop, which celebrates my day in dps also discuss specific rule, menifesto and stakeholders    We get sticker and attach it in map depends on my nationality  Now time to get to know my team’s stakeholders. What they want for us? What they expect from us? How free my team are on the topic?To be honest, it is endless tug-of-war. We should discuss with my stakeholders, endlessly, and find out solution which can meet interest of users, stakeholders and my team. Basically, my team’s main stakeholder is ADAC, but BMW, City of munich and Nokia will also participate as my team’s stakeholders. Specifying our goal of product: "
diff --git a/_site/2020/02/classifier-city/index.html b/_site/2020/02/classifier-city/index.html
index 97866cc76a..b9d18103eb 100644
--- a/_site/2020/02/classifier-city/index.html
+++ b/_site/2020/02/classifier-city/index.html
@@ -19,9 +19,9 @@
 <meta property="og:site_name" content="SpellOnYou" />
 <meta property="og:image" content="http://localhost:4000/assets/images/munich2.jpg" />
 <meta property="og:type" content="article" />
-<meta property="article:published_time" content="2020-02-15T00:00:00+01:00" />
+<meta property="article:published_time" content="2020-02-15T00:00:00+09:00" />
 <script type="application/ld+json">
-{"description":"CONTENTS","author":{"@type":"Person","name":"dionne"},"@type":"BlogPosting","url":"http://localhost:4000/2020/02/classifier-city/","publisher":{"@type":"Organization","logo":{"@type":"ImageObject","url":"http://localhost:4000/assets/images/logo.png"},"name":"dionne"},"image":"http://localhost:4000/assets/images/munich2.jpg","headline":"Making a classifier with image dataset made from gooogle","dateModified":"2020-02-15T00:00:00+01:00","datePublished":"2020-02-15T00:00:00+01:00","mainEntityOfPage":{"@type":"WebPage","@id":"http://localhost:4000/2020/02/classifier-city/"},"@context":"http://schema.org"}</script>
+{"description":"CONTENTS","author":{"@type":"Person","name":"dionne"},"@type":"BlogPosting","url":"http://localhost:4000/2020/02/classifier-city/","publisher":{"@type":"Organization","logo":{"@type":"ImageObject","url":"http://localhost:4000/assets/images/logo.png"},"name":"dionne"},"image":"http://localhost:4000/assets/images/munich2.jpg","headline":"Making a classifier with image dataset made from gooogle","dateModified":"2020-02-15T00:00:00+09:00","datePublished":"2020-02-15T00:00:00+09:00","mainEntityOfPage":{"@type":"WebPage","@id":"http://localhost:4000/2020/02/classifier-city/"},"@context":"http://schema.org"}</script>
 <!-- End Jekyll SEO tag -->
 
 
@@ -161,96 +161,101 @@
     "body": "  {% if page. url ==  /  %}            {% assign latest_post = site. posts[0] %}          &lt;div class= topfirstimage  style= background-image: url({% if latest_post. image contains  ://  %}{{ latest_post. image }}{% else %} {{site. baseurl}}/{{ latest_post. image}}{% endif %}); height: 200px;  background-size: cover;  background-repeat: no-repeat; &gt;&lt;/div&gt;           {{ latest_post. title }}  :       {{ latest_post. excerpt | strip_html | strip_newlines | truncate: 136 }}               In         {% for category in latest_post. categories %}        {{ category }},         {% endfor %}                                {{ latest_post. date | date: '%b %d, %Y' }}                            {%- assign second_post = site. posts[1] -%}                        {% if second_post. image %}                         &lt;img class= w-100  src= {% if second_post. image contains  ://  %}{{ second_post. image }}{% else %}{{ second_post. image | absolute_url }}{% endif %}  alt= {{ second_post. title }} &gt;                        {% endif %}                                    {{ second_post. title }}          :                       In             {% for category in second_post. categories %}            {{ category }},             {% endfor %}                                                      {{ second_post. date | date: '%b %d, %Y' }}                                    {%- assign third_post = site. posts[2] -%}                        {% if third_post. image %}                         &lt;img class= w-100  src= {% if third_post. image contains  ://  %}{{ third_post. image }}{% else %}{{site. baseurl}}/{{ third_post. image }}{% endif %}  alt= {{ third_post. title }} &gt;                        {% endif %}                                    {{ third_post. title }}          :                       In             {% for category in third_post. categories %}            {{ category }},             {% endfor %}                                                      {{ third_post. date | date: '%b %d, %Y' }}                                    {%- assign fourth_post = site. posts[3] -%}                        {% if fourth_post. image %}                        &lt;img class= w-100  src= {% if fourth_post. image contains  ://  %}{{ fourth_post. image }}{% else %}{{site. baseurl}}/{{ fourth_post. image }}{% endif %}  alt= {{ fourth_post. title }} &gt;                        {% endif %}                                    {{ fourth_post. title }}          :                       In             {% for category in fourth_post. categories %}            {{ category }},             {% endfor %}                                                      {{ fourth_post. date | date: '%b %d, %Y' }}                                  {% for post in site. posts %} {% if post. tags contains  sticky  %}                    {{post. title}}                  {{ post. excerpt | strip_html | strip_newlines | truncate: 136 }}                 Read More            	             {% endif %}{% endfor %}  {% endif %}                All Stories:         {% for post in paginator. posts %}          {% include main-loop-card. html %}        {% endfor %}                   {% if paginator. total_pages &gt; 1 %}              {% if paginator. previous_page %}        &laquo; Prev       {% else %}        &laquo;       {% endif %}       {% for page in (1. . paginator. total_pages) %}        {% if page == paginator. page %}        {{ page }}        {% elsif page == 1 %}        {{ page }}        {% else %}        {{ page }}        {% endif %}       {% endfor %}       {% if paginator. next_page %}        Next &raquo;       {% else %}        &raquo;       {% endif %}            {% endif %}                     {% include sidebar-featured. html %}      "
     }, {
     "id": 12,
+    "url": "http://localhost:4000/2020/04/v3-2019-lesson06-note/",
+    "title": "fastai 2019 course-v3 Part1, lesson06",
+    "body": "2020/04/15 - Lesson 06Rossmann(Tabular):  Tabular data: be careful on Categorical variable vs Continuous variable.  if datatype is int, fastai think it is classification, not a regression.  Root mean square percentage error. as loss function.  When you assign the y_range, it’s better to assign little bit more than actual maximum. &gt; because it’s sigmoid.  intermediate layers, which is weight matrix is 1) 1000, and 2) 500 -&gt; which means our parameter would be 500*1000. learn. modelWhat is dropout and embedding dropout?: Nitish Srivastava, Dropout: A Simple way to prevent Neural Networks from Overfitting  you can dropout with p value, make it specified to specific layer, or make it applied to all the layers.  Pytorch code 1) bernoulli, which decides whether you will hold it? 2) and divide the noise value depends on noise value. so noise became 2 or remain 0.      According to pytorch code, We do change at training time, but we do nothing at test time. and this means you don’t have to do anything special with inference time. ’   TODO: find at forums what is inference time - Related to NVIDIA, GPU.     Embedding dropout is just a dropout.      It’s different between continuous variable and embedding layer.  TODO Still can’t understand. why embedding dropout is effective. or,… in need.    Let’s delete at random, some of the results of the embedding.    and It worked well especially at Kaggle   Batch Normalization: Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift -&gt; came out false! According to How Does Batch Normalization Help Optimization?  The key was multiplicative bias {\gamma} and additive bias {\beta}` Explain     Let $$ \hat{y} = f(w_1, w_2, w_3, … , x)} $$ , loss = MSE , Then y_range should be between 1 and 5`   And Activation function ends with -1 -&gt; +1   To mitigate this problem, we can add the other parameter, like $$w_n$$   But there’re so much interactions in the process so just re-scale the output.    Momentum parameter at BatchNorm1d:  Different from momentum like in optimization.  This momentum is Exponentially weighted moving average of the mean, instead of deviation.      If this is small number: mean standard deviation would be less from mini_batch to mini_batch » less regularization effect. (If this is large number, variation would be greater from mini_batch to mini_batch » more regularization effect)   TODO: can’t sure, but i understand, this is not about how to update parameter but about how much reflect previous value when scale and shift   Q. Preference between batchnorm and the other regularizations(drop out, weight decay)A. Nope, always try and see the results## lesson6-pets-more### Data Augmentation- Last reg- `get_transforms` has lots of params (even not yet learned all) -&gt; check documentation	- Remember you can implement all the doc contents bc it's made from nbdev	- TODO: try this!!- Essence of data augmentation is you should maintain the label, while somewhat making sense. 	- ex) tilt, because it's optically sensible, you can always change the angle of the data view. - zeros, border, and reflection but always `reflection` works most of the time, so that is the default### Convolutional Kernel(What is convolution?)- Will make heat\_map from scratch, which means the parts convolution focuses on![setosa_visualization]()- http://setosa. io/ev/image-kernels/	- javascript thing	- How convolution works	- Kernel. which does element-wise multiplication, and sum them up	- so it has on pixel less at borders -&gt; so it uses padding, and fastai uses reflection as said. - why this Kernel(matrix) helps catching horizontal edge side?	- because this kernel`(picture2)` weights differently, depends on `x axis`	- why familiar, because it's similar intuition with fugus`(paper)` paper- CNN from different viewpoints`link`	- output of pixel is results from different linear equations. 	- If you connect this with represents of neural network nodes, you can see that the specific inp nodes connected with specific out nodes. 	- **Summarize**: cnn does 1) matmul some of the elements are always zero 2) same weight for every row, which is called `weight time? weight. . ?, 1:18:50` `(picture)`#### Further lowdown- Because generally image has 3 channels, we need rank 3 kernel. - And **do multiply with all channel output is one pixel**. (`draw by your self`)	- but this kernel will catch one feature, like horizontal, so that we make more kernel so that output becomes (h * w * kernel)	- And that `kernel` come to `channel`- **Conv2d**: with 3 by 3 kernel, stride 2 conv -&gt; (h/2 * w/2 * kernel)	- skip or jump over input pixel	- to protect from memory out of control~~~pythonlearn. modellearn. summary()~~~TODO: understand yourself the blocks of conv-kernel: - Usually use big kernel size at first layer (will study this at part2)- Bottom right highlighting kernel(`pic / draw`)- `torch. tensor. expand`: for memory efficient, because we should do RGB- We do not make separate kernel, but make rank 4 kernel	- 4d tensor is just stacked kernel- `t[None]. shape` create new unit axis, and why? we make this -&gt; it should move unit of batch, not one size image. ### Average pooling, feature- suppose our pre-trained model results in size of `11 by 11 by 512 ` `pic 4` and my classification task has 37 classes	* take the first face of channel, which is 11 by 11 and `mean` it, so that make rank 2 tensor, 512 by 1	* and make 2d matrix, which is 512 by 37 and multiply so that we can get 37 by 1 matrix. - Feature, at convolution block	- So, when we transfer-learning without unfreeze, every element of last matrix (512 by 1) should represent(or could catch) each feature. 	### Heatmap, Hook~~~hook_output(model[0]) -&gt; acts -&gt; avg_acts~~~- if we average the block with `axis=feature`, result of matrix(11 by 11) depicts `how activated was that area?` -&gt; it is heatmap, `avg_acts`- and acts comes from hook, which is more advanced pytorch feature. 	- hook into pytorch machine itself, and run any arbitrary Pytorch code	- Why this is cool?: Normally it gives set of outputs of forward pass, but we can interrupt and hook the forward pass. 	- Also can store the output of the convolutional part of the model, which is before avg_pooling- Thinking back when we do cut off `after` the conv part. 	- but with fast. ai the original convolutional part of the model would be *the first thing in the model*, specifically could be given from `learn. model. eval()[0]`	- And this is gotten from `hooked_output` and having hooked the output, we can pass our x_minibatch to output. 	- Not directly, but with normalized, minibatch, put on to the gpu	- `one_item()` function do it, when we have one data `TODO: this is assignment` do it yourself without one_item function	- and `. cuda()` put it on gpu- you should print out very often the shape of tensor, and try think why. "
+    }, {
+    "id": 13,
+    "url": "http://localhost:4000/2020/04/qna-image-segmentation/",
+    "title": "[Q&A] Image Segmentation, using Unet with Driving Video data",
+    "body": "2020/04/02 - This post is about my questions while I was studying USF Deep Learning course about image segmentation task. All the answers are from the course, source code, library document, or document. I cared about being clear at reporting information including source of information, however if there are still anything unclear, please contact me. And thank you Jeremy&amp;Rachael for everything. Also Thank you Cambridge Computer Vision Lab to made us to study with your labor. The Cambridge-driving Labeled Video Database (CamVid) is the first collection of videos with object class semantic labels, complete with metadata. The database provides ground truth labels that associate each pixel with one of 32 semantic classes. If someone is interested in this project, please check the site and see the details. Now, let’s start first using jupyter’s one of tricks which I love most. It enables cell to print the code without print function. from IPython. core. interactiveshell import InteractiveShell# pretty print all cell's output and not just the last oneInteractiveShell. ast_node_interactivity =  all from fastai. vision import *from fastai. callbacks. hooks import *from fastai. utils. mem import *path = untar_data(URLs. CAMVID) # The locations where the data and models are downloaded are set in config. ymlpath. ls() I’m trying to accustomed to using pathlib module, not just it became built-in module in python, but I felt uncomfortable myself with os module. However, still unpredictable conflicts are remain, even in the quite standard library like Pytorch, tensorflow, onnx. (it require me string for path. not PosixPath. will send PR. . ) [PosixPath('/root/. fastai/data/camvid/valid. txt'), PosixPath('/root/. fastai/data/camvid/images'), PosixPath('/root/. fastai/data/camvid/labels'), PosixPath('/root/. fastai/data/camvid/codes. txt')]path_img = path/'images'path_lbl = path/'labels'fnames = get_image_files(path_img) #filenamelbl_names = get_image_files(path_lbl)1. (Play with data) My Hypothesis:  File name has A_B format. and A / B would be at key-value position.  Use collections - defaultdict  Default Dict: Link:   easy to group a sequence of key and value pairs into a dictionary of list?from collections import defaultdictfnames[0], lbl_names[0](PosixPath('/root/. fastai/data/camvid/images/0001TP_009210. png'), PosixPath('/root/. fastai/data/camvid/labels/0016E5_01800_P. png'))files = [tuple(i. stem. split('_')) for i in fnames]labels = [tuple(i. stem. split('_')[:-1]) for i in lbl_names]d = defaultdict(list)for k, v in files: d[k]. append(v)d. keys()len(d['0001TP'])124for k, v in d. items():  print(k, v)0001TP ['009210', '008850', '007350', '008970', '009840', '010140', '008490', '008520', '009540', '008250', '008340', '006840', '007860', '007410', '007740', '009870', '010080', '007890', '008790', '010020', '008400', '007080', '008280', '010380', '009330', '009060', '007470', '006810', '009720', '008580', '007110', '008730', '009150', '007680', '009780', '007800', '007290', '008760', '009510', '008640', '008310', '007440', '006900', '007500', '008460', '009030', '008130', '009480', '009900', '010230', '009270', '008040', '007590', '007950', '009990', '008550', '007260', '008100', '007530', '006960', '008190', '009420', '009930', '009000', '007830', '008940', '006690', '009570', '008880', '010170', '007560', '009300', '006750', '009360', '010200', '007320', '008010', '009120', '007620', '007200', '007140', '010320', '006720', '008670', '007230', '008370', '010260', '009690', '006930', '009090', '007770', '010290', '010350', '008610', '008070', '009600', '008430', '009450', '007380', '009240', '007710', '007170', '008160', '008910', '007020', '006780', '007050', '009960', '009810', '008220', '009180', '009750', '010050', '009660', '010110', '007920', '009630', '007650', '006990', '008700', '009390', '007980', '008820', '006870']0016E5 ['01290', '08159', '05760', '08133', '08063', '06660', '00960', '05850', '00750', '06960', '08035', '08107', '07975', '08017', '05610', '07140', '08119', '08027', '07170', '08400', '08093', '02100', '06390', '04470', '08340', '06060', '00600', '07470', '08151', '07800', '01620', '05730', '01530', '00690', '08430', '05940', '01980', '07320', '08069', '07965', '04380', '05430', '01410', '06780', '08007', '08087', '08079', '06600', '08109', '05490', '00901', '04590', '04680', '08045', '01770', '06690', '08085', '06810', '00420', '08011', '07440', '02190', '06300', '04800', '01500', '00450', '08029', '01470', '06330', '07997', '08067', '05370', '08013', '08190', '00840', '02370', '08049', '08135', '01440', '06870', '05820', '05280', '08051', '04440', '08091', '01380', '00630', '07290', '05520', '04770', '00540', '07995', '07999', '05550', '07920', '08101', '08141', '08053', '04620', '08103', '05160', '07350', '08057', '06030', '06000', '08550', '07963', '08089', '05970', '08047', '05640', '06240', '05220', '04350', '01590', '07959', '01950', '08117', '06180', '01560', '05400', '08043', '07680', '00780', '08081', '07050', '01020', '01350', '04530', '06720', '07969', '08149', '08003', '08131', '08129', '08033', '05460', '01650', '07530', '08023', '05340', '08640', '05100', '08075', '01230', '04980', '02070', '01080', '06210', '05910', '08009', '01800', '05190', '02400', '08083', '08019', '07620', '07200', '07890', '08059', '06990', '04410', '08121', '08123', '06930', '08137', '08147', '08095', '06570', '06150', '08153', '06840', '05250', '00510', '08370', '08580', '08113', '07410', '08097', '01200', '04950', '07770', '07650', '04710', '06090', '08055', '07110', '07981', '00990', '08250', '08127', '01920', '07985', '08220', '08005', '08157', '05130', '08071', '01140', '04830', '07740', '08143', '06120', '02040', '08111', '08115', '00660', '08280', '06420', '07983', '02220', '05700', '01860', '01260', '04920', '06510', '07020', '08073', '08105', '08125', '06360', '07860', '07993', '00810', '06540', '08099', '08139', '02010', '07973', '08155', '07991', '06630', '00480', '06750', '04890', '08001', '08025', '00870', '08490', '01830', '07977', '05010', '01170', '07961', '01680', '01050', '07987', '07080', '04560', '00930', '05310', '02340', '05790', '08460', '00720', '08031', '02280', '08039', '08037', '08065', '06270', '08077', '06900', '04650', '06480', '07230', '08041', '06450', '00570', '07989', '04740', '07979', '02250', '07380', '00390', '01710', '07590', '08021', '08520', '07500', '01110', '04500', '02310', '07971', '02130', '05580', '05880', '08610', '08310', '08145', '05670', '04860', '07260', '08015', '07967', '01740', '01320', '07560', '07830', '01890', '08061', '02160', '07710', '05070', '05040']Seq05VD ['f00030', 'f02550', 'f03450', 'f01110', 'f00480', 'f00210', 'f04590', 'f04170', 'f01800', 'f03990', 'f03360', 'f03900', 'f02070', 'f00810', 'f03690', 'f01350', 'f01530', 'f04980', 'f05100', 'f03060', 'f00900', 'f03870', 'f02460', 'f01470', 'f02370', 'f02820', 'f04080', 'f02760', 'f04860', 'f02250', 'f04200', 'f00270', 'f03720', 'f02850', 'f04410', 'f01200', 'f03090', 'f02010', 'f03930', 'f00090', 'f01650', 'f01890', 'f03840', 'f03030', 'f02130', 'f01230', 'f04110', 'f02520', 'f04140', 'f04020', 'f00060', 'f03420', 'f01560', 'f00120', 'f04290', 'f02340', 'f00300', 'f01380', 'f00870', 'f01860', 'f02970', 'f04560', 'f02730', 'f00330', 'f04530', 'f03780', 'f01770', 'f03390', 'f05040', 'f02430', 'f03330', 'f00660', 'f01740', 'f02100', 'f04800', 'f04050', 'f00510', 'f02790', 'f04350', 'f00690', 'f00540', 'f02490', 'f00960', 'f00930', 'f04230', 'f02880', 'f03600', 'f01020', 'f01500', 'f02400', 'f04830', 'f04470', 'f03300', 'f02670', 'f00450', 'f01980', 'f01170', 'f01620', 'f04500', 'f01080', 'f03180', 'f05070', 'f03150', 'f04950', 'f01440', 'f03510', 'f01710', 'f00360', 'f04770', 'f02910', 'f01050', 'f00630', 'f04320', 'f00570', 'f03240', 'f02190', 'f01140', 'f03540', 'f02220', 'f02640', 'f03960', 'f00000', 'f04920', 'f01950', 'f00990', 'f03480', 'f03000', 'f00420', 'f04620', 'f03210', 'f00780', 'f03570', 'f01590', 'f00750', 'f01920', 'f04650', 'f03750', 'f03630', 'f02310', 'f02610', 'f02580', 'f04740', 'f02280', 'f04680', 'f00390', 'f00720', 'f03660', 'f02040', 'f03270', 'f00180', 'f03810', 'f01410', 'f01290', 'f03120', 'f00840', 'f04440', 'f00150', 'f01260', 'f02700', 'f02940', 'f00600', 'f01830', 'f04260', 'f05010', 'f04890', 'f02160', 'f00240', 'f04380', 'f01680', 'f04710', 'f01320']0006R0 ['f02820', 'f03690', 'f03180', 'f02550', 'f01020', 'f03660', 'f02340', 'f01170', 'f02610', 'f02940', 'f01290', 'f02100', 'f01350', 'f03270', 'f03870', 'f01380', 'f01980', 'f03810', 'f02430', 'f02310', 'f01830', 'f03480', 'f02970', 'f01890', 'f03210', 'f03930', 'f02040', 'f02070', 'f02400', 'f01560', 'f03030', 'f01770', 'f01590', 'f01950', 'f03420', 'f01650', 'f03450', 'f00990', 'f03630', 'f01500', 'f03570', 'f00930', 'f03090', 'f03360', 'f02880', 'f02460', 'f01440', 'f01920', 'f01230', 'f03840', 'f02730', 'f01620', 'f02220', 'f03750', 'f03330', 'f03540', 'f02520', 'f02790', 'f01050', 'f03120', 'f01800', 'f01140', 'f01860', 'f01530', 'f01470', 'f02670', 'f02490', 'f01260', 'f01110', 'f02760', 'f01680', 'f03150', 'f02580', 'f03300', 'f02280', 'f01200', 'f03390', 'f03510', 'f02640', 'f02190', 'f02370', 'f01320', 'f02130', 'f03600', 'f03240', 'f03780', 'f03720', 'f02700', 'f01410', 'f01080', 'f02850', 'f01710', 'f03900', 'f03060', 'f01740', 'f02010', 'f02250', 'f00960', 'f03000', 'f02160', 'f02910']for k, v in d. items():  print(k, len(d[k]))0001TP 1240016E5 305Seq05VD 1710006R0 101for i in d2. keys():  print(i,len(d2[i]))0016E5 3050001TP 1240006R0 101Seq05VD 171files[0], labels[0](('0001TP', '009210'), ('0016E5', '01800'))2. My question: Link:  Why do we need masking? and does color from fastai library? (have to look into source code) What do the parameter alpha do? When people make masked img, would it be have ranged integer limit? Does image normalization related with this?lbl_sorted = sorted(lbl_names)f_sorted = sorted(fnames)lbl_1 = lbl_sorted[33]f_1 = f_sorted[33]img = open_image(lbl_1)mask = open_mask(lbl_1)_,axs = plt. subplots(1,2, figsize=(10,5))# img. show(ax=axs[0], y=mask, title='masked')img. show(ax=axs[0], title='1')mask. show(ax=axs[1], title='2', alpha=1. ) img_2 = open_image(f_1)mask_2 = open_mask(f_1)_,axs = plt. subplots(1,2, figsize=(10,5))# img. show(ax=axs[0], y=mask, title='masked')img_2. show(ax=axs[0], title='3',)mask_2. show(ax=axs[1], title='4', alpha=1. ) open_mask(lbl_1). data. shapetorch. Size([1, 720, 960])open_mask(lbl_1). data. shapetorch. Size([1, 720, 960])open_image(f_1). data. shapetorch. Size([3, 720, 960])open_image(f_1). data. shapetorch. Size([3, 720, 960])img. data #labeled datatensor([[[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]],    [[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]],    [[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]]])mask. data # after mask, labeled datatensor([[[ 4, 4, 4, . . . , 21, 21, 21],     [ 4, 4, 4, . . . , 21, 21, 21],     [ 4, 4, 4, . . . , 21, 21, 21],     . . . ,     [17, 17, 17, . . . , 30, 30, 30],     [17, 17, 17, . . . , 30, 30, 30],     [17, 17, 17, . . . , 30, 30, 30]]])img_2. data, mask_2. data(tensor([[[0. 0706, 0. 0667, 0. 0706, . . . , 0. 6431, 0. 6549, 0. 6627],     [0. 0745, 0. 0706, 0. 0706, . . . , 0. 6431, 0. 6510, 0. 6549],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 6392, 0. 6588, 0. 6588],     . . . ,     [0. 0863, 0. 0824, 0. 0824, . . . , 0. 1333, 0. 1216, 0. 1255],     [0. 0902, 0. 0863, 0. 0824, . . . , 0. 1255, 0. 1176, 0. 1216],     [0. 0863, 0. 0824, 0. 0784, . . . , 0. 1137, 0. 1059, 0. 1137]],     [[0. 0706, 0. 0667, 0. 0706, . . . , 0. 7490, 0. 7608, 0. 7686],     [0. 0745, 0. 0706, 0. 0706, . . . , 0. 7451, 0. 7569, 0. 7608],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 7412, 0. 7529, 0. 7529],     . . . ,     [0. 0980, 0. 0941, 0. 0941, . . . , 0. 1804, 0. 1686, 0. 1725],     [0. 1059, 0. 1020, 0. 0980, . . . , 0. 1725, 0. 1647, 0. 1686],     [0. 1020, 0. 0980, 0. 0941, . . . , 0. 1608, 0. 1529, 0. 1608]],     [[0. 0784, 0. 0745, 0. 0784, . . . , 0. 7569, 0. 7686, 0. 7765],     [0. 0824, 0. 0784, 0. 0784, . . . , 0. 7647, 0. 7647, 0. 7686],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 7608, 0. 7647, 0. 7647],     . . . ,     [0. 1216, 0. 1176, 0. 1176, . . . , 0. 2000, 0. 1882, 0. 1922],     [0. 1176, 0. 1137, 0. 1098, . . . , 0. 1843, 0. 1765, 0. 1804],     [0. 1137, 0. 1098, 0. 1059, . . . , 0. 1725, 0. 1647, 0. 1725]]]), tensor([[[ 18, 17, 18, . . . , 183, 186, 188],     [ 19, 18, 18, . . . , 183, 185, 186],     [ 20, 18, 19, . . . , 182, 185, 185],     . . . ,     [ 25, 24, 24, . . . , 43, 40, 41],     [ 26, 25, 24, . . . , 41, 39, 40],     [ 25, 24, 23, . . . , 38, 36, 38]]]))3. What is a difference between image and imageSegment?: imageSegment  An ImageSegment object has the same properties as an Image. The only difference is that when applying the transformations to an ImageSegment, it will ignore the functions that deal with lighting and keep values of 0 and 1.  It’s easy to show the segmentation mask over the associated Image by using the y argument of show_image. img = open_image(fnames[0])mask = open_mask(lbl_names[0])_,axs = plt. subplots(1,3, figsize=(8,4))img. show(ax=axs[0], title='no mask')img. show(ax=axs[1], y=mask, title='masked') #seg mask over the img using y argmask. show(ax=axs[2], title='mask only', alpha=1. ) vision. image ##4. Why/How img div by 255 and how it results fast. ai : vision. image - If div=True, pixel values are divided by 255. to become floats between 0. and 1.  At times, you want to get rid of distortions caused by lights and shadows in an image.    Normalizing the RGB values of an image can at times be a simple and effective way of achieving this.     So sum of the pixel’s value over all channels(which is S) divides each intensified channel so that nomalized value will be R/S, G/S and B/S (where, S=R+G+B).   Detailed explain here4. Python Evaluation Order: Python evaluates expressions from left to right. Notice that while evaluating an assignment, the right-hand side is evaluated before the left-hand side. mask_tmp, trg_tmp, void_tmp = 2, 1, 10mask_tmp = trg_tmp != void_tmpprint(mask_tmp, trg_tmp, void_tmp) # (1) target is not same with voidTrue 1 10# Example 1x = 1y = 2x,y = y,x+yx, y(2, 3)# Example 2x = 1y = 2x = yy = x+yx, y(2, 4)5. model learner parameter :: pct_start: A: Percentage of total number of epochs when learning rate rises during one cycle. Q: Sorry, I still confused that one cycle in the new API only runs one epoch. How the percentage of total number of epochs works? Can you give a example? If learn. fit_one_cycle(10, slice(1e-4,1e-3,1e-2), pct_start=0. 05)??A: Ok, strictly correct answer would be percentage of iterations, so you can have lr both increase and decrease during same epoch. In your example, say, you have 100 iterations per epoch, then for half an epoch (0. 05 * (10 * 100) = 50) lr will rise, then slowly decrease. Q2: Thanks for this explanation … so essentially, it is the percentage of overall iterations where the LR is increasing, correct? So, given the default of 0. 3, it means that your LR is going up for 30% of your iterations and then decreasing over the last 70%. Is that a correct summation of what is happening? A2: Yes, I think that’s correct. You can verify that by changing its value and check:learn. recorder. plot_lr() For example if pct_start = 0. 2 source: forums. fastai "
+    }, {
+    "id": 14,
     "url": "http://localhost:4000/2020/03/note08-fastai-4/",
     "title": "Gradient backward, Chain Rule, Refactoring",
-    "body": "2020/03/02 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves   ⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
+    "body": "2020/03/02 -  This note is divided into 4 section.      Section1: What is the meaning of ‘deep-learning from foundations?’   Section2: What’s inside Pytorch Operator?   Section3: Implement forward&amp;backward pass from scratch   Section4: Gradient backward, Chain Rule, Refactoring   ” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves    Postfix underscore means in pytorch, in-place function, What is in-place function?⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
     }, {
-    "id": 13,
+    "id": 15,
     "url": "http://localhost:4000/2020/03/note08-fastai-3/",
     "title": "Implement forward&backward pass from scratch",
     "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring1. The forward and backward passes: 1. 1 Normalization: train_mean,train_std = x_train. mean(),x_train. std()&gt;&gt;&gt; train_mean,train_std(tensor(0. 1304), tensor(0. 3073))Remember!  Dataset, which is x_train, mean and standard deviation is not 0&amp;1. But we need them to be which means we should substract means and divide data by std.  You should not standarlize validation set because training set and validation set should be aparted.  after normalize, mean is close to zero, and standard deviation is close to 1. 1. 2 Variable definition:  n,m: size of the training set c: the number of activations we need in our model2. Foundation Version: 2. 1 Basic architecture:  Our model has one hidden layer, output to have 10 activations, used in cross entropy.    But in process of building architecture, we will use mean square error, output to have 1 activations and lator change it to cross entropy   number of hidden unit; 50see below pic  We want to make w1&amp;w2 mean and std be 0&amp;1.      why initializating and make mean zero and std one is important?   paper highlighting importance of normalisation - training 10,000 layer network without regularisation1   2. 1. 1 simplified kaiming initQ: Why we did init, normalize with only validation data? Because we can not handle and get statistics from each value of x_valid?{: style=”color:red; font-size: 130%; text-align: center;”}  what about hidden(first) layer?w1 = torch. randn(m,nh)b1 = torch. zeros(nh)t = lin(x_valid, w1, b1) # hidden&gt;&gt;&gt; t. mean(), t. std()((tensor(2. 3191), tensor(27. 0303))In output(second) layer, w2 = torch. randn(nh,1)b2 = torch. zeros(1)t2 = lin(t, w2, b2) # output&gt;&gt;&gt; t2. mean(), t2. std()(tensor(-58. 2665), tensor(170. 9717))   which is terribly far from normalzed value.     But if we apply simplified kaiming init  w1 = torch. randn(m,nh)/math. sqrt(m); b1 = torch. zeros(nh)w2 = torch. randn(nh,1)/math. sqrt(nh); b2 = torch. zeros(1)t = lin(x_valid, w1, b1)t. mean(),t. std()&gt;&gt;&gt; (tensor(-0. 0516), tensor(0. 9354)) But, actually, we use activations not only linear function After applying activations relu at linear layer, mean and deviation became 0. 5. 2. 1. 2 Glorrot initializationPaper2: Understanding the difficulty of training deep feedforward neural networks  Gaussian(, bell shaped, normal distributions) is not trained very well.  How to initialize neural nets? with the size of layer , the number of filters .  But there is No acount for import of ReLU If we got 1000 layers, vanishing gradients problem emerges2. 1. 3 Kaiming initializatingPaper3: Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification  Kaiming He, explained here rectifier: rectified linear unit rectifier network: neural network with rectifier linear units  This is kaiming init, and why suddenly replace one to two on a top?     to avoid vanishing gradient(weights)   But it doesn’t give very nice mean tough.    2. 1. 4 Pytorch package Why fan_out?     according to pytorch documentation,   choosing 'fan_in' preserves the magnitude of the variance of the wights in the forward pass. choosing 'fan_out' preserves the magnitues in the backward pass(, which means matmul; with transposed matrix) ➡️ in the other words, torch use fan_out cz pytorch transpose in linear transformaton.  What about CNN in Pytorch?I tried torch. nn. Conv2d. conv2d_forward?? Jeremy digged into using torch. nn. modules. conv. _ConvNd. reset_parameters?? 2  in Pytorch, it doesn’t seem to be implemented kaiming init in right formula. so we should use our own operation.  But actually, this has been discussed in Pytorch community before. 3 4 Jeremy said it enhanced variance also, so I sampled 100 times and counted better results.  To make sure the shape seems sensible. check with assert. (remember we will replace 1 to 10 in cross entropy)assert model(x_valid). shape==torch. Size([x_valid. shape[0],1])&gt;&gt;&gt; model(x_valid). shape(10000, 1) We have made Relu, init, linear, it seems we can forward pass code we need for basic architecture nh = 50def lin(x, w, b): return x@w + b;w1 = torch. randn(m,nh)*math. sqrt(2. /m ); b1 = torch. zeros(nh)w2 = torch. randn(nh,1); b2 = torch. zeros(1)def relu(x): return x. clamp_min(0. ) - 0. 5t1 = relu(lin(x_valid, w1, b1))def model(xb):  l1 = lin(xb, w1, b1)  l2 = relu(l1)  l3 = lin(l2, w2, b2)  return l32. 2 Loss function: MSE:  Mean squared error need unit vector, so we remove unit axis.   def mse(output, targ): return (output. squeeze(-1) - targ). pow(2). mean()    In python, in case you remove axis, you use ‘squeeze’, or add axis use ‘unsqueeze’ torch. squeeze where code commonly broken. so, when you use squeeze, clarify dimension axis you want to removetmp = torch. tensor([1,1])tmp. squeeze()&gt;&gt;&gt; tensor([1, 1]) make sure to make as float when you calculateBut why??? because it is tensor?{: style=”color:red; font-size: 130%;”} Here’s the error when I don’t transform the data type ---------------------------------------------------------------------------TypeError                 Traceback (most recent call last)&lt;ipython-input-22-ae6009bef8b4&gt; in &lt;module&gt;()----&gt; 1 y_train = get_data()[1] # call data again   2 mse(preds, y_train)TypeError: 'map' object is not subscriptable This is forward passFootnote: Other materials:  Understanding the difficulty of training deep feedforward neural networks, paper that introduced Xavier initialization      Fixup Initialization: Residual Learning Without Normalization &#8617;        Pytorch implementaion on Kaiming init of conv and linear layers &#8617;        Pytorch kaiming init issue &#8617;        Pytorch kaiming init explained &#8617;    "
     }, {
-    "id": 14,
+    "id": 16,
     "url": "http://localhost:4000/2020/03/note08-fastai-2/",
     "title": "What's inside Pytorch Operator?",
     "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, RefactoringWhat’s inside Pytorch Operator?: Section02 Time comparison with pure Python:    Matmul with broadcasting&gt; 3194. 95 times faster     Einstein summation&gt; 16090. 91 times faster     Pytorch’s operator&gt; 49166. 67 times faster  1. Elementwise op: 1. 1 Frobenius norm:   above converted into (m*m). sum(). sqrt() Plus, don’t suffer from mathmatical symbols. He also copy and paste that equations from wikipedia.  and if you need latex form, download it from archive. 2. Elementwise Matmul:  What is the meaning of elementwise?   We do not calculate each component. But all of the component at once. Because, length of column of A and row of B are fixed.   How much time we saved?  So now that takes 1. 37ms. We have removed one line of code and it is a 178 times faster…#TODOI don’t know where the 5 from. but keep it. Maybe this is related with frobenius norm…?as a result, the code before for k in range(ac):  c[i,j] += a[i,k] + b[k,j]the code after c[i,j] = (a[i,:] * b[:,j]). sum()To compare it (result betweet original and adjusted version) we use not test_eq but other function. The reason for this is that due to rounding errors from math operations, matrices may not be exactly the same. As a result, we want a function that will “is a equal to b within some tolerance” #exportdef near(a,b):   return torch. allclose(a, b, rtol=1e-3, atol=1e-5)def test_near(a,b):   test(a,b,near)test_near(t1, matmul(m1, m2))3. Broadcasting:  Now, we will use the broadcasting and removec[i,j] = (a[i,:] * b[:,j]). sum() How it works?&gt;&gt;&gt; a=tensor([[10,10,10],     [20,20,20],     [30,30,30]])&gt;&gt;&gt; b=tensor([1,2,3,])&gt;&gt;&gt; a,b  (tensor([[10, 10, 10],     [20, 20, 20],     [30, 30, 30]]),tensor([1, 2, 3]))     &gt;&gt;&gt; a+btensor([[11, 12, 13],    [21, 22, 23],    [31, 32, 33]])  &lt;Figure 2&gt; demonstrated how array b is broadcasting(or copied but not occupy memory) to compatible with a. Refered from numpy_tutorial   there is no loop, but it seems there is exactly the loop.     This is not from jeremy (actually after a moment he cover it) but i wondered How to broadcast an array by columns?  c=tensor([[1],[2],[3]])a+ctensor([[11, 11, 11],    [22, 22, 22],    [33, 33, 33]])s  What is tensor. stride()?help(t. stride)Help on built-in function stride:  stride(…) method of torch. Tensor instancestride(dim) -&gt; tuple or intReturns the stride of :attr:’self’ tensor. Stride is the jump necessary to go from one element to the next one in the specified dimension :attr:’dim’. A tuple of all strides is returned when no argument is passed in. Otherwise, an integer value is returned as the stride in the particular dimension :attr:’dim’. Args:  dim (int, optional): the desired dimension in which stride is requiredExample::* x = torch. tensor([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]])`x. stride()&gt;&gt;&gt; (5, 1)x. stride(0)&gt;&gt;&gt; 5x. stride(-1)&gt;&gt;&gt; 1   unsqueeze &amp; None index   We can manipulate rank of tensor Special value ‘None’, which means please squeeze a new axis here== please broadcast herec = torch. tensor([10,20,30])c[None,:] in c, squeeze a new axis in here please. 2. 2 Matmul with broadcasting: for i in range(ar):#  c[i,j] = (a[i,:]).      *[:,j]. sum() #previous  c[i]  = (a[i]. unsqueeze(-1) * b). sum(dim=0) And Using None also (As howard teached)c[i]  = (a[i ]. unsqueeze(-1) * b). sum(dim=0) #howardc[i]  = (a[i][:,None] * b). sum(dim=0) # using Nonec[i]  = (a[i,:,None]*b). sum(dim=0)⭐️Tips🌟 1) Anytime there’s a trailinng(final) colon in numpy or pytorch you can delete it ex) c[i, :] = c [i]2) any number of colon commas at the start, you can switch it with the single elipsis.  ex) c[:,:,:,:,i] = c […,i] 2. 3 Broadcasting Rules:  What if we tensor. size([1,3]) * tensor. size([3,1])?  torch. Size([3, 3])    What is scale????   What if they are one array is times of the other array? ex) Image : 256 x 256 x 3Scale : 128 x 256 x 3Result: ?   Why I did not inserted axis via None, but happened broadcasting? &gt;&gt;&gt; c * c[:,None]tensor([[100. , 200. , 300. ],    [200. , 400. , 600. ],    [300. , 600. , 900. ]])maybe it broadcast cz following array has 3 rows as same principle, no matter what nature shape was, if we do the operation tensor broadcasts to the other. &gt;&gt;&gt; c==c[None]tensor([[True, True, True]])&gt;&gt;&gt; c[None]==c[None,:]tensor([[True, True, True]])&gt;&gt;&gt;c[None,:]==ctensor([[True, True, True]])3. Einstein summation:  Creates batch-wise, remove inner most loop, and replaced it with an elementwise producta. k. ac[i,j] += a[i,k] * b[k,j]inner most loop c[i,j] = (a[i,:] * b[:,j]). sum()elementwise product  Because K is repeated so we do a dot product. And it is torch. Usage of einsum()1) transpose2) diagnalisation tracing3) batch-wise (matmul) …  einstein summation notationdef matmul(a,b): return torch. einsum('ik,kj-&gt;ij', a, b)so after all, we are now 16000 times faster than Python. 4. Pytorch op: 49166. 67 times faster than pure python And we will use this matrix multiplication in Fully Connect forward, with some initialized parameters and ReLU. But before that, we need initialized parameters and ReLU, Footnote:  TensorRank ti noteResources:  Frobenius Norm Review Broadcasting Review (especially Rule)     Refer colab! (I totally confused with extension of arrays)    torch. allclose Review np. einsum Reviewh "
     }, {
-    "id": 15,
+    "id": 17,
     "url": "http://localhost:4000/2020/02/note08-fastai-1/",
     "title": "What is the meaning of 'deep-learning from foundations?'",
     "body": "2020/02/29 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ I don’t know if you read this article, but I heartily appreciate Rachael Thomas and Jeremy Howard for providing these priceless lectures for free Homework:  Review concepts 16 concepts from Course 1 (lessons 1 - 7)(1) Affine Functions &amp; non-linearities; 2) Parameters &amp; activations; 3) Random initialization &amp; transfer learning; 4) SGD, Momentum, Adam; 5) Convolutions; Batch-norm; 6) Dropout; 7) Data augmentation; 8) Weight decay; 9) Res/dense blocks; 10) Image classification and regression; 11)Embeddings; 12) Continuous &amp; Categorical variables; 13) Collaborative filtering; 14) Language models; 15) NLP classification; 16) Segmentation; U-net; GANS) Make sure you understand broadcasting Read section 2. 2 in Delving Deep into Rectifiers Try to replicate as much of the notebooks as you can without peeking; when you get stuck, peek at the lesson notebook, but then close it and try to do it yourself calculus for machine learning     based on weight…    einsum conventionCONTENTS:  What is going on in this course?     What is ‘from foundations’?   Steps to a basic modern CNN model   Today’s implementation goal: 1) matmul -&gt; 4) FC backward    Library development using jupyter notebook     jupyter notebook certainly can make module    Elementwise ops     How can we make python faster?         What is element wise operation?           FootnoteWhat is going on in this course?: What is ‘from foundations’?: 1) Recreate fast. ai and Pytorch 2) using pure python  Evade OverfittingOverfit : validation error getting worsetraining loss &lt; validation loss  Know the name of the symbol you usefind in this page if you don’t know the symbol that you are using or just draw it here (run by ML!) Steps to a basic modern CNN model:  1) Matrix multiplication -&gt; 2) Relu/Initialization -&gt; 3) Fully-connected Forward-&gt; 4) Fully-connected Backward -&gt; 5) Train loop -&gt; 6) Convolution-&gt; 7) Optimization -&gt;8) Batchnormalization -&gt; 9) Resnet Today’s implementation goal: 1) matmul -&gt; 4) FC backward: Library development using jupyter notebook: what is assers? jupyter notebook certainly can make module:  There will be #export tag that Howard (and we) want to extract special notebook2script. py will detect sign of #expert and convert following into python module and test ittest\_eq(TEST,'test')test\_eq(TEST,'test1')  what is run_notebook. py?     when you want to test your module in command line interface   		!python run\_notebook. py 01_matmul. ipynb  Is there any difference between 1) and 2)?1) test -&gt; test01 2) test01 -&gt; test #TODO I don’t know yet  look into run_notebook. py, package fire Jeremy used. What is that?read and run the code in a notebook, and in the process, Jeremy made Python Fire library called!shockingly, fire takes any kind of function and converts into CLI command. fire library was released by Google open source, Thursday, March 2, 2017    Get data   pytorch and numpy are pretty much same.  variable c explains how many pixels there are in in MNIST, 28 pixels PyTorch’s view() method: torch function that manipulating tensor, and squeeze() in torch &amp; mathmatical operation similar function Rao &amp; McMahan said usually this functions result in feature vector.    In part 1, you can use view function several times.     Initial python model     Which is Linear, like $Xw$(weight)$+a$(bias) $= Y$     If you don’t know hou to multiple matrix, refer this site matmul visulization site   How many time spends if we we use pure python   function matmul, typical matrix multiplication function, takes about 1 second for calculating 1 single train data! (maybe assumed stochastic, 5 data points in validation)     it takes about 11. 36 hours to update parameters even single layer and 1 iteration! (if that was my computer, it would be 14 hours. . )🤪   THIS is why we need to consider ‘time’&amp;’space’ This is kinda slow - what if we could speed it up by 50,000 times? Let’s try! Elementwise ops: How can we make python faster?:  If we want to calculate faster, then do remove pythonic calcuation, by passing its computation down to something that is written something other than python, like pytorch.  According to PyTorch doc it uses C++ (via ATen), so we are going to implement that function with python. What is element wise operation?:  items makes a pair, operate corresponding componentFootnote:  notebooks material video broadcasting excel"
     }, {
-    "id": 16,
+    "id": 18,
     "url": "http://localhost:4000/2020/02/what-is-convolution/",
     "title": "Digging into convolution",
     "body": "2020/02/28 - Issues 1) Kaiming Initializtion in Pytorch was in trouble. 1 2) Jeremy started to dig in, in lesson09, but I didn’t know why the size of tensor is 2 and even understand this spreadsheet data. 3 Homework: Read Visualizing and Understanding Convolutional Networks paper  What is a convolution?     Visualization         one kernel     Matthew D Zeiler &amp; Rob Fergus Paper          Convolution can be represented as matmul   Padding   Kernel has rank 3   How can we find a side-edge, a gradient and area of constant weight?   What is a convolution?: A convolutional neural network is that your red, green, and blue pixels go into the simple computation, and something comes out of that, and then the result of that goes into a second layer, and the result of that goes into the third layer and so forth. Visualization: one kernel Refer this site for visualizing CNN filteringMatthew D Zeiler &amp; Rob Fergus PaperLecture01     Nine examples of the actual coefficients from the **first layer**  Convolution can be represented as matmul: CNNs from different viewpoints {align-items: center;}    [A B C D E F G H I J] is 3 by 3 image data flatten to vector.   As a result, convolution is a just matrix just two things happens     Some of entries are set to zeros at all the times   same color always have the same weight. That called weight time / wegith sharing    So, we can implement a convolution with matrix multiplication. But, we don’t do that because it’s slow!Padding:  What most of libraries do is just put zeros asdie of matrix  fast. ai uses reflection paddings (what is this? Jeremy said he uttered it)Kernel has rank 3:  As standard picture input would be 4 5, it would be actually 3d, not 2d.  If we make kernel as a 3x3 size, we pass over same kernel all the different Red, Green, Blue Pixels.      This could make problem, because, if we want to detect frog, which is green, we would want more activations on the green(I made a test cell in my colab 6)   How can we find a side-edge, a gradient and area of constant weight?: Not top-edge!  One kernel can find only the top-edge, so we should stack the kernels 7 So, we pass it through bunch of kernels to the input images, and that process gives us height x width x corresponding number of kernels.  Usually that number of chanel is 16 And if we want to get the more channels and features, we should repeat that process     This process gives rise to memory out of control, we do the stride   #### conv-example. xlsx  2 convolutional filters At a second layer, filter is 3x3x2 tensor, because to add up together the first layer’s channel. Reference:       Problem was math. sqrt(5) was not kaiming initialization formula, Implementation in Pytorch &#8617;        size of tensor, lecture09 &#8617;        conv-example. xlsx &#8617;        Why do computer use red, green and blue instead of primary colors &#8617;        Grayscale is a group of shades without any visible color. … Each of these dots has its own brightness level as well and, therefore, can be converted to grayscale. A grayscale image is one with all color information removed.  &#8617;        Testing RGB and grayscale &#8617;        stack kernel and make new rank of tensor at output, Lesson06-2019 &#8617;    "
     }, {
-    "id": 17,
+    "id": 19,
     "url": "http://localhost:4000/2020/02/dps-week8/",
-    "title": "Digital Product School week 8&9",
-    "body": "2020/02/24 - The 8th week retropect at Digital Product School Week 8/9 - Ship your MVP/Release next iteration each day     This week's schedule  CONTENT:  Preparing engineering weekly Agile Process     Daily Stand-up    Making application flowchart (feat draw. io) / ER diagram     Flowchart, understaning user journey   ER diagram    Engineering weekly AI lunch Connecting firebase andPreparing engineering weekly: This week at Wednesday, I planned to explain the Language Modelings, mainly focusing ELMo, ULMFiT, BERT and GPT-2. Slides is available here Changed the presentation, because there were people who are not in ML domain. hereWhenever I do the presentation, I learn more than the information I give them. At the same time, I realize I need to learn more than I know. Agile Process: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m not sure ‘what is agile’ but this is what we tried to make agile process.  Daily Stand-up: Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Making application flowchart (feat draw. io) / ER diagram:  Before we start the party, we should clarify the flowchart and ER diagram of our application. Flowchart, understaning user journey: Thanks for google, we could use draw. io for our framechart framework. Actually, we cana choice other good flatform, but draw. io has connected app throgh google drive, most of our engineer was used to it. And after this job, I got to know there is also (of course) rule with the symbols, color, size, space, scaling and direction of arrow -reference. But why we should do this? WE have made our storymap before!! I think storymap is for visualize our status and app. So it should be shared with whole the team, and they should able to understand each role’s issue. But flowchart is more like testing technical feasibility, and error that user can experience. So it could be little more specific, complicated, and hypothetical.     This week engineer's main plan  ER diagram: Even if we use NoSQL database through firebase, my team was accustomed to SQL more. That what we educated when we were at college, so we had to organize our concept while we were learning NoSQL. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Before today, my AI collegues presented regression, knn and it was my turn. I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! AI lunch: Connecting firebase and: "
+    "title": "My life in Digital Product School - week 8/19/10",
+    "body": "2020/02/24 - The 8/9/10th week retropect at Digital Product School Week 8 - Ship your MVPWeek 9/10 - Release next iteration each day     Week 8th schedule  CONTENT:  Agile Product Development     Daily Stand-up(planning)   Gemba Walk   Sprint Reviews    Engineering weeklyAgile Product Development: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m still not sure ‘what is agile’ but this is how we tried to make agile process.  Daily Stand-up(planning): Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Gemba Walk:     Team Cero with core team  Every 2 weeks, we do the Gemba work, which is ‘question everything to the core team’ time. At this period, people can ask anything related to our product, workshop, and framework. Core team will help just for each team, and each team can solve the problem related to their work. &lt; br/&gt;Why we need this session? because with workshop and general schedule, core team has no time just focus on each team. So through this session, we can have opportunity to understand each program and workshop, like why we are using this platform, and when is the due of our small project, and we have this problem and we need help for this. whatever small problem you have, core team is always willing to help you. Sprint Reviews: Every Friday, we have time to summarise what we did for the week. Maybe we need HMW question and our storymap to share our process and then tell and share what we did try, what point we succeeded and what point it was deviant of our prediction, and why we tried it. .     Sprint of Ve-link  And then, just after all team’s ppt, we do vote with such a cute marvel. Always it’s very difficult to vote (of course you can’t vote to your team!) Because it depends on criteria what do I value!But since this is process of our agile work, I try to focus on what they have changed since last week, and why they did it, how they did it. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Everyone have their knowledge to share and we can be tutor and at the same time can be of tutee. Previously, my AI collegues presented regression, knn. And because I’m somewhat specialized to NLP, I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! "
     }, {
-    "id": 18,
+    "id": 20,
     "url": "http://localhost:4000/2020/02/fast.ai-nlp-note-16/",
     "title": "Algorithmic bias",
     "body": "2020/02/20 - Algorithms can encode &amp; magnify human bias Case Study 1: Facial Recognition &amp; Predictive Policing:  Joy Buolamwini &amp; Timnit Gebru, gendershades. org     Microsoft, FACE+, IBM - All of these things are sell now.    Largest gap between $\therefore\ Lighter Male\ &gt;\ Darker\ Female $      This US mayor joked cops should “mount . 50-caliber” guns where AI predicts crime      With machine learning, with automation, there’s a 99% success, so that robot is ㅡwill beㅡ99% accurate in telling us what is going to happen next, which is really interesting.     - city official in Lancater, CA, approving on using IBM for public security  Bias:  Bias is type of error Statistical Bias: difference between a statistic’s expected value and the true value Unjust Bias: disproportionate preference for or prejudice against a group Unconscious bias: bias that we don’t realize we have But, term bias is too generic to be productive. Different sources of bias have different causes Representation Bias: Dataset was not representative of the algorithm that might be used on later. Above : Data is okay, but algorithm has some problem. Below : Data has error.  For example, object detection production that performs very well in common product of US. But in contrast, change of target product region, like Zimbabwe, Solomon Island, and so on, reduced the performence remarkably. It is not the algorithmic problem, so we should care about data volume of region. Evaluation Bias: Benchmark datasets spur on research, 4. 4% of IJB-A images are dark-skinned women. 2/3 of ImageNet images from the West (Sharkar et al, 2017) Case Study 2: Recidivism Algorithm Used Prison Sentencing: Case Study 3: Online Ad Delivery: Bias in NLP: ( Nothing to do with the course, but I’m researching this field these days. )    But all about Englsih     ImpactThe person is doctor. The person is nurse -&gt; 그는 의사다. 그녀는 간호사다.  Concept of “biased data” often too generic to be useful:  Different sources of bias have different sources Data, models and systems are not unchanging numbers on a screen. They’re the result of a complex process that starts with years of historical context and involves a series of choices and norms, from data measurement to model evaluation to human interpretation. - Harini Suresh, “The problem with Biased Data” Five Sources of Bias in ML:  Representation Bias Evaluation Bias Measurement Bias Aggregation Bias(46:02) Historical Bias(46:26)     A few studies(47:13)   Racial Bias, Even when we have good intentions(new york times)(47:10)   gender(48:59)   Humans are biased, so why does algorithmic bias matter?: Algorithms &amp; humans are used differently (humans are usually decision maker)  Algorithms are accurate and objective No way to apeal if there if error processed large scale cheap Machine learning can amplify bias Machine learning can create feedback loops. Technology is power. And with that comes responsibility. Solutions:  Analyze a project at work/school:     Questions about AI   5 types of bias (Suresh &amp; Guttag)   Datasheets for datasets, Modelcards for model reporting   Accuracy rate on different sub-groups    Work with domain experts &amp; those impacted Increase diversity in our workspace Advocate for good policy Be on the ongoing lookout for bias"
     }, {
-    "id": 19,
+    "id": 21,
     "url": "http://localhost:4000/2020/02/classifier-city/",
     "title": "Making a classifier with image dataset made from gooogle",
     "body": "2020/02/15 - CONTENTS:  Creating dataset from google images     Using google_images_download   Create ImageDataBunch    Train model     fit_one_cycle()   Let’s find-tune   Let’s train the whole model!   Let’s make batch size bigger!    Interpretation Model in productionCode can be found hereDeployed model here Making a classifier which can distinguish Seoul from Munich and Sanfrancisco!(hoping my well in Munich!) Creating dataset from google images: In machine learning, you always need data before you build your model. You can use either URLs or google_images_download package. Since Jeremy explained specifically, I will try the other. Using google_images_download: note: This is not google official package Refer to Official Doncument, put that arguments. from google_images_download import google_images_downloadresponse = google_images_download. googleimagesdownload()  #class instantiationout_dir = os. path. abspath('. . /. . /materials/dataset/pkg/')os. mkdir(out_dir)arguments = { keywords : Cebu,Munich,Seoul ,        print_urls :True,        suffix_keywords : city ,        output_directory :out_dir,        type : photo ,      }paths = response. download(arguments)  #passing the arguments to the functionprint(paths)and if you need, here is main code. Create ImageDataBunch: We need to separate validation set because we just grabbed these imagese from Google. Most of the dataset we use (kaggle/research) splited into train / validation / test so if they are not devided beforehand we should make databunch, and Jeremy recommended assign 20% to validation. Help on function verify_images in module fastai. vision. data:verify_images(path: Union[pathlib. Path, str], delete: bool = True, max_workers: int = 4, max_size: int = None, recurse: bool = False, dest: Union[pathlib. Path, str] = '. ', n_channels: int = 3, interp=2, ext: str = None, img_format: str = None, resume: bool = None, **kwargs)  Check if the images in `path` aren't broken, maybe resize them and copy it in `dest`. Data from google image url Data from package Train model:           len(class)   len(train)   len(valid)         Data_url   3   432   108       Data_pkg   3   216   53   Uisng model: restnet34 1, Measurement: accuracy 2 fit_one_cycle(): What is fit one cycle? Cyclical Learning Rates for Training Neural Networks One of the way to find good learning rate. Core idea is to start with small learning rate (like 1e-4, 1e-3) and increase the learning rate after each mini-batch till loss starts exploding. And pick up learning rate one order lower than exploding point. For example, plotted learning rate is like below picture, picking up around 1e-2 is the best way.  Why this methods Traditionally, the learning rate is decreased as the learning starts converging with time. But this paper suggests to cycle our learning rate, because it makes us avoid local minimum. Basically this cyclic method enables us to explore whole of loss function so that find out global minimum. In other words, higher learning rate behaves like regularisation. Let’s find-tune: Do train just one last layer by learning rate found by find_lr This section you should find the strongest downward slope that kind of sticking around for quite a while. And choose just one order lower than lowest point. As explained before, I will pick up 1e-2. And of course, this is fine-tuning, we don’t need discriminative learning rate yet.  Let’s train the whole model!: link When you plot the learning rate again, maybe you will get soaring shape of learning rate. Rule of thumb, When you slice the learning rate, use learning rate you used at unfrozen part. Divide it by 5 or 10 and put it on maximum bound. At minimum bound, get the point just before it soared, and divide it by 10.  Let’s make batch size bigger!: Since default batch size is 64, I tried it to 128. And it gets way more better result(even it’s still underfitting!) And if I freeze model and train whole model again, the model would be better. Also, you can use this method to the other big dataset model training! Interpretation: See the confusion matrix. Result is quite great. *Since I’m using colab, I will skip data cleansing. But I highly recommend you to use ImageCleaner widget, only if you are using jupyter notebook (not jupyter lab) Model in production: You can deploy your model in simple way. I referred fast. ai, and used render(it’s free for limited time). You can find detailed document here. and you can create a route like this. @app. route( /classify-url , methods=[ GET ])async def classify_url(request):  bytes = await get_bytes(request. query_params[ url ])  img = open_image(BytesIO(bytes))  _,_,losses = learner. predict(img)  return JSONResponse({     predictions : sorted(      zip(cat_learner. data. classes, map(float, losses)),      key=lambda p: p[1],      reverse=True    )  })You can find my deployed model here Reference: How to create a deep learning dataset using Google Images towardsdatascience - one cycle policy       Deep Residual Learning for Image Recognition &#8617;        Accuracy_and_precision &#8617;    "
     }, {
-    "id": 20,
+    "id": 22,
     "url": "http://localhost:4000/2020/02/dps-week5/",
     "title": "Digital Product School week 5",
     "body": "2020/02/09 - The 5th week retropect at Digital Product School Week 5 - Create a Storymap and sync it with Lean Canvas     This week's schedule  CONTENT:  How to create our story map Prepare your story Discover your product’s AI potentialMondayHow to create our story map:     We need this 'aha' moment  There was a Milestone workshop, about our weekly goal. As we are agile working, we go fast and change every week’s goal. This week we will finalize our story map based on user’s pain-point and HMW questions.     How should we make our story-map  Basically we should make story map based on this rule  Tell stories, don’t just write them!     We always need context, that means all the story component should be connected    Visualize your product to establish a shared understanding and speed up discussions!     Post-it filled of text is not enough, we should fill it with visualizations then team mates can understand it fast    Only discuss in front our your story map! (Speed)     So we can update our story-map as soon as we change our opinion   And also    Use a story map to find the parts that matter most and to identify holes in your idea!     Since the story map consists of techinical part, we should consider each story’s technical feasibility    Minimise output, maximise outcome and impact! Build tests to figure out what’s minimum and what’s viable!     This story map functions to find out our minimum value of ideas    Work iteratively: Change your story map according to your learnings!     We should repeat this process again and again    PMs: Make sure Storymap is up to date!Prepare your story:     team cero, our whole story map    Our goal    Technical feasibility of our storyWhat is your strategy to make user achieve something? This would be our expand point Discover your product’s AI potential: How can we apply AI to our product? Let’s write down our ‘HMW’ questions, and find out all p ossibilities. These are suggestion of possibilities, so don’t attached to feasibility (we will do in at lean start-up)     Software section's expectation    AI section's expectationTuesday    Engineer's task, week5This 5th week, engineers settled WendesdayThursdayFriday"
     }, {
-    "id": 21,
+    "id": 23,
     "url": "http://localhost:4000/2020/02/GPU-time/",
     "title": "4 reasons took much time to setting GPU for fast.ai than I expected",
     "body": "2020/02/05 - Motivation: Before now, me as a undergraduate student, I was parsimony who usually depend on colab, kaggle, friend’s server(occasional) whenever i need GPU. . And this time it’s been for a while to install GPU than I expected and I share the several component that stood in my way. Written at Oct 24 2019, if you think this is deprecated, please do not have a leap of faith. Just for the record, I’ve used Kaggle, Colab, GCP, Azure, EC2 as GPU cloud. 1. Did not know there is JupyterLab option in Google Cloud Platform. : At the first time when GCP came out, there was no AI Platform service. So from starting vm instance to launching jupyter and installing packages, I did all of the things myself. (and I learned 🤗) $	curl -O https://repo. continuum. io/archive/Anaconda3-5. 0. 1-Linux-x86_64. sh[Downloading conda in ssh] I created VM instance,selected zone, machine type and disk type. Then, define firewall rules and in ssh terminal, install jupyter and other packages. But you can do all of these things just using AI Platform.  [AI Platform] I think it especially save your time if you are living in Asia-Pacific, which google doesn’t support not that much GPU resources.  2. Consider if the platform has limited resources in a region you live in. : I live in South Korea, East Asia, and it seems like this region has lots of limitation in GPU (except quite expensive AWS) And the Taiwan which was the only one region where I can launch my own VM with GPU (I tried all the other regions in the list) sometimes do normaly, but not always. 😥After launching, I did several works and next day I could not start VM. (I didn’t count it, but tried it a few hours because I didn’t want cost any more time…) Endlessly failed to start instance, then I choose to move AWS as an alternative way. 3. Fast. ai gives deliberate guide and I didn’t know it. : Fast. ai offer the guide for all available platform. (Colab, salamander, Gradient, Kaggle, Colab, and so on) It is so important, and really needs, because cloud computing options are vary as occasion and purpose arise. I didn’t know fast. ai has manual to running GCP, and I think it’s as good a reason as any for me to be have taken time. It helped me so much when I had aws and shortened my time. I don’t want to read all of the manual in amazno. . (It is recommended. . but I’d rather read GIT PRO now…) ssh -i ~/. ssh/&lt;your_private_key_pair&gt; -L localhost:8888:localhost:8888 ubuntu@&lt;your instance IP&gt;4. You should wait to add more volume just after add volume, by building AWS EC2. : Since Elastic Block Store(EBS) storage supports optimized storage, users can’t extend storage volume two times in a row.  Unfortunately, at the first time, I didn’t know it (again 👻) and when VM lacked volume, I doubled dist capacity (76*2) at a rough but It needs more.  &lt;!– this time I installed GPU in two years, and it became little complicated compared to 2 years ago. And this time for the first time(maybe not the first time. . but i handled it in my class or with my friend. but it’s my first time on my own. ) I very I’m started to using used google colab, kaggleand, GCP-JupyterLab, ec2 - friend made, aws vm machine but I had a environment variable but i did not know of it. On these days, I could not get a resources from taiwan…    I couldn’t notice a deliberate     Anyway, as a result I tried myself gcp myself and aws ec2 with fast. ai But I think doing on my self surely takes much time (in this point I wonder why I’m doing this, and should remind me, especially I was studying disk volume optimization)     disk volume exceed - https://askubuntu. com/questions/919748/no-space-left-on-device-even-though-there-is:  "
     }, {
-    "id": 22,
+    "id": 24,
     "url": "http://localhost:4000/2020/02/dps-week4/",
     "title": "Digital Product School week 4",
     "body": "2020/02/01 - The 4th week retropect at Digital Product School Week 4 - Find solution ideas and run experiments   [This week’s schedule] CONTENT:  Ideation Techniques     What is ideation techniques?    Generating idea in my team AIdeation Team brain storming of idea Die Produkt MacherMondayIdeation Techniques:   [slides from @steffen] What is ideation techniques?: We tried to find out user’s painpoint last week. Tried to users talk about their, pain point. No question directly, but extract from them their pain with transportation. Generating idea in my team:   AIdeation:   TuesdayTeam brain storming of idea:   Based on generated idea on Monday, we extended our idea doing rolling-paper!     Die Produkt Macher: What is lean start-up?  Lean startup is a methodology for developing businesses and products that aims to shorten product development cycles and rapidly discover if a proposed business model is viable; this is achieved by adopting a combination of business-hypothesis-driven experimentation, iterative product releases, and validated learning. - wikipedia WendesdayThursdayFriday"
     }, {
-    "id": 23,
+    "id": 25,
     "url": "http://localhost:4000/2020/01/retrosprect-of-acl-paper-2020/",
     "title": "Retrospect of ACL 2020 paper writing",
     "body": "2020/01/29 - 2020 Annual Conference of the Association for Computational Linguistics Why I can’t use ‘Cebuano’ for the research?: Why I had to change target language from ‘Cebuano’ to ‘Tagalog’?-&gt; No language translator options except google translation. But before knowing that I already consult my friend, whose mother tongue is English. So I had to aplogize her, but couldn’t tell her why suddenly I changed my plan. -&gt; I realized there are many languages even can’t be researched at all. . -&gt; Getting accustomed to discrimination makes misunderstanding, sometimes. At my country, we couldn’t use music streaming service, because of legal problem. But at that moment, I thought it was discrimination, which is done by music company. "
     }, {
-    "id": 24,
+    "id": 26,
     "url": "http://localhost:4000/2020/01/Git-Merge/",
     "title": "Why am I not listed as a contributor?!",
     "body": "2020/01/10 - From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’m not listed as contributor of repo which my PR accessed. How did I come to a repository?: When I’m stuck, I would prefer to code, than to go deep in theory. (It must be so. . too much to understand 🤒)It was BERT released by Google AI I felt keenly the necessity of implementing, because not only couldn’t understand the way they figured out positional encoding formula, but how it actually works. What does it mean to “scale” dot product in Attention? (Now I know it’s far from my section 😂) Figure 1. Scaled Dot Product. Adopted from tensorflow blogWhat was the code error?: For implement code in paper, I read the papers Transformer and BERT, structured the model, and refered the others’ code. Meanwhile, I found out a small error in tokenization process, which was changing a token into [MASK], enabled bidirectional representation. I’ve made PR, and got merged. But I was not in contributors. Why?: Figure 2. Merged Pull request Adopted from graykode projectActually I happened to know there can be couple of reasons github doesn’t include my name as contributor. Well, if contributors tab has more than 100 people, in which case it shows you up only if you are in the top 100 contributors because displaying too many contributors can make webpages down. Somethimes, however, it doesn’t that problem. Why not? Two possibilities are there.    First, According to Joel-Glovier, if repository maintainer merged-as-a-rebase PR will end up showing as maintainer’s commit. But maintainer shouldn’t normally do this.     Second, if you happend to commit using a different git email that what is in your GitHub profile, it will not be attached to your Github user, and “doesn’t show up” as you.  Reference:  Michał Chromiak’s blog Github: why are my contributions are not showing on my profile atlassian-gitfetch"
     }, {
-    "id": 25,
-    "url": "http://localhost:4000/2019/12/lesson1-fastai/",
-    "title": "Fine Grained Classification",
-    "body": "2019/12/31 - Finally you can solve the mystery behind this weird drawing. . through this course. juptyer notebook magic: %reload_ext autoreload%autoreload 2%matplotlib inlinethis is special directives to jupyter notebook, not python code. And it is called ‘magics’ (but i think jeremy is magicion)  If somebody changes underlying library code while I’m running this, please reload it automatically If somebody asks to plot something, then please plot it here in this Jupyter NotebookDon’t hesitate to import start~ Digging into untar_data, path. ls: Union[pathlib. Path, str]: typed programming language? -&gt; maybe i think disclaim the type beforehand for sure.  Q. like assert? path. ls()this is some module that fast. ai made because os. listdir(‘path’) is unconvinient. Python3 pathlib library!: pathlib "
-    }, {
-    "id": 26,
+    "id": 27,
     "url": "http://localhost:4000/2019/12/jeremy-howard/",
     "title": "Jeremy Howard",
     "body": "2019/12/15 - This is journey to find out ‘who am I trying to be?’: How he impacted me?  The person who made me start Computer Vision again. He emphasized the importance of studying NLP and Computer together to understand the deep-learning. He didn’t order it to study, but always he pursuade me with reasonable way.  “It’s not just something I can throw away. NLP and computer vision a few weeks apart and that’s going to force your brain to realize like ‘oh I have to remember this’”  He made me admit my failure in deep-learning. I started to objectify where am I. What should I do when I’m frustrated.  “Keep going. You’re not expected to remember everything. Yet.  You’re not expected to understand everything. Yet. You’re not expected to know why everything works.  Yet. ”  His articles are numerous, below.      What is torch. nn Really?   High Performance Numeric Programming with Swift: Explorations and Reflections   C++11, random distributions, and Swift    And especially, I like this book. Designing great data products Great predictive modeling is an important part of the solution, but it no longer stands on its own; as products become more sophisticated, it disappears into the plumbing. Designing great data products And he is also famous for words. Here are some.  we’re going to try and use that to really understand what’s going on. So to warn you, none of it is rocket science but a lot of its going to look really new. So don’t expect to get it the first time but expect to listen and jump into the notebook try a few things test things out look particularly at like tensor shapes and inputs and outputs to check your understanding then go back and listen again. But and kind of try it, a few times, because you will get there right, it’s just that there’s going to be a lot of new concepts because we haven’t done that much stuff in pure Pytorch. Lesson 6: Deep Learning 2019 "
     }, {
-    "id": 27,
+    "id": 28,
     "url": "http://localhost:4000/2019/11/julia-evans/",
     "title": "Julia Evans",
     "body": "2019/11/20 - This is journey to find out ‘who am I trying to be?’: The women who surprised me in many ways. First, she approached me to teaching some concepts drawing cartoons. It was at Hackers news, which was hightest ranks. Personally I have the use of not to reading title, so and cartoon was so cute and clear. I naturally gonna understood mechanism and astonished by her explaination ability. Her value, which she was taught by many people so want to do same things, moved me. Volume of her knowledge, that just reading post title is a deal of work, amazed me. "
     }, {
-    "id": 28,
+    "id": 29,
     "url": "http://localhost:4000/2019/11/coc-retropective/",
     "title": "Retrospective on Pycon 2019 Korea (CoC Committee)",
     "body": "2019/11/05 - When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized our responsibility, and if I passed each other’s burden, It could be my burden next time. In solidarity of the obligation, we finished conference well. And after participating PyCon Korea 2018 as volunteer, I’ve joined PyCon Korea Organizer last year. &lt;Figure 1&gt; First meeting of PyCon 2019 Korea Organizers It’s been a while since PyCon 2019 finished. It’s held on Aug 15 - 18, at Coex Grand Balloom &lt;Figure 2&gt; Ongoing session, speaking on news comment processing &lt;Figure 3&gt; Sponsor Booth iin Coex Hall &lt;Figure 4&gt; After PyCon 2019, with all of volunteer, organizer, speakers 😍 🥰  Serving as part of the coc TF, I spent large fraction of last year doing CoC job. here’s the path what we’ve been grappled with to grasp a solution. First half: Before the conference Toward Diverse Community:  Formally we’ve been reusing and modifying PyCon US CoC, but we needed fit in Korean and I was part of that to revise code of conduct. Except ‘That’ Diversity, Because it is ‘Harassment’:  Specific point was harassment, and the others were not. process of finding the points. How can we settle this point?Second half: During the conference Handling the potential Harassment: Disjunction of policy and real-time situation: This ‘PyCon 2019 Korea retrospective series’ would be devided into 3 Episodes.  “Retrospective on Pycon 2019 Korea (CoC Committee)” “Retrospective on Pycon 2019 Korea (Program Chair)” (20 Nov, To Be Update) “Maintaining participation while still making timely decisions” (29 Nov, To Be Update)"
     }, {
-    "id": 29,
+    "id": 30,
     "url": "http://localhost:4000/2019/11/elif-shafak/",
     "title": "Elif Shafak",
     "body": "2019/11/05 - This is journey to find out ‘who am I trying to be?’: For creative-minded people, Istanbul is a treasure. ’ Photo © Chris Boland, licensed under CC BY-NC-ND 2. 0    it suddenly felt like what I was trying to convey was more complicated and detailed than what the circumstances allowed me to say.     And I did what I usually do in similar situations: I stammered, I shut down, and I stopped talking. I stopped talking because the truth was complicated, even though I knew, deep within, that one should never, ever remain silent for fear of complexity.         &lt;Figure 1&gt; Elif Shafak   Photo credit: www. elifsafak. com. tr      I want to talk about emotions and the need to boost our emotional intelligence. I think it’s a pity that mainstream political theory pays very little attention to emotions.     Oftentimes, analysts and experts are so busy with data and metrics that they seem to forget those things in life that are difficult to measure and perhaps impossible to cluster under statistical models. But I think this is a mistake, for two main reasons. We are emotional beings.     I think it’s going to be one of our biggest intellectual challenges, because our political systems are replete with emotions. In country after country, we have seen illiberal politicians exploiting these emotions. And yet within the academia and among the intelligentsia, we are yet to take emotions seriously. I think we should.  1 2 Reference:       British Council Worldwide &#8617;        Ted Talk &#8617;    "
     }, {
-    "id": 30,
+    "id": 31,
     "url": "http://localhost:4000/2019/01/dps-week1/",
     "title": "Digital Product School week 1",
     "body": "2019/01/11 - The 1th week retropect at Digital Product School [This week’s schedule] CONTENT:  Welcome to Digital Product School!     Trip to Spitzingsee   Welcome to Design Office   Specifying our goal of product   Welcome to Digital Product School!: Trip to Spitzingsee:    At the first day of Digital Product School, we had a off-site with all of batch 9 people. All the costs were managed by dps. At the beautiful mountain, we settled the team, and got my team goal.     Basically, there are two kind of team in DPS. (1) Wild team - the team has fixed topic(2) Company team - the team which has specific stakeholders, and also topic defined by that stakeholders  The Core-team will fix what team you will join in DPS for 3 months based on ymy professionals, they announce it at off-site. [My team for 3 months at DPS]    And we decide on my batch #9 theme song. How? Each team draw for songs and pitch ‘why this song should be batch #9 theme song’The result? Imagine dragon - Believer (I didn’t know at the moment, this song would be stamped in my memory)     We have a workshop for getting to know each other. For example, we share 1) what do I expect from 3 months of dps, 2) when I feel happy in my life time, 3) what I worked for last week, 4) what was my last project and 5) what plays important role in my life      My team's board  Cero  Welcome to Design Office:  At first day of design office, we had workshop, which celebrates my day in dps also discuss specific rule, menifesto and stakeholders    We get sticker and attach it in map depends on my nationality  Now time to get to know my team’s stakeholders. What they want for us? What they expect from us? How free my team are on the topic?To be honest, it is endless tug-of-war. We should discuss with my stakeholders, endlessly, and find out solution which can meet interest of users, stakeholders and my team. Basically, my team’s main stakeholder is ADAC, but BMW, City of munich and Nokia will also participate as my team’s stakeholders. Specifying our goal of product: "
diff --git a/_site/2020/02/dps-week4/index.html b/_site/2020/02/dps-week4/index.html
index 1e781c8b17..d4bdbf567a 100644
--- a/_site/2020/02/dps-week4/index.html
+++ b/_site/2020/02/dps-week4/index.html
@@ -19,9 +19,9 @@
 <meta property="og:site_name" content="SpellOnYou" />
 <meta property="og:image" content="http://localhost:4000/assets/images/week4/week4-ourteam.JPG" />
 <meta property="og:type" content="article" />
-<meta property="article:published_time" content="2020-02-01T00:00:00+01:00" />
+<meta property="article:published_time" content="2020-02-01T00:00:00+09:00" />
 <script type="application/ld+json">
-{"description":"The 4th week retropect at Digital Product School","author":{"@type":"Person","name":"dionne"},"@type":"BlogPosting","url":"http://localhost:4000/2020/02/dps-week4/","publisher":{"@type":"Organization","logo":{"@type":"ImageObject","url":"http://localhost:4000/assets/images/logo.png"},"name":"dionne"},"image":"http://localhost:4000/assets/images/week4/week4-ourteam.JPG","headline":"Digital Product School week 4","dateModified":"2020-02-01T00:00:00+01:00","datePublished":"2020-02-01T00:00:00+01:00","mainEntityOfPage":{"@type":"WebPage","@id":"http://localhost:4000/2020/02/dps-week4/"},"@context":"http://schema.org"}</script>
+{"description":"The 4th week retropect at Digital Product School","author":{"@type":"Person","name":"dionne"},"@type":"BlogPosting","url":"http://localhost:4000/2020/02/dps-week4/","publisher":{"@type":"Organization","logo":{"@type":"ImageObject","url":"http://localhost:4000/assets/images/logo.png"},"name":"dionne"},"image":"http://localhost:4000/assets/images/week4/week4-ourteam.JPG","headline":"Digital Product School week 4","dateModified":"2020-02-01T00:00:00+09:00","datePublished":"2020-02-01T00:00:00+09:00","mainEntityOfPage":{"@type":"WebPage","@id":"http://localhost:4000/2020/02/dps-week4/"},"@context":"http://schema.org"}</script>
 <!-- End Jekyll SEO tag -->
 
 
@@ -161,96 +161,101 @@
     "body": "  {% if page. url ==  /  %}            {% assign latest_post = site. posts[0] %}          &lt;div class= topfirstimage  style= background-image: url({% if latest_post. image contains  ://  %}{{ latest_post. image }}{% else %} {{site. baseurl}}/{{ latest_post. image}}{% endif %}); height: 200px;  background-size: cover;  background-repeat: no-repeat; &gt;&lt;/div&gt;           {{ latest_post. title }}  :       {{ latest_post. excerpt | strip_html | strip_newlines | truncate: 136 }}               In         {% for category in latest_post. categories %}        {{ category }},         {% endfor %}                                {{ latest_post. date | date: '%b %d, %Y' }}                            {%- assign second_post = site. posts[1] -%}                        {% if second_post. image %}                         &lt;img class= w-100  src= {% if second_post. image contains  ://  %}{{ second_post. image }}{% else %}{{ second_post. image | absolute_url }}{% endif %}  alt= {{ second_post. title }} &gt;                        {% endif %}                                    {{ second_post. title }}          :                       In             {% for category in second_post. categories %}            {{ category }},             {% endfor %}                                                      {{ second_post. date | date: '%b %d, %Y' }}                                    {%- assign third_post = site. posts[2] -%}                        {% if third_post. image %}                         &lt;img class= w-100  src= {% if third_post. image contains  ://  %}{{ third_post. image }}{% else %}{{site. baseurl}}/{{ third_post. image }}{% endif %}  alt= {{ third_post. title }} &gt;                        {% endif %}                                    {{ third_post. title }}          :                       In             {% for category in third_post. categories %}            {{ category }},             {% endfor %}                                                      {{ third_post. date | date: '%b %d, %Y' }}                                    {%- assign fourth_post = site. posts[3] -%}                        {% if fourth_post. image %}                        &lt;img class= w-100  src= {% if fourth_post. image contains  ://  %}{{ fourth_post. image }}{% else %}{{site. baseurl}}/{{ fourth_post. image }}{% endif %}  alt= {{ fourth_post. title }} &gt;                        {% endif %}                                    {{ fourth_post. title }}          :                       In             {% for category in fourth_post. categories %}            {{ category }},             {% endfor %}                                                      {{ fourth_post. date | date: '%b %d, %Y' }}                                  {% for post in site. posts %} {% if post. tags contains  sticky  %}                    {{post. title}}                  {{ post. excerpt | strip_html | strip_newlines | truncate: 136 }}                 Read More            	             {% endif %}{% endfor %}  {% endif %}                All Stories:         {% for post in paginator. posts %}          {% include main-loop-card. html %}        {% endfor %}                   {% if paginator. total_pages &gt; 1 %}              {% if paginator. previous_page %}        &laquo; Prev       {% else %}        &laquo;       {% endif %}       {% for page in (1. . paginator. total_pages) %}        {% if page == paginator. page %}        {{ page }}        {% elsif page == 1 %}        {{ page }}        {% else %}        {{ page }}        {% endif %}       {% endfor %}       {% if paginator. next_page %}        Next &raquo;       {% else %}        &raquo;       {% endif %}            {% endif %}                     {% include sidebar-featured. html %}      "
     }, {
     "id": 12,
+    "url": "http://localhost:4000/2020/04/v3-2019-lesson06-note/",
+    "title": "fastai 2019 course-v3 Part1, lesson06",
+    "body": "2020/04/15 - Lesson 06Rossmann(Tabular):  Tabular data: be careful on Categorical variable vs Continuous variable.  if datatype is int, fastai think it is classification, not a regression.  Root mean square percentage error. as loss function.  When you assign the y_range, it’s better to assign little bit more than actual maximum. &gt; because it’s sigmoid.  intermediate layers, which is weight matrix is 1) 1000, and 2) 500 -&gt; which means our parameter would be 500*1000. learn. modelWhat is dropout and embedding dropout?: Nitish Srivastava, Dropout: A Simple way to prevent Neural Networks from Overfitting  you can dropout with p value, make it specified to specific layer, or make it applied to all the layers.  Pytorch code 1) bernoulli, which decides whether you will hold it? 2) and divide the noise value depends on noise value. so noise became 2 or remain 0.      According to pytorch code, We do change at training time, but we do nothing at test time. and this means you don’t have to do anything special with inference time. ’   TODO: find at forums what is inference time - Related to NVIDIA, GPU.     Embedding dropout is just a dropout.      It’s different between continuous variable and embedding layer.  TODO Still can’t understand. why embedding dropout is effective. or,… in need.    Let’s delete at random, some of the results of the embedding.    and It worked well especially at Kaggle   Batch Normalization: Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift -&gt; came out false! According to How Does Batch Normalization Help Optimization?  The key was multiplicative bias {\gamma} and additive bias {\beta}` Explain     Let $$ \hat{y} = f(w_1, w_2, w_3, … , x)} $$ , loss = MSE , Then y_range should be between 1 and 5`   And Activation function ends with -1 -&gt; +1   To mitigate this problem, we can add the other parameter, like $$w_n$$   But there’re so much interactions in the process so just re-scale the output.    Momentum parameter at BatchNorm1d:  Different from momentum like in optimization.  This momentum is Exponentially weighted moving average of the mean, instead of deviation.      If this is small number: mean standard deviation would be less from mini_batch to mini_batch » less regularization effect. (If this is large number, variation would be greater from mini_batch to mini_batch » more regularization effect)   TODO: can’t sure, but i understand, this is not about how to update parameter but about how much reflect previous value when scale and shift   Q. Preference between batchnorm and the other regularizations(drop out, weight decay)A. Nope, always try and see the results## lesson6-pets-more### Data Augmentation- Last reg- `get_transforms` has lots of params (even not yet learned all) -&gt; check documentation	- Remember you can implement all the doc contents bc it's made from nbdev	- TODO: try this!!- Essence of data augmentation is you should maintain the label, while somewhat making sense. 	- ex) tilt, because it's optically sensible, you can always change the angle of the data view. - zeros, border, and reflection but always `reflection` works most of the time, so that is the default### Convolutional Kernel(What is convolution?)- Will make heat\_map from scratch, which means the parts convolution focuses on![setosa_visualization]()- http://setosa. io/ev/image-kernels/	- javascript thing	- How convolution works	- Kernel. which does element-wise multiplication, and sum them up	- so it has on pixel less at borders -&gt; so it uses padding, and fastai uses reflection as said. - why this Kernel(matrix) helps catching horizontal edge side?	- because this kernel`(picture2)` weights differently, depends on `x axis`	- why familiar, because it's similar intuition with fugus`(paper)` paper- CNN from different viewpoints`link`	- output of pixel is results from different linear equations. 	- If you connect this with represents of neural network nodes, you can see that the specific inp nodes connected with specific out nodes. 	- **Summarize**: cnn does 1) matmul some of the elements are always zero 2) same weight for every row, which is called `weight time? weight. . ?, 1:18:50` `(picture)`#### Further lowdown- Because generally image has 3 channels, we need rank 3 kernel. - And **do multiply with all channel output is one pixel**. (`draw by your self`)	- but this kernel will catch one feature, like horizontal, so that we make more kernel so that output becomes (h * w * kernel)	- And that `kernel` come to `channel`- **Conv2d**: with 3 by 3 kernel, stride 2 conv -&gt; (h/2 * w/2 * kernel)	- skip or jump over input pixel	- to protect from memory out of control~~~pythonlearn. modellearn. summary()~~~TODO: understand yourself the blocks of conv-kernel: - Usually use big kernel size at first layer (will study this at part2)- Bottom right highlighting kernel(`pic / draw`)- `torch. tensor. expand`: for memory efficient, because we should do RGB- We do not make separate kernel, but make rank 4 kernel	- 4d tensor is just stacked kernel- `t[None]. shape` create new unit axis, and why? we make this -&gt; it should move unit of batch, not one size image. ### Average pooling, feature- suppose our pre-trained model results in size of `11 by 11 by 512 ` `pic 4` and my classification task has 37 classes	* take the first face of channel, which is 11 by 11 and `mean` it, so that make rank 2 tensor, 512 by 1	* and make 2d matrix, which is 512 by 37 and multiply so that we can get 37 by 1 matrix. - Feature, at convolution block	- So, when we transfer-learning without unfreeze, every element of last matrix (512 by 1) should represent(or could catch) each feature. 	### Heatmap, Hook~~~hook_output(model[0]) -&gt; acts -&gt; avg_acts~~~- if we average the block with `axis=feature`, result of matrix(11 by 11) depicts `how activated was that area?` -&gt; it is heatmap, `avg_acts`- and acts comes from hook, which is more advanced pytorch feature. 	- hook into pytorch machine itself, and run any arbitrary Pytorch code	- Why this is cool?: Normally it gives set of outputs of forward pass, but we can interrupt and hook the forward pass. 	- Also can store the output of the convolutional part of the model, which is before avg_pooling- Thinking back when we do cut off `after` the conv part. 	- but with fast. ai the original convolutional part of the model would be *the first thing in the model*, specifically could be given from `learn. model. eval()[0]`	- And this is gotten from `hooked_output` and having hooked the output, we can pass our x_minibatch to output. 	- Not directly, but with normalized, minibatch, put on to the gpu	- `one_item()` function do it, when we have one data `TODO: this is assignment` do it yourself without one_item function	- and `. cuda()` put it on gpu- you should print out very often the shape of tensor, and try think why. "
+    }, {
+    "id": 13,
+    "url": "http://localhost:4000/2020/04/qna-image-segmentation/",
+    "title": "[Q&A] Image Segmentation, using Unet with Driving Video data",
+    "body": "2020/04/02 - This post is about my questions while I was studying USF Deep Learning course about image segmentation task. All the answers are from the course, source code, library document, or document. I cared about being clear at reporting information including source of information, however if there are still anything unclear, please contact me. And thank you Jeremy&amp;Rachael for everything. Also Thank you Cambridge Computer Vision Lab to made us to study with your labor. The Cambridge-driving Labeled Video Database (CamVid) is the first collection of videos with object class semantic labels, complete with metadata. The database provides ground truth labels that associate each pixel with one of 32 semantic classes. If someone is interested in this project, please check the site and see the details. Now, let’s start first using jupyter’s one of tricks which I love most. It enables cell to print the code without print function. from IPython. core. interactiveshell import InteractiveShell# pretty print all cell's output and not just the last oneInteractiveShell. ast_node_interactivity =  all from fastai. vision import *from fastai. callbacks. hooks import *from fastai. utils. mem import *path = untar_data(URLs. CAMVID) # The locations where the data and models are downloaded are set in config. ymlpath. ls() I’m trying to accustomed to using pathlib module, not just it became built-in module in python, but I felt uncomfortable myself with os module. However, still unpredictable conflicts are remain, even in the quite standard library like Pytorch, tensorflow, onnx. (it require me string for path. not PosixPath. will send PR. . ) [PosixPath('/root/. fastai/data/camvid/valid. txt'), PosixPath('/root/. fastai/data/camvid/images'), PosixPath('/root/. fastai/data/camvid/labels'), PosixPath('/root/. fastai/data/camvid/codes. txt')]path_img = path/'images'path_lbl = path/'labels'fnames = get_image_files(path_img) #filenamelbl_names = get_image_files(path_lbl)1. (Play with data) My Hypothesis:  File name has A_B format. and A / B would be at key-value position.  Use collections - defaultdict  Default Dict: Link:   easy to group a sequence of key and value pairs into a dictionary of list?from collections import defaultdictfnames[0], lbl_names[0](PosixPath('/root/. fastai/data/camvid/images/0001TP_009210. png'), PosixPath('/root/. fastai/data/camvid/labels/0016E5_01800_P. png'))files = [tuple(i. stem. split('_')) for i in fnames]labels = [tuple(i. stem. split('_')[:-1]) for i in lbl_names]d = defaultdict(list)for k, v in files: d[k]. append(v)d. keys()len(d['0001TP'])124for k, v in d. items():  print(k, v)0001TP ['009210', '008850', '007350', '008970', '009840', '010140', '008490', '008520', '009540', '008250', '008340', '006840', '007860', '007410', '007740', '009870', '010080', '007890', '008790', '010020', '008400', '007080', '008280', '010380', '009330', '009060', '007470', '006810', '009720', '008580', '007110', '008730', '009150', '007680', '009780', '007800', '007290', '008760', '009510', '008640', '008310', '007440', '006900', '007500', '008460', '009030', '008130', '009480', '009900', '010230', '009270', '008040', '007590', '007950', '009990', '008550', '007260', '008100', '007530', '006960', '008190', '009420', '009930', '009000', '007830', '008940', '006690', '009570', '008880', '010170', '007560', '009300', '006750', '009360', '010200', '007320', '008010', '009120', '007620', '007200', '007140', '010320', '006720', '008670', '007230', '008370', '010260', '009690', '006930', '009090', '007770', '010290', '010350', '008610', '008070', '009600', '008430', '009450', '007380', '009240', '007710', '007170', '008160', '008910', '007020', '006780', '007050', '009960', '009810', '008220', '009180', '009750', '010050', '009660', '010110', '007920', '009630', '007650', '006990', '008700', '009390', '007980', '008820', '006870']0016E5 ['01290', '08159', '05760', '08133', '08063', '06660', '00960', '05850', '00750', '06960', '08035', '08107', '07975', '08017', '05610', '07140', '08119', '08027', '07170', '08400', '08093', '02100', '06390', '04470', '08340', '06060', '00600', '07470', '08151', '07800', '01620', '05730', '01530', '00690', '08430', '05940', '01980', '07320', '08069', '07965', '04380', '05430', '01410', '06780', '08007', '08087', '08079', '06600', '08109', '05490', '00901', '04590', '04680', '08045', '01770', '06690', '08085', '06810', '00420', '08011', '07440', '02190', '06300', '04800', '01500', '00450', '08029', '01470', '06330', '07997', '08067', '05370', '08013', '08190', '00840', '02370', '08049', '08135', '01440', '06870', '05820', '05280', '08051', '04440', '08091', '01380', '00630', '07290', '05520', '04770', '00540', '07995', '07999', '05550', '07920', '08101', '08141', '08053', '04620', '08103', '05160', '07350', '08057', '06030', '06000', '08550', '07963', '08089', '05970', '08047', '05640', '06240', '05220', '04350', '01590', '07959', '01950', '08117', '06180', '01560', '05400', '08043', '07680', '00780', '08081', '07050', '01020', '01350', '04530', '06720', '07969', '08149', '08003', '08131', '08129', '08033', '05460', '01650', '07530', '08023', '05340', '08640', '05100', '08075', '01230', '04980', '02070', '01080', '06210', '05910', '08009', '01800', '05190', '02400', '08083', '08019', '07620', '07200', '07890', '08059', '06990', '04410', '08121', '08123', '06930', '08137', '08147', '08095', '06570', '06150', '08153', '06840', '05250', '00510', '08370', '08580', '08113', '07410', '08097', '01200', '04950', '07770', '07650', '04710', '06090', '08055', '07110', '07981', '00990', '08250', '08127', '01920', '07985', '08220', '08005', '08157', '05130', '08071', '01140', '04830', '07740', '08143', '06120', '02040', '08111', '08115', '00660', '08280', '06420', '07983', '02220', '05700', '01860', '01260', '04920', '06510', '07020', '08073', '08105', '08125', '06360', '07860', '07993', '00810', '06540', '08099', '08139', '02010', '07973', '08155', '07991', '06630', '00480', '06750', '04890', '08001', '08025', '00870', '08490', '01830', '07977', '05010', '01170', '07961', '01680', '01050', '07987', '07080', '04560', '00930', '05310', '02340', '05790', '08460', '00720', '08031', '02280', '08039', '08037', '08065', '06270', '08077', '06900', '04650', '06480', '07230', '08041', '06450', '00570', '07989', '04740', '07979', '02250', '07380', '00390', '01710', '07590', '08021', '08520', '07500', '01110', '04500', '02310', '07971', '02130', '05580', '05880', '08610', '08310', '08145', '05670', '04860', '07260', '08015', '07967', '01740', '01320', '07560', '07830', '01890', '08061', '02160', '07710', '05070', '05040']Seq05VD ['f00030', 'f02550', 'f03450', 'f01110', 'f00480', 'f00210', 'f04590', 'f04170', 'f01800', 'f03990', 'f03360', 'f03900', 'f02070', 'f00810', 'f03690', 'f01350', 'f01530', 'f04980', 'f05100', 'f03060', 'f00900', 'f03870', 'f02460', 'f01470', 'f02370', 'f02820', 'f04080', 'f02760', 'f04860', 'f02250', 'f04200', 'f00270', 'f03720', 'f02850', 'f04410', 'f01200', 'f03090', 'f02010', 'f03930', 'f00090', 'f01650', 'f01890', 'f03840', 'f03030', 'f02130', 'f01230', 'f04110', 'f02520', 'f04140', 'f04020', 'f00060', 'f03420', 'f01560', 'f00120', 'f04290', 'f02340', 'f00300', 'f01380', 'f00870', 'f01860', 'f02970', 'f04560', 'f02730', 'f00330', 'f04530', 'f03780', 'f01770', 'f03390', 'f05040', 'f02430', 'f03330', 'f00660', 'f01740', 'f02100', 'f04800', 'f04050', 'f00510', 'f02790', 'f04350', 'f00690', 'f00540', 'f02490', 'f00960', 'f00930', 'f04230', 'f02880', 'f03600', 'f01020', 'f01500', 'f02400', 'f04830', 'f04470', 'f03300', 'f02670', 'f00450', 'f01980', 'f01170', 'f01620', 'f04500', 'f01080', 'f03180', 'f05070', 'f03150', 'f04950', 'f01440', 'f03510', 'f01710', 'f00360', 'f04770', 'f02910', 'f01050', 'f00630', 'f04320', 'f00570', 'f03240', 'f02190', 'f01140', 'f03540', 'f02220', 'f02640', 'f03960', 'f00000', 'f04920', 'f01950', 'f00990', 'f03480', 'f03000', 'f00420', 'f04620', 'f03210', 'f00780', 'f03570', 'f01590', 'f00750', 'f01920', 'f04650', 'f03750', 'f03630', 'f02310', 'f02610', 'f02580', 'f04740', 'f02280', 'f04680', 'f00390', 'f00720', 'f03660', 'f02040', 'f03270', 'f00180', 'f03810', 'f01410', 'f01290', 'f03120', 'f00840', 'f04440', 'f00150', 'f01260', 'f02700', 'f02940', 'f00600', 'f01830', 'f04260', 'f05010', 'f04890', 'f02160', 'f00240', 'f04380', 'f01680', 'f04710', 'f01320']0006R0 ['f02820', 'f03690', 'f03180', 'f02550', 'f01020', 'f03660', 'f02340', 'f01170', 'f02610', 'f02940', 'f01290', 'f02100', 'f01350', 'f03270', 'f03870', 'f01380', 'f01980', 'f03810', 'f02430', 'f02310', 'f01830', 'f03480', 'f02970', 'f01890', 'f03210', 'f03930', 'f02040', 'f02070', 'f02400', 'f01560', 'f03030', 'f01770', 'f01590', 'f01950', 'f03420', 'f01650', 'f03450', 'f00990', 'f03630', 'f01500', 'f03570', 'f00930', 'f03090', 'f03360', 'f02880', 'f02460', 'f01440', 'f01920', 'f01230', 'f03840', 'f02730', 'f01620', 'f02220', 'f03750', 'f03330', 'f03540', 'f02520', 'f02790', 'f01050', 'f03120', 'f01800', 'f01140', 'f01860', 'f01530', 'f01470', 'f02670', 'f02490', 'f01260', 'f01110', 'f02760', 'f01680', 'f03150', 'f02580', 'f03300', 'f02280', 'f01200', 'f03390', 'f03510', 'f02640', 'f02190', 'f02370', 'f01320', 'f02130', 'f03600', 'f03240', 'f03780', 'f03720', 'f02700', 'f01410', 'f01080', 'f02850', 'f01710', 'f03900', 'f03060', 'f01740', 'f02010', 'f02250', 'f00960', 'f03000', 'f02160', 'f02910']for k, v in d. items():  print(k, len(d[k]))0001TP 1240016E5 305Seq05VD 1710006R0 101for i in d2. keys():  print(i,len(d2[i]))0016E5 3050001TP 1240006R0 101Seq05VD 171files[0], labels[0](('0001TP', '009210'), ('0016E5', '01800'))2. My question: Link:  Why do we need masking? and does color from fastai library? (have to look into source code) What do the parameter alpha do? When people make masked img, would it be have ranged integer limit? Does image normalization related with this?lbl_sorted = sorted(lbl_names)f_sorted = sorted(fnames)lbl_1 = lbl_sorted[33]f_1 = f_sorted[33]img = open_image(lbl_1)mask = open_mask(lbl_1)_,axs = plt. subplots(1,2, figsize=(10,5))# img. show(ax=axs[0], y=mask, title='masked')img. show(ax=axs[0], title='1')mask. show(ax=axs[1], title='2', alpha=1. ) img_2 = open_image(f_1)mask_2 = open_mask(f_1)_,axs = plt. subplots(1,2, figsize=(10,5))# img. show(ax=axs[0], y=mask, title='masked')img_2. show(ax=axs[0], title='3',)mask_2. show(ax=axs[1], title='4', alpha=1. ) open_mask(lbl_1). data. shapetorch. Size([1, 720, 960])open_mask(lbl_1). data. shapetorch. Size([1, 720, 960])open_image(f_1). data. shapetorch. Size([3, 720, 960])open_image(f_1). data. shapetorch. Size([3, 720, 960])img. data #labeled datatensor([[[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]],    [[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]],    [[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]]])mask. data # after mask, labeled datatensor([[[ 4, 4, 4, . . . , 21, 21, 21],     [ 4, 4, 4, . . . , 21, 21, 21],     [ 4, 4, 4, . . . , 21, 21, 21],     . . . ,     [17, 17, 17, . . . , 30, 30, 30],     [17, 17, 17, . . . , 30, 30, 30],     [17, 17, 17, . . . , 30, 30, 30]]])img_2. data, mask_2. data(tensor([[[0. 0706, 0. 0667, 0. 0706, . . . , 0. 6431, 0. 6549, 0. 6627],     [0. 0745, 0. 0706, 0. 0706, . . . , 0. 6431, 0. 6510, 0. 6549],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 6392, 0. 6588, 0. 6588],     . . . ,     [0. 0863, 0. 0824, 0. 0824, . . . , 0. 1333, 0. 1216, 0. 1255],     [0. 0902, 0. 0863, 0. 0824, . . . , 0. 1255, 0. 1176, 0. 1216],     [0. 0863, 0. 0824, 0. 0784, . . . , 0. 1137, 0. 1059, 0. 1137]],     [[0. 0706, 0. 0667, 0. 0706, . . . , 0. 7490, 0. 7608, 0. 7686],     [0. 0745, 0. 0706, 0. 0706, . . . , 0. 7451, 0. 7569, 0. 7608],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 7412, 0. 7529, 0. 7529],     . . . ,     [0. 0980, 0. 0941, 0. 0941, . . . , 0. 1804, 0. 1686, 0. 1725],     [0. 1059, 0. 1020, 0. 0980, . . . , 0. 1725, 0. 1647, 0. 1686],     [0. 1020, 0. 0980, 0. 0941, . . . , 0. 1608, 0. 1529, 0. 1608]],     [[0. 0784, 0. 0745, 0. 0784, . . . , 0. 7569, 0. 7686, 0. 7765],     [0. 0824, 0. 0784, 0. 0784, . . . , 0. 7647, 0. 7647, 0. 7686],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 7608, 0. 7647, 0. 7647],     . . . ,     [0. 1216, 0. 1176, 0. 1176, . . . , 0. 2000, 0. 1882, 0. 1922],     [0. 1176, 0. 1137, 0. 1098, . . . , 0. 1843, 0. 1765, 0. 1804],     [0. 1137, 0. 1098, 0. 1059, . . . , 0. 1725, 0. 1647, 0. 1725]]]), tensor([[[ 18, 17, 18, . . . , 183, 186, 188],     [ 19, 18, 18, . . . , 183, 185, 186],     [ 20, 18, 19, . . . , 182, 185, 185],     . . . ,     [ 25, 24, 24, . . . , 43, 40, 41],     [ 26, 25, 24, . . . , 41, 39, 40],     [ 25, 24, 23, . . . , 38, 36, 38]]]))3. What is a difference between image and imageSegment?: imageSegment  An ImageSegment object has the same properties as an Image. The only difference is that when applying the transformations to an ImageSegment, it will ignore the functions that deal with lighting and keep values of 0 and 1.  It’s easy to show the segmentation mask over the associated Image by using the y argument of show_image. img = open_image(fnames[0])mask = open_mask(lbl_names[0])_,axs = plt. subplots(1,3, figsize=(8,4))img. show(ax=axs[0], title='no mask')img. show(ax=axs[1], y=mask, title='masked') #seg mask over the img using y argmask. show(ax=axs[2], title='mask only', alpha=1. ) vision. image ##4. Why/How img div by 255 and how it results fast. ai : vision. image - If div=True, pixel values are divided by 255. to become floats between 0. and 1.  At times, you want to get rid of distortions caused by lights and shadows in an image.    Normalizing the RGB values of an image can at times be a simple and effective way of achieving this.     So sum of the pixel’s value over all channels(which is S) divides each intensified channel so that nomalized value will be R/S, G/S and B/S (where, S=R+G+B).   Detailed explain here4. Python Evaluation Order: Python evaluates expressions from left to right. Notice that while evaluating an assignment, the right-hand side is evaluated before the left-hand side. mask_tmp, trg_tmp, void_tmp = 2, 1, 10mask_tmp = trg_tmp != void_tmpprint(mask_tmp, trg_tmp, void_tmp) # (1) target is not same with voidTrue 1 10# Example 1x = 1y = 2x,y = y,x+yx, y(2, 3)# Example 2x = 1y = 2x = yy = x+yx, y(2, 4)5. model learner parameter :: pct_start: A: Percentage of total number of epochs when learning rate rises during one cycle. Q: Sorry, I still confused that one cycle in the new API only runs one epoch. How the percentage of total number of epochs works? Can you give a example? If learn. fit_one_cycle(10, slice(1e-4,1e-3,1e-2), pct_start=0. 05)??A: Ok, strictly correct answer would be percentage of iterations, so you can have lr both increase and decrease during same epoch. In your example, say, you have 100 iterations per epoch, then for half an epoch (0. 05 * (10 * 100) = 50) lr will rise, then slowly decrease. Q2: Thanks for this explanation … so essentially, it is the percentage of overall iterations where the LR is increasing, correct? So, given the default of 0. 3, it means that your LR is going up for 30% of your iterations and then decreasing over the last 70%. Is that a correct summation of what is happening? A2: Yes, I think that’s correct. You can verify that by changing its value and check:learn. recorder. plot_lr() For example if pct_start = 0. 2 source: forums. fastai "
+    }, {
+    "id": 14,
     "url": "http://localhost:4000/2020/03/note08-fastai-4/",
     "title": "Gradient backward, Chain Rule, Refactoring",
-    "body": "2020/03/02 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves   ⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
+    "body": "2020/03/02 -  This note is divided into 4 section.      Section1: What is the meaning of ‘deep-learning from foundations?’   Section2: What’s inside Pytorch Operator?   Section3: Implement forward&amp;backward pass from scratch   Section4: Gradient backward, Chain Rule, Refactoring   ” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves    Postfix underscore means in pytorch, in-place function, What is in-place function?⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
     }, {
-    "id": 13,
+    "id": 15,
     "url": "http://localhost:4000/2020/03/note08-fastai-3/",
     "title": "Implement forward&backward pass from scratch",
     "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring1. The forward and backward passes: 1. 1 Normalization: train_mean,train_std = x_train. mean(),x_train. std()&gt;&gt;&gt; train_mean,train_std(tensor(0. 1304), tensor(0. 3073))Remember!  Dataset, which is x_train, mean and standard deviation is not 0&amp;1. But we need them to be which means we should substract means and divide data by std.  You should not standarlize validation set because training set and validation set should be aparted.  after normalize, mean is close to zero, and standard deviation is close to 1. 1. 2 Variable definition:  n,m: size of the training set c: the number of activations we need in our model2. Foundation Version: 2. 1 Basic architecture:  Our model has one hidden layer, output to have 10 activations, used in cross entropy.    But in process of building architecture, we will use mean square error, output to have 1 activations and lator change it to cross entropy   number of hidden unit; 50see below pic  We want to make w1&amp;w2 mean and std be 0&amp;1.      why initializating and make mean zero and std one is important?   paper highlighting importance of normalisation - training 10,000 layer network without regularisation1   2. 1. 1 simplified kaiming initQ: Why we did init, normalize with only validation data? Because we can not handle and get statistics from each value of x_valid?{: style=”color:red; font-size: 130%; text-align: center;”}  what about hidden(first) layer?w1 = torch. randn(m,nh)b1 = torch. zeros(nh)t = lin(x_valid, w1, b1) # hidden&gt;&gt;&gt; t. mean(), t. std()((tensor(2. 3191), tensor(27. 0303))In output(second) layer, w2 = torch. randn(nh,1)b2 = torch. zeros(1)t2 = lin(t, w2, b2) # output&gt;&gt;&gt; t2. mean(), t2. std()(tensor(-58. 2665), tensor(170. 9717))   which is terribly far from normalzed value.     But if we apply simplified kaiming init  w1 = torch. randn(m,nh)/math. sqrt(m); b1 = torch. zeros(nh)w2 = torch. randn(nh,1)/math. sqrt(nh); b2 = torch. zeros(1)t = lin(x_valid, w1, b1)t. mean(),t. std()&gt;&gt;&gt; (tensor(-0. 0516), tensor(0. 9354)) But, actually, we use activations not only linear function After applying activations relu at linear layer, mean and deviation became 0. 5. 2. 1. 2 Glorrot initializationPaper2: Understanding the difficulty of training deep feedforward neural networks  Gaussian(, bell shaped, normal distributions) is not trained very well.  How to initialize neural nets? with the size of layer , the number of filters .  But there is No acount for import of ReLU If we got 1000 layers, vanishing gradients problem emerges2. 1. 3 Kaiming initializatingPaper3: Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification  Kaiming He, explained here rectifier: rectified linear unit rectifier network: neural network with rectifier linear units  This is kaiming init, and why suddenly replace one to two on a top?     to avoid vanishing gradient(weights)   But it doesn’t give very nice mean tough.    2. 1. 4 Pytorch package Why fan_out?     according to pytorch documentation,   choosing 'fan_in' preserves the magnitude of the variance of the wights in the forward pass. choosing 'fan_out' preserves the magnitues in the backward pass(, which means matmul; with transposed matrix) ➡️ in the other words, torch use fan_out cz pytorch transpose in linear transformaton.  What about CNN in Pytorch?I tried torch. nn. Conv2d. conv2d_forward?? Jeremy digged into using torch. nn. modules. conv. _ConvNd. reset_parameters?? 2  in Pytorch, it doesn’t seem to be implemented kaiming init in right formula. so we should use our own operation.  But actually, this has been discussed in Pytorch community before. 3 4 Jeremy said it enhanced variance also, so I sampled 100 times and counted better results.  To make sure the shape seems sensible. check with assert. (remember we will replace 1 to 10 in cross entropy)assert model(x_valid). shape==torch. Size([x_valid. shape[0],1])&gt;&gt;&gt; model(x_valid). shape(10000, 1) We have made Relu, init, linear, it seems we can forward pass code we need for basic architecture nh = 50def lin(x, w, b): return x@w + b;w1 = torch. randn(m,nh)*math. sqrt(2. /m ); b1 = torch. zeros(nh)w2 = torch. randn(nh,1); b2 = torch. zeros(1)def relu(x): return x. clamp_min(0. ) - 0. 5t1 = relu(lin(x_valid, w1, b1))def model(xb):  l1 = lin(xb, w1, b1)  l2 = relu(l1)  l3 = lin(l2, w2, b2)  return l32. 2 Loss function: MSE:  Mean squared error need unit vector, so we remove unit axis.   def mse(output, targ): return (output. squeeze(-1) - targ). pow(2). mean()    In python, in case you remove axis, you use ‘squeeze’, or add axis use ‘unsqueeze’ torch. squeeze where code commonly broken. so, when you use squeeze, clarify dimension axis you want to removetmp = torch. tensor([1,1])tmp. squeeze()&gt;&gt;&gt; tensor([1, 1]) make sure to make as float when you calculateBut why??? because it is tensor?{: style=”color:red; font-size: 130%;”} Here’s the error when I don’t transform the data type ---------------------------------------------------------------------------TypeError                 Traceback (most recent call last)&lt;ipython-input-22-ae6009bef8b4&gt; in &lt;module&gt;()----&gt; 1 y_train = get_data()[1] # call data again   2 mse(preds, y_train)TypeError: 'map' object is not subscriptable This is forward passFootnote: Other materials:  Understanding the difficulty of training deep feedforward neural networks, paper that introduced Xavier initialization      Fixup Initialization: Residual Learning Without Normalization &#8617;        Pytorch implementaion on Kaiming init of conv and linear layers &#8617;        Pytorch kaiming init issue &#8617;        Pytorch kaiming init explained &#8617;    "
     }, {
-    "id": 14,
+    "id": 16,
     "url": "http://localhost:4000/2020/03/note08-fastai-2/",
     "title": "What's inside Pytorch Operator?",
     "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, RefactoringWhat’s inside Pytorch Operator?: Section02 Time comparison with pure Python:    Matmul with broadcasting&gt; 3194. 95 times faster     Einstein summation&gt; 16090. 91 times faster     Pytorch’s operator&gt; 49166. 67 times faster  1. Elementwise op: 1. 1 Frobenius norm:   above converted into (m*m). sum(). sqrt() Plus, don’t suffer from mathmatical symbols. He also copy and paste that equations from wikipedia.  and if you need latex form, download it from archive. 2. Elementwise Matmul:  What is the meaning of elementwise?   We do not calculate each component. But all of the component at once. Because, length of column of A and row of B are fixed.   How much time we saved?  So now that takes 1. 37ms. We have removed one line of code and it is a 178 times faster…#TODOI don’t know where the 5 from. but keep it. Maybe this is related with frobenius norm…?as a result, the code before for k in range(ac):  c[i,j] += a[i,k] + b[k,j]the code after c[i,j] = (a[i,:] * b[:,j]). sum()To compare it (result betweet original and adjusted version) we use not test_eq but other function. The reason for this is that due to rounding errors from math operations, matrices may not be exactly the same. As a result, we want a function that will “is a equal to b within some tolerance” #exportdef near(a,b):   return torch. allclose(a, b, rtol=1e-3, atol=1e-5)def test_near(a,b):   test(a,b,near)test_near(t1, matmul(m1, m2))3. Broadcasting:  Now, we will use the broadcasting and removec[i,j] = (a[i,:] * b[:,j]). sum() How it works?&gt;&gt;&gt; a=tensor([[10,10,10],     [20,20,20],     [30,30,30]])&gt;&gt;&gt; b=tensor([1,2,3,])&gt;&gt;&gt; a,b  (tensor([[10, 10, 10],     [20, 20, 20],     [30, 30, 30]]),tensor([1, 2, 3]))     &gt;&gt;&gt; a+btensor([[11, 12, 13],    [21, 22, 23],    [31, 32, 33]])  &lt;Figure 2&gt; demonstrated how array b is broadcasting(or copied but not occupy memory) to compatible with a. Refered from numpy_tutorial   there is no loop, but it seems there is exactly the loop.     This is not from jeremy (actually after a moment he cover it) but i wondered How to broadcast an array by columns?  c=tensor([[1],[2],[3]])a+ctensor([[11, 11, 11],    [22, 22, 22],    [33, 33, 33]])s  What is tensor. stride()?help(t. stride)Help on built-in function stride:  stride(…) method of torch. Tensor instancestride(dim) -&gt; tuple or intReturns the stride of :attr:’self’ tensor. Stride is the jump necessary to go from one element to the next one in the specified dimension :attr:’dim’. A tuple of all strides is returned when no argument is passed in. Otherwise, an integer value is returned as the stride in the particular dimension :attr:’dim’. Args:  dim (int, optional): the desired dimension in which stride is requiredExample::* x = torch. tensor([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]])`x. stride()&gt;&gt;&gt; (5, 1)x. stride(0)&gt;&gt;&gt; 5x. stride(-1)&gt;&gt;&gt; 1   unsqueeze &amp; None index   We can manipulate rank of tensor Special value ‘None’, which means please squeeze a new axis here== please broadcast herec = torch. tensor([10,20,30])c[None,:] in c, squeeze a new axis in here please. 2. 2 Matmul with broadcasting: for i in range(ar):#  c[i,j] = (a[i,:]).      *[:,j]. sum() #previous  c[i]  = (a[i]. unsqueeze(-1) * b). sum(dim=0) And Using None also (As howard teached)c[i]  = (a[i ]. unsqueeze(-1) * b). sum(dim=0) #howardc[i]  = (a[i][:,None] * b). sum(dim=0) # using Nonec[i]  = (a[i,:,None]*b). sum(dim=0)⭐️Tips🌟 1) Anytime there’s a trailinng(final) colon in numpy or pytorch you can delete it ex) c[i, :] = c [i]2) any number of colon commas at the start, you can switch it with the single elipsis.  ex) c[:,:,:,:,i] = c […,i] 2. 3 Broadcasting Rules:  What if we tensor. size([1,3]) * tensor. size([3,1])?  torch. Size([3, 3])    What is scale????   What if they are one array is times of the other array? ex) Image : 256 x 256 x 3Scale : 128 x 256 x 3Result: ?   Why I did not inserted axis via None, but happened broadcasting? &gt;&gt;&gt; c * c[:,None]tensor([[100. , 200. , 300. ],    [200. , 400. , 600. ],    [300. , 600. , 900. ]])maybe it broadcast cz following array has 3 rows as same principle, no matter what nature shape was, if we do the operation tensor broadcasts to the other. &gt;&gt;&gt; c==c[None]tensor([[True, True, True]])&gt;&gt;&gt; c[None]==c[None,:]tensor([[True, True, True]])&gt;&gt;&gt;c[None,:]==ctensor([[True, True, True]])3. Einstein summation:  Creates batch-wise, remove inner most loop, and replaced it with an elementwise producta. k. ac[i,j] += a[i,k] * b[k,j]inner most loop c[i,j] = (a[i,:] * b[:,j]). sum()elementwise product  Because K is repeated so we do a dot product. And it is torch. Usage of einsum()1) transpose2) diagnalisation tracing3) batch-wise (matmul) …  einstein summation notationdef matmul(a,b): return torch. einsum('ik,kj-&gt;ij', a, b)so after all, we are now 16000 times faster than Python. 4. Pytorch op: 49166. 67 times faster than pure python And we will use this matrix multiplication in Fully Connect forward, with some initialized parameters and ReLU. But before that, we need initialized parameters and ReLU, Footnote:  TensorRank ti noteResources:  Frobenius Norm Review Broadcasting Review (especially Rule)     Refer colab! (I totally confused with extension of arrays)    torch. allclose Review np. einsum Reviewh "
     }, {
-    "id": 15,
+    "id": 17,
     "url": "http://localhost:4000/2020/02/note08-fastai-1/",
     "title": "What is the meaning of 'deep-learning from foundations?'",
     "body": "2020/02/29 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ I don’t know if you read this article, but I heartily appreciate Rachael Thomas and Jeremy Howard for providing these priceless lectures for free Homework:  Review concepts 16 concepts from Course 1 (lessons 1 - 7)(1) Affine Functions &amp; non-linearities; 2) Parameters &amp; activations; 3) Random initialization &amp; transfer learning; 4) SGD, Momentum, Adam; 5) Convolutions; Batch-norm; 6) Dropout; 7) Data augmentation; 8) Weight decay; 9) Res/dense blocks; 10) Image classification and regression; 11)Embeddings; 12) Continuous &amp; Categorical variables; 13) Collaborative filtering; 14) Language models; 15) NLP classification; 16) Segmentation; U-net; GANS) Make sure you understand broadcasting Read section 2. 2 in Delving Deep into Rectifiers Try to replicate as much of the notebooks as you can without peeking; when you get stuck, peek at the lesson notebook, but then close it and try to do it yourself calculus for machine learning     based on weight…    einsum conventionCONTENTS:  What is going on in this course?     What is ‘from foundations’?   Steps to a basic modern CNN model   Today’s implementation goal: 1) matmul -&gt; 4) FC backward    Library development using jupyter notebook     jupyter notebook certainly can make module    Elementwise ops     How can we make python faster?         What is element wise operation?           FootnoteWhat is going on in this course?: What is ‘from foundations’?: 1) Recreate fast. ai and Pytorch 2) using pure python  Evade OverfittingOverfit : validation error getting worsetraining loss &lt; validation loss  Know the name of the symbol you usefind in this page if you don’t know the symbol that you are using or just draw it here (run by ML!) Steps to a basic modern CNN model:  1) Matrix multiplication -&gt; 2) Relu/Initialization -&gt; 3) Fully-connected Forward-&gt; 4) Fully-connected Backward -&gt; 5) Train loop -&gt; 6) Convolution-&gt; 7) Optimization -&gt;8) Batchnormalization -&gt; 9) Resnet Today’s implementation goal: 1) matmul -&gt; 4) FC backward: Library development using jupyter notebook: what is assers? jupyter notebook certainly can make module:  There will be #export tag that Howard (and we) want to extract special notebook2script. py will detect sign of #expert and convert following into python module and test ittest\_eq(TEST,'test')test\_eq(TEST,'test1')  what is run_notebook. py?     when you want to test your module in command line interface   		!python run\_notebook. py 01_matmul. ipynb  Is there any difference between 1) and 2)?1) test -&gt; test01 2) test01 -&gt; test #TODO I don’t know yet  look into run_notebook. py, package fire Jeremy used. What is that?read and run the code in a notebook, and in the process, Jeremy made Python Fire library called!shockingly, fire takes any kind of function and converts into CLI command. fire library was released by Google open source, Thursday, March 2, 2017    Get data   pytorch and numpy are pretty much same.  variable c explains how many pixels there are in in MNIST, 28 pixels PyTorch’s view() method: torch function that manipulating tensor, and squeeze() in torch &amp; mathmatical operation similar function Rao &amp; McMahan said usually this functions result in feature vector.    In part 1, you can use view function several times.     Initial python model     Which is Linear, like $Xw$(weight)$+a$(bias) $= Y$     If you don’t know hou to multiple matrix, refer this site matmul visulization site   How many time spends if we we use pure python   function matmul, typical matrix multiplication function, takes about 1 second for calculating 1 single train data! (maybe assumed stochastic, 5 data points in validation)     it takes about 11. 36 hours to update parameters even single layer and 1 iteration! (if that was my computer, it would be 14 hours. . )🤪   THIS is why we need to consider ‘time’&amp;’space’ This is kinda slow - what if we could speed it up by 50,000 times? Let’s try! Elementwise ops: How can we make python faster?:  If we want to calculate faster, then do remove pythonic calcuation, by passing its computation down to something that is written something other than python, like pytorch.  According to PyTorch doc it uses C++ (via ATen), so we are going to implement that function with python. What is element wise operation?:  items makes a pair, operate corresponding componentFootnote:  notebooks material video broadcasting excel"
     }, {
-    "id": 16,
+    "id": 18,
     "url": "http://localhost:4000/2020/02/what-is-convolution/",
     "title": "Digging into convolution",
     "body": "2020/02/28 - Issues 1) Kaiming Initializtion in Pytorch was in trouble. 1 2) Jeremy started to dig in, in lesson09, but I didn’t know why the size of tensor is 2 and even understand this spreadsheet data. 3 Homework: Read Visualizing and Understanding Convolutional Networks paper  What is a convolution?     Visualization         one kernel     Matthew D Zeiler &amp; Rob Fergus Paper          Convolution can be represented as matmul   Padding   Kernel has rank 3   How can we find a side-edge, a gradient and area of constant weight?   What is a convolution?: A convolutional neural network is that your red, green, and blue pixels go into the simple computation, and something comes out of that, and then the result of that goes into a second layer, and the result of that goes into the third layer and so forth. Visualization: one kernel Refer this site for visualizing CNN filteringMatthew D Zeiler &amp; Rob Fergus PaperLecture01     Nine examples of the actual coefficients from the **first layer**  Convolution can be represented as matmul: CNNs from different viewpoints {align-items: center;}    [A B C D E F G H I J] is 3 by 3 image data flatten to vector.   As a result, convolution is a just matrix just two things happens     Some of entries are set to zeros at all the times   same color always have the same weight. That called weight time / wegith sharing    So, we can implement a convolution with matrix multiplication. But, we don’t do that because it’s slow!Padding:  What most of libraries do is just put zeros asdie of matrix  fast. ai uses reflection paddings (what is this? Jeremy said he uttered it)Kernel has rank 3:  As standard picture input would be 4 5, it would be actually 3d, not 2d.  If we make kernel as a 3x3 size, we pass over same kernel all the different Red, Green, Blue Pixels.      This could make problem, because, if we want to detect frog, which is green, we would want more activations on the green(I made a test cell in my colab 6)   How can we find a side-edge, a gradient and area of constant weight?: Not top-edge!  One kernel can find only the top-edge, so we should stack the kernels 7 So, we pass it through bunch of kernels to the input images, and that process gives us height x width x corresponding number of kernels.  Usually that number of chanel is 16 And if we want to get the more channels and features, we should repeat that process     This process gives rise to memory out of control, we do the stride   #### conv-example. xlsx  2 convolutional filters At a second layer, filter is 3x3x2 tensor, because to add up together the first layer’s channel. Reference:       Problem was math. sqrt(5) was not kaiming initialization formula, Implementation in Pytorch &#8617;        size of tensor, lecture09 &#8617;        conv-example. xlsx &#8617;        Why do computer use red, green and blue instead of primary colors &#8617;        Grayscale is a group of shades without any visible color. … Each of these dots has its own brightness level as well and, therefore, can be converted to grayscale. A grayscale image is one with all color information removed.  &#8617;        Testing RGB and grayscale &#8617;        stack kernel and make new rank of tensor at output, Lesson06-2019 &#8617;    "
     }, {
-    "id": 17,
+    "id": 19,
     "url": "http://localhost:4000/2020/02/dps-week8/",
-    "title": "Digital Product School week 8&9",
-    "body": "2020/02/24 - The 8th week retropect at Digital Product School Week 8/9 - Ship your MVP/Release next iteration each day     This week's schedule  CONTENT:  Preparing engineering weekly Agile Process     Daily Stand-up    Making application flowchart (feat draw. io) / ER diagram     Flowchart, understaning user journey   ER diagram    Engineering weekly AI lunch Connecting firebase andPreparing engineering weekly: This week at Wednesday, I planned to explain the Language Modelings, mainly focusing ELMo, ULMFiT, BERT and GPT-2. Slides is available here Changed the presentation, because there were people who are not in ML domain. hereWhenever I do the presentation, I learn more than the information I give them. At the same time, I realize I need to learn more than I know. Agile Process: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m not sure ‘what is agile’ but this is what we tried to make agile process.  Daily Stand-up: Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Making application flowchart (feat draw. io) / ER diagram:  Before we start the party, we should clarify the flowchart and ER diagram of our application. Flowchart, understaning user journey: Thanks for google, we could use draw. io for our framechart framework. Actually, we cana choice other good flatform, but draw. io has connected app throgh google drive, most of our engineer was used to it. And after this job, I got to know there is also (of course) rule with the symbols, color, size, space, scaling and direction of arrow -reference. But why we should do this? WE have made our storymap before!! I think storymap is for visualize our status and app. So it should be shared with whole the team, and they should able to understand each role’s issue. But flowchart is more like testing technical feasibility, and error that user can experience. So it could be little more specific, complicated, and hypothetical.     This week engineer's main plan  ER diagram: Even if we use NoSQL database through firebase, my team was accustomed to SQL more. That what we educated when we were at college, so we had to organize our concept while we were learning NoSQL. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Before today, my AI collegues presented regression, knn and it was my turn. I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! AI lunch: Connecting firebase and: "
+    "title": "My life in Digital Product School - week 8/19/10",
+    "body": "2020/02/24 - The 8/9/10th week retropect at Digital Product School Week 8 - Ship your MVPWeek 9/10 - Release next iteration each day     Week 8th schedule  CONTENT:  Agile Product Development     Daily Stand-up(planning)   Gemba Walk   Sprint Reviews    Engineering weeklyAgile Product Development: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m still not sure ‘what is agile’ but this is how we tried to make agile process.  Daily Stand-up(planning): Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Gemba Walk:     Team Cero with core team  Every 2 weeks, we do the Gemba work, which is ‘question everything to the core team’ time. At this period, people can ask anything related to our product, workshop, and framework. Core team will help just for each team, and each team can solve the problem related to their work. &lt; br/&gt;Why we need this session? because with workshop and general schedule, core team has no time just focus on each team. So through this session, we can have opportunity to understand each program and workshop, like why we are using this platform, and when is the due of our small project, and we have this problem and we need help for this. whatever small problem you have, core team is always willing to help you. Sprint Reviews: Every Friday, we have time to summarise what we did for the week. Maybe we need HMW question and our storymap to share our process and then tell and share what we did try, what point we succeeded and what point it was deviant of our prediction, and why we tried it. .     Sprint of Ve-link  And then, just after all team’s ppt, we do vote with such a cute marvel. Always it’s very difficult to vote (of course you can’t vote to your team!) Because it depends on criteria what do I value!But since this is process of our agile work, I try to focus on what they have changed since last week, and why they did it, how they did it. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Everyone have their knowledge to share and we can be tutor and at the same time can be of tutee. Previously, my AI collegues presented regression, knn. And because I’m somewhat specialized to NLP, I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! "
     }, {
-    "id": 18,
+    "id": 20,
     "url": "http://localhost:4000/2020/02/fast.ai-nlp-note-16/",
     "title": "Algorithmic bias",
     "body": "2020/02/20 - Algorithms can encode &amp; magnify human bias Case Study 1: Facial Recognition &amp; Predictive Policing:  Joy Buolamwini &amp; Timnit Gebru, gendershades. org     Microsoft, FACE+, IBM - All of these things are sell now.    Largest gap between $\therefore\ Lighter Male\ &gt;\ Darker\ Female $      This US mayor joked cops should “mount . 50-caliber” guns where AI predicts crime      With machine learning, with automation, there’s a 99% success, so that robot is ㅡwill beㅡ99% accurate in telling us what is going to happen next, which is really interesting.     - city official in Lancater, CA, approving on using IBM for public security  Bias:  Bias is type of error Statistical Bias: difference between a statistic’s expected value and the true value Unjust Bias: disproportionate preference for or prejudice against a group Unconscious bias: bias that we don’t realize we have But, term bias is too generic to be productive. Different sources of bias have different causes Representation Bias: Dataset was not representative of the algorithm that might be used on later. Above : Data is okay, but algorithm has some problem. Below : Data has error.  For example, object detection production that performs very well in common product of US. But in contrast, change of target product region, like Zimbabwe, Solomon Island, and so on, reduced the performence remarkably. It is not the algorithmic problem, so we should care about data volume of region. Evaluation Bias: Benchmark datasets spur on research, 4. 4% of IJB-A images are dark-skinned women. 2/3 of ImageNet images from the West (Sharkar et al, 2017) Case Study 2: Recidivism Algorithm Used Prison Sentencing: Case Study 3: Online Ad Delivery: Bias in NLP: ( Nothing to do with the course, but I’m researching this field these days. )    But all about Englsih     ImpactThe person is doctor. The person is nurse -&gt; 그는 의사다. 그녀는 간호사다.  Concept of “biased data” often too generic to be useful:  Different sources of bias have different sources Data, models and systems are not unchanging numbers on a screen. They’re the result of a complex process that starts with years of historical context and involves a series of choices and norms, from data measurement to model evaluation to human interpretation. - Harini Suresh, “The problem with Biased Data” Five Sources of Bias in ML:  Representation Bias Evaluation Bias Measurement Bias Aggregation Bias(46:02) Historical Bias(46:26)     A few studies(47:13)   Racial Bias, Even when we have good intentions(new york times)(47:10)   gender(48:59)   Humans are biased, so why does algorithmic bias matter?: Algorithms &amp; humans are used differently (humans are usually decision maker)  Algorithms are accurate and objective No way to apeal if there if error processed large scale cheap Machine learning can amplify bias Machine learning can create feedback loops. Technology is power. And with that comes responsibility. Solutions:  Analyze a project at work/school:     Questions about AI   5 types of bias (Suresh &amp; Guttag)   Datasheets for datasets, Modelcards for model reporting   Accuracy rate on different sub-groups    Work with domain experts &amp; those impacted Increase diversity in our workspace Advocate for good policy Be on the ongoing lookout for bias"
     }, {
-    "id": 19,
+    "id": 21,
     "url": "http://localhost:4000/2020/02/classifier-city/",
     "title": "Making a classifier with image dataset made from gooogle",
     "body": "2020/02/15 - CONTENTS:  Creating dataset from google images     Using google_images_download   Create ImageDataBunch    Train model     fit_one_cycle()   Let’s find-tune   Let’s train the whole model!   Let’s make batch size bigger!    Interpretation Model in productionCode can be found hereDeployed model here Making a classifier which can distinguish Seoul from Munich and Sanfrancisco!(hoping my well in Munich!) Creating dataset from google images: In machine learning, you always need data before you build your model. You can use either URLs or google_images_download package. Since Jeremy explained specifically, I will try the other. Using google_images_download: note: This is not google official package Refer to Official Doncument, put that arguments. from google_images_download import google_images_downloadresponse = google_images_download. googleimagesdownload()  #class instantiationout_dir = os. path. abspath('. . /. . /materials/dataset/pkg/')os. mkdir(out_dir)arguments = { keywords : Cebu,Munich,Seoul ,        print_urls :True,        suffix_keywords : city ,        output_directory :out_dir,        type : photo ,      }paths = response. download(arguments)  #passing the arguments to the functionprint(paths)and if you need, here is main code. Create ImageDataBunch: We need to separate validation set because we just grabbed these imagese from Google. Most of the dataset we use (kaggle/research) splited into train / validation / test so if they are not devided beforehand we should make databunch, and Jeremy recommended assign 20% to validation. Help on function verify_images in module fastai. vision. data:verify_images(path: Union[pathlib. Path, str], delete: bool = True, max_workers: int = 4, max_size: int = None, recurse: bool = False, dest: Union[pathlib. Path, str] = '. ', n_channels: int = 3, interp=2, ext: str = None, img_format: str = None, resume: bool = None, **kwargs)  Check if the images in `path` aren't broken, maybe resize them and copy it in `dest`. Data from google image url Data from package Train model:           len(class)   len(train)   len(valid)         Data_url   3   432   108       Data_pkg   3   216   53   Uisng model: restnet34 1, Measurement: accuracy 2 fit_one_cycle(): What is fit one cycle? Cyclical Learning Rates for Training Neural Networks One of the way to find good learning rate. Core idea is to start with small learning rate (like 1e-4, 1e-3) and increase the learning rate after each mini-batch till loss starts exploding. And pick up learning rate one order lower than exploding point. For example, plotted learning rate is like below picture, picking up around 1e-2 is the best way.  Why this methods Traditionally, the learning rate is decreased as the learning starts converging with time. But this paper suggests to cycle our learning rate, because it makes us avoid local minimum. Basically this cyclic method enables us to explore whole of loss function so that find out global minimum. In other words, higher learning rate behaves like regularisation. Let’s find-tune: Do train just one last layer by learning rate found by find_lr This section you should find the strongest downward slope that kind of sticking around for quite a while. And choose just one order lower than lowest point. As explained before, I will pick up 1e-2. And of course, this is fine-tuning, we don’t need discriminative learning rate yet.  Let’s train the whole model!: link When you plot the learning rate again, maybe you will get soaring shape of learning rate. Rule of thumb, When you slice the learning rate, use learning rate you used at unfrozen part. Divide it by 5 or 10 and put it on maximum bound. At minimum bound, get the point just before it soared, and divide it by 10.  Let’s make batch size bigger!: Since default batch size is 64, I tried it to 128. And it gets way more better result(even it’s still underfitting!) And if I freeze model and train whole model again, the model would be better. Also, you can use this method to the other big dataset model training! Interpretation: See the confusion matrix. Result is quite great. *Since I’m using colab, I will skip data cleansing. But I highly recommend you to use ImageCleaner widget, only if you are using jupyter notebook (not jupyter lab) Model in production: You can deploy your model in simple way. I referred fast. ai, and used render(it’s free for limited time). You can find detailed document here. and you can create a route like this. @app. route( /classify-url , methods=[ GET ])async def classify_url(request):  bytes = await get_bytes(request. query_params[ url ])  img = open_image(BytesIO(bytes))  _,_,losses = learner. predict(img)  return JSONResponse({     predictions : sorted(      zip(cat_learner. data. classes, map(float, losses)),      key=lambda p: p[1],      reverse=True    )  })You can find my deployed model here Reference: How to create a deep learning dataset using Google Images towardsdatascience - one cycle policy       Deep Residual Learning for Image Recognition &#8617;        Accuracy_and_precision &#8617;    "
     }, {
-    "id": 20,
+    "id": 22,
     "url": "http://localhost:4000/2020/02/dps-week5/",
     "title": "Digital Product School week 5",
     "body": "2020/02/09 - The 5th week retropect at Digital Product School Week 5 - Create a Storymap and sync it with Lean Canvas     This week's schedule  CONTENT:  How to create our story map Prepare your story Discover your product’s AI potentialMondayHow to create our story map:     We need this 'aha' moment  There was a Milestone workshop, about our weekly goal. As we are agile working, we go fast and change every week’s goal. This week we will finalize our story map based on user’s pain-point and HMW questions.     How should we make our story-map  Basically we should make story map based on this rule  Tell stories, don’t just write them!     We always need context, that means all the story component should be connected    Visualize your product to establish a shared understanding and speed up discussions!     Post-it filled of text is not enough, we should fill it with visualizations then team mates can understand it fast    Only discuss in front our your story map! (Speed)     So we can update our story-map as soon as we change our opinion   And also    Use a story map to find the parts that matter most and to identify holes in your idea!     Since the story map consists of techinical part, we should consider each story’s technical feasibility    Minimise output, maximise outcome and impact! Build tests to figure out what’s minimum and what’s viable!     This story map functions to find out our minimum value of ideas    Work iteratively: Change your story map according to your learnings!     We should repeat this process again and again    PMs: Make sure Storymap is up to date!Prepare your story:     team cero, our whole story map    Our goal    Technical feasibility of our storyWhat is your strategy to make user achieve something? This would be our expand point Discover your product’s AI potential: How can we apply AI to our product? Let’s write down our ‘HMW’ questions, and find out all p ossibilities. These are suggestion of possibilities, so don’t attached to feasibility (we will do in at lean start-up)     Software section's expectation    AI section's expectationTuesday    Engineer's task, week5This 5th week, engineers settled WendesdayThursdayFriday"
     }, {
-    "id": 21,
+    "id": 23,
     "url": "http://localhost:4000/2020/02/GPU-time/",
     "title": "4 reasons took much time to setting GPU for fast.ai than I expected",
     "body": "2020/02/05 - Motivation: Before now, me as a undergraduate student, I was parsimony who usually depend on colab, kaggle, friend’s server(occasional) whenever i need GPU. . And this time it’s been for a while to install GPU than I expected and I share the several component that stood in my way. Written at Oct 24 2019, if you think this is deprecated, please do not have a leap of faith. Just for the record, I’ve used Kaggle, Colab, GCP, Azure, EC2 as GPU cloud. 1. Did not know there is JupyterLab option in Google Cloud Platform. : At the first time when GCP came out, there was no AI Platform service. So from starting vm instance to launching jupyter and installing packages, I did all of the things myself. (and I learned 🤗) $	curl -O https://repo. continuum. io/archive/Anaconda3-5. 0. 1-Linux-x86_64. sh[Downloading conda in ssh] I created VM instance,selected zone, machine type and disk type. Then, define firewall rules and in ssh terminal, install jupyter and other packages. But you can do all of these things just using AI Platform.  [AI Platform] I think it especially save your time if you are living in Asia-Pacific, which google doesn’t support not that much GPU resources.  2. Consider if the platform has limited resources in a region you live in. : I live in South Korea, East Asia, and it seems like this region has lots of limitation in GPU (except quite expensive AWS) And the Taiwan which was the only one region where I can launch my own VM with GPU (I tried all the other regions in the list) sometimes do normaly, but not always. 😥After launching, I did several works and next day I could not start VM. (I didn’t count it, but tried it a few hours because I didn’t want cost any more time…) Endlessly failed to start instance, then I choose to move AWS as an alternative way. 3. Fast. ai gives deliberate guide and I didn’t know it. : Fast. ai offer the guide for all available platform. (Colab, salamander, Gradient, Kaggle, Colab, and so on) It is so important, and really needs, because cloud computing options are vary as occasion and purpose arise. I didn’t know fast. ai has manual to running GCP, and I think it’s as good a reason as any for me to be have taken time. It helped me so much when I had aws and shortened my time. I don’t want to read all of the manual in amazno. . (It is recommended. . but I’d rather read GIT PRO now…) ssh -i ~/. ssh/&lt;your_private_key_pair&gt; -L localhost:8888:localhost:8888 ubuntu@&lt;your instance IP&gt;4. You should wait to add more volume just after add volume, by building AWS EC2. : Since Elastic Block Store(EBS) storage supports optimized storage, users can’t extend storage volume two times in a row.  Unfortunately, at the first time, I didn’t know it (again 👻) and when VM lacked volume, I doubled dist capacity (76*2) at a rough but It needs more.  &lt;!– this time I installed GPU in two years, and it became little complicated compared to 2 years ago. And this time for the first time(maybe not the first time. . but i handled it in my class or with my friend. but it’s my first time on my own. ) I very I’m started to using used google colab, kaggleand, GCP-JupyterLab, ec2 - friend made, aws vm machine but I had a environment variable but i did not know of it. On these days, I could not get a resources from taiwan…    I couldn’t notice a deliberate     Anyway, as a result I tried myself gcp myself and aws ec2 with fast. ai But I think doing on my self surely takes much time (in this point I wonder why I’m doing this, and should remind me, especially I was studying disk volume optimization)     disk volume exceed - https://askubuntu. com/questions/919748/no-space-left-on-device-even-though-there-is:  "
     }, {
-    "id": 22,
+    "id": 24,
     "url": "http://localhost:4000/2020/02/dps-week4/",
     "title": "Digital Product School week 4",
     "body": "2020/02/01 - The 4th week retropect at Digital Product School Week 4 - Find solution ideas and run experiments   [This week’s schedule] CONTENT:  Ideation Techniques     What is ideation techniques?    Generating idea in my team AIdeation Team brain storming of idea Die Produkt MacherMondayIdeation Techniques:   [slides from @steffen] What is ideation techniques?: We tried to find out user’s painpoint last week. Tried to users talk about their, pain point. No question directly, but extract from them their pain with transportation. Generating idea in my team:   AIdeation:   TuesdayTeam brain storming of idea:   Based on generated idea on Monday, we extended our idea doing rolling-paper!     Die Produkt Macher: What is lean start-up?  Lean startup is a methodology for developing businesses and products that aims to shorten product development cycles and rapidly discover if a proposed business model is viable; this is achieved by adopting a combination of business-hypothesis-driven experimentation, iterative product releases, and validated learning. - wikipedia WendesdayThursdayFriday"
     }, {
-    "id": 23,
+    "id": 25,
     "url": "http://localhost:4000/2020/01/retrosprect-of-acl-paper-2020/",
     "title": "Retrospect of ACL 2020 paper writing",
     "body": "2020/01/29 - 2020 Annual Conference of the Association for Computational Linguistics Why I can’t use ‘Cebuano’ for the research?: Why I had to change target language from ‘Cebuano’ to ‘Tagalog’?-&gt; No language translator options except google translation. But before knowing that I already consult my friend, whose mother tongue is English. So I had to aplogize her, but couldn’t tell her why suddenly I changed my plan. -&gt; I realized there are many languages even can’t be researched at all. . -&gt; Getting accustomed to discrimination makes misunderstanding, sometimes. At my country, we couldn’t use music streaming service, because of legal problem. But at that moment, I thought it was discrimination, which is done by music company. "
     }, {
-    "id": 24,
+    "id": 26,
     "url": "http://localhost:4000/2020/01/Git-Merge/",
     "title": "Why am I not listed as a contributor?!",
     "body": "2020/01/10 - From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’m not listed as contributor of repo which my PR accessed. How did I come to a repository?: When I’m stuck, I would prefer to code, than to go deep in theory. (It must be so. . too much to understand 🤒)It was BERT released by Google AI I felt keenly the necessity of implementing, because not only couldn’t understand the way they figured out positional encoding formula, but how it actually works. What does it mean to “scale” dot product in Attention? (Now I know it’s far from my section 😂) Figure 1. Scaled Dot Product. Adopted from tensorflow blogWhat was the code error?: For implement code in paper, I read the papers Transformer and BERT, structured the model, and refered the others’ code. Meanwhile, I found out a small error in tokenization process, which was changing a token into [MASK], enabled bidirectional representation. I’ve made PR, and got merged. But I was not in contributors. Why?: Figure 2. Merged Pull request Adopted from graykode projectActually I happened to know there can be couple of reasons github doesn’t include my name as contributor. Well, if contributors tab has more than 100 people, in which case it shows you up only if you are in the top 100 contributors because displaying too many contributors can make webpages down. Somethimes, however, it doesn’t that problem. Why not? Two possibilities are there.    First, According to Joel-Glovier, if repository maintainer merged-as-a-rebase PR will end up showing as maintainer’s commit. But maintainer shouldn’t normally do this.     Second, if you happend to commit using a different git email that what is in your GitHub profile, it will not be attached to your Github user, and “doesn’t show up” as you.  Reference:  Michał Chromiak’s blog Github: why are my contributions are not showing on my profile atlassian-gitfetch"
     }, {
-    "id": 25,
-    "url": "http://localhost:4000/2019/12/lesson1-fastai/",
-    "title": "Fine Grained Classification",
-    "body": "2019/12/31 - Finally you can solve the mystery behind this weird drawing. . through this course. juptyer notebook magic: %reload_ext autoreload%autoreload 2%matplotlib inlinethis is special directives to jupyter notebook, not python code. And it is called ‘magics’ (but i think jeremy is magicion)  If somebody changes underlying library code while I’m running this, please reload it automatically If somebody asks to plot something, then please plot it here in this Jupyter NotebookDon’t hesitate to import start~ Digging into untar_data, path. ls: Union[pathlib. Path, str]: typed programming language? -&gt; maybe i think disclaim the type beforehand for sure.  Q. like assert? path. ls()this is some module that fast. ai made because os. listdir(‘path’) is unconvinient. Python3 pathlib library!: pathlib "
-    }, {
-    "id": 26,
+    "id": 27,
     "url": "http://localhost:4000/2019/12/jeremy-howard/",
     "title": "Jeremy Howard",
     "body": "2019/12/15 - This is journey to find out ‘who am I trying to be?’: How he impacted me?  The person who made me start Computer Vision again. He emphasized the importance of studying NLP and Computer together to understand the deep-learning. He didn’t order it to study, but always he pursuade me with reasonable way.  “It’s not just something I can throw away. NLP and computer vision a few weeks apart and that’s going to force your brain to realize like ‘oh I have to remember this’”  He made me admit my failure in deep-learning. I started to objectify where am I. What should I do when I’m frustrated.  “Keep going. You’re not expected to remember everything. Yet.  You’re not expected to understand everything. Yet. You’re not expected to know why everything works.  Yet. ”  His articles are numerous, below.      What is torch. nn Really?   High Performance Numeric Programming with Swift: Explorations and Reflections   C++11, random distributions, and Swift    And especially, I like this book. Designing great data products Great predictive modeling is an important part of the solution, but it no longer stands on its own; as products become more sophisticated, it disappears into the plumbing. Designing great data products And he is also famous for words. Here are some.  we’re going to try and use that to really understand what’s going on. So to warn you, none of it is rocket science but a lot of its going to look really new. So don’t expect to get it the first time but expect to listen and jump into the notebook try a few things test things out look particularly at like tensor shapes and inputs and outputs to check your understanding then go back and listen again. But and kind of try it, a few times, because you will get there right, it’s just that there’s going to be a lot of new concepts because we haven’t done that much stuff in pure Pytorch. Lesson 6: Deep Learning 2019 "
     }, {
-    "id": 27,
+    "id": 28,
     "url": "http://localhost:4000/2019/11/julia-evans/",
     "title": "Julia Evans",
     "body": "2019/11/20 - This is journey to find out ‘who am I trying to be?’: The women who surprised me in many ways. First, she approached me to teaching some concepts drawing cartoons. It was at Hackers news, which was hightest ranks. Personally I have the use of not to reading title, so and cartoon was so cute and clear. I naturally gonna understood mechanism and astonished by her explaination ability. Her value, which she was taught by many people so want to do same things, moved me. Volume of her knowledge, that just reading post title is a deal of work, amazed me. "
     }, {
-    "id": 28,
+    "id": 29,
     "url": "http://localhost:4000/2019/11/coc-retropective/",
     "title": "Retrospective on Pycon 2019 Korea (CoC Committee)",
     "body": "2019/11/05 - When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized our responsibility, and if I passed each other’s burden, It could be my burden next time. In solidarity of the obligation, we finished conference well. And after participating PyCon Korea 2018 as volunteer, I’ve joined PyCon Korea Organizer last year. &lt;Figure 1&gt; First meeting of PyCon 2019 Korea Organizers It’s been a while since PyCon 2019 finished. It’s held on Aug 15 - 18, at Coex Grand Balloom &lt;Figure 2&gt; Ongoing session, speaking on news comment processing &lt;Figure 3&gt; Sponsor Booth iin Coex Hall &lt;Figure 4&gt; After PyCon 2019, with all of volunteer, organizer, speakers 😍 🥰  Serving as part of the coc TF, I spent large fraction of last year doing CoC job. here’s the path what we’ve been grappled with to grasp a solution. First half: Before the conference Toward Diverse Community:  Formally we’ve been reusing and modifying PyCon US CoC, but we needed fit in Korean and I was part of that to revise code of conduct. Except ‘That’ Diversity, Because it is ‘Harassment’:  Specific point was harassment, and the others were not. process of finding the points. How can we settle this point?Second half: During the conference Handling the potential Harassment: Disjunction of policy and real-time situation: This ‘PyCon 2019 Korea retrospective series’ would be devided into 3 Episodes.  “Retrospective on Pycon 2019 Korea (CoC Committee)” “Retrospective on Pycon 2019 Korea (Program Chair)” (20 Nov, To Be Update) “Maintaining participation while still making timely decisions” (29 Nov, To Be Update)"
     }, {
-    "id": 29,
+    "id": 30,
     "url": "http://localhost:4000/2019/11/elif-shafak/",
     "title": "Elif Shafak",
     "body": "2019/11/05 - This is journey to find out ‘who am I trying to be?’: For creative-minded people, Istanbul is a treasure. ’ Photo © Chris Boland, licensed under CC BY-NC-ND 2. 0    it suddenly felt like what I was trying to convey was more complicated and detailed than what the circumstances allowed me to say.     And I did what I usually do in similar situations: I stammered, I shut down, and I stopped talking. I stopped talking because the truth was complicated, even though I knew, deep within, that one should never, ever remain silent for fear of complexity.         &lt;Figure 1&gt; Elif Shafak   Photo credit: www. elifsafak. com. tr      I want to talk about emotions and the need to boost our emotional intelligence. I think it’s a pity that mainstream political theory pays very little attention to emotions.     Oftentimes, analysts and experts are so busy with data and metrics that they seem to forget those things in life that are difficult to measure and perhaps impossible to cluster under statistical models. But I think this is a mistake, for two main reasons. We are emotional beings.     I think it’s going to be one of our biggest intellectual challenges, because our political systems are replete with emotions. In country after country, we have seen illiberal politicians exploiting these emotions. And yet within the academia and among the intelligentsia, we are yet to take emotions seriously. I think we should.  1 2 Reference:       British Council Worldwide &#8617;        Ted Talk &#8617;    "
     }, {
-    "id": 30,
+    "id": 31,
     "url": "http://localhost:4000/2019/01/dps-week1/",
     "title": "Digital Product School week 1",
     "body": "2019/01/11 - The 1th week retropect at Digital Product School [This week’s schedule] CONTENT:  Welcome to Digital Product School!     Trip to Spitzingsee   Welcome to Design Office   Specifying our goal of product   Welcome to Digital Product School!: Trip to Spitzingsee:    At the first day of Digital Product School, we had a off-site with all of batch 9 people. All the costs were managed by dps. At the beautiful mountain, we settled the team, and got my team goal.     Basically, there are two kind of team in DPS. (1) Wild team - the team has fixed topic(2) Company team - the team which has specific stakeholders, and also topic defined by that stakeholders  The Core-team will fix what team you will join in DPS for 3 months based on ymy professionals, they announce it at off-site. [My team for 3 months at DPS]    And we decide on my batch #9 theme song. How? Each team draw for songs and pitch ‘why this song should be batch #9 theme song’The result? Imagine dragon - Believer (I didn’t know at the moment, this song would be stamped in my memory)     We have a workshop for getting to know each other. For example, we share 1) what do I expect from 3 months of dps, 2) when I feel happy in my life time, 3) what I worked for last week, 4) what was my last project and 5) what plays important role in my life      My team's board  Cero  Welcome to Design Office:  At first day of design office, we had workshop, which celebrates my day in dps also discuss specific rule, menifesto and stakeholders    We get sticker and attach it in map depends on my nationality  Now time to get to know my team’s stakeholders. What they want for us? What they expect from us? How free my team are on the topic?To be honest, it is endless tug-of-war. We should discuss with my stakeholders, endlessly, and find out solution which can meet interest of users, stakeholders and my team. Basically, my team’s main stakeholder is ADAC, but BMW, City of munich and Nokia will also participate as my team’s stakeholders. Specifying our goal of product: "
diff --git a/_site/2020/02/dps-week5/index.html b/_site/2020/02/dps-week5/index.html
index 52b62a8739..fb805db71d 100644
--- a/_site/2020/02/dps-week5/index.html
+++ b/_site/2020/02/dps-week5/index.html
@@ -19,9 +19,9 @@
 <meta property="og:site_name" content="SpellOnYou" />
 <meta property="og:image" content="http://localhost:4000/assets/images/week5/user-storymap.png" />
 <meta property="og:type" content="article" />
-<meta property="article:published_time" content="2020-02-09T00:00:00+01:00" />
+<meta property="article:published_time" content="2020-02-09T00:00:00+09:00" />
 <script type="application/ld+json">
-{"description":"The 5th week retropect at Digital Product School","author":{"@type":"Person","name":"dionne"},"@type":"BlogPosting","url":"http://localhost:4000/2020/02/dps-week5/","publisher":{"@type":"Organization","logo":{"@type":"ImageObject","url":"http://localhost:4000/assets/images/logo.png"},"name":"dionne"},"image":"http://localhost:4000/assets/images/week5/user-storymap.png","headline":"Digital Product School week 5","dateModified":"2020-02-09T00:00:00+01:00","datePublished":"2020-02-09T00:00:00+01:00","mainEntityOfPage":{"@type":"WebPage","@id":"http://localhost:4000/2020/02/dps-week5/"},"@context":"http://schema.org"}</script>
+{"description":"The 5th week retropect at Digital Product School","author":{"@type":"Person","name":"dionne"},"@type":"BlogPosting","url":"http://localhost:4000/2020/02/dps-week5/","publisher":{"@type":"Organization","logo":{"@type":"ImageObject","url":"http://localhost:4000/assets/images/logo.png"},"name":"dionne"},"image":"http://localhost:4000/assets/images/week5/user-storymap.png","headline":"Digital Product School week 5","dateModified":"2020-02-09T00:00:00+09:00","datePublished":"2020-02-09T00:00:00+09:00","mainEntityOfPage":{"@type":"WebPage","@id":"http://localhost:4000/2020/02/dps-week5/"},"@context":"http://schema.org"}</script>
 <!-- End Jekyll SEO tag -->
 
 
@@ -161,96 +161,101 @@
     "body": "  {% if page. url ==  /  %}            {% assign latest_post = site. posts[0] %}          &lt;div class= topfirstimage  style= background-image: url({% if latest_post. image contains  ://  %}{{ latest_post. image }}{% else %} {{site. baseurl}}/{{ latest_post. image}}{% endif %}); height: 200px;  background-size: cover;  background-repeat: no-repeat; &gt;&lt;/div&gt;           {{ latest_post. title }}  :       {{ latest_post. excerpt | strip_html | strip_newlines | truncate: 136 }}               In         {% for category in latest_post. categories %}        {{ category }},         {% endfor %}                                {{ latest_post. date | date: '%b %d, %Y' }}                            {%- assign second_post = site. posts[1] -%}                        {% if second_post. image %}                         &lt;img class= w-100  src= {% if second_post. image contains  ://  %}{{ second_post. image }}{% else %}{{ second_post. image | absolute_url }}{% endif %}  alt= {{ second_post. title }} &gt;                        {% endif %}                                    {{ second_post. title }}          :                       In             {% for category in second_post. categories %}            {{ category }},             {% endfor %}                                                      {{ second_post. date | date: '%b %d, %Y' }}                                    {%- assign third_post = site. posts[2] -%}                        {% if third_post. image %}                         &lt;img class= w-100  src= {% if third_post. image contains  ://  %}{{ third_post. image }}{% else %}{{site. baseurl}}/{{ third_post. image }}{% endif %}  alt= {{ third_post. title }} &gt;                        {% endif %}                                    {{ third_post. title }}          :                       In             {% for category in third_post. categories %}            {{ category }},             {% endfor %}                                                      {{ third_post. date | date: '%b %d, %Y' }}                                    {%- assign fourth_post = site. posts[3] -%}                        {% if fourth_post. image %}                        &lt;img class= w-100  src= {% if fourth_post. image contains  ://  %}{{ fourth_post. image }}{% else %}{{site. baseurl}}/{{ fourth_post. image }}{% endif %}  alt= {{ fourth_post. title }} &gt;                        {% endif %}                                    {{ fourth_post. title }}          :                       In             {% for category in fourth_post. categories %}            {{ category }},             {% endfor %}                                                      {{ fourth_post. date | date: '%b %d, %Y' }}                                  {% for post in site. posts %} {% if post. tags contains  sticky  %}                    {{post. title}}                  {{ post. excerpt | strip_html | strip_newlines | truncate: 136 }}                 Read More            	             {% endif %}{% endfor %}  {% endif %}                All Stories:         {% for post in paginator. posts %}          {% include main-loop-card. html %}        {% endfor %}                   {% if paginator. total_pages &gt; 1 %}              {% if paginator. previous_page %}        &laquo; Prev       {% else %}        &laquo;       {% endif %}       {% for page in (1. . paginator. total_pages) %}        {% if page == paginator. page %}        {{ page }}        {% elsif page == 1 %}        {{ page }}        {% else %}        {{ page }}        {% endif %}       {% endfor %}       {% if paginator. next_page %}        Next &raquo;       {% else %}        &raquo;       {% endif %}            {% endif %}                     {% include sidebar-featured. html %}      "
     }, {
     "id": 12,
+    "url": "http://localhost:4000/2020/04/v3-2019-lesson06-note/",
+    "title": "fastai 2019 course-v3 Part1, lesson06",
+    "body": "2020/04/15 - Lesson 06Rossmann(Tabular):  Tabular data: be careful on Categorical variable vs Continuous variable.  if datatype is int, fastai think it is classification, not a regression.  Root mean square percentage error. as loss function.  When you assign the y_range, it’s better to assign little bit more than actual maximum. &gt; because it’s sigmoid.  intermediate layers, which is weight matrix is 1) 1000, and 2) 500 -&gt; which means our parameter would be 500*1000. learn. modelWhat is dropout and embedding dropout?: Nitish Srivastava, Dropout: A Simple way to prevent Neural Networks from Overfitting  you can dropout with p value, make it specified to specific layer, or make it applied to all the layers.  Pytorch code 1) bernoulli, which decides whether you will hold it? 2) and divide the noise value depends on noise value. so noise became 2 or remain 0.      According to pytorch code, We do change at training time, but we do nothing at test time. and this means you don’t have to do anything special with inference time. ’   TODO: find at forums what is inference time - Related to NVIDIA, GPU.     Embedding dropout is just a dropout.      It’s different between continuous variable and embedding layer.  TODO Still can’t understand. why embedding dropout is effective. or,… in need.    Let’s delete at random, some of the results of the embedding.    and It worked well especially at Kaggle   Batch Normalization: Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift -&gt; came out false! According to How Does Batch Normalization Help Optimization?  The key was multiplicative bias {\gamma} and additive bias {\beta}` Explain     Let $$ \hat{y} = f(w_1, w_2, w_3, … , x)} $$ , loss = MSE , Then y_range should be between 1 and 5`   And Activation function ends with -1 -&gt; +1   To mitigate this problem, we can add the other parameter, like $$w_n$$   But there’re so much interactions in the process so just re-scale the output.    Momentum parameter at BatchNorm1d:  Different from momentum like in optimization.  This momentum is Exponentially weighted moving average of the mean, instead of deviation.      If this is small number: mean standard deviation would be less from mini_batch to mini_batch » less regularization effect. (If this is large number, variation would be greater from mini_batch to mini_batch » more regularization effect)   TODO: can’t sure, but i understand, this is not about how to update parameter but about how much reflect previous value when scale and shift   Q. Preference between batchnorm and the other regularizations(drop out, weight decay)A. Nope, always try and see the results## lesson6-pets-more### Data Augmentation- Last reg- `get_transforms` has lots of params (even not yet learned all) -&gt; check documentation	- Remember you can implement all the doc contents bc it's made from nbdev	- TODO: try this!!- Essence of data augmentation is you should maintain the label, while somewhat making sense. 	- ex) tilt, because it's optically sensible, you can always change the angle of the data view. - zeros, border, and reflection but always `reflection` works most of the time, so that is the default### Convolutional Kernel(What is convolution?)- Will make heat\_map from scratch, which means the parts convolution focuses on![setosa_visualization]()- http://setosa. io/ev/image-kernels/	- javascript thing	- How convolution works	- Kernel. which does element-wise multiplication, and sum them up	- so it has on pixel less at borders -&gt; so it uses padding, and fastai uses reflection as said. - why this Kernel(matrix) helps catching horizontal edge side?	- because this kernel`(picture2)` weights differently, depends on `x axis`	- why familiar, because it's similar intuition with fugus`(paper)` paper- CNN from different viewpoints`link`	- output of pixel is results from different linear equations. 	- If you connect this with represents of neural network nodes, you can see that the specific inp nodes connected with specific out nodes. 	- **Summarize**: cnn does 1) matmul some of the elements are always zero 2) same weight for every row, which is called `weight time? weight. . ?, 1:18:50` `(picture)`#### Further lowdown- Because generally image has 3 channels, we need rank 3 kernel. - And **do multiply with all channel output is one pixel**. (`draw by your self`)	- but this kernel will catch one feature, like horizontal, so that we make more kernel so that output becomes (h * w * kernel)	- And that `kernel` come to `channel`- **Conv2d**: with 3 by 3 kernel, stride 2 conv -&gt; (h/2 * w/2 * kernel)	- skip or jump over input pixel	- to protect from memory out of control~~~pythonlearn. modellearn. summary()~~~TODO: understand yourself the blocks of conv-kernel: - Usually use big kernel size at first layer (will study this at part2)- Bottom right highlighting kernel(`pic / draw`)- `torch. tensor. expand`: for memory efficient, because we should do RGB- We do not make separate kernel, but make rank 4 kernel	- 4d tensor is just stacked kernel- `t[None]. shape` create new unit axis, and why? we make this -&gt; it should move unit of batch, not one size image. ### Average pooling, feature- suppose our pre-trained model results in size of `11 by 11 by 512 ` `pic 4` and my classification task has 37 classes	* take the first face of channel, which is 11 by 11 and `mean` it, so that make rank 2 tensor, 512 by 1	* and make 2d matrix, which is 512 by 37 and multiply so that we can get 37 by 1 matrix. - Feature, at convolution block	- So, when we transfer-learning without unfreeze, every element of last matrix (512 by 1) should represent(or could catch) each feature. 	### Heatmap, Hook~~~hook_output(model[0]) -&gt; acts -&gt; avg_acts~~~- if we average the block with `axis=feature`, result of matrix(11 by 11) depicts `how activated was that area?` -&gt; it is heatmap, `avg_acts`- and acts comes from hook, which is more advanced pytorch feature. 	- hook into pytorch machine itself, and run any arbitrary Pytorch code	- Why this is cool?: Normally it gives set of outputs of forward pass, but we can interrupt and hook the forward pass. 	- Also can store the output of the convolutional part of the model, which is before avg_pooling- Thinking back when we do cut off `after` the conv part. 	- but with fast. ai the original convolutional part of the model would be *the first thing in the model*, specifically could be given from `learn. model. eval()[0]`	- And this is gotten from `hooked_output` and having hooked the output, we can pass our x_minibatch to output. 	- Not directly, but with normalized, minibatch, put on to the gpu	- `one_item()` function do it, when we have one data `TODO: this is assignment` do it yourself without one_item function	- and `. cuda()` put it on gpu- you should print out very often the shape of tensor, and try think why. "
+    }, {
+    "id": 13,
+    "url": "http://localhost:4000/2020/04/qna-image-segmentation/",
+    "title": "[Q&A] Image Segmentation, using Unet with Driving Video data",
+    "body": "2020/04/02 - This post is about my questions while I was studying USF Deep Learning course about image segmentation task. All the answers are from the course, source code, library document, or document. I cared about being clear at reporting information including source of information, however if there are still anything unclear, please contact me. And thank you Jeremy&amp;Rachael for everything. Also Thank you Cambridge Computer Vision Lab to made us to study with your labor. The Cambridge-driving Labeled Video Database (CamVid) is the first collection of videos with object class semantic labels, complete with metadata. The database provides ground truth labels that associate each pixel with one of 32 semantic classes. If someone is interested in this project, please check the site and see the details. Now, let’s start first using jupyter’s one of tricks which I love most. It enables cell to print the code without print function. from IPython. core. interactiveshell import InteractiveShell# pretty print all cell's output and not just the last oneInteractiveShell. ast_node_interactivity =  all from fastai. vision import *from fastai. callbacks. hooks import *from fastai. utils. mem import *path = untar_data(URLs. CAMVID) # The locations where the data and models are downloaded are set in config. ymlpath. ls() I’m trying to accustomed to using pathlib module, not just it became built-in module in python, but I felt uncomfortable myself with os module. However, still unpredictable conflicts are remain, even in the quite standard library like Pytorch, tensorflow, onnx. (it require me string for path. not PosixPath. will send PR. . ) [PosixPath('/root/. fastai/data/camvid/valid. txt'), PosixPath('/root/. fastai/data/camvid/images'), PosixPath('/root/. fastai/data/camvid/labels'), PosixPath('/root/. fastai/data/camvid/codes. txt')]path_img = path/'images'path_lbl = path/'labels'fnames = get_image_files(path_img) #filenamelbl_names = get_image_files(path_lbl)1. (Play with data) My Hypothesis:  File name has A_B format. and A / B would be at key-value position.  Use collections - defaultdict  Default Dict: Link:   easy to group a sequence of key and value pairs into a dictionary of list?from collections import defaultdictfnames[0], lbl_names[0](PosixPath('/root/. fastai/data/camvid/images/0001TP_009210. png'), PosixPath('/root/. fastai/data/camvid/labels/0016E5_01800_P. png'))files = [tuple(i. stem. split('_')) for i in fnames]labels = [tuple(i. stem. split('_')[:-1]) for i in lbl_names]d = defaultdict(list)for k, v in files: d[k]. append(v)d. keys()len(d['0001TP'])124for k, v in d. items():  print(k, v)0001TP ['009210', '008850', '007350', '008970', '009840', '010140', '008490', '008520', '009540', '008250', '008340', '006840', '007860', '007410', '007740', '009870', '010080', '007890', '008790', '010020', '008400', '007080', '008280', '010380', '009330', '009060', '007470', '006810', '009720', '008580', '007110', '008730', '009150', '007680', '009780', '007800', '007290', '008760', '009510', '008640', '008310', '007440', '006900', '007500', '008460', '009030', '008130', '009480', '009900', '010230', '009270', '008040', '007590', '007950', '009990', '008550', '007260', '008100', '007530', '006960', '008190', '009420', '009930', '009000', '007830', '008940', '006690', '009570', '008880', '010170', '007560', '009300', '006750', '009360', '010200', '007320', '008010', '009120', '007620', '007200', '007140', '010320', '006720', '008670', '007230', '008370', '010260', '009690', '006930', '009090', '007770', '010290', '010350', '008610', '008070', '009600', '008430', '009450', '007380', '009240', '007710', '007170', '008160', '008910', '007020', '006780', '007050', '009960', '009810', '008220', '009180', '009750', '010050', '009660', '010110', '007920', '009630', '007650', '006990', '008700', '009390', '007980', '008820', '006870']0016E5 ['01290', '08159', '05760', '08133', '08063', '06660', '00960', '05850', '00750', '06960', '08035', '08107', '07975', '08017', '05610', '07140', '08119', '08027', '07170', '08400', '08093', '02100', '06390', '04470', '08340', '06060', '00600', '07470', '08151', '07800', '01620', '05730', '01530', '00690', '08430', '05940', '01980', '07320', '08069', '07965', '04380', '05430', '01410', '06780', '08007', '08087', '08079', '06600', '08109', '05490', '00901', '04590', '04680', '08045', '01770', '06690', '08085', '06810', '00420', '08011', '07440', '02190', '06300', '04800', '01500', '00450', '08029', '01470', '06330', '07997', '08067', '05370', '08013', '08190', '00840', '02370', '08049', '08135', '01440', '06870', '05820', '05280', '08051', '04440', '08091', '01380', '00630', '07290', '05520', '04770', '00540', '07995', '07999', '05550', '07920', '08101', '08141', '08053', '04620', '08103', '05160', '07350', '08057', '06030', '06000', '08550', '07963', '08089', '05970', '08047', '05640', '06240', '05220', '04350', '01590', '07959', '01950', '08117', '06180', '01560', '05400', '08043', '07680', '00780', '08081', '07050', '01020', '01350', '04530', '06720', '07969', '08149', '08003', '08131', '08129', '08033', '05460', '01650', '07530', '08023', '05340', '08640', '05100', '08075', '01230', '04980', '02070', '01080', '06210', '05910', '08009', '01800', '05190', '02400', '08083', '08019', '07620', '07200', '07890', '08059', '06990', '04410', '08121', '08123', '06930', '08137', '08147', '08095', '06570', '06150', '08153', '06840', '05250', '00510', '08370', '08580', '08113', '07410', '08097', '01200', '04950', '07770', '07650', '04710', '06090', '08055', '07110', '07981', '00990', '08250', '08127', '01920', '07985', '08220', '08005', '08157', '05130', '08071', '01140', '04830', '07740', '08143', '06120', '02040', '08111', '08115', '00660', '08280', '06420', '07983', '02220', '05700', '01860', '01260', '04920', '06510', '07020', '08073', '08105', '08125', '06360', '07860', '07993', '00810', '06540', '08099', '08139', '02010', '07973', '08155', '07991', '06630', '00480', '06750', '04890', '08001', '08025', '00870', '08490', '01830', '07977', '05010', '01170', '07961', '01680', '01050', '07987', '07080', '04560', '00930', '05310', '02340', '05790', '08460', '00720', '08031', '02280', '08039', '08037', '08065', '06270', '08077', '06900', '04650', '06480', '07230', '08041', '06450', '00570', '07989', '04740', '07979', '02250', '07380', '00390', '01710', '07590', '08021', '08520', '07500', '01110', '04500', '02310', '07971', '02130', '05580', '05880', '08610', '08310', '08145', '05670', '04860', '07260', '08015', '07967', '01740', '01320', '07560', '07830', '01890', '08061', '02160', '07710', '05070', '05040']Seq05VD ['f00030', 'f02550', 'f03450', 'f01110', 'f00480', 'f00210', 'f04590', 'f04170', 'f01800', 'f03990', 'f03360', 'f03900', 'f02070', 'f00810', 'f03690', 'f01350', 'f01530', 'f04980', 'f05100', 'f03060', 'f00900', 'f03870', 'f02460', 'f01470', 'f02370', 'f02820', 'f04080', 'f02760', 'f04860', 'f02250', 'f04200', 'f00270', 'f03720', 'f02850', 'f04410', 'f01200', 'f03090', 'f02010', 'f03930', 'f00090', 'f01650', 'f01890', 'f03840', 'f03030', 'f02130', 'f01230', 'f04110', 'f02520', 'f04140', 'f04020', 'f00060', 'f03420', 'f01560', 'f00120', 'f04290', 'f02340', 'f00300', 'f01380', 'f00870', 'f01860', 'f02970', 'f04560', 'f02730', 'f00330', 'f04530', 'f03780', 'f01770', 'f03390', 'f05040', 'f02430', 'f03330', 'f00660', 'f01740', 'f02100', 'f04800', 'f04050', 'f00510', 'f02790', 'f04350', 'f00690', 'f00540', 'f02490', 'f00960', 'f00930', 'f04230', 'f02880', 'f03600', 'f01020', 'f01500', 'f02400', 'f04830', 'f04470', 'f03300', 'f02670', 'f00450', 'f01980', 'f01170', 'f01620', 'f04500', 'f01080', 'f03180', 'f05070', 'f03150', 'f04950', 'f01440', 'f03510', 'f01710', 'f00360', 'f04770', 'f02910', 'f01050', 'f00630', 'f04320', 'f00570', 'f03240', 'f02190', 'f01140', 'f03540', 'f02220', 'f02640', 'f03960', 'f00000', 'f04920', 'f01950', 'f00990', 'f03480', 'f03000', 'f00420', 'f04620', 'f03210', 'f00780', 'f03570', 'f01590', 'f00750', 'f01920', 'f04650', 'f03750', 'f03630', 'f02310', 'f02610', 'f02580', 'f04740', 'f02280', 'f04680', 'f00390', 'f00720', 'f03660', 'f02040', 'f03270', 'f00180', 'f03810', 'f01410', 'f01290', 'f03120', 'f00840', 'f04440', 'f00150', 'f01260', 'f02700', 'f02940', 'f00600', 'f01830', 'f04260', 'f05010', 'f04890', 'f02160', 'f00240', 'f04380', 'f01680', 'f04710', 'f01320']0006R0 ['f02820', 'f03690', 'f03180', 'f02550', 'f01020', 'f03660', 'f02340', 'f01170', 'f02610', 'f02940', 'f01290', 'f02100', 'f01350', 'f03270', 'f03870', 'f01380', 'f01980', 'f03810', 'f02430', 'f02310', 'f01830', 'f03480', 'f02970', 'f01890', 'f03210', 'f03930', 'f02040', 'f02070', 'f02400', 'f01560', 'f03030', 'f01770', 'f01590', 'f01950', 'f03420', 'f01650', 'f03450', 'f00990', 'f03630', 'f01500', 'f03570', 'f00930', 'f03090', 'f03360', 'f02880', 'f02460', 'f01440', 'f01920', 'f01230', 'f03840', 'f02730', 'f01620', 'f02220', 'f03750', 'f03330', 'f03540', 'f02520', 'f02790', 'f01050', 'f03120', 'f01800', 'f01140', 'f01860', 'f01530', 'f01470', 'f02670', 'f02490', 'f01260', 'f01110', 'f02760', 'f01680', 'f03150', 'f02580', 'f03300', 'f02280', 'f01200', 'f03390', 'f03510', 'f02640', 'f02190', 'f02370', 'f01320', 'f02130', 'f03600', 'f03240', 'f03780', 'f03720', 'f02700', 'f01410', 'f01080', 'f02850', 'f01710', 'f03900', 'f03060', 'f01740', 'f02010', 'f02250', 'f00960', 'f03000', 'f02160', 'f02910']for k, v in d. items():  print(k, len(d[k]))0001TP 1240016E5 305Seq05VD 1710006R0 101for i in d2. keys():  print(i,len(d2[i]))0016E5 3050001TP 1240006R0 101Seq05VD 171files[0], labels[0](('0001TP', '009210'), ('0016E5', '01800'))2. My question: Link:  Why do we need masking? and does color from fastai library? (have to look into source code) What do the parameter alpha do? When people make masked img, would it be have ranged integer limit? Does image normalization related with this?lbl_sorted = sorted(lbl_names)f_sorted = sorted(fnames)lbl_1 = lbl_sorted[33]f_1 = f_sorted[33]img = open_image(lbl_1)mask = open_mask(lbl_1)_,axs = plt. subplots(1,2, figsize=(10,5))# img. show(ax=axs[0], y=mask, title='masked')img. show(ax=axs[0], title='1')mask. show(ax=axs[1], title='2', alpha=1. ) img_2 = open_image(f_1)mask_2 = open_mask(f_1)_,axs = plt. subplots(1,2, figsize=(10,5))# img. show(ax=axs[0], y=mask, title='masked')img_2. show(ax=axs[0], title='3',)mask_2. show(ax=axs[1], title='4', alpha=1. ) open_mask(lbl_1). data. shapetorch. Size([1, 720, 960])open_mask(lbl_1). data. shapetorch. Size([1, 720, 960])open_image(f_1). data. shapetorch. Size([3, 720, 960])open_image(f_1). data. shapetorch. Size([3, 720, 960])img. data #labeled datatensor([[[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]],    [[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]],    [[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]]])mask. data # after mask, labeled datatensor([[[ 4, 4, 4, . . . , 21, 21, 21],     [ 4, 4, 4, . . . , 21, 21, 21],     [ 4, 4, 4, . . . , 21, 21, 21],     . . . ,     [17, 17, 17, . . . , 30, 30, 30],     [17, 17, 17, . . . , 30, 30, 30],     [17, 17, 17, . . . , 30, 30, 30]]])img_2. data, mask_2. data(tensor([[[0. 0706, 0. 0667, 0. 0706, . . . , 0. 6431, 0. 6549, 0. 6627],     [0. 0745, 0. 0706, 0. 0706, . . . , 0. 6431, 0. 6510, 0. 6549],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 6392, 0. 6588, 0. 6588],     . . . ,     [0. 0863, 0. 0824, 0. 0824, . . . , 0. 1333, 0. 1216, 0. 1255],     [0. 0902, 0. 0863, 0. 0824, . . . , 0. 1255, 0. 1176, 0. 1216],     [0. 0863, 0. 0824, 0. 0784, . . . , 0. 1137, 0. 1059, 0. 1137]],     [[0. 0706, 0. 0667, 0. 0706, . . . , 0. 7490, 0. 7608, 0. 7686],     [0. 0745, 0. 0706, 0. 0706, . . . , 0. 7451, 0. 7569, 0. 7608],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 7412, 0. 7529, 0. 7529],     . . . ,     [0. 0980, 0. 0941, 0. 0941, . . . , 0. 1804, 0. 1686, 0. 1725],     [0. 1059, 0. 1020, 0. 0980, . . . , 0. 1725, 0. 1647, 0. 1686],     [0. 1020, 0. 0980, 0. 0941, . . . , 0. 1608, 0. 1529, 0. 1608]],     [[0. 0784, 0. 0745, 0. 0784, . . . , 0. 7569, 0. 7686, 0. 7765],     [0. 0824, 0. 0784, 0. 0784, . . . , 0. 7647, 0. 7647, 0. 7686],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 7608, 0. 7647, 0. 7647],     . . . ,     [0. 1216, 0. 1176, 0. 1176, . . . , 0. 2000, 0. 1882, 0. 1922],     [0. 1176, 0. 1137, 0. 1098, . . . , 0. 1843, 0. 1765, 0. 1804],     [0. 1137, 0. 1098, 0. 1059, . . . , 0. 1725, 0. 1647, 0. 1725]]]), tensor([[[ 18, 17, 18, . . . , 183, 186, 188],     [ 19, 18, 18, . . . , 183, 185, 186],     [ 20, 18, 19, . . . , 182, 185, 185],     . . . ,     [ 25, 24, 24, . . . , 43, 40, 41],     [ 26, 25, 24, . . . , 41, 39, 40],     [ 25, 24, 23, . . . , 38, 36, 38]]]))3. What is a difference between image and imageSegment?: imageSegment  An ImageSegment object has the same properties as an Image. The only difference is that when applying the transformations to an ImageSegment, it will ignore the functions that deal with lighting and keep values of 0 and 1.  It’s easy to show the segmentation mask over the associated Image by using the y argument of show_image. img = open_image(fnames[0])mask = open_mask(lbl_names[0])_,axs = plt. subplots(1,3, figsize=(8,4))img. show(ax=axs[0], title='no mask')img. show(ax=axs[1], y=mask, title='masked') #seg mask over the img using y argmask. show(ax=axs[2], title='mask only', alpha=1. ) vision. image ##4. Why/How img div by 255 and how it results fast. ai : vision. image - If div=True, pixel values are divided by 255. to become floats between 0. and 1.  At times, you want to get rid of distortions caused by lights and shadows in an image.    Normalizing the RGB values of an image can at times be a simple and effective way of achieving this.     So sum of the pixel’s value over all channels(which is S) divides each intensified channel so that nomalized value will be R/S, G/S and B/S (where, S=R+G+B).   Detailed explain here4. Python Evaluation Order: Python evaluates expressions from left to right. Notice that while evaluating an assignment, the right-hand side is evaluated before the left-hand side. mask_tmp, trg_tmp, void_tmp = 2, 1, 10mask_tmp = trg_tmp != void_tmpprint(mask_tmp, trg_tmp, void_tmp) # (1) target is not same with voidTrue 1 10# Example 1x = 1y = 2x,y = y,x+yx, y(2, 3)# Example 2x = 1y = 2x = yy = x+yx, y(2, 4)5. model learner parameter :: pct_start: A: Percentage of total number of epochs when learning rate rises during one cycle. Q: Sorry, I still confused that one cycle in the new API only runs one epoch. How the percentage of total number of epochs works? Can you give a example? If learn. fit_one_cycle(10, slice(1e-4,1e-3,1e-2), pct_start=0. 05)??A: Ok, strictly correct answer would be percentage of iterations, so you can have lr both increase and decrease during same epoch. In your example, say, you have 100 iterations per epoch, then for half an epoch (0. 05 * (10 * 100) = 50) lr will rise, then slowly decrease. Q2: Thanks for this explanation … so essentially, it is the percentage of overall iterations where the LR is increasing, correct? So, given the default of 0. 3, it means that your LR is going up for 30% of your iterations and then decreasing over the last 70%. Is that a correct summation of what is happening? A2: Yes, I think that’s correct. You can verify that by changing its value and check:learn. recorder. plot_lr() For example if pct_start = 0. 2 source: forums. fastai "
+    }, {
+    "id": 14,
     "url": "http://localhost:4000/2020/03/note08-fastai-4/",
     "title": "Gradient backward, Chain Rule, Refactoring",
-    "body": "2020/03/02 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves   ⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
+    "body": "2020/03/02 -  This note is divided into 4 section.      Section1: What is the meaning of ‘deep-learning from foundations?’   Section2: What’s inside Pytorch Operator?   Section3: Implement forward&amp;backward pass from scratch   Section4: Gradient backward, Chain Rule, Refactoring   ” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves    Postfix underscore means in pytorch, in-place function, What is in-place function?⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
     }, {
-    "id": 13,
+    "id": 15,
     "url": "http://localhost:4000/2020/03/note08-fastai-3/",
     "title": "Implement forward&backward pass from scratch",
     "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring1. The forward and backward passes: 1. 1 Normalization: train_mean,train_std = x_train. mean(),x_train. std()&gt;&gt;&gt; train_mean,train_std(tensor(0. 1304), tensor(0. 3073))Remember!  Dataset, which is x_train, mean and standard deviation is not 0&amp;1. But we need them to be which means we should substract means and divide data by std.  You should not standarlize validation set because training set and validation set should be aparted.  after normalize, mean is close to zero, and standard deviation is close to 1. 1. 2 Variable definition:  n,m: size of the training set c: the number of activations we need in our model2. Foundation Version: 2. 1 Basic architecture:  Our model has one hidden layer, output to have 10 activations, used in cross entropy.    But in process of building architecture, we will use mean square error, output to have 1 activations and lator change it to cross entropy   number of hidden unit; 50see below pic  We want to make w1&amp;w2 mean and std be 0&amp;1.      why initializating and make mean zero and std one is important?   paper highlighting importance of normalisation - training 10,000 layer network without regularisation1   2. 1. 1 simplified kaiming initQ: Why we did init, normalize with only validation data? Because we can not handle and get statistics from each value of x_valid?{: style=”color:red; font-size: 130%; text-align: center;”}  what about hidden(first) layer?w1 = torch. randn(m,nh)b1 = torch. zeros(nh)t = lin(x_valid, w1, b1) # hidden&gt;&gt;&gt; t. mean(), t. std()((tensor(2. 3191), tensor(27. 0303))In output(second) layer, w2 = torch. randn(nh,1)b2 = torch. zeros(1)t2 = lin(t, w2, b2) # output&gt;&gt;&gt; t2. mean(), t2. std()(tensor(-58. 2665), tensor(170. 9717))   which is terribly far from normalzed value.     But if we apply simplified kaiming init  w1 = torch. randn(m,nh)/math. sqrt(m); b1 = torch. zeros(nh)w2 = torch. randn(nh,1)/math. sqrt(nh); b2 = torch. zeros(1)t = lin(x_valid, w1, b1)t. mean(),t. std()&gt;&gt;&gt; (tensor(-0. 0516), tensor(0. 9354)) But, actually, we use activations not only linear function After applying activations relu at linear layer, mean and deviation became 0. 5. 2. 1. 2 Glorrot initializationPaper2: Understanding the difficulty of training deep feedforward neural networks  Gaussian(, bell shaped, normal distributions) is not trained very well.  How to initialize neural nets? with the size of layer , the number of filters .  But there is No acount for import of ReLU If we got 1000 layers, vanishing gradients problem emerges2. 1. 3 Kaiming initializatingPaper3: Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification  Kaiming He, explained here rectifier: rectified linear unit rectifier network: neural network with rectifier linear units  This is kaiming init, and why suddenly replace one to two on a top?     to avoid vanishing gradient(weights)   But it doesn’t give very nice mean tough.    2. 1. 4 Pytorch package Why fan_out?     according to pytorch documentation,   choosing 'fan_in' preserves the magnitude of the variance of the wights in the forward pass. choosing 'fan_out' preserves the magnitues in the backward pass(, which means matmul; with transposed matrix) ➡️ in the other words, torch use fan_out cz pytorch transpose in linear transformaton.  What about CNN in Pytorch?I tried torch. nn. Conv2d. conv2d_forward?? Jeremy digged into using torch. nn. modules. conv. _ConvNd. reset_parameters?? 2  in Pytorch, it doesn’t seem to be implemented kaiming init in right formula. so we should use our own operation.  But actually, this has been discussed in Pytorch community before. 3 4 Jeremy said it enhanced variance also, so I sampled 100 times and counted better results.  To make sure the shape seems sensible. check with assert. (remember we will replace 1 to 10 in cross entropy)assert model(x_valid). shape==torch. Size([x_valid. shape[0],1])&gt;&gt;&gt; model(x_valid). shape(10000, 1) We have made Relu, init, linear, it seems we can forward pass code we need for basic architecture nh = 50def lin(x, w, b): return x@w + b;w1 = torch. randn(m,nh)*math. sqrt(2. /m ); b1 = torch. zeros(nh)w2 = torch. randn(nh,1); b2 = torch. zeros(1)def relu(x): return x. clamp_min(0. ) - 0. 5t1 = relu(lin(x_valid, w1, b1))def model(xb):  l1 = lin(xb, w1, b1)  l2 = relu(l1)  l3 = lin(l2, w2, b2)  return l32. 2 Loss function: MSE:  Mean squared error need unit vector, so we remove unit axis.   def mse(output, targ): return (output. squeeze(-1) - targ). pow(2). mean()    In python, in case you remove axis, you use ‘squeeze’, or add axis use ‘unsqueeze’ torch. squeeze where code commonly broken. so, when you use squeeze, clarify dimension axis you want to removetmp = torch. tensor([1,1])tmp. squeeze()&gt;&gt;&gt; tensor([1, 1]) make sure to make as float when you calculateBut why??? because it is tensor?{: style=”color:red; font-size: 130%;”} Here’s the error when I don’t transform the data type ---------------------------------------------------------------------------TypeError                 Traceback (most recent call last)&lt;ipython-input-22-ae6009bef8b4&gt; in &lt;module&gt;()----&gt; 1 y_train = get_data()[1] # call data again   2 mse(preds, y_train)TypeError: 'map' object is not subscriptable This is forward passFootnote: Other materials:  Understanding the difficulty of training deep feedforward neural networks, paper that introduced Xavier initialization      Fixup Initialization: Residual Learning Without Normalization &#8617;        Pytorch implementaion on Kaiming init of conv and linear layers &#8617;        Pytorch kaiming init issue &#8617;        Pytorch kaiming init explained &#8617;    "
     }, {
-    "id": 14,
+    "id": 16,
     "url": "http://localhost:4000/2020/03/note08-fastai-2/",
     "title": "What's inside Pytorch Operator?",
     "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, RefactoringWhat’s inside Pytorch Operator?: Section02 Time comparison with pure Python:    Matmul with broadcasting&gt; 3194. 95 times faster     Einstein summation&gt; 16090. 91 times faster     Pytorch’s operator&gt; 49166. 67 times faster  1. Elementwise op: 1. 1 Frobenius norm:   above converted into (m*m). sum(). sqrt() Plus, don’t suffer from mathmatical symbols. He also copy and paste that equations from wikipedia.  and if you need latex form, download it from archive. 2. Elementwise Matmul:  What is the meaning of elementwise?   We do not calculate each component. But all of the component at once. Because, length of column of A and row of B are fixed.   How much time we saved?  So now that takes 1. 37ms. We have removed one line of code and it is a 178 times faster…#TODOI don’t know where the 5 from. but keep it. Maybe this is related with frobenius norm…?as a result, the code before for k in range(ac):  c[i,j] += a[i,k] + b[k,j]the code after c[i,j] = (a[i,:] * b[:,j]). sum()To compare it (result betweet original and adjusted version) we use not test_eq but other function. The reason for this is that due to rounding errors from math operations, matrices may not be exactly the same. As a result, we want a function that will “is a equal to b within some tolerance” #exportdef near(a,b):   return torch. allclose(a, b, rtol=1e-3, atol=1e-5)def test_near(a,b):   test(a,b,near)test_near(t1, matmul(m1, m2))3. Broadcasting:  Now, we will use the broadcasting and removec[i,j] = (a[i,:] * b[:,j]). sum() How it works?&gt;&gt;&gt; a=tensor([[10,10,10],     [20,20,20],     [30,30,30]])&gt;&gt;&gt; b=tensor([1,2,3,])&gt;&gt;&gt; a,b  (tensor([[10, 10, 10],     [20, 20, 20],     [30, 30, 30]]),tensor([1, 2, 3]))     &gt;&gt;&gt; a+btensor([[11, 12, 13],    [21, 22, 23],    [31, 32, 33]])  &lt;Figure 2&gt; demonstrated how array b is broadcasting(or copied but not occupy memory) to compatible with a. Refered from numpy_tutorial   there is no loop, but it seems there is exactly the loop.     This is not from jeremy (actually after a moment he cover it) but i wondered How to broadcast an array by columns?  c=tensor([[1],[2],[3]])a+ctensor([[11, 11, 11],    [22, 22, 22],    [33, 33, 33]])s  What is tensor. stride()?help(t. stride)Help on built-in function stride:  stride(…) method of torch. Tensor instancestride(dim) -&gt; tuple or intReturns the stride of :attr:’self’ tensor. Stride is the jump necessary to go from one element to the next one in the specified dimension :attr:’dim’. A tuple of all strides is returned when no argument is passed in. Otherwise, an integer value is returned as the stride in the particular dimension :attr:’dim’. Args:  dim (int, optional): the desired dimension in which stride is requiredExample::* x = torch. tensor([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]])`x. stride()&gt;&gt;&gt; (5, 1)x. stride(0)&gt;&gt;&gt; 5x. stride(-1)&gt;&gt;&gt; 1   unsqueeze &amp; None index   We can manipulate rank of tensor Special value ‘None’, which means please squeeze a new axis here== please broadcast herec = torch. tensor([10,20,30])c[None,:] in c, squeeze a new axis in here please. 2. 2 Matmul with broadcasting: for i in range(ar):#  c[i,j] = (a[i,:]).      *[:,j]. sum() #previous  c[i]  = (a[i]. unsqueeze(-1) * b). sum(dim=0) And Using None also (As howard teached)c[i]  = (a[i ]. unsqueeze(-1) * b). sum(dim=0) #howardc[i]  = (a[i][:,None] * b). sum(dim=0) # using Nonec[i]  = (a[i,:,None]*b). sum(dim=0)⭐️Tips🌟 1) Anytime there’s a trailinng(final) colon in numpy or pytorch you can delete it ex) c[i, :] = c [i]2) any number of colon commas at the start, you can switch it with the single elipsis.  ex) c[:,:,:,:,i] = c […,i] 2. 3 Broadcasting Rules:  What if we tensor. size([1,3]) * tensor. size([3,1])?  torch. Size([3, 3])    What is scale????   What if they are one array is times of the other array? ex) Image : 256 x 256 x 3Scale : 128 x 256 x 3Result: ?   Why I did not inserted axis via None, but happened broadcasting? &gt;&gt;&gt; c * c[:,None]tensor([[100. , 200. , 300. ],    [200. , 400. , 600. ],    [300. , 600. , 900. ]])maybe it broadcast cz following array has 3 rows as same principle, no matter what nature shape was, if we do the operation tensor broadcasts to the other. &gt;&gt;&gt; c==c[None]tensor([[True, True, True]])&gt;&gt;&gt; c[None]==c[None,:]tensor([[True, True, True]])&gt;&gt;&gt;c[None,:]==ctensor([[True, True, True]])3. Einstein summation:  Creates batch-wise, remove inner most loop, and replaced it with an elementwise producta. k. ac[i,j] += a[i,k] * b[k,j]inner most loop c[i,j] = (a[i,:] * b[:,j]). sum()elementwise product  Because K is repeated so we do a dot product. And it is torch. Usage of einsum()1) transpose2) diagnalisation tracing3) batch-wise (matmul) …  einstein summation notationdef matmul(a,b): return torch. einsum('ik,kj-&gt;ij', a, b)so after all, we are now 16000 times faster than Python. 4. Pytorch op: 49166. 67 times faster than pure python And we will use this matrix multiplication in Fully Connect forward, with some initialized parameters and ReLU. But before that, we need initialized parameters and ReLU, Footnote:  TensorRank ti noteResources:  Frobenius Norm Review Broadcasting Review (especially Rule)     Refer colab! (I totally confused with extension of arrays)    torch. allclose Review np. einsum Reviewh "
     }, {
-    "id": 15,
+    "id": 17,
     "url": "http://localhost:4000/2020/02/note08-fastai-1/",
     "title": "What is the meaning of 'deep-learning from foundations?'",
     "body": "2020/02/29 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ I don’t know if you read this article, but I heartily appreciate Rachael Thomas and Jeremy Howard for providing these priceless lectures for free Homework:  Review concepts 16 concepts from Course 1 (lessons 1 - 7)(1) Affine Functions &amp; non-linearities; 2) Parameters &amp; activations; 3) Random initialization &amp; transfer learning; 4) SGD, Momentum, Adam; 5) Convolutions; Batch-norm; 6) Dropout; 7) Data augmentation; 8) Weight decay; 9) Res/dense blocks; 10) Image classification and regression; 11)Embeddings; 12) Continuous &amp; Categorical variables; 13) Collaborative filtering; 14) Language models; 15) NLP classification; 16) Segmentation; U-net; GANS) Make sure you understand broadcasting Read section 2. 2 in Delving Deep into Rectifiers Try to replicate as much of the notebooks as you can without peeking; when you get stuck, peek at the lesson notebook, but then close it and try to do it yourself calculus for machine learning     based on weight…    einsum conventionCONTENTS:  What is going on in this course?     What is ‘from foundations’?   Steps to a basic modern CNN model   Today’s implementation goal: 1) matmul -&gt; 4) FC backward    Library development using jupyter notebook     jupyter notebook certainly can make module    Elementwise ops     How can we make python faster?         What is element wise operation?           FootnoteWhat is going on in this course?: What is ‘from foundations’?: 1) Recreate fast. ai and Pytorch 2) using pure python  Evade OverfittingOverfit : validation error getting worsetraining loss &lt; validation loss  Know the name of the symbol you usefind in this page if you don’t know the symbol that you are using or just draw it here (run by ML!) Steps to a basic modern CNN model:  1) Matrix multiplication -&gt; 2) Relu/Initialization -&gt; 3) Fully-connected Forward-&gt; 4) Fully-connected Backward -&gt; 5) Train loop -&gt; 6) Convolution-&gt; 7) Optimization -&gt;8) Batchnormalization -&gt; 9) Resnet Today’s implementation goal: 1) matmul -&gt; 4) FC backward: Library development using jupyter notebook: what is assers? jupyter notebook certainly can make module:  There will be #export tag that Howard (and we) want to extract special notebook2script. py will detect sign of #expert and convert following into python module and test ittest\_eq(TEST,'test')test\_eq(TEST,'test1')  what is run_notebook. py?     when you want to test your module in command line interface   		!python run\_notebook. py 01_matmul. ipynb  Is there any difference between 1) and 2)?1) test -&gt; test01 2) test01 -&gt; test #TODO I don’t know yet  look into run_notebook. py, package fire Jeremy used. What is that?read and run the code in a notebook, and in the process, Jeremy made Python Fire library called!shockingly, fire takes any kind of function and converts into CLI command. fire library was released by Google open source, Thursday, March 2, 2017    Get data   pytorch and numpy are pretty much same.  variable c explains how many pixels there are in in MNIST, 28 pixels PyTorch’s view() method: torch function that manipulating tensor, and squeeze() in torch &amp; mathmatical operation similar function Rao &amp; McMahan said usually this functions result in feature vector.    In part 1, you can use view function several times.     Initial python model     Which is Linear, like $Xw$(weight)$+a$(bias) $= Y$     If you don’t know hou to multiple matrix, refer this site matmul visulization site   How many time spends if we we use pure python   function matmul, typical matrix multiplication function, takes about 1 second for calculating 1 single train data! (maybe assumed stochastic, 5 data points in validation)     it takes about 11. 36 hours to update parameters even single layer and 1 iteration! (if that was my computer, it would be 14 hours. . )🤪   THIS is why we need to consider ‘time’&amp;’space’ This is kinda slow - what if we could speed it up by 50,000 times? Let’s try! Elementwise ops: How can we make python faster?:  If we want to calculate faster, then do remove pythonic calcuation, by passing its computation down to something that is written something other than python, like pytorch.  According to PyTorch doc it uses C++ (via ATen), so we are going to implement that function with python. What is element wise operation?:  items makes a pair, operate corresponding componentFootnote:  notebooks material video broadcasting excel"
     }, {
-    "id": 16,
+    "id": 18,
     "url": "http://localhost:4000/2020/02/what-is-convolution/",
     "title": "Digging into convolution",
     "body": "2020/02/28 - Issues 1) Kaiming Initializtion in Pytorch was in trouble. 1 2) Jeremy started to dig in, in lesson09, but I didn’t know why the size of tensor is 2 and even understand this spreadsheet data. 3 Homework: Read Visualizing and Understanding Convolutional Networks paper  What is a convolution?     Visualization         one kernel     Matthew D Zeiler &amp; Rob Fergus Paper          Convolution can be represented as matmul   Padding   Kernel has rank 3   How can we find a side-edge, a gradient and area of constant weight?   What is a convolution?: A convolutional neural network is that your red, green, and blue pixels go into the simple computation, and something comes out of that, and then the result of that goes into a second layer, and the result of that goes into the third layer and so forth. Visualization: one kernel Refer this site for visualizing CNN filteringMatthew D Zeiler &amp; Rob Fergus PaperLecture01     Nine examples of the actual coefficients from the **first layer**  Convolution can be represented as matmul: CNNs from different viewpoints {align-items: center;}    [A B C D E F G H I J] is 3 by 3 image data flatten to vector.   As a result, convolution is a just matrix just two things happens     Some of entries are set to zeros at all the times   same color always have the same weight. That called weight time / wegith sharing    So, we can implement a convolution with matrix multiplication. But, we don’t do that because it’s slow!Padding:  What most of libraries do is just put zeros asdie of matrix  fast. ai uses reflection paddings (what is this? Jeremy said he uttered it)Kernel has rank 3:  As standard picture input would be 4 5, it would be actually 3d, not 2d.  If we make kernel as a 3x3 size, we pass over same kernel all the different Red, Green, Blue Pixels.      This could make problem, because, if we want to detect frog, which is green, we would want more activations on the green(I made a test cell in my colab 6)   How can we find a side-edge, a gradient and area of constant weight?: Not top-edge!  One kernel can find only the top-edge, so we should stack the kernels 7 So, we pass it through bunch of kernels to the input images, and that process gives us height x width x corresponding number of kernels.  Usually that number of chanel is 16 And if we want to get the more channels and features, we should repeat that process     This process gives rise to memory out of control, we do the stride   #### conv-example. xlsx  2 convolutional filters At a second layer, filter is 3x3x2 tensor, because to add up together the first layer’s channel. Reference:       Problem was math. sqrt(5) was not kaiming initialization formula, Implementation in Pytorch &#8617;        size of tensor, lecture09 &#8617;        conv-example. xlsx &#8617;        Why do computer use red, green and blue instead of primary colors &#8617;        Grayscale is a group of shades without any visible color. … Each of these dots has its own brightness level as well and, therefore, can be converted to grayscale. A grayscale image is one with all color information removed.  &#8617;        Testing RGB and grayscale &#8617;        stack kernel and make new rank of tensor at output, Lesson06-2019 &#8617;    "
     }, {
-    "id": 17,
+    "id": 19,
     "url": "http://localhost:4000/2020/02/dps-week8/",
-    "title": "Digital Product School week 8&9",
-    "body": "2020/02/24 - The 8th week retropect at Digital Product School Week 8/9 - Ship your MVP/Release next iteration each day     This week's schedule  CONTENT:  Preparing engineering weekly Agile Process     Daily Stand-up    Making application flowchart (feat draw. io) / ER diagram     Flowchart, understaning user journey   ER diagram    Engineering weekly AI lunch Connecting firebase andPreparing engineering weekly: This week at Wednesday, I planned to explain the Language Modelings, mainly focusing ELMo, ULMFiT, BERT and GPT-2. Slides is available here Changed the presentation, because there were people who are not in ML domain. hereWhenever I do the presentation, I learn more than the information I give them. At the same time, I realize I need to learn more than I know. Agile Process: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m not sure ‘what is agile’ but this is what we tried to make agile process.  Daily Stand-up: Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Making application flowchart (feat draw. io) / ER diagram:  Before we start the party, we should clarify the flowchart and ER diagram of our application. Flowchart, understaning user journey: Thanks for google, we could use draw. io for our framechart framework. Actually, we cana choice other good flatform, but draw. io has connected app throgh google drive, most of our engineer was used to it. And after this job, I got to know there is also (of course) rule with the symbols, color, size, space, scaling and direction of arrow -reference. But why we should do this? WE have made our storymap before!! I think storymap is for visualize our status and app. So it should be shared with whole the team, and they should able to understand each role’s issue. But flowchart is more like testing technical feasibility, and error that user can experience. So it could be little more specific, complicated, and hypothetical.     This week engineer's main plan  ER diagram: Even if we use NoSQL database through firebase, my team was accustomed to SQL more. That what we educated when we were at college, so we had to organize our concept while we were learning NoSQL. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Before today, my AI collegues presented regression, knn and it was my turn. I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! AI lunch: Connecting firebase and: "
+    "title": "My life in Digital Product School - week 8/19/10",
+    "body": "2020/02/24 - The 8/9/10th week retropect at Digital Product School Week 8 - Ship your MVPWeek 9/10 - Release next iteration each day     Week 8th schedule  CONTENT:  Agile Product Development     Daily Stand-up(planning)   Gemba Walk   Sprint Reviews    Engineering weeklyAgile Product Development: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m still not sure ‘what is agile’ but this is how we tried to make agile process.  Daily Stand-up(planning): Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Gemba Walk:     Team Cero with core team  Every 2 weeks, we do the Gemba work, which is ‘question everything to the core team’ time. At this period, people can ask anything related to our product, workshop, and framework. Core team will help just for each team, and each team can solve the problem related to their work. &lt; br/&gt;Why we need this session? because with workshop and general schedule, core team has no time just focus on each team. So through this session, we can have opportunity to understand each program and workshop, like why we are using this platform, and when is the due of our small project, and we have this problem and we need help for this. whatever small problem you have, core team is always willing to help you. Sprint Reviews: Every Friday, we have time to summarise what we did for the week. Maybe we need HMW question and our storymap to share our process and then tell and share what we did try, what point we succeeded and what point it was deviant of our prediction, and why we tried it. .     Sprint of Ve-link  And then, just after all team’s ppt, we do vote with such a cute marvel. Always it’s very difficult to vote (of course you can’t vote to your team!) Because it depends on criteria what do I value!But since this is process of our agile work, I try to focus on what they have changed since last week, and why they did it, how they did it. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Everyone have their knowledge to share and we can be tutor and at the same time can be of tutee. Previously, my AI collegues presented regression, knn. And because I’m somewhat specialized to NLP, I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! "
     }, {
-    "id": 18,
+    "id": 20,
     "url": "http://localhost:4000/2020/02/fast.ai-nlp-note-16/",
     "title": "Algorithmic bias",
     "body": "2020/02/20 - Algorithms can encode &amp; magnify human bias Case Study 1: Facial Recognition &amp; Predictive Policing:  Joy Buolamwini &amp; Timnit Gebru, gendershades. org     Microsoft, FACE+, IBM - All of these things are sell now.    Largest gap between $\therefore\ Lighter Male\ &gt;\ Darker\ Female $      This US mayor joked cops should “mount . 50-caliber” guns where AI predicts crime      With machine learning, with automation, there’s a 99% success, so that robot is ㅡwill beㅡ99% accurate in telling us what is going to happen next, which is really interesting.     - city official in Lancater, CA, approving on using IBM for public security  Bias:  Bias is type of error Statistical Bias: difference between a statistic’s expected value and the true value Unjust Bias: disproportionate preference for or prejudice against a group Unconscious bias: bias that we don’t realize we have But, term bias is too generic to be productive. Different sources of bias have different causes Representation Bias: Dataset was not representative of the algorithm that might be used on later. Above : Data is okay, but algorithm has some problem. Below : Data has error.  For example, object detection production that performs very well in common product of US. But in contrast, change of target product region, like Zimbabwe, Solomon Island, and so on, reduced the performence remarkably. It is not the algorithmic problem, so we should care about data volume of region. Evaluation Bias: Benchmark datasets spur on research, 4. 4% of IJB-A images are dark-skinned women. 2/3 of ImageNet images from the West (Sharkar et al, 2017) Case Study 2: Recidivism Algorithm Used Prison Sentencing: Case Study 3: Online Ad Delivery: Bias in NLP: ( Nothing to do with the course, but I’m researching this field these days. )    But all about Englsih     ImpactThe person is doctor. The person is nurse -&gt; 그는 의사다. 그녀는 간호사다.  Concept of “biased data” often too generic to be useful:  Different sources of bias have different sources Data, models and systems are not unchanging numbers on a screen. They’re the result of a complex process that starts with years of historical context and involves a series of choices and norms, from data measurement to model evaluation to human interpretation. - Harini Suresh, “The problem with Biased Data” Five Sources of Bias in ML:  Representation Bias Evaluation Bias Measurement Bias Aggregation Bias(46:02) Historical Bias(46:26)     A few studies(47:13)   Racial Bias, Even when we have good intentions(new york times)(47:10)   gender(48:59)   Humans are biased, so why does algorithmic bias matter?: Algorithms &amp; humans are used differently (humans are usually decision maker)  Algorithms are accurate and objective No way to apeal if there if error processed large scale cheap Machine learning can amplify bias Machine learning can create feedback loops. Technology is power. And with that comes responsibility. Solutions:  Analyze a project at work/school:     Questions about AI   5 types of bias (Suresh &amp; Guttag)   Datasheets for datasets, Modelcards for model reporting   Accuracy rate on different sub-groups    Work with domain experts &amp; those impacted Increase diversity in our workspace Advocate for good policy Be on the ongoing lookout for bias"
     }, {
-    "id": 19,
+    "id": 21,
     "url": "http://localhost:4000/2020/02/classifier-city/",
     "title": "Making a classifier with image dataset made from gooogle",
     "body": "2020/02/15 - CONTENTS:  Creating dataset from google images     Using google_images_download   Create ImageDataBunch    Train model     fit_one_cycle()   Let’s find-tune   Let’s train the whole model!   Let’s make batch size bigger!    Interpretation Model in productionCode can be found hereDeployed model here Making a classifier which can distinguish Seoul from Munich and Sanfrancisco!(hoping my well in Munich!) Creating dataset from google images: In machine learning, you always need data before you build your model. You can use either URLs or google_images_download package. Since Jeremy explained specifically, I will try the other. Using google_images_download: note: This is not google official package Refer to Official Doncument, put that arguments. from google_images_download import google_images_downloadresponse = google_images_download. googleimagesdownload()  #class instantiationout_dir = os. path. abspath('. . /. . /materials/dataset/pkg/')os. mkdir(out_dir)arguments = { keywords : Cebu,Munich,Seoul ,        print_urls :True,        suffix_keywords : city ,        output_directory :out_dir,        type : photo ,      }paths = response. download(arguments)  #passing the arguments to the functionprint(paths)and if you need, here is main code. Create ImageDataBunch: We need to separate validation set because we just grabbed these imagese from Google. Most of the dataset we use (kaggle/research) splited into train / validation / test so if they are not devided beforehand we should make databunch, and Jeremy recommended assign 20% to validation. Help on function verify_images in module fastai. vision. data:verify_images(path: Union[pathlib. Path, str], delete: bool = True, max_workers: int = 4, max_size: int = None, recurse: bool = False, dest: Union[pathlib. Path, str] = '. ', n_channels: int = 3, interp=2, ext: str = None, img_format: str = None, resume: bool = None, **kwargs)  Check if the images in `path` aren't broken, maybe resize them and copy it in `dest`. Data from google image url Data from package Train model:           len(class)   len(train)   len(valid)         Data_url   3   432   108       Data_pkg   3   216   53   Uisng model: restnet34 1, Measurement: accuracy 2 fit_one_cycle(): What is fit one cycle? Cyclical Learning Rates for Training Neural Networks One of the way to find good learning rate. Core idea is to start with small learning rate (like 1e-4, 1e-3) and increase the learning rate after each mini-batch till loss starts exploding. And pick up learning rate one order lower than exploding point. For example, plotted learning rate is like below picture, picking up around 1e-2 is the best way.  Why this methods Traditionally, the learning rate is decreased as the learning starts converging with time. But this paper suggests to cycle our learning rate, because it makes us avoid local minimum. Basically this cyclic method enables us to explore whole of loss function so that find out global minimum. In other words, higher learning rate behaves like regularisation. Let’s find-tune: Do train just one last layer by learning rate found by find_lr This section you should find the strongest downward slope that kind of sticking around for quite a while. And choose just one order lower than lowest point. As explained before, I will pick up 1e-2. And of course, this is fine-tuning, we don’t need discriminative learning rate yet.  Let’s train the whole model!: link When you plot the learning rate again, maybe you will get soaring shape of learning rate. Rule of thumb, When you slice the learning rate, use learning rate you used at unfrozen part. Divide it by 5 or 10 and put it on maximum bound. At minimum bound, get the point just before it soared, and divide it by 10.  Let’s make batch size bigger!: Since default batch size is 64, I tried it to 128. And it gets way more better result(even it’s still underfitting!) And if I freeze model and train whole model again, the model would be better. Also, you can use this method to the other big dataset model training! Interpretation: See the confusion matrix. Result is quite great. *Since I’m using colab, I will skip data cleansing. But I highly recommend you to use ImageCleaner widget, only if you are using jupyter notebook (not jupyter lab) Model in production: You can deploy your model in simple way. I referred fast. ai, and used render(it’s free for limited time). You can find detailed document here. and you can create a route like this. @app. route( /classify-url , methods=[ GET ])async def classify_url(request):  bytes = await get_bytes(request. query_params[ url ])  img = open_image(BytesIO(bytes))  _,_,losses = learner. predict(img)  return JSONResponse({     predictions : sorted(      zip(cat_learner. data. classes, map(float, losses)),      key=lambda p: p[1],      reverse=True    )  })You can find my deployed model here Reference: How to create a deep learning dataset using Google Images towardsdatascience - one cycle policy       Deep Residual Learning for Image Recognition &#8617;        Accuracy_and_precision &#8617;    "
     }, {
-    "id": 20,
+    "id": 22,
     "url": "http://localhost:4000/2020/02/dps-week5/",
     "title": "Digital Product School week 5",
     "body": "2020/02/09 - The 5th week retropect at Digital Product School Week 5 - Create a Storymap and sync it with Lean Canvas     This week's schedule  CONTENT:  How to create our story map Prepare your story Discover your product’s AI potentialMondayHow to create our story map:     We need this 'aha' moment  There was a Milestone workshop, about our weekly goal. As we are agile working, we go fast and change every week’s goal. This week we will finalize our story map based on user’s pain-point and HMW questions.     How should we make our story-map  Basically we should make story map based on this rule  Tell stories, don’t just write them!     We always need context, that means all the story component should be connected    Visualize your product to establish a shared understanding and speed up discussions!     Post-it filled of text is not enough, we should fill it with visualizations then team mates can understand it fast    Only discuss in front our your story map! (Speed)     So we can update our story-map as soon as we change our opinion   And also    Use a story map to find the parts that matter most and to identify holes in your idea!     Since the story map consists of techinical part, we should consider each story’s technical feasibility    Minimise output, maximise outcome and impact! Build tests to figure out what’s minimum and what’s viable!     This story map functions to find out our minimum value of ideas    Work iteratively: Change your story map according to your learnings!     We should repeat this process again and again    PMs: Make sure Storymap is up to date!Prepare your story:     team cero, our whole story map    Our goal    Technical feasibility of our storyWhat is your strategy to make user achieve something? This would be our expand point Discover your product’s AI potential: How can we apply AI to our product? Let’s write down our ‘HMW’ questions, and find out all p ossibilities. These are suggestion of possibilities, so don’t attached to feasibility (we will do in at lean start-up)     Software section's expectation    AI section's expectationTuesday    Engineer's task, week5This 5th week, engineers settled WendesdayThursdayFriday"
     }, {
-    "id": 21,
+    "id": 23,
     "url": "http://localhost:4000/2020/02/GPU-time/",
     "title": "4 reasons took much time to setting GPU for fast.ai than I expected",
     "body": "2020/02/05 - Motivation: Before now, me as a undergraduate student, I was parsimony who usually depend on colab, kaggle, friend’s server(occasional) whenever i need GPU. . And this time it’s been for a while to install GPU than I expected and I share the several component that stood in my way. Written at Oct 24 2019, if you think this is deprecated, please do not have a leap of faith. Just for the record, I’ve used Kaggle, Colab, GCP, Azure, EC2 as GPU cloud. 1. Did not know there is JupyterLab option in Google Cloud Platform. : At the first time when GCP came out, there was no AI Platform service. So from starting vm instance to launching jupyter and installing packages, I did all of the things myself. (and I learned 🤗) $	curl -O https://repo. continuum. io/archive/Anaconda3-5. 0. 1-Linux-x86_64. sh[Downloading conda in ssh] I created VM instance,selected zone, machine type and disk type. Then, define firewall rules and in ssh terminal, install jupyter and other packages. But you can do all of these things just using AI Platform.  [AI Platform] I think it especially save your time if you are living in Asia-Pacific, which google doesn’t support not that much GPU resources.  2. Consider if the platform has limited resources in a region you live in. : I live in South Korea, East Asia, and it seems like this region has lots of limitation in GPU (except quite expensive AWS) And the Taiwan which was the only one region where I can launch my own VM with GPU (I tried all the other regions in the list) sometimes do normaly, but not always. 😥After launching, I did several works and next day I could not start VM. (I didn’t count it, but tried it a few hours because I didn’t want cost any more time…) Endlessly failed to start instance, then I choose to move AWS as an alternative way. 3. Fast. ai gives deliberate guide and I didn’t know it. : Fast. ai offer the guide for all available platform. (Colab, salamander, Gradient, Kaggle, Colab, and so on) It is so important, and really needs, because cloud computing options are vary as occasion and purpose arise. I didn’t know fast. ai has manual to running GCP, and I think it’s as good a reason as any for me to be have taken time. It helped me so much when I had aws and shortened my time. I don’t want to read all of the manual in amazno. . (It is recommended. . but I’d rather read GIT PRO now…) ssh -i ~/. ssh/&lt;your_private_key_pair&gt; -L localhost:8888:localhost:8888 ubuntu@&lt;your instance IP&gt;4. You should wait to add more volume just after add volume, by building AWS EC2. : Since Elastic Block Store(EBS) storage supports optimized storage, users can’t extend storage volume two times in a row.  Unfortunately, at the first time, I didn’t know it (again 👻) and when VM lacked volume, I doubled dist capacity (76*2) at a rough but It needs more.  &lt;!– this time I installed GPU in two years, and it became little complicated compared to 2 years ago. And this time for the first time(maybe not the first time. . but i handled it in my class or with my friend. but it’s my first time on my own. ) I very I’m started to using used google colab, kaggleand, GCP-JupyterLab, ec2 - friend made, aws vm machine but I had a environment variable but i did not know of it. On these days, I could not get a resources from taiwan…    I couldn’t notice a deliberate     Anyway, as a result I tried myself gcp myself and aws ec2 with fast. ai But I think doing on my self surely takes much time (in this point I wonder why I’m doing this, and should remind me, especially I was studying disk volume optimization)     disk volume exceed - https://askubuntu. com/questions/919748/no-space-left-on-device-even-though-there-is:  "
     }, {
-    "id": 22,
+    "id": 24,
     "url": "http://localhost:4000/2020/02/dps-week4/",
     "title": "Digital Product School week 4",
     "body": "2020/02/01 - The 4th week retropect at Digital Product School Week 4 - Find solution ideas and run experiments   [This week’s schedule] CONTENT:  Ideation Techniques     What is ideation techniques?    Generating idea in my team AIdeation Team brain storming of idea Die Produkt MacherMondayIdeation Techniques:   [slides from @steffen] What is ideation techniques?: We tried to find out user’s painpoint last week. Tried to users talk about their, pain point. No question directly, but extract from them their pain with transportation. Generating idea in my team:   AIdeation:   TuesdayTeam brain storming of idea:   Based on generated idea on Monday, we extended our idea doing rolling-paper!     Die Produkt Macher: What is lean start-up?  Lean startup is a methodology for developing businesses and products that aims to shorten product development cycles and rapidly discover if a proposed business model is viable; this is achieved by adopting a combination of business-hypothesis-driven experimentation, iterative product releases, and validated learning. - wikipedia WendesdayThursdayFriday"
     }, {
-    "id": 23,
+    "id": 25,
     "url": "http://localhost:4000/2020/01/retrosprect-of-acl-paper-2020/",
     "title": "Retrospect of ACL 2020 paper writing",
     "body": "2020/01/29 - 2020 Annual Conference of the Association for Computational Linguistics Why I can’t use ‘Cebuano’ for the research?: Why I had to change target language from ‘Cebuano’ to ‘Tagalog’?-&gt; No language translator options except google translation. But before knowing that I already consult my friend, whose mother tongue is English. So I had to aplogize her, but couldn’t tell her why suddenly I changed my plan. -&gt; I realized there are many languages even can’t be researched at all. . -&gt; Getting accustomed to discrimination makes misunderstanding, sometimes. At my country, we couldn’t use music streaming service, because of legal problem. But at that moment, I thought it was discrimination, which is done by music company. "
     }, {
-    "id": 24,
+    "id": 26,
     "url": "http://localhost:4000/2020/01/Git-Merge/",
     "title": "Why am I not listed as a contributor?!",
     "body": "2020/01/10 - From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’m not listed as contributor of repo which my PR accessed. How did I come to a repository?: When I’m stuck, I would prefer to code, than to go deep in theory. (It must be so. . too much to understand 🤒)It was BERT released by Google AI I felt keenly the necessity of implementing, because not only couldn’t understand the way they figured out positional encoding formula, but how it actually works. What does it mean to “scale” dot product in Attention? (Now I know it’s far from my section 😂) Figure 1. Scaled Dot Product. Adopted from tensorflow blogWhat was the code error?: For implement code in paper, I read the papers Transformer and BERT, structured the model, and refered the others’ code. Meanwhile, I found out a small error in tokenization process, which was changing a token into [MASK], enabled bidirectional representation. I’ve made PR, and got merged. But I was not in contributors. Why?: Figure 2. Merged Pull request Adopted from graykode projectActually I happened to know there can be couple of reasons github doesn’t include my name as contributor. Well, if contributors tab has more than 100 people, in which case it shows you up only if you are in the top 100 contributors because displaying too many contributors can make webpages down. Somethimes, however, it doesn’t that problem. Why not? Two possibilities are there.    First, According to Joel-Glovier, if repository maintainer merged-as-a-rebase PR will end up showing as maintainer’s commit. But maintainer shouldn’t normally do this.     Second, if you happend to commit using a different git email that what is in your GitHub profile, it will not be attached to your Github user, and “doesn’t show up” as you.  Reference:  Michał Chromiak’s blog Github: why are my contributions are not showing on my profile atlassian-gitfetch"
     }, {
-    "id": 25,
-    "url": "http://localhost:4000/2019/12/lesson1-fastai/",
-    "title": "Fine Grained Classification",
-    "body": "2019/12/31 - Finally you can solve the mystery behind this weird drawing. . through this course. juptyer notebook magic: %reload_ext autoreload%autoreload 2%matplotlib inlinethis is special directives to jupyter notebook, not python code. And it is called ‘magics’ (but i think jeremy is magicion)  If somebody changes underlying library code while I’m running this, please reload it automatically If somebody asks to plot something, then please plot it here in this Jupyter NotebookDon’t hesitate to import start~ Digging into untar_data, path. ls: Union[pathlib. Path, str]: typed programming language? -&gt; maybe i think disclaim the type beforehand for sure.  Q. like assert? path. ls()this is some module that fast. ai made because os. listdir(‘path’) is unconvinient. Python3 pathlib library!: pathlib "
-    }, {
-    "id": 26,
+    "id": 27,
     "url": "http://localhost:4000/2019/12/jeremy-howard/",
     "title": "Jeremy Howard",
     "body": "2019/12/15 - This is journey to find out ‘who am I trying to be?’: How he impacted me?  The person who made me start Computer Vision again. He emphasized the importance of studying NLP and Computer together to understand the deep-learning. He didn’t order it to study, but always he pursuade me with reasonable way.  “It’s not just something I can throw away. NLP and computer vision a few weeks apart and that’s going to force your brain to realize like ‘oh I have to remember this’”  He made me admit my failure in deep-learning. I started to objectify where am I. What should I do when I’m frustrated.  “Keep going. You’re not expected to remember everything. Yet.  You’re not expected to understand everything. Yet. You’re not expected to know why everything works.  Yet. ”  His articles are numerous, below.      What is torch. nn Really?   High Performance Numeric Programming with Swift: Explorations and Reflections   C++11, random distributions, and Swift    And especially, I like this book. Designing great data products Great predictive modeling is an important part of the solution, but it no longer stands on its own; as products become more sophisticated, it disappears into the plumbing. Designing great data products And he is also famous for words. Here are some.  we’re going to try and use that to really understand what’s going on. So to warn you, none of it is rocket science but a lot of its going to look really new. So don’t expect to get it the first time but expect to listen and jump into the notebook try a few things test things out look particularly at like tensor shapes and inputs and outputs to check your understanding then go back and listen again. But and kind of try it, a few times, because you will get there right, it’s just that there’s going to be a lot of new concepts because we haven’t done that much stuff in pure Pytorch. Lesson 6: Deep Learning 2019 "
     }, {
-    "id": 27,
+    "id": 28,
     "url": "http://localhost:4000/2019/11/julia-evans/",
     "title": "Julia Evans",
     "body": "2019/11/20 - This is journey to find out ‘who am I trying to be?’: The women who surprised me in many ways. First, she approached me to teaching some concepts drawing cartoons. It was at Hackers news, which was hightest ranks. Personally I have the use of not to reading title, so and cartoon was so cute and clear. I naturally gonna understood mechanism and astonished by her explaination ability. Her value, which she was taught by many people so want to do same things, moved me. Volume of her knowledge, that just reading post title is a deal of work, amazed me. "
     }, {
-    "id": 28,
+    "id": 29,
     "url": "http://localhost:4000/2019/11/coc-retropective/",
     "title": "Retrospective on Pycon 2019 Korea (CoC Committee)",
     "body": "2019/11/05 - When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized our responsibility, and if I passed each other’s burden, It could be my burden next time. In solidarity of the obligation, we finished conference well. And after participating PyCon Korea 2018 as volunteer, I’ve joined PyCon Korea Organizer last year. &lt;Figure 1&gt; First meeting of PyCon 2019 Korea Organizers It’s been a while since PyCon 2019 finished. It’s held on Aug 15 - 18, at Coex Grand Balloom &lt;Figure 2&gt; Ongoing session, speaking on news comment processing &lt;Figure 3&gt; Sponsor Booth iin Coex Hall &lt;Figure 4&gt; After PyCon 2019, with all of volunteer, organizer, speakers 😍 🥰  Serving as part of the coc TF, I spent large fraction of last year doing CoC job. here’s the path what we’ve been grappled with to grasp a solution. First half: Before the conference Toward Diverse Community:  Formally we’ve been reusing and modifying PyCon US CoC, but we needed fit in Korean and I was part of that to revise code of conduct. Except ‘That’ Diversity, Because it is ‘Harassment’:  Specific point was harassment, and the others were not. process of finding the points. How can we settle this point?Second half: During the conference Handling the potential Harassment: Disjunction of policy and real-time situation: This ‘PyCon 2019 Korea retrospective series’ would be devided into 3 Episodes.  “Retrospective on Pycon 2019 Korea (CoC Committee)” “Retrospective on Pycon 2019 Korea (Program Chair)” (20 Nov, To Be Update) “Maintaining participation while still making timely decisions” (29 Nov, To Be Update)"
     }, {
-    "id": 29,
+    "id": 30,
     "url": "http://localhost:4000/2019/11/elif-shafak/",
     "title": "Elif Shafak",
     "body": "2019/11/05 - This is journey to find out ‘who am I trying to be?’: For creative-minded people, Istanbul is a treasure. ’ Photo © Chris Boland, licensed under CC BY-NC-ND 2. 0    it suddenly felt like what I was trying to convey was more complicated and detailed than what the circumstances allowed me to say.     And I did what I usually do in similar situations: I stammered, I shut down, and I stopped talking. I stopped talking because the truth was complicated, even though I knew, deep within, that one should never, ever remain silent for fear of complexity.         &lt;Figure 1&gt; Elif Shafak   Photo credit: www. elifsafak. com. tr      I want to talk about emotions and the need to boost our emotional intelligence. I think it’s a pity that mainstream political theory pays very little attention to emotions.     Oftentimes, analysts and experts are so busy with data and metrics that they seem to forget those things in life that are difficult to measure and perhaps impossible to cluster under statistical models. But I think this is a mistake, for two main reasons. We are emotional beings.     I think it’s going to be one of our biggest intellectual challenges, because our political systems are replete with emotions. In country after country, we have seen illiberal politicians exploiting these emotions. And yet within the academia and among the intelligentsia, we are yet to take emotions seriously. I think we should.  1 2 Reference:       British Council Worldwide &#8617;        Ted Talk &#8617;    "
     }, {
-    "id": 30,
+    "id": 31,
     "url": "http://localhost:4000/2019/01/dps-week1/",
     "title": "Digital Product School week 1",
     "body": "2019/01/11 - The 1th week retropect at Digital Product School [This week’s schedule] CONTENT:  Welcome to Digital Product School!     Trip to Spitzingsee   Welcome to Design Office   Specifying our goal of product   Welcome to Digital Product School!: Trip to Spitzingsee:    At the first day of Digital Product School, we had a off-site with all of batch 9 people. All the costs were managed by dps. At the beautiful mountain, we settled the team, and got my team goal.     Basically, there are two kind of team in DPS. (1) Wild team - the team has fixed topic(2) Company team - the team which has specific stakeholders, and also topic defined by that stakeholders  The Core-team will fix what team you will join in DPS for 3 months based on ymy professionals, they announce it at off-site. [My team for 3 months at DPS]    And we decide on my batch #9 theme song. How? Each team draw for songs and pitch ‘why this song should be batch #9 theme song’The result? Imagine dragon - Believer (I didn’t know at the moment, this song would be stamped in my memory)     We have a workshop for getting to know each other. For example, we share 1) what do I expect from 3 months of dps, 2) when I feel happy in my life time, 3) what I worked for last week, 4) what was my last project and 5) what plays important role in my life      My team's board  Cero  Welcome to Design Office:  At first day of design office, we had workshop, which celebrates my day in dps also discuss specific rule, menifesto and stakeholders    We get sticker and attach it in map depends on my nationality  Now time to get to know my team’s stakeholders. What they want for us? What they expect from us? How free my team are on the topic?To be honest, it is endless tug-of-war. We should discuss with my stakeholders, endlessly, and find out solution which can meet interest of users, stakeholders and my team. Basically, my team’s main stakeholder is ADAC, but BMW, City of munich and Nokia will also participate as my team’s stakeholders. Specifying our goal of product: "
diff --git a/_site/2020/02/dps-week8/index.html b/_site/2020/02/dps-week8/index.html
index 1040d821ee..47b0b2adc4 100644
--- a/_site/2020/02/dps-week8/index.html
+++ b/_site/2020/02/dps-week8/index.html
@@ -4,24 +4,24 @@
     <meta charset="utf-8">
     <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
     <meta http-equiv="X-UA-Compatible" content="ie=edge">
-    <title>Digital Product School week 8&9 | SpellOnYou</title>
+    <title>My life in Digital Product School - week 8/19/10 | SpellOnYou</title>
 
     <!-- Begin Jekyll SEO tag v2.5.0 -->
-<title>Digital Product School week 8&amp;9 | SpellOnYou</title>
+<title>My life in Digital Product School - week 8/19/10 | SpellOnYou</title>
 <meta name="generator" content="Jekyll v3.8.5" />
-<meta property="og:title" content="Digital Product School week 8&amp;9" />
+<meta property="og:title" content="My life in Digital Product School - week 8/19/10" />
 <meta name="author" content="dionne" />
 <meta property="og:locale" content="en_US" />
-<meta name="description" content="The 8th week retropect at Digital Product School" />
-<meta property="og:description" content="The 8th week retropect at Digital Product School" />
+<meta name="description" content="The 8/9/10th week retropect at Digital Product School" />
+<meta property="og:description" content="The 8/9/10th week retropect at Digital Product School" />
 <link rel="canonical" href="http://localhost:4000/2020/02/dps-week8/" />
 <meta property="og:url" content="http://localhost:4000/2020/02/dps-week8/" />
 <meta property="og:site_name" content="SpellOnYou" />
 <meta property="og:image" content="http://localhost:4000/assets/images/week8/gate.png" />
 <meta property="og:type" content="article" />
-<meta property="article:published_time" content="2020-02-24T00:00:00+01:00" />
+<meta property="article:published_time" content="2020-02-24T00:00:00+09:00" />
 <script type="application/ld+json">
-{"description":"The 8th week retropect at Digital Product School","author":{"@type":"Person","name":"dionne"},"@type":"BlogPosting","url":"http://localhost:4000/2020/02/dps-week8/","publisher":{"@type":"Organization","logo":{"@type":"ImageObject","url":"http://localhost:4000/assets/images/logo.png"},"name":"dionne"},"image":"http://localhost:4000/assets/images/week8/gate.png","headline":"Digital Product School week 8&amp;9","dateModified":"2020-02-24T00:00:00+01:00","datePublished":"2020-02-24T00:00:00+01:00","mainEntityOfPage":{"@type":"WebPage","@id":"http://localhost:4000/2020/02/dps-week8/"},"@context":"http://schema.org"}</script>
+{"description":"The 8/9/10th week retropect at Digital Product School","author":{"@type":"Person","name":"dionne"},"@type":"BlogPosting","url":"http://localhost:4000/2020/02/dps-week8/","publisher":{"@type":"Organization","logo":{"@type":"ImageObject","url":"http://localhost:4000/assets/images/logo.png"},"name":"dionne"},"image":"http://localhost:4000/assets/images/week8/gate.png","headline":"My life in Digital Product School - week 8/19/10","dateModified":"2020-02-24T00:00:00+09:00","datePublished":"2020-02-24T00:00:00+09:00","mainEntityOfPage":{"@type":"WebPage","@id":"http://localhost:4000/2020/02/dps-week8/"},"@context":"http://schema.org"}</script>
 <!-- End Jekyll SEO tag -->
 
 
@@ -161,96 +161,101 @@
     "body": "  {% if page. url ==  /  %}            {% assign latest_post = site. posts[0] %}          &lt;div class= topfirstimage  style= background-image: url({% if latest_post. image contains  ://  %}{{ latest_post. image }}{% else %} {{site. baseurl}}/{{ latest_post. image}}{% endif %}); height: 200px;  background-size: cover;  background-repeat: no-repeat; &gt;&lt;/div&gt;           {{ latest_post. title }}  :       {{ latest_post. excerpt | strip_html | strip_newlines | truncate: 136 }}               In         {% for category in latest_post. categories %}        {{ category }},         {% endfor %}                                {{ latest_post. date | date: '%b %d, %Y' }}                            {%- assign second_post = site. posts[1] -%}                        {% if second_post. image %}                         &lt;img class= w-100  src= {% if second_post. image contains  ://  %}{{ second_post. image }}{% else %}{{ second_post. image | absolute_url }}{% endif %}  alt= {{ second_post. title }} &gt;                        {% endif %}                                    {{ second_post. title }}          :                       In             {% for category in second_post. categories %}            {{ category }},             {% endfor %}                                                      {{ second_post. date | date: '%b %d, %Y' }}                                    {%- assign third_post = site. posts[2] -%}                        {% if third_post. image %}                         &lt;img class= w-100  src= {% if third_post. image contains  ://  %}{{ third_post. image }}{% else %}{{site. baseurl}}/{{ third_post. image }}{% endif %}  alt= {{ third_post. title }} &gt;                        {% endif %}                                    {{ third_post. title }}          :                       In             {% for category in third_post. categories %}            {{ category }},             {% endfor %}                                                      {{ third_post. date | date: '%b %d, %Y' }}                                    {%- assign fourth_post = site. posts[3] -%}                        {% if fourth_post. image %}                        &lt;img class= w-100  src= {% if fourth_post. image contains  ://  %}{{ fourth_post. image }}{% else %}{{site. baseurl}}/{{ fourth_post. image }}{% endif %}  alt= {{ fourth_post. title }} &gt;                        {% endif %}                                    {{ fourth_post. title }}          :                       In             {% for category in fourth_post. categories %}            {{ category }},             {% endfor %}                                                      {{ fourth_post. date | date: '%b %d, %Y' }}                                  {% for post in site. posts %} {% if post. tags contains  sticky  %}                    {{post. title}}                  {{ post. excerpt | strip_html | strip_newlines | truncate: 136 }}                 Read More            	             {% endif %}{% endfor %}  {% endif %}                All Stories:         {% for post in paginator. posts %}          {% include main-loop-card. html %}        {% endfor %}                   {% if paginator. total_pages &gt; 1 %}              {% if paginator. previous_page %}        &laquo; Prev       {% else %}        &laquo;       {% endif %}       {% for page in (1. . paginator. total_pages) %}        {% if page == paginator. page %}        {{ page }}        {% elsif page == 1 %}        {{ page }}        {% else %}        {{ page }}        {% endif %}       {% endfor %}       {% if paginator. next_page %}        Next &raquo;       {% else %}        &raquo;       {% endif %}            {% endif %}                     {% include sidebar-featured. html %}      "
     }, {
     "id": 12,
+    "url": "http://localhost:4000/2020/04/v3-2019-lesson06-note/",
+    "title": "fastai 2019 course-v3 Part1, lesson06",
+    "body": "2020/04/15 - Lesson 06Rossmann(Tabular):  Tabular data: be careful on Categorical variable vs Continuous variable.  if datatype is int, fastai think it is classification, not a regression.  Root mean square percentage error. as loss function.  When you assign the y_range, it’s better to assign little bit more than actual maximum. &gt; because it’s sigmoid.  intermediate layers, which is weight matrix is 1) 1000, and 2) 500 -&gt; which means our parameter would be 500*1000. learn. modelWhat is dropout and embedding dropout?: Nitish Srivastava, Dropout: A Simple way to prevent Neural Networks from Overfitting  you can dropout with p value, make it specified to specific layer, or make it applied to all the layers.  Pytorch code 1) bernoulli, which decides whether you will hold it? 2) and divide the noise value depends on noise value. so noise became 2 or remain 0.      According to pytorch code, We do change at training time, but we do nothing at test time. and this means you don’t have to do anything special with inference time. ’   TODO: find at forums what is inference time - Related to NVIDIA, GPU.     Embedding dropout is just a dropout.      It’s different between continuous variable and embedding layer.  TODO Still can’t understand. why embedding dropout is effective. or,… in need.    Let’s delete at random, some of the results of the embedding.    and It worked well especially at Kaggle   Batch Normalization: Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift -&gt; came out false! According to How Does Batch Normalization Help Optimization?  The key was multiplicative bias {\gamma} and additive bias {\beta}` Explain     Let $$ \hat{y} = f(w_1, w_2, w_3, … , x)} $$ , loss = MSE , Then y_range should be between 1 and 5`   And Activation function ends with -1 -&gt; +1   To mitigate this problem, we can add the other parameter, like $$w_n$$   But there’re so much interactions in the process so just re-scale the output.    Momentum parameter at BatchNorm1d:  Different from momentum like in optimization.  This momentum is Exponentially weighted moving average of the mean, instead of deviation.      If this is small number: mean standard deviation would be less from mini_batch to mini_batch » less regularization effect. (If this is large number, variation would be greater from mini_batch to mini_batch » more regularization effect)   TODO: can’t sure, but i understand, this is not about how to update parameter but about how much reflect previous value when scale and shift   Q. Preference between batchnorm and the other regularizations(drop out, weight decay)A. Nope, always try and see the results## lesson6-pets-more### Data Augmentation- Last reg- `get_transforms` has lots of params (even not yet learned all) -&gt; check documentation	- Remember you can implement all the doc contents bc it's made from nbdev	- TODO: try this!!- Essence of data augmentation is you should maintain the label, while somewhat making sense. 	- ex) tilt, because it's optically sensible, you can always change the angle of the data view. - zeros, border, and reflection but always `reflection` works most of the time, so that is the default### Convolutional Kernel(What is convolution?)- Will make heat\_map from scratch, which means the parts convolution focuses on![setosa_visualization]()- http://setosa. io/ev/image-kernels/	- javascript thing	- How convolution works	- Kernel. which does element-wise multiplication, and sum them up	- so it has on pixel less at borders -&gt; so it uses padding, and fastai uses reflection as said. - why this Kernel(matrix) helps catching horizontal edge side?	- because this kernel`(picture2)` weights differently, depends on `x axis`	- why familiar, because it's similar intuition with fugus`(paper)` paper- CNN from different viewpoints`link`	- output of pixel is results from different linear equations. 	- If you connect this with represents of neural network nodes, you can see that the specific inp nodes connected with specific out nodes. 	- **Summarize**: cnn does 1) matmul some of the elements are always zero 2) same weight for every row, which is called `weight time? weight. . ?, 1:18:50` `(picture)`#### Further lowdown- Because generally image has 3 channels, we need rank 3 kernel. - And **do multiply with all channel output is one pixel**. (`draw by your self`)	- but this kernel will catch one feature, like horizontal, so that we make more kernel so that output becomes (h * w * kernel)	- And that `kernel` come to `channel`- **Conv2d**: with 3 by 3 kernel, stride 2 conv -&gt; (h/2 * w/2 * kernel)	- skip or jump over input pixel	- to protect from memory out of control~~~pythonlearn. modellearn. summary()~~~TODO: understand yourself the blocks of conv-kernel: - Usually use big kernel size at first layer (will study this at part2)- Bottom right highlighting kernel(`pic / draw`)- `torch. tensor. expand`: for memory efficient, because we should do RGB- We do not make separate kernel, but make rank 4 kernel	- 4d tensor is just stacked kernel- `t[None]. shape` create new unit axis, and why? we make this -&gt; it should move unit of batch, not one size image. ### Average pooling, feature- suppose our pre-trained model results in size of `11 by 11 by 512 ` `pic 4` and my classification task has 37 classes	* take the first face of channel, which is 11 by 11 and `mean` it, so that make rank 2 tensor, 512 by 1	* and make 2d matrix, which is 512 by 37 and multiply so that we can get 37 by 1 matrix. - Feature, at convolution block	- So, when we transfer-learning without unfreeze, every element of last matrix (512 by 1) should represent(or could catch) each feature. 	### Heatmap, Hook~~~hook_output(model[0]) -&gt; acts -&gt; avg_acts~~~- if we average the block with `axis=feature`, result of matrix(11 by 11) depicts `how activated was that area?` -&gt; it is heatmap, `avg_acts`- and acts comes from hook, which is more advanced pytorch feature. 	- hook into pytorch machine itself, and run any arbitrary Pytorch code	- Why this is cool?: Normally it gives set of outputs of forward pass, but we can interrupt and hook the forward pass. 	- Also can store the output of the convolutional part of the model, which is before avg_pooling- Thinking back when we do cut off `after` the conv part. 	- but with fast. ai the original convolutional part of the model would be *the first thing in the model*, specifically could be given from `learn. model. eval()[0]`	- And this is gotten from `hooked_output` and having hooked the output, we can pass our x_minibatch to output. 	- Not directly, but with normalized, minibatch, put on to the gpu	- `one_item()` function do it, when we have one data `TODO: this is assignment` do it yourself without one_item function	- and `. cuda()` put it on gpu- you should print out very often the shape of tensor, and try think why. "
+    }, {
+    "id": 13,
+    "url": "http://localhost:4000/2020/04/qna-image-segmentation/",
+    "title": "[Q&A] Image Segmentation, using Unet with Driving Video data",
+    "body": "2020/04/02 - This post is about my questions while I was studying USF Deep Learning course about image segmentation task. All the answers are from the course, source code, library document, or document. I cared about being clear at reporting information including source of information, however if there are still anything unclear, please contact me. And thank you Jeremy&amp;Rachael for everything. Also Thank you Cambridge Computer Vision Lab to made us to study with your labor. The Cambridge-driving Labeled Video Database (CamVid) is the first collection of videos with object class semantic labels, complete with metadata. The database provides ground truth labels that associate each pixel with one of 32 semantic classes. If someone is interested in this project, please check the site and see the details. Now, let’s start first using jupyter’s one of tricks which I love most. It enables cell to print the code without print function. from IPython. core. interactiveshell import InteractiveShell# pretty print all cell's output and not just the last oneInteractiveShell. ast_node_interactivity =  all from fastai. vision import *from fastai. callbacks. hooks import *from fastai. utils. mem import *path = untar_data(URLs. CAMVID) # The locations where the data and models are downloaded are set in config. ymlpath. ls() I’m trying to accustomed to using pathlib module, not just it became built-in module in python, but I felt uncomfortable myself with os module. However, still unpredictable conflicts are remain, even in the quite standard library like Pytorch, tensorflow, onnx. (it require me string for path. not PosixPath. will send PR. . ) [PosixPath('/root/. fastai/data/camvid/valid. txt'), PosixPath('/root/. fastai/data/camvid/images'), PosixPath('/root/. fastai/data/camvid/labels'), PosixPath('/root/. fastai/data/camvid/codes. txt')]path_img = path/'images'path_lbl = path/'labels'fnames = get_image_files(path_img) #filenamelbl_names = get_image_files(path_lbl)1. (Play with data) My Hypothesis:  File name has A_B format. and A / B would be at key-value position.  Use collections - defaultdict  Default Dict: Link:   easy to group a sequence of key and value pairs into a dictionary of list?from collections import defaultdictfnames[0], lbl_names[0](PosixPath('/root/. fastai/data/camvid/images/0001TP_009210. png'), PosixPath('/root/. fastai/data/camvid/labels/0016E5_01800_P. png'))files = [tuple(i. stem. split('_')) for i in fnames]labels = [tuple(i. stem. split('_')[:-1]) for i in lbl_names]d = defaultdict(list)for k, v in files: d[k]. append(v)d. keys()len(d['0001TP'])124for k, v in d. items():  print(k, v)0001TP ['009210', '008850', '007350', '008970', '009840', '010140', '008490', '008520', '009540', '008250', '008340', '006840', '007860', '007410', '007740', '009870', '010080', '007890', '008790', '010020', '008400', '007080', '008280', '010380', '009330', '009060', '007470', '006810', '009720', '008580', '007110', '008730', '009150', '007680', '009780', '007800', '007290', '008760', '009510', '008640', '008310', '007440', '006900', '007500', '008460', '009030', '008130', '009480', '009900', '010230', '009270', '008040', '007590', '007950', '009990', '008550', '007260', '008100', '007530', '006960', '008190', '009420', '009930', '009000', '007830', '008940', '006690', '009570', '008880', '010170', '007560', '009300', '006750', '009360', '010200', '007320', '008010', '009120', '007620', '007200', '007140', '010320', '006720', '008670', '007230', '008370', '010260', '009690', '006930', '009090', '007770', '010290', '010350', '008610', '008070', '009600', '008430', '009450', '007380', '009240', '007710', '007170', '008160', '008910', '007020', '006780', '007050', '009960', '009810', '008220', '009180', '009750', '010050', '009660', '010110', '007920', '009630', '007650', '006990', '008700', '009390', '007980', '008820', '006870']0016E5 ['01290', '08159', '05760', '08133', '08063', '06660', '00960', '05850', '00750', '06960', '08035', '08107', '07975', '08017', '05610', '07140', '08119', '08027', '07170', '08400', '08093', '02100', '06390', '04470', '08340', '06060', '00600', '07470', '08151', '07800', '01620', '05730', '01530', '00690', '08430', '05940', '01980', '07320', '08069', '07965', '04380', '05430', '01410', '06780', '08007', '08087', '08079', '06600', '08109', '05490', '00901', '04590', '04680', '08045', '01770', '06690', '08085', '06810', '00420', '08011', '07440', '02190', '06300', '04800', '01500', '00450', '08029', '01470', '06330', '07997', '08067', '05370', '08013', '08190', '00840', '02370', '08049', '08135', '01440', '06870', '05820', '05280', '08051', '04440', '08091', '01380', '00630', '07290', '05520', '04770', '00540', '07995', '07999', '05550', '07920', '08101', '08141', '08053', '04620', '08103', '05160', '07350', '08057', '06030', '06000', '08550', '07963', '08089', '05970', '08047', '05640', '06240', '05220', '04350', '01590', '07959', '01950', '08117', '06180', '01560', '05400', '08043', '07680', '00780', '08081', '07050', '01020', '01350', '04530', '06720', '07969', '08149', '08003', '08131', '08129', '08033', '05460', '01650', '07530', '08023', '05340', '08640', '05100', '08075', '01230', '04980', '02070', '01080', '06210', '05910', '08009', '01800', '05190', '02400', '08083', '08019', '07620', '07200', '07890', '08059', '06990', '04410', '08121', '08123', '06930', '08137', '08147', '08095', '06570', '06150', '08153', '06840', '05250', '00510', '08370', '08580', '08113', '07410', '08097', '01200', '04950', '07770', '07650', '04710', '06090', '08055', '07110', '07981', '00990', '08250', '08127', '01920', '07985', '08220', '08005', '08157', '05130', '08071', '01140', '04830', '07740', '08143', '06120', '02040', '08111', '08115', '00660', '08280', '06420', '07983', '02220', '05700', '01860', '01260', '04920', '06510', '07020', '08073', '08105', '08125', '06360', '07860', '07993', '00810', '06540', '08099', '08139', '02010', '07973', '08155', '07991', '06630', '00480', '06750', '04890', '08001', '08025', '00870', '08490', '01830', '07977', '05010', '01170', '07961', '01680', '01050', '07987', '07080', '04560', '00930', '05310', '02340', '05790', '08460', '00720', '08031', '02280', '08039', '08037', '08065', '06270', '08077', '06900', '04650', '06480', '07230', '08041', '06450', '00570', '07989', '04740', '07979', '02250', '07380', '00390', '01710', '07590', '08021', '08520', '07500', '01110', '04500', '02310', '07971', '02130', '05580', '05880', '08610', '08310', '08145', '05670', '04860', '07260', '08015', '07967', '01740', '01320', '07560', '07830', '01890', '08061', '02160', '07710', '05070', '05040']Seq05VD ['f00030', 'f02550', 'f03450', 'f01110', 'f00480', 'f00210', 'f04590', 'f04170', 'f01800', 'f03990', 'f03360', 'f03900', 'f02070', 'f00810', 'f03690', 'f01350', 'f01530', 'f04980', 'f05100', 'f03060', 'f00900', 'f03870', 'f02460', 'f01470', 'f02370', 'f02820', 'f04080', 'f02760', 'f04860', 'f02250', 'f04200', 'f00270', 'f03720', 'f02850', 'f04410', 'f01200', 'f03090', 'f02010', 'f03930', 'f00090', 'f01650', 'f01890', 'f03840', 'f03030', 'f02130', 'f01230', 'f04110', 'f02520', 'f04140', 'f04020', 'f00060', 'f03420', 'f01560', 'f00120', 'f04290', 'f02340', 'f00300', 'f01380', 'f00870', 'f01860', 'f02970', 'f04560', 'f02730', 'f00330', 'f04530', 'f03780', 'f01770', 'f03390', 'f05040', 'f02430', 'f03330', 'f00660', 'f01740', 'f02100', 'f04800', 'f04050', 'f00510', 'f02790', 'f04350', 'f00690', 'f00540', 'f02490', 'f00960', 'f00930', 'f04230', 'f02880', 'f03600', 'f01020', 'f01500', 'f02400', 'f04830', 'f04470', 'f03300', 'f02670', 'f00450', 'f01980', 'f01170', 'f01620', 'f04500', 'f01080', 'f03180', 'f05070', 'f03150', 'f04950', 'f01440', 'f03510', 'f01710', 'f00360', 'f04770', 'f02910', 'f01050', 'f00630', 'f04320', 'f00570', 'f03240', 'f02190', 'f01140', 'f03540', 'f02220', 'f02640', 'f03960', 'f00000', 'f04920', 'f01950', 'f00990', 'f03480', 'f03000', 'f00420', 'f04620', 'f03210', 'f00780', 'f03570', 'f01590', 'f00750', 'f01920', 'f04650', 'f03750', 'f03630', 'f02310', 'f02610', 'f02580', 'f04740', 'f02280', 'f04680', 'f00390', 'f00720', 'f03660', 'f02040', 'f03270', 'f00180', 'f03810', 'f01410', 'f01290', 'f03120', 'f00840', 'f04440', 'f00150', 'f01260', 'f02700', 'f02940', 'f00600', 'f01830', 'f04260', 'f05010', 'f04890', 'f02160', 'f00240', 'f04380', 'f01680', 'f04710', 'f01320']0006R0 ['f02820', 'f03690', 'f03180', 'f02550', 'f01020', 'f03660', 'f02340', 'f01170', 'f02610', 'f02940', 'f01290', 'f02100', 'f01350', 'f03270', 'f03870', 'f01380', 'f01980', 'f03810', 'f02430', 'f02310', 'f01830', 'f03480', 'f02970', 'f01890', 'f03210', 'f03930', 'f02040', 'f02070', 'f02400', 'f01560', 'f03030', 'f01770', 'f01590', 'f01950', 'f03420', 'f01650', 'f03450', 'f00990', 'f03630', 'f01500', 'f03570', 'f00930', 'f03090', 'f03360', 'f02880', 'f02460', 'f01440', 'f01920', 'f01230', 'f03840', 'f02730', 'f01620', 'f02220', 'f03750', 'f03330', 'f03540', 'f02520', 'f02790', 'f01050', 'f03120', 'f01800', 'f01140', 'f01860', 'f01530', 'f01470', 'f02670', 'f02490', 'f01260', 'f01110', 'f02760', 'f01680', 'f03150', 'f02580', 'f03300', 'f02280', 'f01200', 'f03390', 'f03510', 'f02640', 'f02190', 'f02370', 'f01320', 'f02130', 'f03600', 'f03240', 'f03780', 'f03720', 'f02700', 'f01410', 'f01080', 'f02850', 'f01710', 'f03900', 'f03060', 'f01740', 'f02010', 'f02250', 'f00960', 'f03000', 'f02160', 'f02910']for k, v in d. items():  print(k, len(d[k]))0001TP 1240016E5 305Seq05VD 1710006R0 101for i in d2. keys():  print(i,len(d2[i]))0016E5 3050001TP 1240006R0 101Seq05VD 171files[0], labels[0](('0001TP', '009210'), ('0016E5', '01800'))2. My question: Link:  Why do we need masking? and does color from fastai library? (have to look into source code) What do the parameter alpha do? When people make masked img, would it be have ranged integer limit? Does image normalization related with this?lbl_sorted = sorted(lbl_names)f_sorted = sorted(fnames)lbl_1 = lbl_sorted[33]f_1 = f_sorted[33]img = open_image(lbl_1)mask = open_mask(lbl_1)_,axs = plt. subplots(1,2, figsize=(10,5))# img. show(ax=axs[0], y=mask, title='masked')img. show(ax=axs[0], title='1')mask. show(ax=axs[1], title='2', alpha=1. ) img_2 = open_image(f_1)mask_2 = open_mask(f_1)_,axs = plt. subplots(1,2, figsize=(10,5))# img. show(ax=axs[0], y=mask, title='masked')img_2. show(ax=axs[0], title='3',)mask_2. show(ax=axs[1], title='4', alpha=1. ) open_mask(lbl_1). data. shapetorch. Size([1, 720, 960])open_mask(lbl_1). data. shapetorch. Size([1, 720, 960])open_image(f_1). data. shapetorch. Size([3, 720, 960])open_image(f_1). data. shapetorch. Size([3, 720, 960])img. data #labeled datatensor([[[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]],    [[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]],    [[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]]])mask. data # after mask, labeled datatensor([[[ 4, 4, 4, . . . , 21, 21, 21],     [ 4, 4, 4, . . . , 21, 21, 21],     [ 4, 4, 4, . . . , 21, 21, 21],     . . . ,     [17, 17, 17, . . . , 30, 30, 30],     [17, 17, 17, . . . , 30, 30, 30],     [17, 17, 17, . . . , 30, 30, 30]]])img_2. data, mask_2. data(tensor([[[0. 0706, 0. 0667, 0. 0706, . . . , 0. 6431, 0. 6549, 0. 6627],     [0. 0745, 0. 0706, 0. 0706, . . . , 0. 6431, 0. 6510, 0. 6549],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 6392, 0. 6588, 0. 6588],     . . . ,     [0. 0863, 0. 0824, 0. 0824, . . . , 0. 1333, 0. 1216, 0. 1255],     [0. 0902, 0. 0863, 0. 0824, . . . , 0. 1255, 0. 1176, 0. 1216],     [0. 0863, 0. 0824, 0. 0784, . . . , 0. 1137, 0. 1059, 0. 1137]],     [[0. 0706, 0. 0667, 0. 0706, . . . , 0. 7490, 0. 7608, 0. 7686],     [0. 0745, 0. 0706, 0. 0706, . . . , 0. 7451, 0. 7569, 0. 7608],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 7412, 0. 7529, 0. 7529],     . . . ,     [0. 0980, 0. 0941, 0. 0941, . . . , 0. 1804, 0. 1686, 0. 1725],     [0. 1059, 0. 1020, 0. 0980, . . . , 0. 1725, 0. 1647, 0. 1686],     [0. 1020, 0. 0980, 0. 0941, . . . , 0. 1608, 0. 1529, 0. 1608]],     [[0. 0784, 0. 0745, 0. 0784, . . . , 0. 7569, 0. 7686, 0. 7765],     [0. 0824, 0. 0784, 0. 0784, . . . , 0. 7647, 0. 7647, 0. 7686],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 7608, 0. 7647, 0. 7647],     . . . ,     [0. 1216, 0. 1176, 0. 1176, . . . , 0. 2000, 0. 1882, 0. 1922],     [0. 1176, 0. 1137, 0. 1098, . . . , 0. 1843, 0. 1765, 0. 1804],     [0. 1137, 0. 1098, 0. 1059, . . . , 0. 1725, 0. 1647, 0. 1725]]]), tensor([[[ 18, 17, 18, . . . , 183, 186, 188],     [ 19, 18, 18, . . . , 183, 185, 186],     [ 20, 18, 19, . . . , 182, 185, 185],     . . . ,     [ 25, 24, 24, . . . , 43, 40, 41],     [ 26, 25, 24, . . . , 41, 39, 40],     [ 25, 24, 23, . . . , 38, 36, 38]]]))3. What is a difference between image and imageSegment?: imageSegment  An ImageSegment object has the same properties as an Image. The only difference is that when applying the transformations to an ImageSegment, it will ignore the functions that deal with lighting and keep values of 0 and 1.  It’s easy to show the segmentation mask over the associated Image by using the y argument of show_image. img = open_image(fnames[0])mask = open_mask(lbl_names[0])_,axs = plt. subplots(1,3, figsize=(8,4))img. show(ax=axs[0], title='no mask')img. show(ax=axs[1], y=mask, title='masked') #seg mask over the img using y argmask. show(ax=axs[2], title='mask only', alpha=1. ) vision. image ##4. Why/How img div by 255 and how it results fast. ai : vision. image - If div=True, pixel values are divided by 255. to become floats between 0. and 1.  At times, you want to get rid of distortions caused by lights and shadows in an image.    Normalizing the RGB values of an image can at times be a simple and effective way of achieving this.     So sum of the pixel’s value over all channels(which is S) divides each intensified channel so that nomalized value will be R/S, G/S and B/S (where, S=R+G+B).   Detailed explain here4. Python Evaluation Order: Python evaluates expressions from left to right. Notice that while evaluating an assignment, the right-hand side is evaluated before the left-hand side. mask_tmp, trg_tmp, void_tmp = 2, 1, 10mask_tmp = trg_tmp != void_tmpprint(mask_tmp, trg_tmp, void_tmp) # (1) target is not same with voidTrue 1 10# Example 1x = 1y = 2x,y = y,x+yx, y(2, 3)# Example 2x = 1y = 2x = yy = x+yx, y(2, 4)5. model learner parameter :: pct_start: A: Percentage of total number of epochs when learning rate rises during one cycle. Q: Sorry, I still confused that one cycle in the new API only runs one epoch. How the percentage of total number of epochs works? Can you give a example? If learn. fit_one_cycle(10, slice(1e-4,1e-3,1e-2), pct_start=0. 05)??A: Ok, strictly correct answer would be percentage of iterations, so you can have lr both increase and decrease during same epoch. In your example, say, you have 100 iterations per epoch, then for half an epoch (0. 05 * (10 * 100) = 50) lr will rise, then slowly decrease. Q2: Thanks for this explanation … so essentially, it is the percentage of overall iterations where the LR is increasing, correct? So, given the default of 0. 3, it means that your LR is going up for 30% of your iterations and then decreasing over the last 70%. Is that a correct summation of what is happening? A2: Yes, I think that’s correct. You can verify that by changing its value and check:learn. recorder. plot_lr() For example if pct_start = 0. 2 source: forums. fastai "
+    }, {
+    "id": 14,
     "url": "http://localhost:4000/2020/03/note08-fastai-4/",
     "title": "Gradient backward, Chain Rule, Refactoring",
-    "body": "2020/03/02 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves   ⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
+    "body": "2020/03/02 -  This note is divided into 4 section.      Section1: What is the meaning of ‘deep-learning from foundations?’   Section2: What’s inside Pytorch Operator?   Section3: Implement forward&amp;backward pass from scratch   Section4: Gradient backward, Chain Rule, Refactoring   ” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves    Postfix underscore means in pytorch, in-place function, What is in-place function?⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
     }, {
-    "id": 13,
+    "id": 15,
     "url": "http://localhost:4000/2020/03/note08-fastai-3/",
     "title": "Implement forward&backward pass from scratch",
     "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring1. The forward and backward passes: 1. 1 Normalization: train_mean,train_std = x_train. mean(),x_train. std()&gt;&gt;&gt; train_mean,train_std(tensor(0. 1304), tensor(0. 3073))Remember!  Dataset, which is x_train, mean and standard deviation is not 0&amp;1. But we need them to be which means we should substract means and divide data by std.  You should not standarlize validation set because training set and validation set should be aparted.  after normalize, mean is close to zero, and standard deviation is close to 1. 1. 2 Variable definition:  n,m: size of the training set c: the number of activations we need in our model2. Foundation Version: 2. 1 Basic architecture:  Our model has one hidden layer, output to have 10 activations, used in cross entropy.    But in process of building architecture, we will use mean square error, output to have 1 activations and lator change it to cross entropy   number of hidden unit; 50see below pic  We want to make w1&amp;w2 mean and std be 0&amp;1.      why initializating and make mean zero and std one is important?   paper highlighting importance of normalisation - training 10,000 layer network without regularisation1   2. 1. 1 simplified kaiming initQ: Why we did init, normalize with only validation data? Because we can not handle and get statistics from each value of x_valid?{: style=”color:red; font-size: 130%; text-align: center;”}  what about hidden(first) layer?w1 = torch. randn(m,nh)b1 = torch. zeros(nh)t = lin(x_valid, w1, b1) # hidden&gt;&gt;&gt; t. mean(), t. std()((tensor(2. 3191), tensor(27. 0303))In output(second) layer, w2 = torch. randn(nh,1)b2 = torch. zeros(1)t2 = lin(t, w2, b2) # output&gt;&gt;&gt; t2. mean(), t2. std()(tensor(-58. 2665), tensor(170. 9717))   which is terribly far from normalzed value.     But if we apply simplified kaiming init  w1 = torch. randn(m,nh)/math. sqrt(m); b1 = torch. zeros(nh)w2 = torch. randn(nh,1)/math. sqrt(nh); b2 = torch. zeros(1)t = lin(x_valid, w1, b1)t. mean(),t. std()&gt;&gt;&gt; (tensor(-0. 0516), tensor(0. 9354)) But, actually, we use activations not only linear function After applying activations relu at linear layer, mean and deviation became 0. 5. 2. 1. 2 Glorrot initializationPaper2: Understanding the difficulty of training deep feedforward neural networks  Gaussian(, bell shaped, normal distributions) is not trained very well.  How to initialize neural nets? with the size of layer , the number of filters .  But there is No acount for import of ReLU If we got 1000 layers, vanishing gradients problem emerges2. 1. 3 Kaiming initializatingPaper3: Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification  Kaiming He, explained here rectifier: rectified linear unit rectifier network: neural network with rectifier linear units  This is kaiming init, and why suddenly replace one to two on a top?     to avoid vanishing gradient(weights)   But it doesn’t give very nice mean tough.    2. 1. 4 Pytorch package Why fan_out?     according to pytorch documentation,   choosing 'fan_in' preserves the magnitude of the variance of the wights in the forward pass. choosing 'fan_out' preserves the magnitues in the backward pass(, which means matmul; with transposed matrix) ➡️ in the other words, torch use fan_out cz pytorch transpose in linear transformaton.  What about CNN in Pytorch?I tried torch. nn. Conv2d. conv2d_forward?? Jeremy digged into using torch. nn. modules. conv. _ConvNd. reset_parameters?? 2  in Pytorch, it doesn’t seem to be implemented kaiming init in right formula. so we should use our own operation.  But actually, this has been discussed in Pytorch community before. 3 4 Jeremy said it enhanced variance also, so I sampled 100 times and counted better results.  To make sure the shape seems sensible. check with assert. (remember we will replace 1 to 10 in cross entropy)assert model(x_valid). shape==torch. Size([x_valid. shape[0],1])&gt;&gt;&gt; model(x_valid). shape(10000, 1) We have made Relu, init, linear, it seems we can forward pass code we need for basic architecture nh = 50def lin(x, w, b): return x@w + b;w1 = torch. randn(m,nh)*math. sqrt(2. /m ); b1 = torch. zeros(nh)w2 = torch. randn(nh,1); b2 = torch. zeros(1)def relu(x): return x. clamp_min(0. ) - 0. 5t1 = relu(lin(x_valid, w1, b1))def model(xb):  l1 = lin(xb, w1, b1)  l2 = relu(l1)  l3 = lin(l2, w2, b2)  return l32. 2 Loss function: MSE:  Mean squared error need unit vector, so we remove unit axis.   def mse(output, targ): return (output. squeeze(-1) - targ). pow(2). mean()    In python, in case you remove axis, you use ‘squeeze’, or add axis use ‘unsqueeze’ torch. squeeze where code commonly broken. so, when you use squeeze, clarify dimension axis you want to removetmp = torch. tensor([1,1])tmp. squeeze()&gt;&gt;&gt; tensor([1, 1]) make sure to make as float when you calculateBut why??? because it is tensor?{: style=”color:red; font-size: 130%;”} Here’s the error when I don’t transform the data type ---------------------------------------------------------------------------TypeError                 Traceback (most recent call last)&lt;ipython-input-22-ae6009bef8b4&gt; in &lt;module&gt;()----&gt; 1 y_train = get_data()[1] # call data again   2 mse(preds, y_train)TypeError: 'map' object is not subscriptable This is forward passFootnote: Other materials:  Understanding the difficulty of training deep feedforward neural networks, paper that introduced Xavier initialization      Fixup Initialization: Residual Learning Without Normalization &#8617;        Pytorch implementaion on Kaiming init of conv and linear layers &#8617;        Pytorch kaiming init issue &#8617;        Pytorch kaiming init explained &#8617;    "
     }, {
-    "id": 14,
+    "id": 16,
     "url": "http://localhost:4000/2020/03/note08-fastai-2/",
     "title": "What's inside Pytorch Operator?",
     "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, RefactoringWhat’s inside Pytorch Operator?: Section02 Time comparison with pure Python:    Matmul with broadcasting&gt; 3194. 95 times faster     Einstein summation&gt; 16090. 91 times faster     Pytorch’s operator&gt; 49166. 67 times faster  1. Elementwise op: 1. 1 Frobenius norm:   above converted into (m*m). sum(). sqrt() Plus, don’t suffer from mathmatical symbols. He also copy and paste that equations from wikipedia.  and if you need latex form, download it from archive. 2. Elementwise Matmul:  What is the meaning of elementwise?   We do not calculate each component. But all of the component at once. Because, length of column of A and row of B are fixed.   How much time we saved?  So now that takes 1. 37ms. We have removed one line of code and it is a 178 times faster…#TODOI don’t know where the 5 from. but keep it. Maybe this is related with frobenius norm…?as a result, the code before for k in range(ac):  c[i,j] += a[i,k] + b[k,j]the code after c[i,j] = (a[i,:] * b[:,j]). sum()To compare it (result betweet original and adjusted version) we use not test_eq but other function. The reason for this is that due to rounding errors from math operations, matrices may not be exactly the same. As a result, we want a function that will “is a equal to b within some tolerance” #exportdef near(a,b):   return torch. allclose(a, b, rtol=1e-3, atol=1e-5)def test_near(a,b):   test(a,b,near)test_near(t1, matmul(m1, m2))3. Broadcasting:  Now, we will use the broadcasting and removec[i,j] = (a[i,:] * b[:,j]). sum() How it works?&gt;&gt;&gt; a=tensor([[10,10,10],     [20,20,20],     [30,30,30]])&gt;&gt;&gt; b=tensor([1,2,3,])&gt;&gt;&gt; a,b  (tensor([[10, 10, 10],     [20, 20, 20],     [30, 30, 30]]),tensor([1, 2, 3]))     &gt;&gt;&gt; a+btensor([[11, 12, 13],    [21, 22, 23],    [31, 32, 33]])  &lt;Figure 2&gt; demonstrated how array b is broadcasting(or copied but not occupy memory) to compatible with a. Refered from numpy_tutorial   there is no loop, but it seems there is exactly the loop.     This is not from jeremy (actually after a moment he cover it) but i wondered How to broadcast an array by columns?  c=tensor([[1],[2],[3]])a+ctensor([[11, 11, 11],    [22, 22, 22],    [33, 33, 33]])s  What is tensor. stride()?help(t. stride)Help on built-in function stride:  stride(…) method of torch. Tensor instancestride(dim) -&gt; tuple or intReturns the stride of :attr:’self’ tensor. Stride is the jump necessary to go from one element to the next one in the specified dimension :attr:’dim’. A tuple of all strides is returned when no argument is passed in. Otherwise, an integer value is returned as the stride in the particular dimension :attr:’dim’. Args:  dim (int, optional): the desired dimension in which stride is requiredExample::* x = torch. tensor([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]])`x. stride()&gt;&gt;&gt; (5, 1)x. stride(0)&gt;&gt;&gt; 5x. stride(-1)&gt;&gt;&gt; 1   unsqueeze &amp; None index   We can manipulate rank of tensor Special value ‘None’, which means please squeeze a new axis here== please broadcast herec = torch. tensor([10,20,30])c[None,:] in c, squeeze a new axis in here please. 2. 2 Matmul with broadcasting: for i in range(ar):#  c[i,j] = (a[i,:]).      *[:,j]. sum() #previous  c[i]  = (a[i]. unsqueeze(-1) * b). sum(dim=0) And Using None also (As howard teached)c[i]  = (a[i ]. unsqueeze(-1) * b). sum(dim=0) #howardc[i]  = (a[i][:,None] * b). sum(dim=0) # using Nonec[i]  = (a[i,:,None]*b). sum(dim=0)⭐️Tips🌟 1) Anytime there’s a trailinng(final) colon in numpy or pytorch you can delete it ex) c[i, :] = c [i]2) any number of colon commas at the start, you can switch it with the single elipsis.  ex) c[:,:,:,:,i] = c […,i] 2. 3 Broadcasting Rules:  What if we tensor. size([1,3]) * tensor. size([3,1])?  torch. Size([3, 3])    What is scale????   What if they are one array is times of the other array? ex) Image : 256 x 256 x 3Scale : 128 x 256 x 3Result: ?   Why I did not inserted axis via None, but happened broadcasting? &gt;&gt;&gt; c * c[:,None]tensor([[100. , 200. , 300. ],    [200. , 400. , 600. ],    [300. , 600. , 900. ]])maybe it broadcast cz following array has 3 rows as same principle, no matter what nature shape was, if we do the operation tensor broadcasts to the other. &gt;&gt;&gt; c==c[None]tensor([[True, True, True]])&gt;&gt;&gt; c[None]==c[None,:]tensor([[True, True, True]])&gt;&gt;&gt;c[None,:]==ctensor([[True, True, True]])3. Einstein summation:  Creates batch-wise, remove inner most loop, and replaced it with an elementwise producta. k. ac[i,j] += a[i,k] * b[k,j]inner most loop c[i,j] = (a[i,:] * b[:,j]). sum()elementwise product  Because K is repeated so we do a dot product. And it is torch. Usage of einsum()1) transpose2) diagnalisation tracing3) batch-wise (matmul) …  einstein summation notationdef matmul(a,b): return torch. einsum('ik,kj-&gt;ij', a, b)so after all, we are now 16000 times faster than Python. 4. Pytorch op: 49166. 67 times faster than pure python And we will use this matrix multiplication in Fully Connect forward, with some initialized parameters and ReLU. But before that, we need initialized parameters and ReLU, Footnote:  TensorRank ti noteResources:  Frobenius Norm Review Broadcasting Review (especially Rule)     Refer colab! (I totally confused with extension of arrays)    torch. allclose Review np. einsum Reviewh "
     }, {
-    "id": 15,
+    "id": 17,
     "url": "http://localhost:4000/2020/02/note08-fastai-1/",
     "title": "What is the meaning of 'deep-learning from foundations?'",
     "body": "2020/02/29 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ I don’t know if you read this article, but I heartily appreciate Rachael Thomas and Jeremy Howard for providing these priceless lectures for free Homework:  Review concepts 16 concepts from Course 1 (lessons 1 - 7)(1) Affine Functions &amp; non-linearities; 2) Parameters &amp; activations; 3) Random initialization &amp; transfer learning; 4) SGD, Momentum, Adam; 5) Convolutions; Batch-norm; 6) Dropout; 7) Data augmentation; 8) Weight decay; 9) Res/dense blocks; 10) Image classification and regression; 11)Embeddings; 12) Continuous &amp; Categorical variables; 13) Collaborative filtering; 14) Language models; 15) NLP classification; 16) Segmentation; U-net; GANS) Make sure you understand broadcasting Read section 2. 2 in Delving Deep into Rectifiers Try to replicate as much of the notebooks as you can without peeking; when you get stuck, peek at the lesson notebook, but then close it and try to do it yourself calculus for machine learning     based on weight…    einsum conventionCONTENTS:  What is going on in this course?     What is ‘from foundations’?   Steps to a basic modern CNN model   Today’s implementation goal: 1) matmul -&gt; 4) FC backward    Library development using jupyter notebook     jupyter notebook certainly can make module    Elementwise ops     How can we make python faster?         What is element wise operation?           FootnoteWhat is going on in this course?: What is ‘from foundations’?: 1) Recreate fast. ai and Pytorch 2) using pure python  Evade OverfittingOverfit : validation error getting worsetraining loss &lt; validation loss  Know the name of the symbol you usefind in this page if you don’t know the symbol that you are using or just draw it here (run by ML!) Steps to a basic modern CNN model:  1) Matrix multiplication -&gt; 2) Relu/Initialization -&gt; 3) Fully-connected Forward-&gt; 4) Fully-connected Backward -&gt; 5) Train loop -&gt; 6) Convolution-&gt; 7) Optimization -&gt;8) Batchnormalization -&gt; 9) Resnet Today’s implementation goal: 1) matmul -&gt; 4) FC backward: Library development using jupyter notebook: what is assers? jupyter notebook certainly can make module:  There will be #export tag that Howard (and we) want to extract special notebook2script. py will detect sign of #expert and convert following into python module and test ittest\_eq(TEST,'test')test\_eq(TEST,'test1')  what is run_notebook. py?     when you want to test your module in command line interface   		!python run\_notebook. py 01_matmul. ipynb  Is there any difference between 1) and 2)?1) test -&gt; test01 2) test01 -&gt; test #TODO I don’t know yet  look into run_notebook. py, package fire Jeremy used. What is that?read and run the code in a notebook, and in the process, Jeremy made Python Fire library called!shockingly, fire takes any kind of function and converts into CLI command. fire library was released by Google open source, Thursday, March 2, 2017    Get data   pytorch and numpy are pretty much same.  variable c explains how many pixels there are in in MNIST, 28 pixels PyTorch’s view() method: torch function that manipulating tensor, and squeeze() in torch &amp; mathmatical operation similar function Rao &amp; McMahan said usually this functions result in feature vector.    In part 1, you can use view function several times.     Initial python model     Which is Linear, like $Xw$(weight)$+a$(bias) $= Y$     If you don’t know hou to multiple matrix, refer this site matmul visulization site   How many time spends if we we use pure python   function matmul, typical matrix multiplication function, takes about 1 second for calculating 1 single train data! (maybe assumed stochastic, 5 data points in validation)     it takes about 11. 36 hours to update parameters even single layer and 1 iteration! (if that was my computer, it would be 14 hours. . )🤪   THIS is why we need to consider ‘time’&amp;’space’ This is kinda slow - what if we could speed it up by 50,000 times? Let’s try! Elementwise ops: How can we make python faster?:  If we want to calculate faster, then do remove pythonic calcuation, by passing its computation down to something that is written something other than python, like pytorch.  According to PyTorch doc it uses C++ (via ATen), so we are going to implement that function with python. What is element wise operation?:  items makes a pair, operate corresponding componentFootnote:  notebooks material video broadcasting excel"
     }, {
-    "id": 16,
+    "id": 18,
     "url": "http://localhost:4000/2020/02/what-is-convolution/",
     "title": "Digging into convolution",
     "body": "2020/02/28 - Issues 1) Kaiming Initializtion in Pytorch was in trouble. 1 2) Jeremy started to dig in, in lesson09, but I didn’t know why the size of tensor is 2 and even understand this spreadsheet data. 3 Homework: Read Visualizing and Understanding Convolutional Networks paper  What is a convolution?     Visualization         one kernel     Matthew D Zeiler &amp; Rob Fergus Paper          Convolution can be represented as matmul   Padding   Kernel has rank 3   How can we find a side-edge, a gradient and area of constant weight?   What is a convolution?: A convolutional neural network is that your red, green, and blue pixels go into the simple computation, and something comes out of that, and then the result of that goes into a second layer, and the result of that goes into the third layer and so forth. Visualization: one kernel Refer this site for visualizing CNN filteringMatthew D Zeiler &amp; Rob Fergus PaperLecture01     Nine examples of the actual coefficients from the **first layer**  Convolution can be represented as matmul: CNNs from different viewpoints {align-items: center;}    [A B C D E F G H I J] is 3 by 3 image data flatten to vector.   As a result, convolution is a just matrix just two things happens     Some of entries are set to zeros at all the times   same color always have the same weight. That called weight time / wegith sharing    So, we can implement a convolution with matrix multiplication. But, we don’t do that because it’s slow!Padding:  What most of libraries do is just put zeros asdie of matrix  fast. ai uses reflection paddings (what is this? Jeremy said he uttered it)Kernel has rank 3:  As standard picture input would be 4 5, it would be actually 3d, not 2d.  If we make kernel as a 3x3 size, we pass over same kernel all the different Red, Green, Blue Pixels.      This could make problem, because, if we want to detect frog, which is green, we would want more activations on the green(I made a test cell in my colab 6)   How can we find a side-edge, a gradient and area of constant weight?: Not top-edge!  One kernel can find only the top-edge, so we should stack the kernels 7 So, we pass it through bunch of kernels to the input images, and that process gives us height x width x corresponding number of kernels.  Usually that number of chanel is 16 And if we want to get the more channels and features, we should repeat that process     This process gives rise to memory out of control, we do the stride   #### conv-example. xlsx  2 convolutional filters At a second layer, filter is 3x3x2 tensor, because to add up together the first layer’s channel. Reference:       Problem was math. sqrt(5) was not kaiming initialization formula, Implementation in Pytorch &#8617;        size of tensor, lecture09 &#8617;        conv-example. xlsx &#8617;        Why do computer use red, green and blue instead of primary colors &#8617;        Grayscale is a group of shades without any visible color. … Each of these dots has its own brightness level as well and, therefore, can be converted to grayscale. A grayscale image is one with all color information removed.  &#8617;        Testing RGB and grayscale &#8617;        stack kernel and make new rank of tensor at output, Lesson06-2019 &#8617;    "
     }, {
-    "id": 17,
+    "id": 19,
     "url": "http://localhost:4000/2020/02/dps-week8/",
-    "title": "Digital Product School week 8&9",
-    "body": "2020/02/24 - The 8th week retropect at Digital Product School Week 8/9 - Ship your MVP/Release next iteration each day     This week's schedule  CONTENT:  Preparing engineering weekly Agile Process     Daily Stand-up    Making application flowchart (feat draw. io) / ER diagram     Flowchart, understaning user journey   ER diagram    Engineering weekly AI lunch Connecting firebase andPreparing engineering weekly: This week at Wednesday, I planned to explain the Language Modelings, mainly focusing ELMo, ULMFiT, BERT and GPT-2. Slides is available here Changed the presentation, because there were people who are not in ML domain. hereWhenever I do the presentation, I learn more than the information I give them. At the same time, I realize I need to learn more than I know. Agile Process: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m not sure ‘what is agile’ but this is what we tried to make agile process.  Daily Stand-up: Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Making application flowchart (feat draw. io) / ER diagram:  Before we start the party, we should clarify the flowchart and ER diagram of our application. Flowchart, understaning user journey: Thanks for google, we could use draw. io for our framechart framework. Actually, we cana choice other good flatform, but draw. io has connected app throgh google drive, most of our engineer was used to it. And after this job, I got to know there is also (of course) rule with the symbols, color, size, space, scaling and direction of arrow -reference. But why we should do this? WE have made our storymap before!! I think storymap is for visualize our status and app. So it should be shared with whole the team, and they should able to understand each role’s issue. But flowchart is more like testing technical feasibility, and error that user can experience. So it could be little more specific, complicated, and hypothetical.     This week engineer's main plan  ER diagram: Even if we use NoSQL database through firebase, my team was accustomed to SQL more. That what we educated when we were at college, so we had to organize our concept while we were learning NoSQL. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Before today, my AI collegues presented regression, knn and it was my turn. I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! AI lunch: Connecting firebase and: "
+    "title": "My life in Digital Product School - week 8/19/10",
+    "body": "2020/02/24 - The 8/9/10th week retropect at Digital Product School Week 8 - Ship your MVPWeek 9/10 - Release next iteration each day     Week 8th schedule  CONTENT:  Agile Product Development     Daily Stand-up(planning)   Gemba Walk   Sprint Reviews    Engineering weeklyAgile Product Development: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m still not sure ‘what is agile’ but this is how we tried to make agile process.  Daily Stand-up(planning): Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Gemba Walk:     Team Cero with core team  Every 2 weeks, we do the Gemba work, which is ‘question everything to the core team’ time. At this period, people can ask anything related to our product, workshop, and framework. Core team will help just for each team, and each team can solve the problem related to their work. &lt; br/&gt;Why we need this session? because with workshop and general schedule, core team has no time just focus on each team. So through this session, we can have opportunity to understand each program and workshop, like why we are using this platform, and when is the due of our small project, and we have this problem and we need help for this. whatever small problem you have, core team is always willing to help you. Sprint Reviews: Every Friday, we have time to summarise what we did for the week. Maybe we need HMW question and our storymap to share our process and then tell and share what we did try, what point we succeeded and what point it was deviant of our prediction, and why we tried it. .     Sprint of Ve-link  And then, just after all team’s ppt, we do vote with such a cute marvel. Always it’s very difficult to vote (of course you can’t vote to your team!) Because it depends on criteria what do I value!But since this is process of our agile work, I try to focus on what they have changed since last week, and why they did it, how they did it. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Everyone have their knowledge to share and we can be tutor and at the same time can be of tutee. Previously, my AI collegues presented regression, knn. And because I’m somewhat specialized to NLP, I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! "
     }, {
-    "id": 18,
+    "id": 20,
     "url": "http://localhost:4000/2020/02/fast.ai-nlp-note-16/",
     "title": "Algorithmic bias",
     "body": "2020/02/20 - Algorithms can encode &amp; magnify human bias Case Study 1: Facial Recognition &amp; Predictive Policing:  Joy Buolamwini &amp; Timnit Gebru, gendershades. org     Microsoft, FACE+, IBM - All of these things are sell now.    Largest gap between $\therefore\ Lighter Male\ &gt;\ Darker\ Female $      This US mayor joked cops should “mount . 50-caliber” guns where AI predicts crime      With machine learning, with automation, there’s a 99% success, so that robot is ㅡwill beㅡ99% accurate in telling us what is going to happen next, which is really interesting.     - city official in Lancater, CA, approving on using IBM for public security  Bias:  Bias is type of error Statistical Bias: difference between a statistic’s expected value and the true value Unjust Bias: disproportionate preference for or prejudice against a group Unconscious bias: bias that we don’t realize we have But, term bias is too generic to be productive. Different sources of bias have different causes Representation Bias: Dataset was not representative of the algorithm that might be used on later. Above : Data is okay, but algorithm has some problem. Below : Data has error.  For example, object detection production that performs very well in common product of US. But in contrast, change of target product region, like Zimbabwe, Solomon Island, and so on, reduced the performence remarkably. It is not the algorithmic problem, so we should care about data volume of region. Evaluation Bias: Benchmark datasets spur on research, 4. 4% of IJB-A images are dark-skinned women. 2/3 of ImageNet images from the West (Sharkar et al, 2017) Case Study 2: Recidivism Algorithm Used Prison Sentencing: Case Study 3: Online Ad Delivery: Bias in NLP: ( Nothing to do with the course, but I’m researching this field these days. )    But all about Englsih     ImpactThe person is doctor. The person is nurse -&gt; 그는 의사다. 그녀는 간호사다.  Concept of “biased data” often too generic to be useful:  Different sources of bias have different sources Data, models and systems are not unchanging numbers on a screen. They’re the result of a complex process that starts with years of historical context and involves a series of choices and norms, from data measurement to model evaluation to human interpretation. - Harini Suresh, “The problem with Biased Data” Five Sources of Bias in ML:  Representation Bias Evaluation Bias Measurement Bias Aggregation Bias(46:02) Historical Bias(46:26)     A few studies(47:13)   Racial Bias, Even when we have good intentions(new york times)(47:10)   gender(48:59)   Humans are biased, so why does algorithmic bias matter?: Algorithms &amp; humans are used differently (humans are usually decision maker)  Algorithms are accurate and objective No way to apeal if there if error processed large scale cheap Machine learning can amplify bias Machine learning can create feedback loops. Technology is power. And with that comes responsibility. Solutions:  Analyze a project at work/school:     Questions about AI   5 types of bias (Suresh &amp; Guttag)   Datasheets for datasets, Modelcards for model reporting   Accuracy rate on different sub-groups    Work with domain experts &amp; those impacted Increase diversity in our workspace Advocate for good policy Be on the ongoing lookout for bias"
     }, {
-    "id": 19,
+    "id": 21,
     "url": "http://localhost:4000/2020/02/classifier-city/",
     "title": "Making a classifier with image dataset made from gooogle",
     "body": "2020/02/15 - CONTENTS:  Creating dataset from google images     Using google_images_download   Create ImageDataBunch    Train model     fit_one_cycle()   Let’s find-tune   Let’s train the whole model!   Let’s make batch size bigger!    Interpretation Model in productionCode can be found hereDeployed model here Making a classifier which can distinguish Seoul from Munich and Sanfrancisco!(hoping my well in Munich!) Creating dataset from google images: In machine learning, you always need data before you build your model. You can use either URLs or google_images_download package. Since Jeremy explained specifically, I will try the other. Using google_images_download: note: This is not google official package Refer to Official Doncument, put that arguments. from google_images_download import google_images_downloadresponse = google_images_download. googleimagesdownload()  #class instantiationout_dir = os. path. abspath('. . /. . /materials/dataset/pkg/')os. mkdir(out_dir)arguments = { keywords : Cebu,Munich,Seoul ,        print_urls :True,        suffix_keywords : city ,        output_directory :out_dir,        type : photo ,      }paths = response. download(arguments)  #passing the arguments to the functionprint(paths)and if you need, here is main code. Create ImageDataBunch: We need to separate validation set because we just grabbed these imagese from Google. Most of the dataset we use (kaggle/research) splited into train / validation / test so if they are not devided beforehand we should make databunch, and Jeremy recommended assign 20% to validation. Help on function verify_images in module fastai. vision. data:verify_images(path: Union[pathlib. Path, str], delete: bool = True, max_workers: int = 4, max_size: int = None, recurse: bool = False, dest: Union[pathlib. Path, str] = '. ', n_channels: int = 3, interp=2, ext: str = None, img_format: str = None, resume: bool = None, **kwargs)  Check if the images in `path` aren't broken, maybe resize them and copy it in `dest`. Data from google image url Data from package Train model:           len(class)   len(train)   len(valid)         Data_url   3   432   108       Data_pkg   3   216   53   Uisng model: restnet34 1, Measurement: accuracy 2 fit_one_cycle(): What is fit one cycle? Cyclical Learning Rates for Training Neural Networks One of the way to find good learning rate. Core idea is to start with small learning rate (like 1e-4, 1e-3) and increase the learning rate after each mini-batch till loss starts exploding. And pick up learning rate one order lower than exploding point. For example, plotted learning rate is like below picture, picking up around 1e-2 is the best way.  Why this methods Traditionally, the learning rate is decreased as the learning starts converging with time. But this paper suggests to cycle our learning rate, because it makes us avoid local minimum. Basically this cyclic method enables us to explore whole of loss function so that find out global minimum. In other words, higher learning rate behaves like regularisation. Let’s find-tune: Do train just one last layer by learning rate found by find_lr This section you should find the strongest downward slope that kind of sticking around for quite a while. And choose just one order lower than lowest point. As explained before, I will pick up 1e-2. And of course, this is fine-tuning, we don’t need discriminative learning rate yet.  Let’s train the whole model!: link When you plot the learning rate again, maybe you will get soaring shape of learning rate. Rule of thumb, When you slice the learning rate, use learning rate you used at unfrozen part. Divide it by 5 or 10 and put it on maximum bound. At minimum bound, get the point just before it soared, and divide it by 10.  Let’s make batch size bigger!: Since default batch size is 64, I tried it to 128. And it gets way more better result(even it’s still underfitting!) And if I freeze model and train whole model again, the model would be better. Also, you can use this method to the other big dataset model training! Interpretation: See the confusion matrix. Result is quite great. *Since I’m using colab, I will skip data cleansing. But I highly recommend you to use ImageCleaner widget, only if you are using jupyter notebook (not jupyter lab) Model in production: You can deploy your model in simple way. I referred fast. ai, and used render(it’s free for limited time). You can find detailed document here. and you can create a route like this. @app. route( /classify-url , methods=[ GET ])async def classify_url(request):  bytes = await get_bytes(request. query_params[ url ])  img = open_image(BytesIO(bytes))  _,_,losses = learner. predict(img)  return JSONResponse({     predictions : sorted(      zip(cat_learner. data. classes, map(float, losses)),      key=lambda p: p[1],      reverse=True    )  })You can find my deployed model here Reference: How to create a deep learning dataset using Google Images towardsdatascience - one cycle policy       Deep Residual Learning for Image Recognition &#8617;        Accuracy_and_precision &#8617;    "
     }, {
-    "id": 20,
+    "id": 22,
     "url": "http://localhost:4000/2020/02/dps-week5/",
     "title": "Digital Product School week 5",
     "body": "2020/02/09 - The 5th week retropect at Digital Product School Week 5 - Create a Storymap and sync it with Lean Canvas     This week's schedule  CONTENT:  How to create our story map Prepare your story Discover your product’s AI potentialMondayHow to create our story map:     We need this 'aha' moment  There was a Milestone workshop, about our weekly goal. As we are agile working, we go fast and change every week’s goal. This week we will finalize our story map based on user’s pain-point and HMW questions.     How should we make our story-map  Basically we should make story map based on this rule  Tell stories, don’t just write them!     We always need context, that means all the story component should be connected    Visualize your product to establish a shared understanding and speed up discussions!     Post-it filled of text is not enough, we should fill it with visualizations then team mates can understand it fast    Only discuss in front our your story map! (Speed)     So we can update our story-map as soon as we change our opinion   And also    Use a story map to find the parts that matter most and to identify holes in your idea!     Since the story map consists of techinical part, we should consider each story’s technical feasibility    Minimise output, maximise outcome and impact! Build tests to figure out what’s minimum and what’s viable!     This story map functions to find out our minimum value of ideas    Work iteratively: Change your story map according to your learnings!     We should repeat this process again and again    PMs: Make sure Storymap is up to date!Prepare your story:     team cero, our whole story map    Our goal    Technical feasibility of our storyWhat is your strategy to make user achieve something? This would be our expand point Discover your product’s AI potential: How can we apply AI to our product? Let’s write down our ‘HMW’ questions, and find out all p ossibilities. These are suggestion of possibilities, so don’t attached to feasibility (we will do in at lean start-up)     Software section's expectation    AI section's expectationTuesday    Engineer's task, week5This 5th week, engineers settled WendesdayThursdayFriday"
     }, {
-    "id": 21,
+    "id": 23,
     "url": "http://localhost:4000/2020/02/GPU-time/",
     "title": "4 reasons took much time to setting GPU for fast.ai than I expected",
     "body": "2020/02/05 - Motivation: Before now, me as a undergraduate student, I was parsimony who usually depend on colab, kaggle, friend’s server(occasional) whenever i need GPU. . And this time it’s been for a while to install GPU than I expected and I share the several component that stood in my way. Written at Oct 24 2019, if you think this is deprecated, please do not have a leap of faith. Just for the record, I’ve used Kaggle, Colab, GCP, Azure, EC2 as GPU cloud. 1. Did not know there is JupyterLab option in Google Cloud Platform. : At the first time when GCP came out, there was no AI Platform service. So from starting vm instance to launching jupyter and installing packages, I did all of the things myself. (and I learned 🤗) $	curl -O https://repo. continuum. io/archive/Anaconda3-5. 0. 1-Linux-x86_64. sh[Downloading conda in ssh] I created VM instance,selected zone, machine type and disk type. Then, define firewall rules and in ssh terminal, install jupyter and other packages. But you can do all of these things just using AI Platform.  [AI Platform] I think it especially save your time if you are living in Asia-Pacific, which google doesn’t support not that much GPU resources.  2. Consider if the platform has limited resources in a region you live in. : I live in South Korea, East Asia, and it seems like this region has lots of limitation in GPU (except quite expensive AWS) And the Taiwan which was the only one region where I can launch my own VM with GPU (I tried all the other regions in the list) sometimes do normaly, but not always. 😥After launching, I did several works and next day I could not start VM. (I didn’t count it, but tried it a few hours because I didn’t want cost any more time…) Endlessly failed to start instance, then I choose to move AWS as an alternative way. 3. Fast. ai gives deliberate guide and I didn’t know it. : Fast. ai offer the guide for all available platform. (Colab, salamander, Gradient, Kaggle, Colab, and so on) It is so important, and really needs, because cloud computing options are vary as occasion and purpose arise. I didn’t know fast. ai has manual to running GCP, and I think it’s as good a reason as any for me to be have taken time. It helped me so much when I had aws and shortened my time. I don’t want to read all of the manual in amazno. . (It is recommended. . but I’d rather read GIT PRO now…) ssh -i ~/. ssh/&lt;your_private_key_pair&gt; -L localhost:8888:localhost:8888 ubuntu@&lt;your instance IP&gt;4. You should wait to add more volume just after add volume, by building AWS EC2. : Since Elastic Block Store(EBS) storage supports optimized storage, users can’t extend storage volume two times in a row.  Unfortunately, at the first time, I didn’t know it (again 👻) and when VM lacked volume, I doubled dist capacity (76*2) at a rough but It needs more.  &lt;!– this time I installed GPU in two years, and it became little complicated compared to 2 years ago. And this time for the first time(maybe not the first time. . but i handled it in my class or with my friend. but it’s my first time on my own. ) I very I’m started to using used google colab, kaggleand, GCP-JupyterLab, ec2 - friend made, aws vm machine but I had a environment variable but i did not know of it. On these days, I could not get a resources from taiwan…    I couldn’t notice a deliberate     Anyway, as a result I tried myself gcp myself and aws ec2 with fast. ai But I think doing on my self surely takes much time (in this point I wonder why I’m doing this, and should remind me, especially I was studying disk volume optimization)     disk volume exceed - https://askubuntu. com/questions/919748/no-space-left-on-device-even-though-there-is:  "
     }, {
-    "id": 22,
+    "id": 24,
     "url": "http://localhost:4000/2020/02/dps-week4/",
     "title": "Digital Product School week 4",
     "body": "2020/02/01 - The 4th week retropect at Digital Product School Week 4 - Find solution ideas and run experiments   [This week’s schedule] CONTENT:  Ideation Techniques     What is ideation techniques?    Generating idea in my team AIdeation Team brain storming of idea Die Produkt MacherMondayIdeation Techniques:   [slides from @steffen] What is ideation techniques?: We tried to find out user’s painpoint last week. Tried to users talk about their, pain point. No question directly, but extract from them their pain with transportation. Generating idea in my team:   AIdeation:   TuesdayTeam brain storming of idea:   Based on generated idea on Monday, we extended our idea doing rolling-paper!     Die Produkt Macher: What is lean start-up?  Lean startup is a methodology for developing businesses and products that aims to shorten product development cycles and rapidly discover if a proposed business model is viable; this is achieved by adopting a combination of business-hypothesis-driven experimentation, iterative product releases, and validated learning. - wikipedia WendesdayThursdayFriday"
     }, {
-    "id": 23,
+    "id": 25,
     "url": "http://localhost:4000/2020/01/retrosprect-of-acl-paper-2020/",
     "title": "Retrospect of ACL 2020 paper writing",
     "body": "2020/01/29 - 2020 Annual Conference of the Association for Computational Linguistics Why I can’t use ‘Cebuano’ for the research?: Why I had to change target language from ‘Cebuano’ to ‘Tagalog’?-&gt; No language translator options except google translation. But before knowing that I already consult my friend, whose mother tongue is English. So I had to aplogize her, but couldn’t tell her why suddenly I changed my plan. -&gt; I realized there are many languages even can’t be researched at all. . -&gt; Getting accustomed to discrimination makes misunderstanding, sometimes. At my country, we couldn’t use music streaming service, because of legal problem. But at that moment, I thought it was discrimination, which is done by music company. "
     }, {
-    "id": 24,
+    "id": 26,
     "url": "http://localhost:4000/2020/01/Git-Merge/",
     "title": "Why am I not listed as a contributor?!",
     "body": "2020/01/10 - From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’m not listed as contributor of repo which my PR accessed. How did I come to a repository?: When I’m stuck, I would prefer to code, than to go deep in theory. (It must be so. . too much to understand 🤒)It was BERT released by Google AI I felt keenly the necessity of implementing, because not only couldn’t understand the way they figured out positional encoding formula, but how it actually works. What does it mean to “scale” dot product in Attention? (Now I know it’s far from my section 😂) Figure 1. Scaled Dot Product. Adopted from tensorflow blogWhat was the code error?: For implement code in paper, I read the papers Transformer and BERT, structured the model, and refered the others’ code. Meanwhile, I found out a small error in tokenization process, which was changing a token into [MASK], enabled bidirectional representation. I’ve made PR, and got merged. But I was not in contributors. Why?: Figure 2. Merged Pull request Adopted from graykode projectActually I happened to know there can be couple of reasons github doesn’t include my name as contributor. Well, if contributors tab has more than 100 people, in which case it shows you up only if you are in the top 100 contributors because displaying too many contributors can make webpages down. Somethimes, however, it doesn’t that problem. Why not? Two possibilities are there.    First, According to Joel-Glovier, if repository maintainer merged-as-a-rebase PR will end up showing as maintainer’s commit. But maintainer shouldn’t normally do this.     Second, if you happend to commit using a different git email that what is in your GitHub profile, it will not be attached to your Github user, and “doesn’t show up” as you.  Reference:  Michał Chromiak’s blog Github: why are my contributions are not showing on my profile atlassian-gitfetch"
     }, {
-    "id": 25,
-    "url": "http://localhost:4000/2019/12/lesson1-fastai/",
-    "title": "Fine Grained Classification",
-    "body": "2019/12/31 - Finally you can solve the mystery behind this weird drawing. . through this course. juptyer notebook magic: %reload_ext autoreload%autoreload 2%matplotlib inlinethis is special directives to jupyter notebook, not python code. And it is called ‘magics’ (but i think jeremy is magicion)  If somebody changes underlying library code while I’m running this, please reload it automatically If somebody asks to plot something, then please plot it here in this Jupyter NotebookDon’t hesitate to import start~ Digging into untar_data, path. ls: Union[pathlib. Path, str]: typed programming language? -&gt; maybe i think disclaim the type beforehand for sure.  Q. like assert? path. ls()this is some module that fast. ai made because os. listdir(‘path’) is unconvinient. Python3 pathlib library!: pathlib "
-    }, {
-    "id": 26,
+    "id": 27,
     "url": "http://localhost:4000/2019/12/jeremy-howard/",
     "title": "Jeremy Howard",
     "body": "2019/12/15 - This is journey to find out ‘who am I trying to be?’: How he impacted me?  The person who made me start Computer Vision again. He emphasized the importance of studying NLP and Computer together to understand the deep-learning. He didn’t order it to study, but always he pursuade me with reasonable way.  “It’s not just something I can throw away. NLP and computer vision a few weeks apart and that’s going to force your brain to realize like ‘oh I have to remember this’”  He made me admit my failure in deep-learning. I started to objectify where am I. What should I do when I’m frustrated.  “Keep going. You’re not expected to remember everything. Yet.  You’re not expected to understand everything. Yet. You’re not expected to know why everything works.  Yet. ”  His articles are numerous, below.      What is torch. nn Really?   High Performance Numeric Programming with Swift: Explorations and Reflections   C++11, random distributions, and Swift    And especially, I like this book. Designing great data products Great predictive modeling is an important part of the solution, but it no longer stands on its own; as products become more sophisticated, it disappears into the plumbing. Designing great data products And he is also famous for words. Here are some.  we’re going to try and use that to really understand what’s going on. So to warn you, none of it is rocket science but a lot of its going to look really new. So don’t expect to get it the first time but expect to listen and jump into the notebook try a few things test things out look particularly at like tensor shapes and inputs and outputs to check your understanding then go back and listen again. But and kind of try it, a few times, because you will get there right, it’s just that there’s going to be a lot of new concepts because we haven’t done that much stuff in pure Pytorch. Lesson 6: Deep Learning 2019 "
     }, {
-    "id": 27,
+    "id": 28,
     "url": "http://localhost:4000/2019/11/julia-evans/",
     "title": "Julia Evans",
     "body": "2019/11/20 - This is journey to find out ‘who am I trying to be?’: The women who surprised me in many ways. First, she approached me to teaching some concepts drawing cartoons. It was at Hackers news, which was hightest ranks. Personally I have the use of not to reading title, so and cartoon was so cute and clear. I naturally gonna understood mechanism and astonished by her explaination ability. Her value, which she was taught by many people so want to do same things, moved me. Volume of her knowledge, that just reading post title is a deal of work, amazed me. "
     }, {
-    "id": 28,
+    "id": 29,
     "url": "http://localhost:4000/2019/11/coc-retropective/",
     "title": "Retrospective on Pycon 2019 Korea (CoC Committee)",
     "body": "2019/11/05 - When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized our responsibility, and if I passed each other’s burden, It could be my burden next time. In solidarity of the obligation, we finished conference well. And after participating PyCon Korea 2018 as volunteer, I’ve joined PyCon Korea Organizer last year. &lt;Figure 1&gt; First meeting of PyCon 2019 Korea Organizers It’s been a while since PyCon 2019 finished. It’s held on Aug 15 - 18, at Coex Grand Balloom &lt;Figure 2&gt; Ongoing session, speaking on news comment processing &lt;Figure 3&gt; Sponsor Booth iin Coex Hall &lt;Figure 4&gt; After PyCon 2019, with all of volunteer, organizer, speakers 😍 🥰  Serving as part of the coc TF, I spent large fraction of last year doing CoC job. here’s the path what we’ve been grappled with to grasp a solution. First half: Before the conference Toward Diverse Community:  Formally we’ve been reusing and modifying PyCon US CoC, but we needed fit in Korean and I was part of that to revise code of conduct. Except ‘That’ Diversity, Because it is ‘Harassment’:  Specific point was harassment, and the others were not. process of finding the points. How can we settle this point?Second half: During the conference Handling the potential Harassment: Disjunction of policy and real-time situation: This ‘PyCon 2019 Korea retrospective series’ would be devided into 3 Episodes.  “Retrospective on Pycon 2019 Korea (CoC Committee)” “Retrospective on Pycon 2019 Korea (Program Chair)” (20 Nov, To Be Update) “Maintaining participation while still making timely decisions” (29 Nov, To Be Update)"
     }, {
-    "id": 29,
+    "id": 30,
     "url": "http://localhost:4000/2019/11/elif-shafak/",
     "title": "Elif Shafak",
     "body": "2019/11/05 - This is journey to find out ‘who am I trying to be?’: For creative-minded people, Istanbul is a treasure. ’ Photo © Chris Boland, licensed under CC BY-NC-ND 2. 0    it suddenly felt like what I was trying to convey was more complicated and detailed than what the circumstances allowed me to say.     And I did what I usually do in similar situations: I stammered, I shut down, and I stopped talking. I stopped talking because the truth was complicated, even though I knew, deep within, that one should never, ever remain silent for fear of complexity.         &lt;Figure 1&gt; Elif Shafak   Photo credit: www. elifsafak. com. tr      I want to talk about emotions and the need to boost our emotional intelligence. I think it’s a pity that mainstream political theory pays very little attention to emotions.     Oftentimes, analysts and experts are so busy with data and metrics that they seem to forget those things in life that are difficult to measure and perhaps impossible to cluster under statistical models. But I think this is a mistake, for two main reasons. We are emotional beings.     I think it’s going to be one of our biggest intellectual challenges, because our political systems are replete with emotions. In country after country, we have seen illiberal politicians exploiting these emotions. And yet within the academia and among the intelligentsia, we are yet to take emotions seriously. I think we should.  1 2 Reference:       British Council Worldwide &#8617;        Ted Talk &#8617;    "
     }, {
-    "id": 30,
+    "id": 31,
     "url": "http://localhost:4000/2019/01/dps-week1/",
     "title": "Digital Product School week 1",
     "body": "2019/01/11 - The 1th week retropect at Digital Product School [This week’s schedule] CONTENT:  Welcome to Digital Product School!     Trip to Spitzingsee   Welcome to Design Office   Specifying our goal of product   Welcome to Digital Product School!: Trip to Spitzingsee:    At the first day of Digital Product School, we had a off-site with all of batch 9 people. All the costs were managed by dps. At the beautiful mountain, we settled the team, and got my team goal.     Basically, there are two kind of team in DPS. (1) Wild team - the team has fixed topic(2) Company team - the team which has specific stakeholders, and also topic defined by that stakeholders  The Core-team will fix what team you will join in DPS for 3 months based on ymy professionals, they announce it at off-site. [My team for 3 months at DPS]    And we decide on my batch #9 theme song. How? Each team draw for songs and pitch ‘why this song should be batch #9 theme song’The result? Imagine dragon - Believer (I didn’t know at the moment, this song would be stamped in my memory)     We have a workshop for getting to know each other. For example, we share 1) what do I expect from 3 months of dps, 2) when I feel happy in my life time, 3) what I worked for last week, 4) what was my last project and 5) what plays important role in my life      My team's board  Cero  Welcome to Design Office:  At first day of design office, we had workshop, which celebrates my day in dps also discuss specific rule, menifesto and stakeholders    We get sticker and attach it in map depends on my nationality  Now time to get to know my team’s stakeholders. What they want for us? What they expect from us? How free my team are on the topic?To be honest, it is endless tug-of-war. We should discuss with my stakeholders, endlessly, and find out solution which can meet interest of users, stakeholders and my team. Basically, my team’s main stakeholder is ADAC, but BMW, City of munich and Nokia will also participate as my team’s stakeholders. Specifying our goal of product: "
@@ -335,7 +340,7 @@
                         
                         </span>
 					</p>
-					<h1 class="display-4 mb-4 article-headline">Digital Product School week 8&9</h1>
+					<h1 class="display-4 mb-4 article-headline">My life in Digital Product School - week 8/19/10</h1>
 					<div class="d-flex align-items-center">
                         
 						<small class="ml-3">  <span><a target="_blank" href="" class="btn btn-outline-success btn-sm btn-round ml-1">Follow</a></span>
@@ -351,7 +356,7 @@ <h1 class="display-4 mb-4 article-headline">Digital Product School week 8&9</h1>
 				</div>
                 
 				<div class="col-md-6 pr-0 align-self-center">
-					<img class="rounded" src="/assets/images/week8/gate.png" alt="Digital Product School week 8&9">
+					<img class="rounded" src="/assets/images/week8/gate.png" alt="My life in Digital Product School - week 8/19/10">
 				</div>
                 
 			</div>
@@ -391,46 +396,34 @@ <h1 class="display-4 mb-4 article-headline">Digital Product School week 8&9</h1>
             
             <!-- Article -->
 			<article class="article-post">                
-			<p>The 8th week retropect at Digital Product School</p>
+			<p>The 8/9/10th week retropect at Digital Product School</p>
 
-<p><strong>Week 8/9 - Ship your MVP/Release next iteration each day</strong></p>
+<p>Week 8 - Ship your MVP<br />
+Week 9/10 - Release next iteration each day</p>
 
 <div style="text-align:center">
     <img src="/assets/images/week8/schedule.png" width="70%" height="70%" />
-    <figcaption>This week's schedule</figcaption>    
+    <figcaption>Week 8th schedule</figcaption>    
 </div>
 
 <h2 class="no_toc" id="content">CONTENT</h2>
 
 <ul id="markdown-toc">
-  <li><a href="#preparing-engineering-weekly" id="markdown-toc-preparing-engineering-weekly">Preparing engineering weekly</a></li>
-  <li><a href="#agile-process" id="markdown-toc-agile-process">Agile Process</a>    <ul>
-      <li><a href="#daily-stand-up" id="markdown-toc-daily-stand-up">Daily Stand-up</a></li>
-    </ul>
-  </li>
-  <li><a href="#making-application-flowchart-feat-drawio---er-diagram" id="markdown-toc-making-application-flowchart-feat-drawio---er-diagram">Making application flowchart (feat draw.io) /  ER diagram</a>    <ul>
-      <li><a href="#flowchart-understaning-user-journey" id="markdown-toc-flowchart-understaning-user-journey">Flowchart, understaning user journey</a></li>
-      <li><a href="#er-diagram" id="markdown-toc-er-diagram">ER diagram</a></li>
+  <li><a href="#agile-product-development" id="markdown-toc-agile-product-development">Agile Product Development</a>    <ul>
+      <li><a href="#daily-stand-upplanning" id="markdown-toc-daily-stand-upplanning">Daily Stand-up(planning)</a></li>
+      <li><a href="#gemba-walk" id="markdown-toc-gemba-walk">Gemba Walk</a></li>
+      <li><a href="#sprint-reviews" id="markdown-toc-sprint-reviews">Sprint Reviews</a></li>
     </ul>
   </li>
   <li><a href="#engineering-weekly" id="markdown-toc-engineering-weekly">Engineering weekly</a></li>
-  <li><a href="#ai-lunch" id="markdown-toc-ai-lunch">AI lunch</a></li>
-  <li><a href="#connecting-firebase-and" id="markdown-toc-connecting-firebase-and">Connecting firebase and</a></li>
 </ul>
 
 <hr />
 
-<h2 id="preparing-engineering-weekly">Preparing engineering weekly</h2>
-
-<p>This week at Wednesday, I planned to explain the <em>Language Modelings</em>, mainly focusing ELMo, ULMFiT, BERT and GPT-2.<br />
-Slides is available <a href="https://docs.google.com/presentation/d/1-oTUqgFq0x3N8QpcN7gHay5KhIN2KdFGOTxzujr75ck/edit?usp=sharing">here</a> 
-Changed the presentation, because there were people who are not in ML domain. <a href="https://www.slideshare.net/JiWenKim/applicable-bert">here</a><br />
-Whenever I do the presentation, I learn more than the information I give them. At the same time, I realize I need to learn more than I know.</p>
-
-<h2 id="agile-process">Agile Process</h2>
-<p>One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m not sure ‘what is agile’ but this is what we tried to make agile process. <br /></p>
+<h2 id="agile-product-development">Agile Product Development</h2>
+<p>One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m still not sure ‘what is agile’ but this is how we tried to make agile process. <br /></p>
 
-<h3 id="daily-stand-up">Daily Stand-up</h3>
+<h3 id="daily-stand-upplanning">Daily Stand-up(planning)</h3>
 
 <p>Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more <strong>familiar</strong>. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.</p>
 
@@ -446,39 +439,36 @@ <h3 id="daily-stand-up">Daily Stand-up</h3>
 
 <p>Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others.</p>
 
-<h2 id="making-application-flowchart-feat-drawio---er-diagram">Making application flowchart (feat draw.io) /  ER diagram</h2>
+<h3 id="gemba-walk">Gemba Walk</h3>
 
-<p><img src="/assets/images/week8/draw-io.png" alt="" /></p>
+<div style="text-align:center">
+    <img src="/assets/images/gembawalk.png" width="50%" height="50%" />
+    <figcaption>Team Cero with core team</figcaption>    
+</div>
 
-<ul>
-  <li>Before we start the party, we should clarify the flowchart and ER diagram of our application.</li>
-</ul>
+<p>Every 2 weeks, we do the Gemba work, which is ‘question everything to the core team’ time. At this period, people can ask anything related to our product, workshop, and framework. Core team will help just for each team, and each team can solve the problem related to their work. &lt; br/&gt;
+Why we need this session? because with workshop and general schedule, core team has no time just focus on each team. So through this session, we can have opportunity to understand each program and workshop, like why we are using this platform, and when is the due of our small project, and <code class="highlighter-rouge">we have this problem and we need help for this</code>. whatever small problem you have, core team is always willing to help you.</p>
 
-<h3 id="flowchart-understaning-user-journey">Flowchart, understaning user journey</h3>
+<h3 id="sprint-reviews">Sprint Reviews</h3>
 
-<p>Thanks for google, we could use draw.io for our framechart framework. Actually, we cana choice other good flatform, but draw.io has connected app throgh google drive, most of our engineer was used to it. And after this job, I got to know there is also (<em>of course</em>) rule with the symbols, color, size, space, scaling and direction of arrow -<a href="https://creately.com/blog/diagrams/flowchart-guide-flowchart-tutorial">reference</a>. <br />
-But why we should do this? WE have made our storymap before!! I think storymap is for visualize our status and app. So it should be shared with whole the team, and they should able to understand each role’s issue. But flowchart is more like testing technical feasibility, and error that user can experience. So it could be little more specific, complicated, and hypothetical.</p>
+<p>Every Friday, we have time to summarise what we did for the week. Maybe we need <code class="highlighter-rouge">HMW question</code> and our <code class="highlighter-rouge">storymap</code> to share our process and then tell and share what we did try, what point we succeeded and what point it was deviant of our prediction, and why we tried it..</p>
 
 <div style="text-align:center">
-    <img src="/assets/images/week8/flowchart.jpg" width="50%" height="50%" />
-    <figcaption>This week engineer's main plan</figcaption>    
+    <img src="/assets/images/gembawalk.png" width="50%" height="50%" />
+    <figcaption>Sprint of Ve-link</figcaption>    
 </div>
 
-<h3 id="er-diagram">ER diagram</h3>
-
-<p>Even if we use NoSQL database through firebase, my team was accustomed to SQL more. That what we educated when we were at college, so we had to organize our concept while we were learning NoSQL.</p>
+<p>And then, just after all team’s ppt, we do vote with such a cute marvel. Always it’s very difficult to vote (of course you can’t vote to your team!) Because it depends on criteria what do I value!<br />
+But since this is process of our agile work, I try to focus on <code class="highlighter-rouge">what they have changed since last week, and why they did it, how they did it</code>.</p>
 
 <h2 id="engineering-weekly">Engineering weekly</h2>
 
-<p>Every engineering weekly we exchange our knowledge each other so that we can grow together.</p>
+<p>Every engineering weekly we exchange our knowledge each other so that we can grow together. Everyone have their knowledge to share and we can be tutor and at the same time can be of tutee.</p>
 
-<p>Before today, my AI collegues presented <code class="highlighter-rouge">regression, knn</code> and it was my turn. I prepared slide that explain about <a href="https://www.slideshare.net/JiWenKim/pre-trained-language-model">pre-trained language model</a>, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief <a href="https://www.slideshare.net/JiWenKim/applicable-bert">BERT mode</a>, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product.</p>
+<p>Previously, my AI collegues presented <code class="highlighter-rouge">regression</code>, <code class="highlighter-rouge">knn</code>. And because I’m somewhat specialized to NLP, I prepared slide that explain about <a href="https://www.slideshare.net/JiWenKim/pre-trained-language-model">pre-trained language model</a>, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief <a href="https://www.slideshare.net/JiWenKim/applicable-bert">BERT mode</a>, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product.</p>
 
 <p>The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback!</p>
 
-<h2 id="ai-lunch">AI lunch</h2>
-
-<h2 id="connecting-firebase-and">Connecting firebase and</h2>
                 
 			</article>
 			
diff --git a/_site/2020/02/fast.ai-nlp-note-16/index.html b/_site/2020/02/fast.ai-nlp-note-16/index.html
index 21c3b50e8b..09ec8840e4 100644
--- a/_site/2020/02/fast.ai-nlp-note-16/index.html
+++ b/_site/2020/02/fast.ai-nlp-note-16/index.html
@@ -19,9 +19,9 @@
 <meta property="og:site_name" content="SpellOnYou" />
 <meta property="og:image" content="http://localhost:4000/assets/images/13.png" />
 <meta property="og:type" content="article" />
-<meta property="article:published_time" content="2020-02-20T00:00:00+01:00" />
+<meta property="article:published_time" content="2020-02-20T00:00:00+09:00" />
 <script type="application/ld+json">
-{"description":"Algorithms can encode &amp; magnify human bias","author":{"@type":"Person","name":"dionne"},"@type":"BlogPosting","url":"http://localhost:4000/2020/02/fast.ai-nlp-note-16/","publisher":{"@type":"Organization","logo":{"@type":"ImageObject","url":"http://localhost:4000/assets/images/logo.png"},"name":"dionne"},"image":"http://localhost:4000/assets/images/13.png","headline":"Algorithmic bias","dateModified":"2020-02-20T00:00:00+01:00","datePublished":"2020-02-20T00:00:00+01:00","mainEntityOfPage":{"@type":"WebPage","@id":"http://localhost:4000/2020/02/fast.ai-nlp-note-16/"},"@context":"http://schema.org"}</script>
+{"description":"Algorithms can encode &amp; magnify human bias","author":{"@type":"Person","name":"dionne"},"@type":"BlogPosting","url":"http://localhost:4000/2020/02/fast.ai-nlp-note-16/","publisher":{"@type":"Organization","logo":{"@type":"ImageObject","url":"http://localhost:4000/assets/images/logo.png"},"name":"dionne"},"image":"http://localhost:4000/assets/images/13.png","headline":"Algorithmic bias","dateModified":"2020-02-20T00:00:00+09:00","datePublished":"2020-02-20T00:00:00+09:00","mainEntityOfPage":{"@type":"WebPage","@id":"http://localhost:4000/2020/02/fast.ai-nlp-note-16/"},"@context":"http://schema.org"}</script>
 <!-- End Jekyll SEO tag -->
 
 
@@ -161,96 +161,101 @@
     "body": "  {% if page. url ==  /  %}            {% assign latest_post = site. posts[0] %}          &lt;div class= topfirstimage  style= background-image: url({% if latest_post. image contains  ://  %}{{ latest_post. image }}{% else %} {{site. baseurl}}/{{ latest_post. image}}{% endif %}); height: 200px;  background-size: cover;  background-repeat: no-repeat; &gt;&lt;/div&gt;           {{ latest_post. title }}  :       {{ latest_post. excerpt | strip_html | strip_newlines | truncate: 136 }}               In         {% for category in latest_post. categories %}        {{ category }},         {% endfor %}                                {{ latest_post. date | date: '%b %d, %Y' }}                            {%- assign second_post = site. posts[1] -%}                        {% if second_post. image %}                         &lt;img class= w-100  src= {% if second_post. image contains  ://  %}{{ second_post. image }}{% else %}{{ second_post. image | absolute_url }}{% endif %}  alt= {{ second_post. title }} &gt;                        {% endif %}                                    {{ second_post. title }}          :                       In             {% for category in second_post. categories %}            {{ category }},             {% endfor %}                                                      {{ second_post. date | date: '%b %d, %Y' }}                                    {%- assign third_post = site. posts[2] -%}                        {% if third_post. image %}                         &lt;img class= w-100  src= {% if third_post. image contains  ://  %}{{ third_post. image }}{% else %}{{site. baseurl}}/{{ third_post. image }}{% endif %}  alt= {{ third_post. title }} &gt;                        {% endif %}                                    {{ third_post. title }}          :                       In             {% for category in third_post. categories %}            {{ category }},             {% endfor %}                                                      {{ third_post. date | date: '%b %d, %Y' }}                                    {%- assign fourth_post = site. posts[3] -%}                        {% if fourth_post. image %}                        &lt;img class= w-100  src= {% if fourth_post. image contains  ://  %}{{ fourth_post. image }}{% else %}{{site. baseurl}}/{{ fourth_post. image }}{% endif %}  alt= {{ fourth_post. title }} &gt;                        {% endif %}                                    {{ fourth_post. title }}          :                       In             {% for category in fourth_post. categories %}            {{ category }},             {% endfor %}                                                      {{ fourth_post. date | date: '%b %d, %Y' }}                                  {% for post in site. posts %} {% if post. tags contains  sticky  %}                    {{post. title}}                  {{ post. excerpt | strip_html | strip_newlines | truncate: 136 }}                 Read More            	             {% endif %}{% endfor %}  {% endif %}                All Stories:         {% for post in paginator. posts %}          {% include main-loop-card. html %}        {% endfor %}                   {% if paginator. total_pages &gt; 1 %}              {% if paginator. previous_page %}        &laquo; Prev       {% else %}        &laquo;       {% endif %}       {% for page in (1. . paginator. total_pages) %}        {% if page == paginator. page %}        {{ page }}        {% elsif page == 1 %}        {{ page }}        {% else %}        {{ page }}        {% endif %}       {% endfor %}       {% if paginator. next_page %}        Next &raquo;       {% else %}        &raquo;       {% endif %}            {% endif %}                     {% include sidebar-featured. html %}      "
     }, {
     "id": 12,
+    "url": "http://localhost:4000/2020/04/v3-2019-lesson06-note/",
+    "title": "fastai 2019 course-v3 Part1, lesson06",
+    "body": "2020/04/15 - Lesson 06Rossmann(Tabular):  Tabular data: be careful on Categorical variable vs Continuous variable.  if datatype is int, fastai think it is classification, not a regression.  Root mean square percentage error. as loss function.  When you assign the y_range, it’s better to assign little bit more than actual maximum. &gt; because it’s sigmoid.  intermediate layers, which is weight matrix is 1) 1000, and 2) 500 -&gt; which means our parameter would be 500*1000. learn. modelWhat is dropout and embedding dropout?: Nitish Srivastava, Dropout: A Simple way to prevent Neural Networks from Overfitting  you can dropout with p value, make it specified to specific layer, or make it applied to all the layers.  Pytorch code 1) bernoulli, which decides whether you will hold it? 2) and divide the noise value depends on noise value. so noise became 2 or remain 0.      According to pytorch code, We do change at training time, but we do nothing at test time. and this means you don’t have to do anything special with inference time. ’   TODO: find at forums what is inference time - Related to NVIDIA, GPU.     Embedding dropout is just a dropout.      It’s different between continuous variable and embedding layer.  TODO Still can’t understand. why embedding dropout is effective. or,… in need.    Let’s delete at random, some of the results of the embedding.    and It worked well especially at Kaggle   Batch Normalization: Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift -&gt; came out false! According to How Does Batch Normalization Help Optimization?  The key was multiplicative bias {\gamma} and additive bias {\beta}` Explain     Let $$ \hat{y} = f(w_1, w_2, w_3, … , x)} $$ , loss = MSE , Then y_range should be between 1 and 5`   And Activation function ends with -1 -&gt; +1   To mitigate this problem, we can add the other parameter, like $$w_n$$   But there’re so much interactions in the process so just re-scale the output.    Momentum parameter at BatchNorm1d:  Different from momentum like in optimization.  This momentum is Exponentially weighted moving average of the mean, instead of deviation.      If this is small number: mean standard deviation would be less from mini_batch to mini_batch » less regularization effect. (If this is large number, variation would be greater from mini_batch to mini_batch » more regularization effect)   TODO: can’t sure, but i understand, this is not about how to update parameter but about how much reflect previous value when scale and shift   Q. Preference between batchnorm and the other regularizations(drop out, weight decay)A. Nope, always try and see the results## lesson6-pets-more### Data Augmentation- Last reg- `get_transforms` has lots of params (even not yet learned all) -&gt; check documentation	- Remember you can implement all the doc contents bc it's made from nbdev	- TODO: try this!!- Essence of data augmentation is you should maintain the label, while somewhat making sense. 	- ex) tilt, because it's optically sensible, you can always change the angle of the data view. - zeros, border, and reflection but always `reflection` works most of the time, so that is the default### Convolutional Kernel(What is convolution?)- Will make heat\_map from scratch, which means the parts convolution focuses on![setosa_visualization]()- http://setosa. io/ev/image-kernels/	- javascript thing	- How convolution works	- Kernel. which does element-wise multiplication, and sum them up	- so it has on pixel less at borders -&gt; so it uses padding, and fastai uses reflection as said. - why this Kernel(matrix) helps catching horizontal edge side?	- because this kernel`(picture2)` weights differently, depends on `x axis`	- why familiar, because it's similar intuition with fugus`(paper)` paper- CNN from different viewpoints`link`	- output of pixel is results from different linear equations. 	- If you connect this with represents of neural network nodes, you can see that the specific inp nodes connected with specific out nodes. 	- **Summarize**: cnn does 1) matmul some of the elements are always zero 2) same weight for every row, which is called `weight time? weight. . ?, 1:18:50` `(picture)`#### Further lowdown- Because generally image has 3 channels, we need rank 3 kernel. - And **do multiply with all channel output is one pixel**. (`draw by your self`)	- but this kernel will catch one feature, like horizontal, so that we make more kernel so that output becomes (h * w * kernel)	- And that `kernel` come to `channel`- **Conv2d**: with 3 by 3 kernel, stride 2 conv -&gt; (h/2 * w/2 * kernel)	- skip or jump over input pixel	- to protect from memory out of control~~~pythonlearn. modellearn. summary()~~~TODO: understand yourself the blocks of conv-kernel: - Usually use big kernel size at first layer (will study this at part2)- Bottom right highlighting kernel(`pic / draw`)- `torch. tensor. expand`: for memory efficient, because we should do RGB- We do not make separate kernel, but make rank 4 kernel	- 4d tensor is just stacked kernel- `t[None]. shape` create new unit axis, and why? we make this -&gt; it should move unit of batch, not one size image. ### Average pooling, feature- suppose our pre-trained model results in size of `11 by 11 by 512 ` `pic 4` and my classification task has 37 classes	* take the first face of channel, which is 11 by 11 and `mean` it, so that make rank 2 tensor, 512 by 1	* and make 2d matrix, which is 512 by 37 and multiply so that we can get 37 by 1 matrix. - Feature, at convolution block	- So, when we transfer-learning without unfreeze, every element of last matrix (512 by 1) should represent(or could catch) each feature. 	### Heatmap, Hook~~~hook_output(model[0]) -&gt; acts -&gt; avg_acts~~~- if we average the block with `axis=feature`, result of matrix(11 by 11) depicts `how activated was that area?` -&gt; it is heatmap, `avg_acts`- and acts comes from hook, which is more advanced pytorch feature. 	- hook into pytorch machine itself, and run any arbitrary Pytorch code	- Why this is cool?: Normally it gives set of outputs of forward pass, but we can interrupt and hook the forward pass. 	- Also can store the output of the convolutional part of the model, which is before avg_pooling- Thinking back when we do cut off `after` the conv part. 	- but with fast. ai the original convolutional part of the model would be *the first thing in the model*, specifically could be given from `learn. model. eval()[0]`	- And this is gotten from `hooked_output` and having hooked the output, we can pass our x_minibatch to output. 	- Not directly, but with normalized, minibatch, put on to the gpu	- `one_item()` function do it, when we have one data `TODO: this is assignment` do it yourself without one_item function	- and `. cuda()` put it on gpu- you should print out very often the shape of tensor, and try think why. "
+    }, {
+    "id": 13,
+    "url": "http://localhost:4000/2020/04/qna-image-segmentation/",
+    "title": "[Q&A] Image Segmentation, using Unet with Driving Video data",
+    "body": "2020/04/02 - This post is about my questions while I was studying USF Deep Learning course about image segmentation task. All the answers are from the course, source code, library document, or document. I cared about being clear at reporting information including source of information, however if there are still anything unclear, please contact me. And thank you Jeremy&amp;Rachael for everything. Also Thank you Cambridge Computer Vision Lab to made us to study with your labor. The Cambridge-driving Labeled Video Database (CamVid) is the first collection of videos with object class semantic labels, complete with metadata. The database provides ground truth labels that associate each pixel with one of 32 semantic classes. If someone is interested in this project, please check the site and see the details. Now, let’s start first using jupyter’s one of tricks which I love most. It enables cell to print the code without print function. from IPython. core. interactiveshell import InteractiveShell# pretty print all cell's output and not just the last oneInteractiveShell. ast_node_interactivity =  all from fastai. vision import *from fastai. callbacks. hooks import *from fastai. utils. mem import *path = untar_data(URLs. CAMVID) # The locations where the data and models are downloaded are set in config. ymlpath. ls() I’m trying to accustomed to using pathlib module, not just it became built-in module in python, but I felt uncomfortable myself with os module. However, still unpredictable conflicts are remain, even in the quite standard library like Pytorch, tensorflow, onnx. (it require me string for path. not PosixPath. will send PR. . ) [PosixPath('/root/. fastai/data/camvid/valid. txt'), PosixPath('/root/. fastai/data/camvid/images'), PosixPath('/root/. fastai/data/camvid/labels'), PosixPath('/root/. fastai/data/camvid/codes. txt')]path_img = path/'images'path_lbl = path/'labels'fnames = get_image_files(path_img) #filenamelbl_names = get_image_files(path_lbl)1. (Play with data) My Hypothesis:  File name has A_B format. and A / B would be at key-value position.  Use collections - defaultdict  Default Dict: Link:   easy to group a sequence of key and value pairs into a dictionary of list?from collections import defaultdictfnames[0], lbl_names[0](PosixPath('/root/. fastai/data/camvid/images/0001TP_009210. png'), PosixPath('/root/. fastai/data/camvid/labels/0016E5_01800_P. png'))files = [tuple(i. stem. split('_')) for i in fnames]labels = [tuple(i. stem. split('_')[:-1]) for i in lbl_names]d = defaultdict(list)for k, v in files: d[k]. append(v)d. keys()len(d['0001TP'])124for k, v in d. items():  print(k, v)0001TP ['009210', '008850', '007350', '008970', '009840', '010140', '008490', '008520', '009540', '008250', '008340', '006840', '007860', '007410', '007740', '009870', '010080', '007890', '008790', '010020', '008400', '007080', '008280', '010380', '009330', '009060', '007470', '006810', '009720', '008580', '007110', '008730', '009150', '007680', '009780', '007800', '007290', '008760', '009510', '008640', '008310', '007440', '006900', '007500', '008460', '009030', '008130', '009480', '009900', '010230', '009270', '008040', '007590', '007950', '009990', '008550', '007260', '008100', '007530', '006960', '008190', '009420', '009930', '009000', '007830', '008940', '006690', '009570', '008880', '010170', '007560', '009300', '006750', '009360', '010200', '007320', '008010', '009120', '007620', '007200', '007140', '010320', '006720', '008670', '007230', '008370', '010260', '009690', '006930', '009090', '007770', '010290', '010350', '008610', '008070', '009600', '008430', '009450', '007380', '009240', '007710', '007170', '008160', '008910', '007020', '006780', '007050', '009960', '009810', '008220', '009180', '009750', '010050', '009660', '010110', '007920', '009630', '007650', '006990', '008700', '009390', '007980', '008820', '006870']0016E5 ['01290', '08159', '05760', '08133', '08063', '06660', '00960', '05850', '00750', '06960', '08035', '08107', '07975', '08017', '05610', '07140', '08119', '08027', '07170', '08400', '08093', '02100', '06390', '04470', '08340', '06060', '00600', '07470', '08151', '07800', '01620', '05730', '01530', '00690', '08430', '05940', '01980', '07320', '08069', '07965', '04380', '05430', '01410', '06780', '08007', '08087', '08079', '06600', '08109', '05490', '00901', '04590', '04680', '08045', '01770', '06690', '08085', '06810', '00420', '08011', '07440', '02190', '06300', '04800', '01500', '00450', '08029', '01470', '06330', '07997', '08067', '05370', '08013', '08190', '00840', '02370', '08049', '08135', '01440', '06870', '05820', '05280', '08051', '04440', '08091', '01380', '00630', '07290', '05520', '04770', '00540', '07995', '07999', '05550', '07920', '08101', '08141', '08053', '04620', '08103', '05160', '07350', '08057', '06030', '06000', '08550', '07963', '08089', '05970', '08047', '05640', '06240', '05220', '04350', '01590', '07959', '01950', '08117', '06180', '01560', '05400', '08043', '07680', '00780', '08081', '07050', '01020', '01350', '04530', '06720', '07969', '08149', '08003', '08131', '08129', '08033', '05460', '01650', '07530', '08023', '05340', '08640', '05100', '08075', '01230', '04980', '02070', '01080', '06210', '05910', '08009', '01800', '05190', '02400', '08083', '08019', '07620', '07200', '07890', '08059', '06990', '04410', '08121', '08123', '06930', '08137', '08147', '08095', '06570', '06150', '08153', '06840', '05250', '00510', '08370', '08580', '08113', '07410', '08097', '01200', '04950', '07770', '07650', '04710', '06090', '08055', '07110', '07981', '00990', '08250', '08127', '01920', '07985', '08220', '08005', '08157', '05130', '08071', '01140', '04830', '07740', '08143', '06120', '02040', '08111', '08115', '00660', '08280', '06420', '07983', '02220', '05700', '01860', '01260', '04920', '06510', '07020', '08073', '08105', '08125', '06360', '07860', '07993', '00810', '06540', '08099', '08139', '02010', '07973', '08155', '07991', '06630', '00480', '06750', '04890', '08001', '08025', '00870', '08490', '01830', '07977', '05010', '01170', '07961', '01680', '01050', '07987', '07080', '04560', '00930', '05310', '02340', '05790', '08460', '00720', '08031', '02280', '08039', '08037', '08065', '06270', '08077', '06900', '04650', '06480', '07230', '08041', '06450', '00570', '07989', '04740', '07979', '02250', '07380', '00390', '01710', '07590', '08021', '08520', '07500', '01110', '04500', '02310', '07971', '02130', '05580', '05880', '08610', '08310', '08145', '05670', '04860', '07260', '08015', '07967', '01740', '01320', '07560', '07830', '01890', '08061', '02160', '07710', '05070', '05040']Seq05VD ['f00030', 'f02550', 'f03450', 'f01110', 'f00480', 'f00210', 'f04590', 'f04170', 'f01800', 'f03990', 'f03360', 'f03900', 'f02070', 'f00810', 'f03690', 'f01350', 'f01530', 'f04980', 'f05100', 'f03060', 'f00900', 'f03870', 'f02460', 'f01470', 'f02370', 'f02820', 'f04080', 'f02760', 'f04860', 'f02250', 'f04200', 'f00270', 'f03720', 'f02850', 'f04410', 'f01200', 'f03090', 'f02010', 'f03930', 'f00090', 'f01650', 'f01890', 'f03840', 'f03030', 'f02130', 'f01230', 'f04110', 'f02520', 'f04140', 'f04020', 'f00060', 'f03420', 'f01560', 'f00120', 'f04290', 'f02340', 'f00300', 'f01380', 'f00870', 'f01860', 'f02970', 'f04560', 'f02730', 'f00330', 'f04530', 'f03780', 'f01770', 'f03390', 'f05040', 'f02430', 'f03330', 'f00660', 'f01740', 'f02100', 'f04800', 'f04050', 'f00510', 'f02790', 'f04350', 'f00690', 'f00540', 'f02490', 'f00960', 'f00930', 'f04230', 'f02880', 'f03600', 'f01020', 'f01500', 'f02400', 'f04830', 'f04470', 'f03300', 'f02670', 'f00450', 'f01980', 'f01170', 'f01620', 'f04500', 'f01080', 'f03180', 'f05070', 'f03150', 'f04950', 'f01440', 'f03510', 'f01710', 'f00360', 'f04770', 'f02910', 'f01050', 'f00630', 'f04320', 'f00570', 'f03240', 'f02190', 'f01140', 'f03540', 'f02220', 'f02640', 'f03960', 'f00000', 'f04920', 'f01950', 'f00990', 'f03480', 'f03000', 'f00420', 'f04620', 'f03210', 'f00780', 'f03570', 'f01590', 'f00750', 'f01920', 'f04650', 'f03750', 'f03630', 'f02310', 'f02610', 'f02580', 'f04740', 'f02280', 'f04680', 'f00390', 'f00720', 'f03660', 'f02040', 'f03270', 'f00180', 'f03810', 'f01410', 'f01290', 'f03120', 'f00840', 'f04440', 'f00150', 'f01260', 'f02700', 'f02940', 'f00600', 'f01830', 'f04260', 'f05010', 'f04890', 'f02160', 'f00240', 'f04380', 'f01680', 'f04710', 'f01320']0006R0 ['f02820', 'f03690', 'f03180', 'f02550', 'f01020', 'f03660', 'f02340', 'f01170', 'f02610', 'f02940', 'f01290', 'f02100', 'f01350', 'f03270', 'f03870', 'f01380', 'f01980', 'f03810', 'f02430', 'f02310', 'f01830', 'f03480', 'f02970', 'f01890', 'f03210', 'f03930', 'f02040', 'f02070', 'f02400', 'f01560', 'f03030', 'f01770', 'f01590', 'f01950', 'f03420', 'f01650', 'f03450', 'f00990', 'f03630', 'f01500', 'f03570', 'f00930', 'f03090', 'f03360', 'f02880', 'f02460', 'f01440', 'f01920', 'f01230', 'f03840', 'f02730', 'f01620', 'f02220', 'f03750', 'f03330', 'f03540', 'f02520', 'f02790', 'f01050', 'f03120', 'f01800', 'f01140', 'f01860', 'f01530', 'f01470', 'f02670', 'f02490', 'f01260', 'f01110', 'f02760', 'f01680', 'f03150', 'f02580', 'f03300', 'f02280', 'f01200', 'f03390', 'f03510', 'f02640', 'f02190', 'f02370', 'f01320', 'f02130', 'f03600', 'f03240', 'f03780', 'f03720', 'f02700', 'f01410', 'f01080', 'f02850', 'f01710', 'f03900', 'f03060', 'f01740', 'f02010', 'f02250', 'f00960', 'f03000', 'f02160', 'f02910']for k, v in d. items():  print(k, len(d[k]))0001TP 1240016E5 305Seq05VD 1710006R0 101for i in d2. keys():  print(i,len(d2[i]))0016E5 3050001TP 1240006R0 101Seq05VD 171files[0], labels[0](('0001TP', '009210'), ('0016E5', '01800'))2. My question: Link:  Why do we need masking? and does color from fastai library? (have to look into source code) What do the parameter alpha do? When people make masked img, would it be have ranged integer limit? Does image normalization related with this?lbl_sorted = sorted(lbl_names)f_sorted = sorted(fnames)lbl_1 = lbl_sorted[33]f_1 = f_sorted[33]img = open_image(lbl_1)mask = open_mask(lbl_1)_,axs = plt. subplots(1,2, figsize=(10,5))# img. show(ax=axs[0], y=mask, title='masked')img. show(ax=axs[0], title='1')mask. show(ax=axs[1], title='2', alpha=1. ) img_2 = open_image(f_1)mask_2 = open_mask(f_1)_,axs = plt. subplots(1,2, figsize=(10,5))# img. show(ax=axs[0], y=mask, title='masked')img_2. show(ax=axs[0], title='3',)mask_2. show(ax=axs[1], title='4', alpha=1. ) open_mask(lbl_1). data. shapetorch. Size([1, 720, 960])open_mask(lbl_1). data. shapetorch. Size([1, 720, 960])open_image(f_1). data. shapetorch. Size([3, 720, 960])open_image(f_1). data. shapetorch. Size([3, 720, 960])img. data #labeled datatensor([[[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]],    [[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]],    [[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]]])mask. data # after mask, labeled datatensor([[[ 4, 4, 4, . . . , 21, 21, 21],     [ 4, 4, 4, . . . , 21, 21, 21],     [ 4, 4, 4, . . . , 21, 21, 21],     . . . ,     [17, 17, 17, . . . , 30, 30, 30],     [17, 17, 17, . . . , 30, 30, 30],     [17, 17, 17, . . . , 30, 30, 30]]])img_2. data, mask_2. data(tensor([[[0. 0706, 0. 0667, 0. 0706, . . . , 0. 6431, 0. 6549, 0. 6627],     [0. 0745, 0. 0706, 0. 0706, . . . , 0. 6431, 0. 6510, 0. 6549],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 6392, 0. 6588, 0. 6588],     . . . ,     [0. 0863, 0. 0824, 0. 0824, . . . , 0. 1333, 0. 1216, 0. 1255],     [0. 0902, 0. 0863, 0. 0824, . . . , 0. 1255, 0. 1176, 0. 1216],     [0. 0863, 0. 0824, 0. 0784, . . . , 0. 1137, 0. 1059, 0. 1137]],     [[0. 0706, 0. 0667, 0. 0706, . . . , 0. 7490, 0. 7608, 0. 7686],     [0. 0745, 0. 0706, 0. 0706, . . . , 0. 7451, 0. 7569, 0. 7608],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 7412, 0. 7529, 0. 7529],     . . . ,     [0. 0980, 0. 0941, 0. 0941, . . . , 0. 1804, 0. 1686, 0. 1725],     [0. 1059, 0. 1020, 0. 0980, . . . , 0. 1725, 0. 1647, 0. 1686],     [0. 1020, 0. 0980, 0. 0941, . . . , 0. 1608, 0. 1529, 0. 1608]],     [[0. 0784, 0. 0745, 0. 0784, . . . , 0. 7569, 0. 7686, 0. 7765],     [0. 0824, 0. 0784, 0. 0784, . . . , 0. 7647, 0. 7647, 0. 7686],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 7608, 0. 7647, 0. 7647],     . . . ,     [0. 1216, 0. 1176, 0. 1176, . . . , 0. 2000, 0. 1882, 0. 1922],     [0. 1176, 0. 1137, 0. 1098, . . . , 0. 1843, 0. 1765, 0. 1804],     [0. 1137, 0. 1098, 0. 1059, . . . , 0. 1725, 0. 1647, 0. 1725]]]), tensor([[[ 18, 17, 18, . . . , 183, 186, 188],     [ 19, 18, 18, . . . , 183, 185, 186],     [ 20, 18, 19, . . . , 182, 185, 185],     . . . ,     [ 25, 24, 24, . . . , 43, 40, 41],     [ 26, 25, 24, . . . , 41, 39, 40],     [ 25, 24, 23, . . . , 38, 36, 38]]]))3. What is a difference between image and imageSegment?: imageSegment  An ImageSegment object has the same properties as an Image. The only difference is that when applying the transformations to an ImageSegment, it will ignore the functions that deal with lighting and keep values of 0 and 1.  It’s easy to show the segmentation mask over the associated Image by using the y argument of show_image. img = open_image(fnames[0])mask = open_mask(lbl_names[0])_,axs = plt. subplots(1,3, figsize=(8,4))img. show(ax=axs[0], title='no mask')img. show(ax=axs[1], y=mask, title='masked') #seg mask over the img using y argmask. show(ax=axs[2], title='mask only', alpha=1. ) vision. image ##4. Why/How img div by 255 and how it results fast. ai : vision. image - If div=True, pixel values are divided by 255. to become floats between 0. and 1.  At times, you want to get rid of distortions caused by lights and shadows in an image.    Normalizing the RGB values of an image can at times be a simple and effective way of achieving this.     So sum of the pixel’s value over all channels(which is S) divides each intensified channel so that nomalized value will be R/S, G/S and B/S (where, S=R+G+B).   Detailed explain here4. Python Evaluation Order: Python evaluates expressions from left to right. Notice that while evaluating an assignment, the right-hand side is evaluated before the left-hand side. mask_tmp, trg_tmp, void_tmp = 2, 1, 10mask_tmp = trg_tmp != void_tmpprint(mask_tmp, trg_tmp, void_tmp) # (1) target is not same with voidTrue 1 10# Example 1x = 1y = 2x,y = y,x+yx, y(2, 3)# Example 2x = 1y = 2x = yy = x+yx, y(2, 4)5. model learner parameter :: pct_start: A: Percentage of total number of epochs when learning rate rises during one cycle. Q: Sorry, I still confused that one cycle in the new API only runs one epoch. How the percentage of total number of epochs works? Can you give a example? If learn. fit_one_cycle(10, slice(1e-4,1e-3,1e-2), pct_start=0. 05)??A: Ok, strictly correct answer would be percentage of iterations, so you can have lr both increase and decrease during same epoch. In your example, say, you have 100 iterations per epoch, then for half an epoch (0. 05 * (10 * 100) = 50) lr will rise, then slowly decrease. Q2: Thanks for this explanation … so essentially, it is the percentage of overall iterations where the LR is increasing, correct? So, given the default of 0. 3, it means that your LR is going up for 30% of your iterations and then decreasing over the last 70%. Is that a correct summation of what is happening? A2: Yes, I think that’s correct. You can verify that by changing its value and check:learn. recorder. plot_lr() For example if pct_start = 0. 2 source: forums. fastai "
+    }, {
+    "id": 14,
     "url": "http://localhost:4000/2020/03/note08-fastai-4/",
     "title": "Gradient backward, Chain Rule, Refactoring",
-    "body": "2020/03/02 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves   ⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
+    "body": "2020/03/02 -  This note is divided into 4 section.      Section1: What is the meaning of ‘deep-learning from foundations?’   Section2: What’s inside Pytorch Operator?   Section3: Implement forward&amp;backward pass from scratch   Section4: Gradient backward, Chain Rule, Refactoring   ” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves    Postfix underscore means in pytorch, in-place function, What is in-place function?⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
     }, {
-    "id": 13,
+    "id": 15,
     "url": "http://localhost:4000/2020/03/note08-fastai-3/",
     "title": "Implement forward&backward pass from scratch",
     "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring1. The forward and backward passes: 1. 1 Normalization: train_mean,train_std = x_train. mean(),x_train. std()&gt;&gt;&gt; train_mean,train_std(tensor(0. 1304), tensor(0. 3073))Remember!  Dataset, which is x_train, mean and standard deviation is not 0&amp;1. But we need them to be which means we should substract means and divide data by std.  You should not standarlize validation set because training set and validation set should be aparted.  after normalize, mean is close to zero, and standard deviation is close to 1. 1. 2 Variable definition:  n,m: size of the training set c: the number of activations we need in our model2. Foundation Version: 2. 1 Basic architecture:  Our model has one hidden layer, output to have 10 activations, used in cross entropy.    But in process of building architecture, we will use mean square error, output to have 1 activations and lator change it to cross entropy   number of hidden unit; 50see below pic  We want to make w1&amp;w2 mean and std be 0&amp;1.      why initializating and make mean zero and std one is important?   paper highlighting importance of normalisation - training 10,000 layer network without regularisation1   2. 1. 1 simplified kaiming initQ: Why we did init, normalize with only validation data? Because we can not handle and get statistics from each value of x_valid?{: style=”color:red; font-size: 130%; text-align: center;”}  what about hidden(first) layer?w1 = torch. randn(m,nh)b1 = torch. zeros(nh)t = lin(x_valid, w1, b1) # hidden&gt;&gt;&gt; t. mean(), t. std()((tensor(2. 3191), tensor(27. 0303))In output(second) layer, w2 = torch. randn(nh,1)b2 = torch. zeros(1)t2 = lin(t, w2, b2) # output&gt;&gt;&gt; t2. mean(), t2. std()(tensor(-58. 2665), tensor(170. 9717))   which is terribly far from normalzed value.     But if we apply simplified kaiming init  w1 = torch. randn(m,nh)/math. sqrt(m); b1 = torch. zeros(nh)w2 = torch. randn(nh,1)/math. sqrt(nh); b2 = torch. zeros(1)t = lin(x_valid, w1, b1)t. mean(),t. std()&gt;&gt;&gt; (tensor(-0. 0516), tensor(0. 9354)) But, actually, we use activations not only linear function After applying activations relu at linear layer, mean and deviation became 0. 5. 2. 1. 2 Glorrot initializationPaper2: Understanding the difficulty of training deep feedforward neural networks  Gaussian(, bell shaped, normal distributions) is not trained very well.  How to initialize neural nets? with the size of layer , the number of filters .  But there is No acount for import of ReLU If we got 1000 layers, vanishing gradients problem emerges2. 1. 3 Kaiming initializatingPaper3: Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification  Kaiming He, explained here rectifier: rectified linear unit rectifier network: neural network with rectifier linear units  This is kaiming init, and why suddenly replace one to two on a top?     to avoid vanishing gradient(weights)   But it doesn’t give very nice mean tough.    2. 1. 4 Pytorch package Why fan_out?     according to pytorch documentation,   choosing 'fan_in' preserves the magnitude of the variance of the wights in the forward pass. choosing 'fan_out' preserves the magnitues in the backward pass(, which means matmul; with transposed matrix) ➡️ in the other words, torch use fan_out cz pytorch transpose in linear transformaton.  What about CNN in Pytorch?I tried torch. nn. Conv2d. conv2d_forward?? Jeremy digged into using torch. nn. modules. conv. _ConvNd. reset_parameters?? 2  in Pytorch, it doesn’t seem to be implemented kaiming init in right formula. so we should use our own operation.  But actually, this has been discussed in Pytorch community before. 3 4 Jeremy said it enhanced variance also, so I sampled 100 times and counted better results.  To make sure the shape seems sensible. check with assert. (remember we will replace 1 to 10 in cross entropy)assert model(x_valid). shape==torch. Size([x_valid. shape[0],1])&gt;&gt;&gt; model(x_valid). shape(10000, 1) We have made Relu, init, linear, it seems we can forward pass code we need for basic architecture nh = 50def lin(x, w, b): return x@w + b;w1 = torch. randn(m,nh)*math. sqrt(2. /m ); b1 = torch. zeros(nh)w2 = torch. randn(nh,1); b2 = torch. zeros(1)def relu(x): return x. clamp_min(0. ) - 0. 5t1 = relu(lin(x_valid, w1, b1))def model(xb):  l1 = lin(xb, w1, b1)  l2 = relu(l1)  l3 = lin(l2, w2, b2)  return l32. 2 Loss function: MSE:  Mean squared error need unit vector, so we remove unit axis.   def mse(output, targ): return (output. squeeze(-1) - targ). pow(2). mean()    In python, in case you remove axis, you use ‘squeeze’, or add axis use ‘unsqueeze’ torch. squeeze where code commonly broken. so, when you use squeeze, clarify dimension axis you want to removetmp = torch. tensor([1,1])tmp. squeeze()&gt;&gt;&gt; tensor([1, 1]) make sure to make as float when you calculateBut why??? because it is tensor?{: style=”color:red; font-size: 130%;”} Here’s the error when I don’t transform the data type ---------------------------------------------------------------------------TypeError                 Traceback (most recent call last)&lt;ipython-input-22-ae6009bef8b4&gt; in &lt;module&gt;()----&gt; 1 y_train = get_data()[1] # call data again   2 mse(preds, y_train)TypeError: 'map' object is not subscriptable This is forward passFootnote: Other materials:  Understanding the difficulty of training deep feedforward neural networks, paper that introduced Xavier initialization      Fixup Initialization: Residual Learning Without Normalization &#8617;        Pytorch implementaion on Kaiming init of conv and linear layers &#8617;        Pytorch kaiming init issue &#8617;        Pytorch kaiming init explained &#8617;    "
     }, {
-    "id": 14,
+    "id": 16,
     "url": "http://localhost:4000/2020/03/note08-fastai-2/",
     "title": "What's inside Pytorch Operator?",
     "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, RefactoringWhat’s inside Pytorch Operator?: Section02 Time comparison with pure Python:    Matmul with broadcasting&gt; 3194. 95 times faster     Einstein summation&gt; 16090. 91 times faster     Pytorch’s operator&gt; 49166. 67 times faster  1. Elementwise op: 1. 1 Frobenius norm:   above converted into (m*m). sum(). sqrt() Plus, don’t suffer from mathmatical symbols. He also copy and paste that equations from wikipedia.  and if you need latex form, download it from archive. 2. Elementwise Matmul:  What is the meaning of elementwise?   We do not calculate each component. But all of the component at once. Because, length of column of A and row of B are fixed.   How much time we saved?  So now that takes 1. 37ms. We have removed one line of code and it is a 178 times faster…#TODOI don’t know where the 5 from. but keep it. Maybe this is related with frobenius norm…?as a result, the code before for k in range(ac):  c[i,j] += a[i,k] + b[k,j]the code after c[i,j] = (a[i,:] * b[:,j]). sum()To compare it (result betweet original and adjusted version) we use not test_eq but other function. The reason for this is that due to rounding errors from math operations, matrices may not be exactly the same. As a result, we want a function that will “is a equal to b within some tolerance” #exportdef near(a,b):   return torch. allclose(a, b, rtol=1e-3, atol=1e-5)def test_near(a,b):   test(a,b,near)test_near(t1, matmul(m1, m2))3. Broadcasting:  Now, we will use the broadcasting and removec[i,j] = (a[i,:] * b[:,j]). sum() How it works?&gt;&gt;&gt; a=tensor([[10,10,10],     [20,20,20],     [30,30,30]])&gt;&gt;&gt; b=tensor([1,2,3,])&gt;&gt;&gt; a,b  (tensor([[10, 10, 10],     [20, 20, 20],     [30, 30, 30]]),tensor([1, 2, 3]))     &gt;&gt;&gt; a+btensor([[11, 12, 13],    [21, 22, 23],    [31, 32, 33]])  &lt;Figure 2&gt; demonstrated how array b is broadcasting(or copied but not occupy memory) to compatible with a. Refered from numpy_tutorial   there is no loop, but it seems there is exactly the loop.     This is not from jeremy (actually after a moment he cover it) but i wondered How to broadcast an array by columns?  c=tensor([[1],[2],[3]])a+ctensor([[11, 11, 11],    [22, 22, 22],    [33, 33, 33]])s  What is tensor. stride()?help(t. stride)Help on built-in function stride:  stride(…) method of torch. Tensor instancestride(dim) -&gt; tuple or intReturns the stride of :attr:’self’ tensor. Stride is the jump necessary to go from one element to the next one in the specified dimension :attr:’dim’. A tuple of all strides is returned when no argument is passed in. Otherwise, an integer value is returned as the stride in the particular dimension :attr:’dim’. Args:  dim (int, optional): the desired dimension in which stride is requiredExample::* x = torch. tensor([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]])`x. stride()&gt;&gt;&gt; (5, 1)x. stride(0)&gt;&gt;&gt; 5x. stride(-1)&gt;&gt;&gt; 1   unsqueeze &amp; None index   We can manipulate rank of tensor Special value ‘None’, which means please squeeze a new axis here== please broadcast herec = torch. tensor([10,20,30])c[None,:] in c, squeeze a new axis in here please. 2. 2 Matmul with broadcasting: for i in range(ar):#  c[i,j] = (a[i,:]).      *[:,j]. sum() #previous  c[i]  = (a[i]. unsqueeze(-1) * b). sum(dim=0) And Using None also (As howard teached)c[i]  = (a[i ]. unsqueeze(-1) * b). sum(dim=0) #howardc[i]  = (a[i][:,None] * b). sum(dim=0) # using Nonec[i]  = (a[i,:,None]*b). sum(dim=0)⭐️Tips🌟 1) Anytime there’s a trailinng(final) colon in numpy or pytorch you can delete it ex) c[i, :] = c [i]2) any number of colon commas at the start, you can switch it with the single elipsis.  ex) c[:,:,:,:,i] = c […,i] 2. 3 Broadcasting Rules:  What if we tensor. size([1,3]) * tensor. size([3,1])?  torch. Size([3, 3])    What is scale????   What if they are one array is times of the other array? ex) Image : 256 x 256 x 3Scale : 128 x 256 x 3Result: ?   Why I did not inserted axis via None, but happened broadcasting? &gt;&gt;&gt; c * c[:,None]tensor([[100. , 200. , 300. ],    [200. , 400. , 600. ],    [300. , 600. , 900. ]])maybe it broadcast cz following array has 3 rows as same principle, no matter what nature shape was, if we do the operation tensor broadcasts to the other. &gt;&gt;&gt; c==c[None]tensor([[True, True, True]])&gt;&gt;&gt; c[None]==c[None,:]tensor([[True, True, True]])&gt;&gt;&gt;c[None,:]==ctensor([[True, True, True]])3. Einstein summation:  Creates batch-wise, remove inner most loop, and replaced it with an elementwise producta. k. ac[i,j] += a[i,k] * b[k,j]inner most loop c[i,j] = (a[i,:] * b[:,j]). sum()elementwise product  Because K is repeated so we do a dot product. And it is torch. Usage of einsum()1) transpose2) diagnalisation tracing3) batch-wise (matmul) …  einstein summation notationdef matmul(a,b): return torch. einsum('ik,kj-&gt;ij', a, b)so after all, we are now 16000 times faster than Python. 4. Pytorch op: 49166. 67 times faster than pure python And we will use this matrix multiplication in Fully Connect forward, with some initialized parameters and ReLU. But before that, we need initialized parameters and ReLU, Footnote:  TensorRank ti noteResources:  Frobenius Norm Review Broadcasting Review (especially Rule)     Refer colab! (I totally confused with extension of arrays)    torch. allclose Review np. einsum Reviewh "
     }, {
-    "id": 15,
+    "id": 17,
     "url": "http://localhost:4000/2020/02/note08-fastai-1/",
     "title": "What is the meaning of 'deep-learning from foundations?'",
     "body": "2020/02/29 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ I don’t know if you read this article, but I heartily appreciate Rachael Thomas and Jeremy Howard for providing these priceless lectures for free Homework:  Review concepts 16 concepts from Course 1 (lessons 1 - 7)(1) Affine Functions &amp; non-linearities; 2) Parameters &amp; activations; 3) Random initialization &amp; transfer learning; 4) SGD, Momentum, Adam; 5) Convolutions; Batch-norm; 6) Dropout; 7) Data augmentation; 8) Weight decay; 9) Res/dense blocks; 10) Image classification and regression; 11)Embeddings; 12) Continuous &amp; Categorical variables; 13) Collaborative filtering; 14) Language models; 15) NLP classification; 16) Segmentation; U-net; GANS) Make sure you understand broadcasting Read section 2. 2 in Delving Deep into Rectifiers Try to replicate as much of the notebooks as you can without peeking; when you get stuck, peek at the lesson notebook, but then close it and try to do it yourself calculus for machine learning     based on weight…    einsum conventionCONTENTS:  What is going on in this course?     What is ‘from foundations’?   Steps to a basic modern CNN model   Today’s implementation goal: 1) matmul -&gt; 4) FC backward    Library development using jupyter notebook     jupyter notebook certainly can make module    Elementwise ops     How can we make python faster?         What is element wise operation?           FootnoteWhat is going on in this course?: What is ‘from foundations’?: 1) Recreate fast. ai and Pytorch 2) using pure python  Evade OverfittingOverfit : validation error getting worsetraining loss &lt; validation loss  Know the name of the symbol you usefind in this page if you don’t know the symbol that you are using or just draw it here (run by ML!) Steps to a basic modern CNN model:  1) Matrix multiplication -&gt; 2) Relu/Initialization -&gt; 3) Fully-connected Forward-&gt; 4) Fully-connected Backward -&gt; 5) Train loop -&gt; 6) Convolution-&gt; 7) Optimization -&gt;8) Batchnormalization -&gt; 9) Resnet Today’s implementation goal: 1) matmul -&gt; 4) FC backward: Library development using jupyter notebook: what is assers? jupyter notebook certainly can make module:  There will be #export tag that Howard (and we) want to extract special notebook2script. py will detect sign of #expert and convert following into python module and test ittest\_eq(TEST,'test')test\_eq(TEST,'test1')  what is run_notebook. py?     when you want to test your module in command line interface   		!python run\_notebook. py 01_matmul. ipynb  Is there any difference between 1) and 2)?1) test -&gt; test01 2) test01 -&gt; test #TODO I don’t know yet  look into run_notebook. py, package fire Jeremy used. What is that?read and run the code in a notebook, and in the process, Jeremy made Python Fire library called!shockingly, fire takes any kind of function and converts into CLI command. fire library was released by Google open source, Thursday, March 2, 2017    Get data   pytorch and numpy are pretty much same.  variable c explains how many pixels there are in in MNIST, 28 pixels PyTorch’s view() method: torch function that manipulating tensor, and squeeze() in torch &amp; mathmatical operation similar function Rao &amp; McMahan said usually this functions result in feature vector.    In part 1, you can use view function several times.     Initial python model     Which is Linear, like $Xw$(weight)$+a$(bias) $= Y$     If you don’t know hou to multiple matrix, refer this site matmul visulization site   How many time spends if we we use pure python   function matmul, typical matrix multiplication function, takes about 1 second for calculating 1 single train data! (maybe assumed stochastic, 5 data points in validation)     it takes about 11. 36 hours to update parameters even single layer and 1 iteration! (if that was my computer, it would be 14 hours. . )🤪   THIS is why we need to consider ‘time’&amp;’space’ This is kinda slow - what if we could speed it up by 50,000 times? Let’s try! Elementwise ops: How can we make python faster?:  If we want to calculate faster, then do remove pythonic calcuation, by passing its computation down to something that is written something other than python, like pytorch.  According to PyTorch doc it uses C++ (via ATen), so we are going to implement that function with python. What is element wise operation?:  items makes a pair, operate corresponding componentFootnote:  notebooks material video broadcasting excel"
     }, {
-    "id": 16,
+    "id": 18,
     "url": "http://localhost:4000/2020/02/what-is-convolution/",
     "title": "Digging into convolution",
     "body": "2020/02/28 - Issues 1) Kaiming Initializtion in Pytorch was in trouble. 1 2) Jeremy started to dig in, in lesson09, but I didn’t know why the size of tensor is 2 and even understand this spreadsheet data. 3 Homework: Read Visualizing and Understanding Convolutional Networks paper  What is a convolution?     Visualization         one kernel     Matthew D Zeiler &amp; Rob Fergus Paper          Convolution can be represented as matmul   Padding   Kernel has rank 3   How can we find a side-edge, a gradient and area of constant weight?   What is a convolution?: A convolutional neural network is that your red, green, and blue pixels go into the simple computation, and something comes out of that, and then the result of that goes into a second layer, and the result of that goes into the third layer and so forth. Visualization: one kernel Refer this site for visualizing CNN filteringMatthew D Zeiler &amp; Rob Fergus PaperLecture01     Nine examples of the actual coefficients from the **first layer**  Convolution can be represented as matmul: CNNs from different viewpoints {align-items: center;}    [A B C D E F G H I J] is 3 by 3 image data flatten to vector.   As a result, convolution is a just matrix just two things happens     Some of entries are set to zeros at all the times   same color always have the same weight. That called weight time / wegith sharing    So, we can implement a convolution with matrix multiplication. But, we don’t do that because it’s slow!Padding:  What most of libraries do is just put zeros asdie of matrix  fast. ai uses reflection paddings (what is this? Jeremy said he uttered it)Kernel has rank 3:  As standard picture input would be 4 5, it would be actually 3d, not 2d.  If we make kernel as a 3x3 size, we pass over same kernel all the different Red, Green, Blue Pixels.      This could make problem, because, if we want to detect frog, which is green, we would want more activations on the green(I made a test cell in my colab 6)   How can we find a side-edge, a gradient and area of constant weight?: Not top-edge!  One kernel can find only the top-edge, so we should stack the kernels 7 So, we pass it through bunch of kernels to the input images, and that process gives us height x width x corresponding number of kernels.  Usually that number of chanel is 16 And if we want to get the more channels and features, we should repeat that process     This process gives rise to memory out of control, we do the stride   #### conv-example. xlsx  2 convolutional filters At a second layer, filter is 3x3x2 tensor, because to add up together the first layer’s channel. Reference:       Problem was math. sqrt(5) was not kaiming initialization formula, Implementation in Pytorch &#8617;        size of tensor, lecture09 &#8617;        conv-example. xlsx &#8617;        Why do computer use red, green and blue instead of primary colors &#8617;        Grayscale is a group of shades without any visible color. … Each of these dots has its own brightness level as well and, therefore, can be converted to grayscale. A grayscale image is one with all color information removed.  &#8617;        Testing RGB and grayscale &#8617;        stack kernel and make new rank of tensor at output, Lesson06-2019 &#8617;    "
     }, {
-    "id": 17,
+    "id": 19,
     "url": "http://localhost:4000/2020/02/dps-week8/",
-    "title": "Digital Product School week 8&9",
-    "body": "2020/02/24 - The 8th week retropect at Digital Product School Week 8/9 - Ship your MVP/Release next iteration each day     This week's schedule  CONTENT:  Preparing engineering weekly Agile Process     Daily Stand-up    Making application flowchart (feat draw. io) / ER diagram     Flowchart, understaning user journey   ER diagram    Engineering weekly AI lunch Connecting firebase andPreparing engineering weekly: This week at Wednesday, I planned to explain the Language Modelings, mainly focusing ELMo, ULMFiT, BERT and GPT-2. Slides is available here Changed the presentation, because there were people who are not in ML domain. hereWhenever I do the presentation, I learn more than the information I give them. At the same time, I realize I need to learn more than I know. Agile Process: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m not sure ‘what is agile’ but this is what we tried to make agile process.  Daily Stand-up: Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Making application flowchart (feat draw. io) / ER diagram:  Before we start the party, we should clarify the flowchart and ER diagram of our application. Flowchart, understaning user journey: Thanks for google, we could use draw. io for our framechart framework. Actually, we cana choice other good flatform, but draw. io has connected app throgh google drive, most of our engineer was used to it. And after this job, I got to know there is also (of course) rule with the symbols, color, size, space, scaling and direction of arrow -reference. But why we should do this? WE have made our storymap before!! I think storymap is for visualize our status and app. So it should be shared with whole the team, and they should able to understand each role’s issue. But flowchart is more like testing technical feasibility, and error that user can experience. So it could be little more specific, complicated, and hypothetical.     This week engineer's main plan  ER diagram: Even if we use NoSQL database through firebase, my team was accustomed to SQL more. That what we educated when we were at college, so we had to organize our concept while we were learning NoSQL. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Before today, my AI collegues presented regression, knn and it was my turn. I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! AI lunch: Connecting firebase and: "
+    "title": "My life in Digital Product School - week 8/19/10",
+    "body": "2020/02/24 - The 8/9/10th week retropect at Digital Product School Week 8 - Ship your MVPWeek 9/10 - Release next iteration each day     Week 8th schedule  CONTENT:  Agile Product Development     Daily Stand-up(planning)   Gemba Walk   Sprint Reviews    Engineering weeklyAgile Product Development: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m still not sure ‘what is agile’ but this is how we tried to make agile process.  Daily Stand-up(planning): Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Gemba Walk:     Team Cero with core team  Every 2 weeks, we do the Gemba work, which is ‘question everything to the core team’ time. At this period, people can ask anything related to our product, workshop, and framework. Core team will help just for each team, and each team can solve the problem related to their work. &lt; br/&gt;Why we need this session? because with workshop and general schedule, core team has no time just focus on each team. So through this session, we can have opportunity to understand each program and workshop, like why we are using this platform, and when is the due of our small project, and we have this problem and we need help for this. whatever small problem you have, core team is always willing to help you. Sprint Reviews: Every Friday, we have time to summarise what we did for the week. Maybe we need HMW question and our storymap to share our process and then tell and share what we did try, what point we succeeded and what point it was deviant of our prediction, and why we tried it. .     Sprint of Ve-link  And then, just after all team’s ppt, we do vote with such a cute marvel. Always it’s very difficult to vote (of course you can’t vote to your team!) Because it depends on criteria what do I value!But since this is process of our agile work, I try to focus on what they have changed since last week, and why they did it, how they did it. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Everyone have their knowledge to share and we can be tutor and at the same time can be of tutee. Previously, my AI collegues presented regression, knn. And because I’m somewhat specialized to NLP, I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! "
     }, {
-    "id": 18,
+    "id": 20,
     "url": "http://localhost:4000/2020/02/fast.ai-nlp-note-16/",
     "title": "Algorithmic bias",
     "body": "2020/02/20 - Algorithms can encode &amp; magnify human bias Case Study 1: Facial Recognition &amp; Predictive Policing:  Joy Buolamwini &amp; Timnit Gebru, gendershades. org     Microsoft, FACE+, IBM - All of these things are sell now.    Largest gap between $\therefore\ Lighter Male\ &gt;\ Darker\ Female $      This US mayor joked cops should “mount . 50-caliber” guns where AI predicts crime      With machine learning, with automation, there’s a 99% success, so that robot is ㅡwill beㅡ99% accurate in telling us what is going to happen next, which is really interesting.     - city official in Lancater, CA, approving on using IBM for public security  Bias:  Bias is type of error Statistical Bias: difference between a statistic’s expected value and the true value Unjust Bias: disproportionate preference for or prejudice against a group Unconscious bias: bias that we don’t realize we have But, term bias is too generic to be productive. Different sources of bias have different causes Representation Bias: Dataset was not representative of the algorithm that might be used on later. Above : Data is okay, but algorithm has some problem. Below : Data has error.  For example, object detection production that performs very well in common product of US. But in contrast, change of target product region, like Zimbabwe, Solomon Island, and so on, reduced the performence remarkably. It is not the algorithmic problem, so we should care about data volume of region. Evaluation Bias: Benchmark datasets spur on research, 4. 4% of IJB-A images are dark-skinned women. 2/3 of ImageNet images from the West (Sharkar et al, 2017) Case Study 2: Recidivism Algorithm Used Prison Sentencing: Case Study 3: Online Ad Delivery: Bias in NLP: ( Nothing to do with the course, but I’m researching this field these days. )    But all about Englsih     ImpactThe person is doctor. The person is nurse -&gt; 그는 의사다. 그녀는 간호사다.  Concept of “biased data” often too generic to be useful:  Different sources of bias have different sources Data, models and systems are not unchanging numbers on a screen. They’re the result of a complex process that starts with years of historical context and involves a series of choices and norms, from data measurement to model evaluation to human interpretation. - Harini Suresh, “The problem with Biased Data” Five Sources of Bias in ML:  Representation Bias Evaluation Bias Measurement Bias Aggregation Bias(46:02) Historical Bias(46:26)     A few studies(47:13)   Racial Bias, Even when we have good intentions(new york times)(47:10)   gender(48:59)   Humans are biased, so why does algorithmic bias matter?: Algorithms &amp; humans are used differently (humans are usually decision maker)  Algorithms are accurate and objective No way to apeal if there if error processed large scale cheap Machine learning can amplify bias Machine learning can create feedback loops. Technology is power. And with that comes responsibility. Solutions:  Analyze a project at work/school:     Questions about AI   5 types of bias (Suresh &amp; Guttag)   Datasheets for datasets, Modelcards for model reporting   Accuracy rate on different sub-groups    Work with domain experts &amp; those impacted Increase diversity in our workspace Advocate for good policy Be on the ongoing lookout for bias"
     }, {
-    "id": 19,
+    "id": 21,
     "url": "http://localhost:4000/2020/02/classifier-city/",
     "title": "Making a classifier with image dataset made from gooogle",
     "body": "2020/02/15 - CONTENTS:  Creating dataset from google images     Using google_images_download   Create ImageDataBunch    Train model     fit_one_cycle()   Let’s find-tune   Let’s train the whole model!   Let’s make batch size bigger!    Interpretation Model in productionCode can be found hereDeployed model here Making a classifier which can distinguish Seoul from Munich and Sanfrancisco!(hoping my well in Munich!) Creating dataset from google images: In machine learning, you always need data before you build your model. You can use either URLs or google_images_download package. Since Jeremy explained specifically, I will try the other. Using google_images_download: note: This is not google official package Refer to Official Doncument, put that arguments. from google_images_download import google_images_downloadresponse = google_images_download. googleimagesdownload()  #class instantiationout_dir = os. path. abspath('. . /. . /materials/dataset/pkg/')os. mkdir(out_dir)arguments = { keywords : Cebu,Munich,Seoul ,        print_urls :True,        suffix_keywords : city ,        output_directory :out_dir,        type : photo ,      }paths = response. download(arguments)  #passing the arguments to the functionprint(paths)and if you need, here is main code. Create ImageDataBunch: We need to separate validation set because we just grabbed these imagese from Google. Most of the dataset we use (kaggle/research) splited into train / validation / test so if they are not devided beforehand we should make databunch, and Jeremy recommended assign 20% to validation. Help on function verify_images in module fastai. vision. data:verify_images(path: Union[pathlib. Path, str], delete: bool = True, max_workers: int = 4, max_size: int = None, recurse: bool = False, dest: Union[pathlib. Path, str] = '. ', n_channels: int = 3, interp=2, ext: str = None, img_format: str = None, resume: bool = None, **kwargs)  Check if the images in `path` aren't broken, maybe resize them and copy it in `dest`. Data from google image url Data from package Train model:           len(class)   len(train)   len(valid)         Data_url   3   432   108       Data_pkg   3   216   53   Uisng model: restnet34 1, Measurement: accuracy 2 fit_one_cycle(): What is fit one cycle? Cyclical Learning Rates for Training Neural Networks One of the way to find good learning rate. Core idea is to start with small learning rate (like 1e-4, 1e-3) and increase the learning rate after each mini-batch till loss starts exploding. And pick up learning rate one order lower than exploding point. For example, plotted learning rate is like below picture, picking up around 1e-2 is the best way.  Why this methods Traditionally, the learning rate is decreased as the learning starts converging with time. But this paper suggests to cycle our learning rate, because it makes us avoid local minimum. Basically this cyclic method enables us to explore whole of loss function so that find out global minimum. In other words, higher learning rate behaves like regularisation. Let’s find-tune: Do train just one last layer by learning rate found by find_lr This section you should find the strongest downward slope that kind of sticking around for quite a while. And choose just one order lower than lowest point. As explained before, I will pick up 1e-2. And of course, this is fine-tuning, we don’t need discriminative learning rate yet.  Let’s train the whole model!: link When you plot the learning rate again, maybe you will get soaring shape of learning rate. Rule of thumb, When you slice the learning rate, use learning rate you used at unfrozen part. Divide it by 5 or 10 and put it on maximum bound. At minimum bound, get the point just before it soared, and divide it by 10.  Let’s make batch size bigger!: Since default batch size is 64, I tried it to 128. And it gets way more better result(even it’s still underfitting!) And if I freeze model and train whole model again, the model would be better. Also, you can use this method to the other big dataset model training! Interpretation: See the confusion matrix. Result is quite great. *Since I’m using colab, I will skip data cleansing. But I highly recommend you to use ImageCleaner widget, only if you are using jupyter notebook (not jupyter lab) Model in production: You can deploy your model in simple way. I referred fast. ai, and used render(it’s free for limited time). You can find detailed document here. and you can create a route like this. @app. route( /classify-url , methods=[ GET ])async def classify_url(request):  bytes = await get_bytes(request. query_params[ url ])  img = open_image(BytesIO(bytes))  _,_,losses = learner. predict(img)  return JSONResponse({     predictions : sorted(      zip(cat_learner. data. classes, map(float, losses)),      key=lambda p: p[1],      reverse=True    )  })You can find my deployed model here Reference: How to create a deep learning dataset using Google Images towardsdatascience - one cycle policy       Deep Residual Learning for Image Recognition &#8617;        Accuracy_and_precision &#8617;    "
     }, {
-    "id": 20,
+    "id": 22,
     "url": "http://localhost:4000/2020/02/dps-week5/",
     "title": "Digital Product School week 5",
     "body": "2020/02/09 - The 5th week retropect at Digital Product School Week 5 - Create a Storymap and sync it with Lean Canvas     This week's schedule  CONTENT:  How to create our story map Prepare your story Discover your product’s AI potentialMondayHow to create our story map:     We need this 'aha' moment  There was a Milestone workshop, about our weekly goal. As we are agile working, we go fast and change every week’s goal. This week we will finalize our story map based on user’s pain-point and HMW questions.     How should we make our story-map  Basically we should make story map based on this rule  Tell stories, don’t just write them!     We always need context, that means all the story component should be connected    Visualize your product to establish a shared understanding and speed up discussions!     Post-it filled of text is not enough, we should fill it with visualizations then team mates can understand it fast    Only discuss in front our your story map! (Speed)     So we can update our story-map as soon as we change our opinion   And also    Use a story map to find the parts that matter most and to identify holes in your idea!     Since the story map consists of techinical part, we should consider each story’s technical feasibility    Minimise output, maximise outcome and impact! Build tests to figure out what’s minimum and what’s viable!     This story map functions to find out our minimum value of ideas    Work iteratively: Change your story map according to your learnings!     We should repeat this process again and again    PMs: Make sure Storymap is up to date!Prepare your story:     team cero, our whole story map    Our goal    Technical feasibility of our storyWhat is your strategy to make user achieve something? This would be our expand point Discover your product’s AI potential: How can we apply AI to our product? Let’s write down our ‘HMW’ questions, and find out all p ossibilities. These are suggestion of possibilities, so don’t attached to feasibility (we will do in at lean start-up)     Software section's expectation    AI section's expectationTuesday    Engineer's task, week5This 5th week, engineers settled WendesdayThursdayFriday"
     }, {
-    "id": 21,
+    "id": 23,
     "url": "http://localhost:4000/2020/02/GPU-time/",
     "title": "4 reasons took much time to setting GPU for fast.ai than I expected",
     "body": "2020/02/05 - Motivation: Before now, me as a undergraduate student, I was parsimony who usually depend on colab, kaggle, friend’s server(occasional) whenever i need GPU. . And this time it’s been for a while to install GPU than I expected and I share the several component that stood in my way. Written at Oct 24 2019, if you think this is deprecated, please do not have a leap of faith. Just for the record, I’ve used Kaggle, Colab, GCP, Azure, EC2 as GPU cloud. 1. Did not know there is JupyterLab option in Google Cloud Platform. : At the first time when GCP came out, there was no AI Platform service. So from starting vm instance to launching jupyter and installing packages, I did all of the things myself. (and I learned 🤗) $	curl -O https://repo. continuum. io/archive/Anaconda3-5. 0. 1-Linux-x86_64. sh[Downloading conda in ssh] I created VM instance,selected zone, machine type and disk type. Then, define firewall rules and in ssh terminal, install jupyter and other packages. But you can do all of these things just using AI Platform.  [AI Platform] I think it especially save your time if you are living in Asia-Pacific, which google doesn’t support not that much GPU resources.  2. Consider if the platform has limited resources in a region you live in. : I live in South Korea, East Asia, and it seems like this region has lots of limitation in GPU (except quite expensive AWS) And the Taiwan which was the only one region where I can launch my own VM with GPU (I tried all the other regions in the list) sometimes do normaly, but not always. 😥After launching, I did several works and next day I could not start VM. (I didn’t count it, but tried it a few hours because I didn’t want cost any more time…) Endlessly failed to start instance, then I choose to move AWS as an alternative way. 3. Fast. ai gives deliberate guide and I didn’t know it. : Fast. ai offer the guide for all available platform. (Colab, salamander, Gradient, Kaggle, Colab, and so on) It is so important, and really needs, because cloud computing options are vary as occasion and purpose arise. I didn’t know fast. ai has manual to running GCP, and I think it’s as good a reason as any for me to be have taken time. It helped me so much when I had aws and shortened my time. I don’t want to read all of the manual in amazno. . (It is recommended. . but I’d rather read GIT PRO now…) ssh -i ~/. ssh/&lt;your_private_key_pair&gt; -L localhost:8888:localhost:8888 ubuntu@&lt;your instance IP&gt;4. You should wait to add more volume just after add volume, by building AWS EC2. : Since Elastic Block Store(EBS) storage supports optimized storage, users can’t extend storage volume two times in a row.  Unfortunately, at the first time, I didn’t know it (again 👻) and when VM lacked volume, I doubled dist capacity (76*2) at a rough but It needs more.  &lt;!– this time I installed GPU in two years, and it became little complicated compared to 2 years ago. And this time for the first time(maybe not the first time. . but i handled it in my class or with my friend. but it’s my first time on my own. ) I very I’m started to using used google colab, kaggleand, GCP-JupyterLab, ec2 - friend made, aws vm machine but I had a environment variable but i did not know of it. On these days, I could not get a resources from taiwan…    I couldn’t notice a deliberate     Anyway, as a result I tried myself gcp myself and aws ec2 with fast. ai But I think doing on my self surely takes much time (in this point I wonder why I’m doing this, and should remind me, especially I was studying disk volume optimization)     disk volume exceed - https://askubuntu. com/questions/919748/no-space-left-on-device-even-though-there-is:  "
     }, {
-    "id": 22,
+    "id": 24,
     "url": "http://localhost:4000/2020/02/dps-week4/",
     "title": "Digital Product School week 4",
     "body": "2020/02/01 - The 4th week retropect at Digital Product School Week 4 - Find solution ideas and run experiments   [This week’s schedule] CONTENT:  Ideation Techniques     What is ideation techniques?    Generating idea in my team AIdeation Team brain storming of idea Die Produkt MacherMondayIdeation Techniques:   [slides from @steffen] What is ideation techniques?: We tried to find out user’s painpoint last week. Tried to users talk about their, pain point. No question directly, but extract from them their pain with transportation. Generating idea in my team:   AIdeation:   TuesdayTeam brain storming of idea:   Based on generated idea on Monday, we extended our idea doing rolling-paper!     Die Produkt Macher: What is lean start-up?  Lean startup is a methodology for developing businesses and products that aims to shorten product development cycles and rapidly discover if a proposed business model is viable; this is achieved by adopting a combination of business-hypothesis-driven experimentation, iterative product releases, and validated learning. - wikipedia WendesdayThursdayFriday"
     }, {
-    "id": 23,
+    "id": 25,
     "url": "http://localhost:4000/2020/01/retrosprect-of-acl-paper-2020/",
     "title": "Retrospect of ACL 2020 paper writing",
     "body": "2020/01/29 - 2020 Annual Conference of the Association for Computational Linguistics Why I can’t use ‘Cebuano’ for the research?: Why I had to change target language from ‘Cebuano’ to ‘Tagalog’?-&gt; No language translator options except google translation. But before knowing that I already consult my friend, whose mother tongue is English. So I had to aplogize her, but couldn’t tell her why suddenly I changed my plan. -&gt; I realized there are many languages even can’t be researched at all. . -&gt; Getting accustomed to discrimination makes misunderstanding, sometimes. At my country, we couldn’t use music streaming service, because of legal problem. But at that moment, I thought it was discrimination, which is done by music company. "
     }, {
-    "id": 24,
+    "id": 26,
     "url": "http://localhost:4000/2020/01/Git-Merge/",
     "title": "Why am I not listed as a contributor?!",
     "body": "2020/01/10 - From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’m not listed as contributor of repo which my PR accessed. How did I come to a repository?: When I’m stuck, I would prefer to code, than to go deep in theory. (It must be so. . too much to understand 🤒)It was BERT released by Google AI I felt keenly the necessity of implementing, because not only couldn’t understand the way they figured out positional encoding formula, but how it actually works. What does it mean to “scale” dot product in Attention? (Now I know it’s far from my section 😂) Figure 1. Scaled Dot Product. Adopted from tensorflow blogWhat was the code error?: For implement code in paper, I read the papers Transformer and BERT, structured the model, and refered the others’ code. Meanwhile, I found out a small error in tokenization process, which was changing a token into [MASK], enabled bidirectional representation. I’ve made PR, and got merged. But I was not in contributors. Why?: Figure 2. Merged Pull request Adopted from graykode projectActually I happened to know there can be couple of reasons github doesn’t include my name as contributor. Well, if contributors tab has more than 100 people, in which case it shows you up only if you are in the top 100 contributors because displaying too many contributors can make webpages down. Somethimes, however, it doesn’t that problem. Why not? Two possibilities are there.    First, According to Joel-Glovier, if repository maintainer merged-as-a-rebase PR will end up showing as maintainer’s commit. But maintainer shouldn’t normally do this.     Second, if you happend to commit using a different git email that what is in your GitHub profile, it will not be attached to your Github user, and “doesn’t show up” as you.  Reference:  Michał Chromiak’s blog Github: why are my contributions are not showing on my profile atlassian-gitfetch"
     }, {
-    "id": 25,
-    "url": "http://localhost:4000/2019/12/lesson1-fastai/",
-    "title": "Fine Grained Classification",
-    "body": "2019/12/31 - Finally you can solve the mystery behind this weird drawing. . through this course. juptyer notebook magic: %reload_ext autoreload%autoreload 2%matplotlib inlinethis is special directives to jupyter notebook, not python code. And it is called ‘magics’ (but i think jeremy is magicion)  If somebody changes underlying library code while I’m running this, please reload it automatically If somebody asks to plot something, then please plot it here in this Jupyter NotebookDon’t hesitate to import start~ Digging into untar_data, path. ls: Union[pathlib. Path, str]: typed programming language? -&gt; maybe i think disclaim the type beforehand for sure.  Q. like assert? path. ls()this is some module that fast. ai made because os. listdir(‘path’) is unconvinient. Python3 pathlib library!: pathlib "
-    }, {
-    "id": 26,
+    "id": 27,
     "url": "http://localhost:4000/2019/12/jeremy-howard/",
     "title": "Jeremy Howard",
     "body": "2019/12/15 - This is journey to find out ‘who am I trying to be?’: How he impacted me?  The person who made me start Computer Vision again. He emphasized the importance of studying NLP and Computer together to understand the deep-learning. He didn’t order it to study, but always he pursuade me with reasonable way.  “It’s not just something I can throw away. NLP and computer vision a few weeks apart and that’s going to force your brain to realize like ‘oh I have to remember this’”  He made me admit my failure in deep-learning. I started to objectify where am I. What should I do when I’m frustrated.  “Keep going. You’re not expected to remember everything. Yet.  You’re not expected to understand everything. Yet. You’re not expected to know why everything works.  Yet. ”  His articles are numerous, below.      What is torch. nn Really?   High Performance Numeric Programming with Swift: Explorations and Reflections   C++11, random distributions, and Swift    And especially, I like this book. Designing great data products Great predictive modeling is an important part of the solution, but it no longer stands on its own; as products become more sophisticated, it disappears into the plumbing. Designing great data products And he is also famous for words. Here are some.  we’re going to try and use that to really understand what’s going on. So to warn you, none of it is rocket science but a lot of its going to look really new. So don’t expect to get it the first time but expect to listen and jump into the notebook try a few things test things out look particularly at like tensor shapes and inputs and outputs to check your understanding then go back and listen again. But and kind of try it, a few times, because you will get there right, it’s just that there’s going to be a lot of new concepts because we haven’t done that much stuff in pure Pytorch. Lesson 6: Deep Learning 2019 "
     }, {
-    "id": 27,
+    "id": 28,
     "url": "http://localhost:4000/2019/11/julia-evans/",
     "title": "Julia Evans",
     "body": "2019/11/20 - This is journey to find out ‘who am I trying to be?’: The women who surprised me in many ways. First, she approached me to teaching some concepts drawing cartoons. It was at Hackers news, which was hightest ranks. Personally I have the use of not to reading title, so and cartoon was so cute and clear. I naturally gonna understood mechanism and astonished by her explaination ability. Her value, which she was taught by many people so want to do same things, moved me. Volume of her knowledge, that just reading post title is a deal of work, amazed me. "
     }, {
-    "id": 28,
+    "id": 29,
     "url": "http://localhost:4000/2019/11/coc-retropective/",
     "title": "Retrospective on Pycon 2019 Korea (CoC Committee)",
     "body": "2019/11/05 - When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized our responsibility, and if I passed each other’s burden, It could be my burden next time. In solidarity of the obligation, we finished conference well. And after participating PyCon Korea 2018 as volunteer, I’ve joined PyCon Korea Organizer last year. &lt;Figure 1&gt; First meeting of PyCon 2019 Korea Organizers It’s been a while since PyCon 2019 finished. It’s held on Aug 15 - 18, at Coex Grand Balloom &lt;Figure 2&gt; Ongoing session, speaking on news comment processing &lt;Figure 3&gt; Sponsor Booth iin Coex Hall &lt;Figure 4&gt; After PyCon 2019, with all of volunteer, organizer, speakers 😍 🥰  Serving as part of the coc TF, I spent large fraction of last year doing CoC job. here’s the path what we’ve been grappled with to grasp a solution. First half: Before the conference Toward Diverse Community:  Formally we’ve been reusing and modifying PyCon US CoC, but we needed fit in Korean and I was part of that to revise code of conduct. Except ‘That’ Diversity, Because it is ‘Harassment’:  Specific point was harassment, and the others were not. process of finding the points. How can we settle this point?Second half: During the conference Handling the potential Harassment: Disjunction of policy and real-time situation: This ‘PyCon 2019 Korea retrospective series’ would be devided into 3 Episodes.  “Retrospective on Pycon 2019 Korea (CoC Committee)” “Retrospective on Pycon 2019 Korea (Program Chair)” (20 Nov, To Be Update) “Maintaining participation while still making timely decisions” (29 Nov, To Be Update)"
     }, {
-    "id": 29,
+    "id": 30,
     "url": "http://localhost:4000/2019/11/elif-shafak/",
     "title": "Elif Shafak",
     "body": "2019/11/05 - This is journey to find out ‘who am I trying to be?’: For creative-minded people, Istanbul is a treasure. ’ Photo © Chris Boland, licensed under CC BY-NC-ND 2. 0    it suddenly felt like what I was trying to convey was more complicated and detailed than what the circumstances allowed me to say.     And I did what I usually do in similar situations: I stammered, I shut down, and I stopped talking. I stopped talking because the truth was complicated, even though I knew, deep within, that one should never, ever remain silent for fear of complexity.         &lt;Figure 1&gt; Elif Shafak   Photo credit: www. elifsafak. com. tr      I want to talk about emotions and the need to boost our emotional intelligence. I think it’s a pity that mainstream political theory pays very little attention to emotions.     Oftentimes, analysts and experts are so busy with data and metrics that they seem to forget those things in life that are difficult to measure and perhaps impossible to cluster under statistical models. But I think this is a mistake, for two main reasons. We are emotional beings.     I think it’s going to be one of our biggest intellectual challenges, because our political systems are replete with emotions. In country after country, we have seen illiberal politicians exploiting these emotions. And yet within the academia and among the intelligentsia, we are yet to take emotions seriously. I think we should.  1 2 Reference:       British Council Worldwide &#8617;        Ted Talk &#8617;    "
     }, {
-    "id": 30,
+    "id": 31,
     "url": "http://localhost:4000/2019/01/dps-week1/",
     "title": "Digital Product School week 1",
     "body": "2019/01/11 - The 1th week retropect at Digital Product School [This week’s schedule] CONTENT:  Welcome to Digital Product School!     Trip to Spitzingsee   Welcome to Design Office   Specifying our goal of product   Welcome to Digital Product School!: Trip to Spitzingsee:    At the first day of Digital Product School, we had a off-site with all of batch 9 people. All the costs were managed by dps. At the beautiful mountain, we settled the team, and got my team goal.     Basically, there are two kind of team in DPS. (1) Wild team - the team has fixed topic(2) Company team - the team which has specific stakeholders, and also topic defined by that stakeholders  The Core-team will fix what team you will join in DPS for 3 months based on ymy professionals, they announce it at off-site. [My team for 3 months at DPS]    And we decide on my batch #9 theme song. How? Each team draw for songs and pitch ‘why this song should be batch #9 theme song’The result? Imagine dragon - Believer (I didn’t know at the moment, this song would be stamped in my memory)     We have a workshop for getting to know each other. For example, we share 1) what do I expect from 3 months of dps, 2) when I feel happy in my life time, 3) what I worked for last week, 4) what was my last project and 5) what plays important role in my life      My team's board  Cero  Welcome to Design Office:  At first day of design office, we had workshop, which celebrates my day in dps also discuss specific rule, menifesto and stakeholders    We get sticker and attach it in map depends on my nationality  Now time to get to know my team’s stakeholders. What they want for us? What they expect from us? How free my team are on the topic?To be honest, it is endless tug-of-war. We should discuss with my stakeholders, endlessly, and find out solution which can meet interest of users, stakeholders and my team. Basically, my team’s main stakeholder is ADAC, but BMW, City of munich and Nokia will also participate as my team’s stakeholders. Specifying our goal of product: "
@@ -599,7 +604,7 @@ <h5 class="font-weight-bold">Written by  </h5>
           
           
             <div class="col-md-6 text-right pr-0">
-                <a class="text-dark" href="/2020/02/dps-week8/"> Digital Product School week 8&9  <img height="30px" class="ml-1" src="/assets/images/week8/gate.png"> </a>
+                <a class="text-dark" href="/2020/02/dps-week8/"> My life in Digital Product School - week 8/19/10  <img height="30px" class="ml-1" src="/assets/images/week8/gate.png"> </a>
             </div>
           
         </div>
diff --git a/_site/2020/02/note08-fastai-1/index.html b/_site/2020/02/note08-fastai-1/index.html
index 330b09f81b..fd0e32a566 100644
--- a/_site/2020/02/note08-fastai-1/index.html
+++ b/_site/2020/02/note08-fastai-1/index.html
@@ -19,9 +19,9 @@
 <meta property="og:site_name" content="SpellOnYou" />
 <meta property="og:image" content="http://localhost:4000/assets/images/1-matmul.png" />
 <meta property="og:type" content="article" />
-<meta property="article:published_time" content="2020-02-29T00:00:00+01:00" />
+<meta property="article:published_time" content="2020-02-29T00:00:00+09:00" />
 <script type="application/ld+json">
-{"description":"This note is divided into 4 section. Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring","author":{"@type":"Person","name":"dionne"},"@type":"BlogPosting","url":"http://localhost:4000/2020/02/note08-fastai-1/","publisher":{"@type":"Organization","logo":{"@type":"ImageObject","url":"http://localhost:4000/assets/images/logo.png"},"name":"dionne"},"image":"http://localhost:4000/assets/images/1-matmul.png","headline":"What is the meaning of ‘deep-learning from foundations?’","dateModified":"2020-02-29T00:00:00+01:00","datePublished":"2020-02-29T00:00:00+01:00","mainEntityOfPage":{"@type":"WebPage","@id":"http://localhost:4000/2020/02/note08-fastai-1/"},"@context":"http://schema.org"}</script>
+{"description":"This note is divided into 4 section. Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring","author":{"@type":"Person","name":"dionne"},"@type":"BlogPosting","url":"http://localhost:4000/2020/02/note08-fastai-1/","publisher":{"@type":"Organization","logo":{"@type":"ImageObject","url":"http://localhost:4000/assets/images/logo.png"},"name":"dionne"},"image":"http://localhost:4000/assets/images/1-matmul.png","headline":"What is the meaning of ‘deep-learning from foundations?’","dateModified":"2020-02-29T00:00:00+09:00","datePublished":"2020-02-29T00:00:00+09:00","mainEntityOfPage":{"@type":"WebPage","@id":"http://localhost:4000/2020/02/note08-fastai-1/"},"@context":"http://schema.org"}</script>
 <!-- End Jekyll SEO tag -->
 
 
@@ -161,96 +161,101 @@
     "body": "  {% if page. url ==  /  %}            {% assign latest_post = site. posts[0] %}          &lt;div class= topfirstimage  style= background-image: url({% if latest_post. image contains  ://  %}{{ latest_post. image }}{% else %} {{site. baseurl}}/{{ latest_post. image}}{% endif %}); height: 200px;  background-size: cover;  background-repeat: no-repeat; &gt;&lt;/div&gt;           {{ latest_post. title }}  :       {{ latest_post. excerpt | strip_html | strip_newlines | truncate: 136 }}               In         {% for category in latest_post. categories %}        {{ category }},         {% endfor %}                                {{ latest_post. date | date: '%b %d, %Y' }}                            {%- assign second_post = site. posts[1] -%}                        {% if second_post. image %}                         &lt;img class= w-100  src= {% if second_post. image contains  ://  %}{{ second_post. image }}{% else %}{{ second_post. image | absolute_url }}{% endif %}  alt= {{ second_post. title }} &gt;                        {% endif %}                                    {{ second_post. title }}          :                       In             {% for category in second_post. categories %}            {{ category }},             {% endfor %}                                                      {{ second_post. date | date: '%b %d, %Y' }}                                    {%- assign third_post = site. posts[2] -%}                        {% if third_post. image %}                         &lt;img class= w-100  src= {% if third_post. image contains  ://  %}{{ third_post. image }}{% else %}{{site. baseurl}}/{{ third_post. image }}{% endif %}  alt= {{ third_post. title }} &gt;                        {% endif %}                                    {{ third_post. title }}          :                       In             {% for category in third_post. categories %}            {{ category }},             {% endfor %}                                                      {{ third_post. date | date: '%b %d, %Y' }}                                    {%- assign fourth_post = site. posts[3] -%}                        {% if fourth_post. image %}                        &lt;img class= w-100  src= {% if fourth_post. image contains  ://  %}{{ fourth_post. image }}{% else %}{{site. baseurl}}/{{ fourth_post. image }}{% endif %}  alt= {{ fourth_post. title }} &gt;                        {% endif %}                                    {{ fourth_post. title }}          :                       In             {% for category in fourth_post. categories %}            {{ category }},             {% endfor %}                                                      {{ fourth_post. date | date: '%b %d, %Y' }}                                  {% for post in site. posts %} {% if post. tags contains  sticky  %}                    {{post. title}}                  {{ post. excerpt | strip_html | strip_newlines | truncate: 136 }}                 Read More            	             {% endif %}{% endfor %}  {% endif %}                All Stories:         {% for post in paginator. posts %}          {% include main-loop-card. html %}        {% endfor %}                   {% if paginator. total_pages &gt; 1 %}              {% if paginator. previous_page %}        &laquo; Prev       {% else %}        &laquo;       {% endif %}       {% for page in (1. . paginator. total_pages) %}        {% if page == paginator. page %}        {{ page }}        {% elsif page == 1 %}        {{ page }}        {% else %}        {{ page }}        {% endif %}       {% endfor %}       {% if paginator. next_page %}        Next &raquo;       {% else %}        &raquo;       {% endif %}            {% endif %}                     {% include sidebar-featured. html %}      "
     }, {
     "id": 12,
+    "url": "http://localhost:4000/2020/04/v3-2019-lesson06-note/",
+    "title": "fastai 2019 course-v3 Part1, lesson06",
+    "body": "2020/04/15 - Lesson 06Rossmann(Tabular):  Tabular data: be careful on Categorical variable vs Continuous variable.  if datatype is int, fastai think it is classification, not a regression.  Root mean square percentage error. as loss function.  When you assign the y_range, it’s better to assign little bit more than actual maximum. &gt; because it’s sigmoid.  intermediate layers, which is weight matrix is 1) 1000, and 2) 500 -&gt; which means our parameter would be 500*1000. learn. modelWhat is dropout and embedding dropout?: Nitish Srivastava, Dropout: A Simple way to prevent Neural Networks from Overfitting  you can dropout with p value, make it specified to specific layer, or make it applied to all the layers.  Pytorch code 1) bernoulli, which decides whether you will hold it? 2) and divide the noise value depends on noise value. so noise became 2 or remain 0.      According to pytorch code, We do change at training time, but we do nothing at test time. and this means you don’t have to do anything special with inference time. ’   TODO: find at forums what is inference time - Related to NVIDIA, GPU.     Embedding dropout is just a dropout.      It’s different between continuous variable and embedding layer.  TODO Still can’t understand. why embedding dropout is effective. or,… in need.    Let’s delete at random, some of the results of the embedding.    and It worked well especially at Kaggle   Batch Normalization: Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift -&gt; came out false! According to How Does Batch Normalization Help Optimization?  The key was multiplicative bias {\gamma} and additive bias {\beta}` Explain     Let $$ \hat{y} = f(w_1, w_2, w_3, … , x)} $$ , loss = MSE , Then y_range should be between 1 and 5`   And Activation function ends with -1 -&gt; +1   To mitigate this problem, we can add the other parameter, like $$w_n$$   But there’re so much interactions in the process so just re-scale the output.    Momentum parameter at BatchNorm1d:  Different from momentum like in optimization.  This momentum is Exponentially weighted moving average of the mean, instead of deviation.      If this is small number: mean standard deviation would be less from mini_batch to mini_batch » less regularization effect. (If this is large number, variation would be greater from mini_batch to mini_batch » more regularization effect)   TODO: can’t sure, but i understand, this is not about how to update parameter but about how much reflect previous value when scale and shift   Q. Preference between batchnorm and the other regularizations(drop out, weight decay)A. Nope, always try and see the results## lesson6-pets-more### Data Augmentation- Last reg- `get_transforms` has lots of params (even not yet learned all) -&gt; check documentation	- Remember you can implement all the doc contents bc it's made from nbdev	- TODO: try this!!- Essence of data augmentation is you should maintain the label, while somewhat making sense. 	- ex) tilt, because it's optically sensible, you can always change the angle of the data view. - zeros, border, and reflection but always `reflection` works most of the time, so that is the default### Convolutional Kernel(What is convolution?)- Will make heat\_map from scratch, which means the parts convolution focuses on![setosa_visualization]()- http://setosa. io/ev/image-kernels/	- javascript thing	- How convolution works	- Kernel. which does element-wise multiplication, and sum them up	- so it has on pixel less at borders -&gt; so it uses padding, and fastai uses reflection as said. - why this Kernel(matrix) helps catching horizontal edge side?	- because this kernel`(picture2)` weights differently, depends on `x axis`	- why familiar, because it's similar intuition with fugus`(paper)` paper- CNN from different viewpoints`link`	- output of pixel is results from different linear equations. 	- If you connect this with represents of neural network nodes, you can see that the specific inp nodes connected with specific out nodes. 	- **Summarize**: cnn does 1) matmul some of the elements are always zero 2) same weight for every row, which is called `weight time? weight. . ?, 1:18:50` `(picture)`#### Further lowdown- Because generally image has 3 channels, we need rank 3 kernel. - And **do multiply with all channel output is one pixel**. (`draw by your self`)	- but this kernel will catch one feature, like horizontal, so that we make more kernel so that output becomes (h * w * kernel)	- And that `kernel` come to `channel`- **Conv2d**: with 3 by 3 kernel, stride 2 conv -&gt; (h/2 * w/2 * kernel)	- skip or jump over input pixel	- to protect from memory out of control~~~pythonlearn. modellearn. summary()~~~TODO: understand yourself the blocks of conv-kernel: - Usually use big kernel size at first layer (will study this at part2)- Bottom right highlighting kernel(`pic / draw`)- `torch. tensor. expand`: for memory efficient, because we should do RGB- We do not make separate kernel, but make rank 4 kernel	- 4d tensor is just stacked kernel- `t[None]. shape` create new unit axis, and why? we make this -&gt; it should move unit of batch, not one size image. ### Average pooling, feature- suppose our pre-trained model results in size of `11 by 11 by 512 ` `pic 4` and my classification task has 37 classes	* take the first face of channel, which is 11 by 11 and `mean` it, so that make rank 2 tensor, 512 by 1	* and make 2d matrix, which is 512 by 37 and multiply so that we can get 37 by 1 matrix. - Feature, at convolution block	- So, when we transfer-learning without unfreeze, every element of last matrix (512 by 1) should represent(or could catch) each feature. 	### Heatmap, Hook~~~hook_output(model[0]) -&gt; acts -&gt; avg_acts~~~- if we average the block with `axis=feature`, result of matrix(11 by 11) depicts `how activated was that area?` -&gt; it is heatmap, `avg_acts`- and acts comes from hook, which is more advanced pytorch feature. 	- hook into pytorch machine itself, and run any arbitrary Pytorch code	- Why this is cool?: Normally it gives set of outputs of forward pass, but we can interrupt and hook the forward pass. 	- Also can store the output of the convolutional part of the model, which is before avg_pooling- Thinking back when we do cut off `after` the conv part. 	- but with fast. ai the original convolutional part of the model would be *the first thing in the model*, specifically could be given from `learn. model. eval()[0]`	- And this is gotten from `hooked_output` and having hooked the output, we can pass our x_minibatch to output. 	- Not directly, but with normalized, minibatch, put on to the gpu	- `one_item()` function do it, when we have one data `TODO: this is assignment` do it yourself without one_item function	- and `. cuda()` put it on gpu- you should print out very often the shape of tensor, and try think why. "
+    }, {
+    "id": 13,
+    "url": "http://localhost:4000/2020/04/qna-image-segmentation/",
+    "title": "[Q&A] Image Segmentation, using Unet with Driving Video data",
+    "body": "2020/04/02 - This post is about my questions while I was studying USF Deep Learning course about image segmentation task. All the answers are from the course, source code, library document, or document. I cared about being clear at reporting information including source of information, however if there are still anything unclear, please contact me. And thank you Jeremy&amp;Rachael for everything. Also Thank you Cambridge Computer Vision Lab to made us to study with your labor. The Cambridge-driving Labeled Video Database (CamVid) is the first collection of videos with object class semantic labels, complete with metadata. The database provides ground truth labels that associate each pixel with one of 32 semantic classes. If someone is interested in this project, please check the site and see the details. Now, let’s start first using jupyter’s one of tricks which I love most. It enables cell to print the code without print function. from IPython. core. interactiveshell import InteractiveShell# pretty print all cell's output and not just the last oneInteractiveShell. ast_node_interactivity =  all from fastai. vision import *from fastai. callbacks. hooks import *from fastai. utils. mem import *path = untar_data(URLs. CAMVID) # The locations where the data and models are downloaded are set in config. ymlpath. ls() I’m trying to accustomed to using pathlib module, not just it became built-in module in python, but I felt uncomfortable myself with os module. However, still unpredictable conflicts are remain, even in the quite standard library like Pytorch, tensorflow, onnx. (it require me string for path. not PosixPath. will send PR. . ) [PosixPath('/root/. fastai/data/camvid/valid. txt'), PosixPath('/root/. fastai/data/camvid/images'), PosixPath('/root/. fastai/data/camvid/labels'), PosixPath('/root/. fastai/data/camvid/codes. txt')]path_img = path/'images'path_lbl = path/'labels'fnames = get_image_files(path_img) #filenamelbl_names = get_image_files(path_lbl)1. (Play with data) My Hypothesis:  File name has A_B format. and A / B would be at key-value position.  Use collections - defaultdict  Default Dict: Link:   easy to group a sequence of key and value pairs into a dictionary of list?from collections import defaultdictfnames[0], lbl_names[0](PosixPath('/root/. fastai/data/camvid/images/0001TP_009210. png'), PosixPath('/root/. fastai/data/camvid/labels/0016E5_01800_P. png'))files = [tuple(i. stem. split('_')) for i in fnames]labels = [tuple(i. stem. split('_')[:-1]) for i in lbl_names]d = defaultdict(list)for k, v in files: d[k]. append(v)d. keys()len(d['0001TP'])124for k, v in d. items():  print(k, v)0001TP ['009210', '008850', '007350', '008970', '009840', '010140', '008490', '008520', '009540', '008250', '008340', '006840', '007860', '007410', '007740', '009870', '010080', '007890', '008790', '010020', '008400', '007080', '008280', '010380', '009330', '009060', '007470', '006810', '009720', '008580', '007110', '008730', '009150', '007680', '009780', '007800', '007290', '008760', '009510', '008640', '008310', '007440', '006900', '007500', '008460', '009030', '008130', '009480', '009900', '010230', '009270', '008040', '007590', '007950', '009990', '008550', '007260', '008100', '007530', '006960', '008190', '009420', '009930', '009000', '007830', '008940', '006690', '009570', '008880', '010170', '007560', '009300', '006750', '009360', '010200', '007320', '008010', '009120', '007620', '007200', '007140', '010320', '006720', '008670', '007230', '008370', '010260', '009690', '006930', '009090', '007770', '010290', '010350', '008610', '008070', '009600', '008430', '009450', '007380', '009240', '007710', '007170', '008160', '008910', '007020', '006780', '007050', '009960', '009810', '008220', '009180', '009750', '010050', '009660', '010110', '007920', '009630', '007650', '006990', '008700', '009390', '007980', '008820', '006870']0016E5 ['01290', '08159', '05760', '08133', '08063', '06660', '00960', '05850', '00750', '06960', '08035', '08107', '07975', '08017', '05610', '07140', '08119', '08027', '07170', '08400', '08093', '02100', '06390', '04470', '08340', '06060', '00600', '07470', '08151', '07800', '01620', '05730', '01530', '00690', '08430', '05940', '01980', '07320', '08069', '07965', '04380', '05430', '01410', '06780', '08007', '08087', '08079', '06600', '08109', '05490', '00901', '04590', '04680', '08045', '01770', '06690', '08085', '06810', '00420', '08011', '07440', '02190', '06300', '04800', '01500', '00450', '08029', '01470', '06330', '07997', '08067', '05370', '08013', '08190', '00840', '02370', '08049', '08135', '01440', '06870', '05820', '05280', '08051', '04440', '08091', '01380', '00630', '07290', '05520', '04770', '00540', '07995', '07999', '05550', '07920', '08101', '08141', '08053', '04620', '08103', '05160', '07350', '08057', '06030', '06000', '08550', '07963', '08089', '05970', '08047', '05640', '06240', '05220', '04350', '01590', '07959', '01950', '08117', '06180', '01560', '05400', '08043', '07680', '00780', '08081', '07050', '01020', '01350', '04530', '06720', '07969', '08149', '08003', '08131', '08129', '08033', '05460', '01650', '07530', '08023', '05340', '08640', '05100', '08075', '01230', '04980', '02070', '01080', '06210', '05910', '08009', '01800', '05190', '02400', '08083', '08019', '07620', '07200', '07890', '08059', '06990', '04410', '08121', '08123', '06930', '08137', '08147', '08095', '06570', '06150', '08153', '06840', '05250', '00510', '08370', '08580', '08113', '07410', '08097', '01200', '04950', '07770', '07650', '04710', '06090', '08055', '07110', '07981', '00990', '08250', '08127', '01920', '07985', '08220', '08005', '08157', '05130', '08071', '01140', '04830', '07740', '08143', '06120', '02040', '08111', '08115', '00660', '08280', '06420', '07983', '02220', '05700', '01860', '01260', '04920', '06510', '07020', '08073', '08105', '08125', '06360', '07860', '07993', '00810', '06540', '08099', '08139', '02010', '07973', '08155', '07991', '06630', '00480', '06750', '04890', '08001', '08025', '00870', '08490', '01830', '07977', '05010', '01170', '07961', '01680', '01050', '07987', '07080', '04560', '00930', '05310', '02340', '05790', '08460', '00720', '08031', '02280', '08039', '08037', '08065', '06270', '08077', '06900', '04650', '06480', '07230', '08041', '06450', '00570', '07989', '04740', '07979', '02250', '07380', '00390', '01710', '07590', '08021', '08520', '07500', '01110', '04500', '02310', '07971', '02130', '05580', '05880', '08610', '08310', '08145', '05670', '04860', '07260', '08015', '07967', '01740', '01320', '07560', '07830', '01890', '08061', '02160', '07710', '05070', '05040']Seq05VD ['f00030', 'f02550', 'f03450', 'f01110', 'f00480', 'f00210', 'f04590', 'f04170', 'f01800', 'f03990', 'f03360', 'f03900', 'f02070', 'f00810', 'f03690', 'f01350', 'f01530', 'f04980', 'f05100', 'f03060', 'f00900', 'f03870', 'f02460', 'f01470', 'f02370', 'f02820', 'f04080', 'f02760', 'f04860', 'f02250', 'f04200', 'f00270', 'f03720', 'f02850', 'f04410', 'f01200', 'f03090', 'f02010', 'f03930', 'f00090', 'f01650', 'f01890', 'f03840', 'f03030', 'f02130', 'f01230', 'f04110', 'f02520', 'f04140', 'f04020', 'f00060', 'f03420', 'f01560', 'f00120', 'f04290', 'f02340', 'f00300', 'f01380', 'f00870', 'f01860', 'f02970', 'f04560', 'f02730', 'f00330', 'f04530', 'f03780', 'f01770', 'f03390', 'f05040', 'f02430', 'f03330', 'f00660', 'f01740', 'f02100', 'f04800', 'f04050', 'f00510', 'f02790', 'f04350', 'f00690', 'f00540', 'f02490', 'f00960', 'f00930', 'f04230', 'f02880', 'f03600', 'f01020', 'f01500', 'f02400', 'f04830', 'f04470', 'f03300', 'f02670', 'f00450', 'f01980', 'f01170', 'f01620', 'f04500', 'f01080', 'f03180', 'f05070', 'f03150', 'f04950', 'f01440', 'f03510', 'f01710', 'f00360', 'f04770', 'f02910', 'f01050', 'f00630', 'f04320', 'f00570', 'f03240', 'f02190', 'f01140', 'f03540', 'f02220', 'f02640', 'f03960', 'f00000', 'f04920', 'f01950', 'f00990', 'f03480', 'f03000', 'f00420', 'f04620', 'f03210', 'f00780', 'f03570', 'f01590', 'f00750', 'f01920', 'f04650', 'f03750', 'f03630', 'f02310', 'f02610', 'f02580', 'f04740', 'f02280', 'f04680', 'f00390', 'f00720', 'f03660', 'f02040', 'f03270', 'f00180', 'f03810', 'f01410', 'f01290', 'f03120', 'f00840', 'f04440', 'f00150', 'f01260', 'f02700', 'f02940', 'f00600', 'f01830', 'f04260', 'f05010', 'f04890', 'f02160', 'f00240', 'f04380', 'f01680', 'f04710', 'f01320']0006R0 ['f02820', 'f03690', 'f03180', 'f02550', 'f01020', 'f03660', 'f02340', 'f01170', 'f02610', 'f02940', 'f01290', 'f02100', 'f01350', 'f03270', 'f03870', 'f01380', 'f01980', 'f03810', 'f02430', 'f02310', 'f01830', 'f03480', 'f02970', 'f01890', 'f03210', 'f03930', 'f02040', 'f02070', 'f02400', 'f01560', 'f03030', 'f01770', 'f01590', 'f01950', 'f03420', 'f01650', 'f03450', 'f00990', 'f03630', 'f01500', 'f03570', 'f00930', 'f03090', 'f03360', 'f02880', 'f02460', 'f01440', 'f01920', 'f01230', 'f03840', 'f02730', 'f01620', 'f02220', 'f03750', 'f03330', 'f03540', 'f02520', 'f02790', 'f01050', 'f03120', 'f01800', 'f01140', 'f01860', 'f01530', 'f01470', 'f02670', 'f02490', 'f01260', 'f01110', 'f02760', 'f01680', 'f03150', 'f02580', 'f03300', 'f02280', 'f01200', 'f03390', 'f03510', 'f02640', 'f02190', 'f02370', 'f01320', 'f02130', 'f03600', 'f03240', 'f03780', 'f03720', 'f02700', 'f01410', 'f01080', 'f02850', 'f01710', 'f03900', 'f03060', 'f01740', 'f02010', 'f02250', 'f00960', 'f03000', 'f02160', 'f02910']for k, v in d. items():  print(k, len(d[k]))0001TP 1240016E5 305Seq05VD 1710006R0 101for i in d2. keys():  print(i,len(d2[i]))0016E5 3050001TP 1240006R0 101Seq05VD 171files[0], labels[0](('0001TP', '009210'), ('0016E5', '01800'))2. My question: Link:  Why do we need masking? and does color from fastai library? (have to look into source code) What do the parameter alpha do? When people make masked img, would it be have ranged integer limit? Does image normalization related with this?lbl_sorted = sorted(lbl_names)f_sorted = sorted(fnames)lbl_1 = lbl_sorted[33]f_1 = f_sorted[33]img = open_image(lbl_1)mask = open_mask(lbl_1)_,axs = plt. subplots(1,2, figsize=(10,5))# img. show(ax=axs[0], y=mask, title='masked')img. show(ax=axs[0], title='1')mask. show(ax=axs[1], title='2', alpha=1. ) img_2 = open_image(f_1)mask_2 = open_mask(f_1)_,axs = plt. subplots(1,2, figsize=(10,5))# img. show(ax=axs[0], y=mask, title='masked')img_2. show(ax=axs[0], title='3',)mask_2. show(ax=axs[1], title='4', alpha=1. ) open_mask(lbl_1). data. shapetorch. Size([1, 720, 960])open_mask(lbl_1). data. shapetorch. Size([1, 720, 960])open_image(f_1). data. shapetorch. Size([3, 720, 960])open_image(f_1). data. shapetorch. Size([3, 720, 960])img. data #labeled datatensor([[[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]],    [[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]],    [[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]]])mask. data # after mask, labeled datatensor([[[ 4, 4, 4, . . . , 21, 21, 21],     [ 4, 4, 4, . . . , 21, 21, 21],     [ 4, 4, 4, . . . , 21, 21, 21],     . . . ,     [17, 17, 17, . . . , 30, 30, 30],     [17, 17, 17, . . . , 30, 30, 30],     [17, 17, 17, . . . , 30, 30, 30]]])img_2. data, mask_2. data(tensor([[[0. 0706, 0. 0667, 0. 0706, . . . , 0. 6431, 0. 6549, 0. 6627],     [0. 0745, 0. 0706, 0. 0706, . . . , 0. 6431, 0. 6510, 0. 6549],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 6392, 0. 6588, 0. 6588],     . . . ,     [0. 0863, 0. 0824, 0. 0824, . . . , 0. 1333, 0. 1216, 0. 1255],     [0. 0902, 0. 0863, 0. 0824, . . . , 0. 1255, 0. 1176, 0. 1216],     [0. 0863, 0. 0824, 0. 0784, . . . , 0. 1137, 0. 1059, 0. 1137]],     [[0. 0706, 0. 0667, 0. 0706, . . . , 0. 7490, 0. 7608, 0. 7686],     [0. 0745, 0. 0706, 0. 0706, . . . , 0. 7451, 0. 7569, 0. 7608],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 7412, 0. 7529, 0. 7529],     . . . ,     [0. 0980, 0. 0941, 0. 0941, . . . , 0. 1804, 0. 1686, 0. 1725],     [0. 1059, 0. 1020, 0. 0980, . . . , 0. 1725, 0. 1647, 0. 1686],     [0. 1020, 0. 0980, 0. 0941, . . . , 0. 1608, 0. 1529, 0. 1608]],     [[0. 0784, 0. 0745, 0. 0784, . . . , 0. 7569, 0. 7686, 0. 7765],     [0. 0824, 0. 0784, 0. 0784, . . . , 0. 7647, 0. 7647, 0. 7686],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 7608, 0. 7647, 0. 7647],     . . . ,     [0. 1216, 0. 1176, 0. 1176, . . . , 0. 2000, 0. 1882, 0. 1922],     [0. 1176, 0. 1137, 0. 1098, . . . , 0. 1843, 0. 1765, 0. 1804],     [0. 1137, 0. 1098, 0. 1059, . . . , 0. 1725, 0. 1647, 0. 1725]]]), tensor([[[ 18, 17, 18, . . . , 183, 186, 188],     [ 19, 18, 18, . . . , 183, 185, 186],     [ 20, 18, 19, . . . , 182, 185, 185],     . . . ,     [ 25, 24, 24, . . . , 43, 40, 41],     [ 26, 25, 24, . . . , 41, 39, 40],     [ 25, 24, 23, . . . , 38, 36, 38]]]))3. What is a difference between image and imageSegment?: imageSegment  An ImageSegment object has the same properties as an Image. The only difference is that when applying the transformations to an ImageSegment, it will ignore the functions that deal with lighting and keep values of 0 and 1.  It’s easy to show the segmentation mask over the associated Image by using the y argument of show_image. img = open_image(fnames[0])mask = open_mask(lbl_names[0])_,axs = plt. subplots(1,3, figsize=(8,4))img. show(ax=axs[0], title='no mask')img. show(ax=axs[1], y=mask, title='masked') #seg mask over the img using y argmask. show(ax=axs[2], title='mask only', alpha=1. ) vision. image ##4. Why/How img div by 255 and how it results fast. ai : vision. image - If div=True, pixel values are divided by 255. to become floats between 0. and 1.  At times, you want to get rid of distortions caused by lights and shadows in an image.    Normalizing the RGB values of an image can at times be a simple and effective way of achieving this.     So sum of the pixel’s value over all channels(which is S) divides each intensified channel so that nomalized value will be R/S, G/S and B/S (where, S=R+G+B).   Detailed explain here4. Python Evaluation Order: Python evaluates expressions from left to right. Notice that while evaluating an assignment, the right-hand side is evaluated before the left-hand side. mask_tmp, trg_tmp, void_tmp = 2, 1, 10mask_tmp = trg_tmp != void_tmpprint(mask_tmp, trg_tmp, void_tmp) # (1) target is not same with voidTrue 1 10# Example 1x = 1y = 2x,y = y,x+yx, y(2, 3)# Example 2x = 1y = 2x = yy = x+yx, y(2, 4)5. model learner parameter :: pct_start: A: Percentage of total number of epochs when learning rate rises during one cycle. Q: Sorry, I still confused that one cycle in the new API only runs one epoch. How the percentage of total number of epochs works? Can you give a example? If learn. fit_one_cycle(10, slice(1e-4,1e-3,1e-2), pct_start=0. 05)??A: Ok, strictly correct answer would be percentage of iterations, so you can have lr both increase and decrease during same epoch. In your example, say, you have 100 iterations per epoch, then for half an epoch (0. 05 * (10 * 100) = 50) lr will rise, then slowly decrease. Q2: Thanks for this explanation … so essentially, it is the percentage of overall iterations where the LR is increasing, correct? So, given the default of 0. 3, it means that your LR is going up for 30% of your iterations and then decreasing over the last 70%. Is that a correct summation of what is happening? A2: Yes, I think that’s correct. You can verify that by changing its value and check:learn. recorder. plot_lr() For example if pct_start = 0. 2 source: forums. fastai "
+    }, {
+    "id": 14,
     "url": "http://localhost:4000/2020/03/note08-fastai-4/",
     "title": "Gradient backward, Chain Rule, Refactoring",
-    "body": "2020/03/02 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves   ⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
+    "body": "2020/03/02 -  This note is divided into 4 section.      Section1: What is the meaning of ‘deep-learning from foundations?’   Section2: What’s inside Pytorch Operator?   Section3: Implement forward&amp;backward pass from scratch   Section4: Gradient backward, Chain Rule, Refactoring   ” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves    Postfix underscore means in pytorch, in-place function, What is in-place function?⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
     }, {
-    "id": 13,
+    "id": 15,
     "url": "http://localhost:4000/2020/03/note08-fastai-3/",
     "title": "Implement forward&backward pass from scratch",
     "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring1. The forward and backward passes: 1. 1 Normalization: train_mean,train_std = x_train. mean(),x_train. std()&gt;&gt;&gt; train_mean,train_std(tensor(0. 1304), tensor(0. 3073))Remember!  Dataset, which is x_train, mean and standard deviation is not 0&amp;1. But we need them to be which means we should substract means and divide data by std.  You should not standarlize validation set because training set and validation set should be aparted.  after normalize, mean is close to zero, and standard deviation is close to 1. 1. 2 Variable definition:  n,m: size of the training set c: the number of activations we need in our model2. Foundation Version: 2. 1 Basic architecture:  Our model has one hidden layer, output to have 10 activations, used in cross entropy.    But in process of building architecture, we will use mean square error, output to have 1 activations and lator change it to cross entropy   number of hidden unit; 50see below pic  We want to make w1&amp;w2 mean and std be 0&amp;1.      why initializating and make mean zero and std one is important?   paper highlighting importance of normalisation - training 10,000 layer network without regularisation1   2. 1. 1 simplified kaiming initQ: Why we did init, normalize with only validation data? Because we can not handle and get statistics from each value of x_valid?{: style=”color:red; font-size: 130%; text-align: center;”}  what about hidden(first) layer?w1 = torch. randn(m,nh)b1 = torch. zeros(nh)t = lin(x_valid, w1, b1) # hidden&gt;&gt;&gt; t. mean(), t. std()((tensor(2. 3191), tensor(27. 0303))In output(second) layer, w2 = torch. randn(nh,1)b2 = torch. zeros(1)t2 = lin(t, w2, b2) # output&gt;&gt;&gt; t2. mean(), t2. std()(tensor(-58. 2665), tensor(170. 9717))   which is terribly far from normalzed value.     But if we apply simplified kaiming init  w1 = torch. randn(m,nh)/math. sqrt(m); b1 = torch. zeros(nh)w2 = torch. randn(nh,1)/math. sqrt(nh); b2 = torch. zeros(1)t = lin(x_valid, w1, b1)t. mean(),t. std()&gt;&gt;&gt; (tensor(-0. 0516), tensor(0. 9354)) But, actually, we use activations not only linear function After applying activations relu at linear layer, mean and deviation became 0. 5. 2. 1. 2 Glorrot initializationPaper2: Understanding the difficulty of training deep feedforward neural networks  Gaussian(, bell shaped, normal distributions) is not trained very well.  How to initialize neural nets? with the size of layer , the number of filters .  But there is No acount for import of ReLU If we got 1000 layers, vanishing gradients problem emerges2. 1. 3 Kaiming initializatingPaper3: Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification  Kaiming He, explained here rectifier: rectified linear unit rectifier network: neural network with rectifier linear units  This is kaiming init, and why suddenly replace one to two on a top?     to avoid vanishing gradient(weights)   But it doesn’t give very nice mean tough.    2. 1. 4 Pytorch package Why fan_out?     according to pytorch documentation,   choosing 'fan_in' preserves the magnitude of the variance of the wights in the forward pass. choosing 'fan_out' preserves the magnitues in the backward pass(, which means matmul; with transposed matrix) ➡️ in the other words, torch use fan_out cz pytorch transpose in linear transformaton.  What about CNN in Pytorch?I tried torch. nn. Conv2d. conv2d_forward?? Jeremy digged into using torch. nn. modules. conv. _ConvNd. reset_parameters?? 2  in Pytorch, it doesn’t seem to be implemented kaiming init in right formula. so we should use our own operation.  But actually, this has been discussed in Pytorch community before. 3 4 Jeremy said it enhanced variance also, so I sampled 100 times and counted better results.  To make sure the shape seems sensible. check with assert. (remember we will replace 1 to 10 in cross entropy)assert model(x_valid). shape==torch. Size([x_valid. shape[0],1])&gt;&gt;&gt; model(x_valid). shape(10000, 1) We have made Relu, init, linear, it seems we can forward pass code we need for basic architecture nh = 50def lin(x, w, b): return x@w + b;w1 = torch. randn(m,nh)*math. sqrt(2. /m ); b1 = torch. zeros(nh)w2 = torch. randn(nh,1); b2 = torch. zeros(1)def relu(x): return x. clamp_min(0. ) - 0. 5t1 = relu(lin(x_valid, w1, b1))def model(xb):  l1 = lin(xb, w1, b1)  l2 = relu(l1)  l3 = lin(l2, w2, b2)  return l32. 2 Loss function: MSE:  Mean squared error need unit vector, so we remove unit axis.   def mse(output, targ): return (output. squeeze(-1) - targ). pow(2). mean()    In python, in case you remove axis, you use ‘squeeze’, or add axis use ‘unsqueeze’ torch. squeeze where code commonly broken. so, when you use squeeze, clarify dimension axis you want to removetmp = torch. tensor([1,1])tmp. squeeze()&gt;&gt;&gt; tensor([1, 1]) make sure to make as float when you calculateBut why??? because it is tensor?{: style=”color:red; font-size: 130%;”} Here’s the error when I don’t transform the data type ---------------------------------------------------------------------------TypeError                 Traceback (most recent call last)&lt;ipython-input-22-ae6009bef8b4&gt; in &lt;module&gt;()----&gt; 1 y_train = get_data()[1] # call data again   2 mse(preds, y_train)TypeError: 'map' object is not subscriptable This is forward passFootnote: Other materials:  Understanding the difficulty of training deep feedforward neural networks, paper that introduced Xavier initialization      Fixup Initialization: Residual Learning Without Normalization &#8617;        Pytorch implementaion on Kaiming init of conv and linear layers &#8617;        Pytorch kaiming init issue &#8617;        Pytorch kaiming init explained &#8617;    "
     }, {
-    "id": 14,
+    "id": 16,
     "url": "http://localhost:4000/2020/03/note08-fastai-2/",
     "title": "What's inside Pytorch Operator?",
     "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, RefactoringWhat’s inside Pytorch Operator?: Section02 Time comparison with pure Python:    Matmul with broadcasting&gt; 3194. 95 times faster     Einstein summation&gt; 16090. 91 times faster     Pytorch’s operator&gt; 49166. 67 times faster  1. Elementwise op: 1. 1 Frobenius norm:   above converted into (m*m). sum(). sqrt() Plus, don’t suffer from mathmatical symbols. He also copy and paste that equations from wikipedia.  and if you need latex form, download it from archive. 2. Elementwise Matmul:  What is the meaning of elementwise?   We do not calculate each component. But all of the component at once. Because, length of column of A and row of B are fixed.   How much time we saved?  So now that takes 1. 37ms. We have removed one line of code and it is a 178 times faster…#TODOI don’t know where the 5 from. but keep it. Maybe this is related with frobenius norm…?as a result, the code before for k in range(ac):  c[i,j] += a[i,k] + b[k,j]the code after c[i,j] = (a[i,:] * b[:,j]). sum()To compare it (result betweet original and adjusted version) we use not test_eq but other function. The reason for this is that due to rounding errors from math operations, matrices may not be exactly the same. As a result, we want a function that will “is a equal to b within some tolerance” #exportdef near(a,b):   return torch. allclose(a, b, rtol=1e-3, atol=1e-5)def test_near(a,b):   test(a,b,near)test_near(t1, matmul(m1, m2))3. Broadcasting:  Now, we will use the broadcasting and removec[i,j] = (a[i,:] * b[:,j]). sum() How it works?&gt;&gt;&gt; a=tensor([[10,10,10],     [20,20,20],     [30,30,30]])&gt;&gt;&gt; b=tensor([1,2,3,])&gt;&gt;&gt; a,b  (tensor([[10, 10, 10],     [20, 20, 20],     [30, 30, 30]]),tensor([1, 2, 3]))     &gt;&gt;&gt; a+btensor([[11, 12, 13],    [21, 22, 23],    [31, 32, 33]])  &lt;Figure 2&gt; demonstrated how array b is broadcasting(or copied but not occupy memory) to compatible with a. Refered from numpy_tutorial   there is no loop, but it seems there is exactly the loop.     This is not from jeremy (actually after a moment he cover it) but i wondered How to broadcast an array by columns?  c=tensor([[1],[2],[3]])a+ctensor([[11, 11, 11],    [22, 22, 22],    [33, 33, 33]])s  What is tensor. stride()?help(t. stride)Help on built-in function stride:  stride(…) method of torch. Tensor instancestride(dim) -&gt; tuple or intReturns the stride of :attr:’self’ tensor. Stride is the jump necessary to go from one element to the next one in the specified dimension :attr:’dim’. A tuple of all strides is returned when no argument is passed in. Otherwise, an integer value is returned as the stride in the particular dimension :attr:’dim’. Args:  dim (int, optional): the desired dimension in which stride is requiredExample::* x = torch. tensor([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]])`x. stride()&gt;&gt;&gt; (5, 1)x. stride(0)&gt;&gt;&gt; 5x. stride(-1)&gt;&gt;&gt; 1   unsqueeze &amp; None index   We can manipulate rank of tensor Special value ‘None’, which means please squeeze a new axis here== please broadcast herec = torch. tensor([10,20,30])c[None,:] in c, squeeze a new axis in here please. 2. 2 Matmul with broadcasting: for i in range(ar):#  c[i,j] = (a[i,:]).      *[:,j]. sum() #previous  c[i]  = (a[i]. unsqueeze(-1) * b). sum(dim=0) And Using None also (As howard teached)c[i]  = (a[i ]. unsqueeze(-1) * b). sum(dim=0) #howardc[i]  = (a[i][:,None] * b). sum(dim=0) # using Nonec[i]  = (a[i,:,None]*b). sum(dim=0)⭐️Tips🌟 1) Anytime there’s a trailinng(final) colon in numpy or pytorch you can delete it ex) c[i, :] = c [i]2) any number of colon commas at the start, you can switch it with the single elipsis.  ex) c[:,:,:,:,i] = c […,i] 2. 3 Broadcasting Rules:  What if we tensor. size([1,3]) * tensor. size([3,1])?  torch. Size([3, 3])    What is scale????   What if they are one array is times of the other array? ex) Image : 256 x 256 x 3Scale : 128 x 256 x 3Result: ?   Why I did not inserted axis via None, but happened broadcasting? &gt;&gt;&gt; c * c[:,None]tensor([[100. , 200. , 300. ],    [200. , 400. , 600. ],    [300. , 600. , 900. ]])maybe it broadcast cz following array has 3 rows as same principle, no matter what nature shape was, if we do the operation tensor broadcasts to the other. &gt;&gt;&gt; c==c[None]tensor([[True, True, True]])&gt;&gt;&gt; c[None]==c[None,:]tensor([[True, True, True]])&gt;&gt;&gt;c[None,:]==ctensor([[True, True, True]])3. Einstein summation:  Creates batch-wise, remove inner most loop, and replaced it with an elementwise producta. k. ac[i,j] += a[i,k] * b[k,j]inner most loop c[i,j] = (a[i,:] * b[:,j]). sum()elementwise product  Because K is repeated so we do a dot product. And it is torch. Usage of einsum()1) transpose2) diagnalisation tracing3) batch-wise (matmul) …  einstein summation notationdef matmul(a,b): return torch. einsum('ik,kj-&gt;ij', a, b)so after all, we are now 16000 times faster than Python. 4. Pytorch op: 49166. 67 times faster than pure python And we will use this matrix multiplication in Fully Connect forward, with some initialized parameters and ReLU. But before that, we need initialized parameters and ReLU, Footnote:  TensorRank ti noteResources:  Frobenius Norm Review Broadcasting Review (especially Rule)     Refer colab! (I totally confused with extension of arrays)    torch. allclose Review np. einsum Reviewh "
     }, {
-    "id": 15,
+    "id": 17,
     "url": "http://localhost:4000/2020/02/note08-fastai-1/",
     "title": "What is the meaning of 'deep-learning from foundations?'",
     "body": "2020/02/29 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ I don’t know if you read this article, but I heartily appreciate Rachael Thomas and Jeremy Howard for providing these priceless lectures for free Homework:  Review concepts 16 concepts from Course 1 (lessons 1 - 7)(1) Affine Functions &amp; non-linearities; 2) Parameters &amp; activations; 3) Random initialization &amp; transfer learning; 4) SGD, Momentum, Adam; 5) Convolutions; Batch-norm; 6) Dropout; 7) Data augmentation; 8) Weight decay; 9) Res/dense blocks; 10) Image classification and regression; 11)Embeddings; 12) Continuous &amp; Categorical variables; 13) Collaborative filtering; 14) Language models; 15) NLP classification; 16) Segmentation; U-net; GANS) Make sure you understand broadcasting Read section 2. 2 in Delving Deep into Rectifiers Try to replicate as much of the notebooks as you can without peeking; when you get stuck, peek at the lesson notebook, but then close it and try to do it yourself calculus for machine learning     based on weight…    einsum conventionCONTENTS:  What is going on in this course?     What is ‘from foundations’?   Steps to a basic modern CNN model   Today’s implementation goal: 1) matmul -&gt; 4) FC backward    Library development using jupyter notebook     jupyter notebook certainly can make module    Elementwise ops     How can we make python faster?         What is element wise operation?           FootnoteWhat is going on in this course?: What is ‘from foundations’?: 1) Recreate fast. ai and Pytorch 2) using pure python  Evade OverfittingOverfit : validation error getting worsetraining loss &lt; validation loss  Know the name of the symbol you usefind in this page if you don’t know the symbol that you are using or just draw it here (run by ML!) Steps to a basic modern CNN model:  1) Matrix multiplication -&gt; 2) Relu/Initialization -&gt; 3) Fully-connected Forward-&gt; 4) Fully-connected Backward -&gt; 5) Train loop -&gt; 6) Convolution-&gt; 7) Optimization -&gt;8) Batchnormalization -&gt; 9) Resnet Today’s implementation goal: 1) matmul -&gt; 4) FC backward: Library development using jupyter notebook: what is assers? jupyter notebook certainly can make module:  There will be #export tag that Howard (and we) want to extract special notebook2script. py will detect sign of #expert and convert following into python module and test ittest\_eq(TEST,'test')test\_eq(TEST,'test1')  what is run_notebook. py?     when you want to test your module in command line interface   		!python run\_notebook. py 01_matmul. ipynb  Is there any difference between 1) and 2)?1) test -&gt; test01 2) test01 -&gt; test #TODO I don’t know yet  look into run_notebook. py, package fire Jeremy used. What is that?read and run the code in a notebook, and in the process, Jeremy made Python Fire library called!shockingly, fire takes any kind of function and converts into CLI command. fire library was released by Google open source, Thursday, March 2, 2017    Get data   pytorch and numpy are pretty much same.  variable c explains how many pixels there are in in MNIST, 28 pixels PyTorch’s view() method: torch function that manipulating tensor, and squeeze() in torch &amp; mathmatical operation similar function Rao &amp; McMahan said usually this functions result in feature vector.    In part 1, you can use view function several times.     Initial python model     Which is Linear, like $Xw$(weight)$+a$(bias) $= Y$     If you don’t know hou to multiple matrix, refer this site matmul visulization site   How many time spends if we we use pure python   function matmul, typical matrix multiplication function, takes about 1 second for calculating 1 single train data! (maybe assumed stochastic, 5 data points in validation)     it takes about 11. 36 hours to update parameters even single layer and 1 iteration! (if that was my computer, it would be 14 hours. . )🤪   THIS is why we need to consider ‘time’&amp;’space’ This is kinda slow - what if we could speed it up by 50,000 times? Let’s try! Elementwise ops: How can we make python faster?:  If we want to calculate faster, then do remove pythonic calcuation, by passing its computation down to something that is written something other than python, like pytorch.  According to PyTorch doc it uses C++ (via ATen), so we are going to implement that function with python. What is element wise operation?:  items makes a pair, operate corresponding componentFootnote:  notebooks material video broadcasting excel"
     }, {
-    "id": 16,
+    "id": 18,
     "url": "http://localhost:4000/2020/02/what-is-convolution/",
     "title": "Digging into convolution",
     "body": "2020/02/28 - Issues 1) Kaiming Initializtion in Pytorch was in trouble. 1 2) Jeremy started to dig in, in lesson09, but I didn’t know why the size of tensor is 2 and even understand this spreadsheet data. 3 Homework: Read Visualizing and Understanding Convolutional Networks paper  What is a convolution?     Visualization         one kernel     Matthew D Zeiler &amp; Rob Fergus Paper          Convolution can be represented as matmul   Padding   Kernel has rank 3   How can we find a side-edge, a gradient and area of constant weight?   What is a convolution?: A convolutional neural network is that your red, green, and blue pixels go into the simple computation, and something comes out of that, and then the result of that goes into a second layer, and the result of that goes into the third layer and so forth. Visualization: one kernel Refer this site for visualizing CNN filteringMatthew D Zeiler &amp; Rob Fergus PaperLecture01     Nine examples of the actual coefficients from the **first layer**  Convolution can be represented as matmul: CNNs from different viewpoints {align-items: center;}    [A B C D E F G H I J] is 3 by 3 image data flatten to vector.   As a result, convolution is a just matrix just two things happens     Some of entries are set to zeros at all the times   same color always have the same weight. That called weight time / wegith sharing    So, we can implement a convolution with matrix multiplication. But, we don’t do that because it’s slow!Padding:  What most of libraries do is just put zeros asdie of matrix  fast. ai uses reflection paddings (what is this? Jeremy said he uttered it)Kernel has rank 3:  As standard picture input would be 4 5, it would be actually 3d, not 2d.  If we make kernel as a 3x3 size, we pass over same kernel all the different Red, Green, Blue Pixels.      This could make problem, because, if we want to detect frog, which is green, we would want more activations on the green(I made a test cell in my colab 6)   How can we find a side-edge, a gradient and area of constant weight?: Not top-edge!  One kernel can find only the top-edge, so we should stack the kernels 7 So, we pass it through bunch of kernels to the input images, and that process gives us height x width x corresponding number of kernels.  Usually that number of chanel is 16 And if we want to get the more channels and features, we should repeat that process     This process gives rise to memory out of control, we do the stride   #### conv-example. xlsx  2 convolutional filters At a second layer, filter is 3x3x2 tensor, because to add up together the first layer’s channel. Reference:       Problem was math. sqrt(5) was not kaiming initialization formula, Implementation in Pytorch &#8617;        size of tensor, lecture09 &#8617;        conv-example. xlsx &#8617;        Why do computer use red, green and blue instead of primary colors &#8617;        Grayscale is a group of shades without any visible color. … Each of these dots has its own brightness level as well and, therefore, can be converted to grayscale. A grayscale image is one with all color information removed.  &#8617;        Testing RGB and grayscale &#8617;        stack kernel and make new rank of tensor at output, Lesson06-2019 &#8617;    "
     }, {
-    "id": 17,
+    "id": 19,
     "url": "http://localhost:4000/2020/02/dps-week8/",
-    "title": "Digital Product School week 8&9",
-    "body": "2020/02/24 - The 8th week retropect at Digital Product School Week 8/9 - Ship your MVP/Release next iteration each day     This week's schedule  CONTENT:  Preparing engineering weekly Agile Process     Daily Stand-up    Making application flowchart (feat draw. io) / ER diagram     Flowchart, understaning user journey   ER diagram    Engineering weekly AI lunch Connecting firebase andPreparing engineering weekly: This week at Wednesday, I planned to explain the Language Modelings, mainly focusing ELMo, ULMFiT, BERT and GPT-2. Slides is available here Changed the presentation, because there were people who are not in ML domain. hereWhenever I do the presentation, I learn more than the information I give them. At the same time, I realize I need to learn more than I know. Agile Process: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m not sure ‘what is agile’ but this is what we tried to make agile process.  Daily Stand-up: Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Making application flowchart (feat draw. io) / ER diagram:  Before we start the party, we should clarify the flowchart and ER diagram of our application. Flowchart, understaning user journey: Thanks for google, we could use draw. io for our framechart framework. Actually, we cana choice other good flatform, but draw. io has connected app throgh google drive, most of our engineer was used to it. And after this job, I got to know there is also (of course) rule with the symbols, color, size, space, scaling and direction of arrow -reference. But why we should do this? WE have made our storymap before!! I think storymap is for visualize our status and app. So it should be shared with whole the team, and they should able to understand each role’s issue. But flowchart is more like testing technical feasibility, and error that user can experience. So it could be little more specific, complicated, and hypothetical.     This week engineer's main plan  ER diagram: Even if we use NoSQL database through firebase, my team was accustomed to SQL more. That what we educated when we were at college, so we had to organize our concept while we were learning NoSQL. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Before today, my AI collegues presented regression, knn and it was my turn. I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! AI lunch: Connecting firebase and: "
+    "title": "My life in Digital Product School - week 8/19/10",
+    "body": "2020/02/24 - The 8/9/10th week retropect at Digital Product School Week 8 - Ship your MVPWeek 9/10 - Release next iteration each day     Week 8th schedule  CONTENT:  Agile Product Development     Daily Stand-up(planning)   Gemba Walk   Sprint Reviews    Engineering weeklyAgile Product Development: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m still not sure ‘what is agile’ but this is how we tried to make agile process.  Daily Stand-up(planning): Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Gemba Walk:     Team Cero with core team  Every 2 weeks, we do the Gemba work, which is ‘question everything to the core team’ time. At this period, people can ask anything related to our product, workshop, and framework. Core team will help just for each team, and each team can solve the problem related to their work. &lt; br/&gt;Why we need this session? because with workshop and general schedule, core team has no time just focus on each team. So through this session, we can have opportunity to understand each program and workshop, like why we are using this platform, and when is the due of our small project, and we have this problem and we need help for this. whatever small problem you have, core team is always willing to help you. Sprint Reviews: Every Friday, we have time to summarise what we did for the week. Maybe we need HMW question and our storymap to share our process and then tell and share what we did try, what point we succeeded and what point it was deviant of our prediction, and why we tried it. .     Sprint of Ve-link  And then, just after all team’s ppt, we do vote with such a cute marvel. Always it’s very difficult to vote (of course you can’t vote to your team!) Because it depends on criteria what do I value!But since this is process of our agile work, I try to focus on what they have changed since last week, and why they did it, how they did it. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Everyone have their knowledge to share and we can be tutor and at the same time can be of tutee. Previously, my AI collegues presented regression, knn. And because I’m somewhat specialized to NLP, I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! "
     }, {
-    "id": 18,
+    "id": 20,
     "url": "http://localhost:4000/2020/02/fast.ai-nlp-note-16/",
     "title": "Algorithmic bias",
     "body": "2020/02/20 - Algorithms can encode &amp; magnify human bias Case Study 1: Facial Recognition &amp; Predictive Policing:  Joy Buolamwini &amp; Timnit Gebru, gendershades. org     Microsoft, FACE+, IBM - All of these things are sell now.    Largest gap between $\therefore\ Lighter Male\ &gt;\ Darker\ Female $      This US mayor joked cops should “mount . 50-caliber” guns where AI predicts crime      With machine learning, with automation, there’s a 99% success, so that robot is ㅡwill beㅡ99% accurate in telling us what is going to happen next, which is really interesting.     - city official in Lancater, CA, approving on using IBM for public security  Bias:  Bias is type of error Statistical Bias: difference between a statistic’s expected value and the true value Unjust Bias: disproportionate preference for or prejudice against a group Unconscious bias: bias that we don’t realize we have But, term bias is too generic to be productive. Different sources of bias have different causes Representation Bias: Dataset was not representative of the algorithm that might be used on later. Above : Data is okay, but algorithm has some problem. Below : Data has error.  For example, object detection production that performs very well in common product of US. But in contrast, change of target product region, like Zimbabwe, Solomon Island, and so on, reduced the performence remarkably. It is not the algorithmic problem, so we should care about data volume of region. Evaluation Bias: Benchmark datasets spur on research, 4. 4% of IJB-A images are dark-skinned women. 2/3 of ImageNet images from the West (Sharkar et al, 2017) Case Study 2: Recidivism Algorithm Used Prison Sentencing: Case Study 3: Online Ad Delivery: Bias in NLP: ( Nothing to do with the course, but I’m researching this field these days. )    But all about Englsih     ImpactThe person is doctor. The person is nurse -&gt; 그는 의사다. 그녀는 간호사다.  Concept of “biased data” often too generic to be useful:  Different sources of bias have different sources Data, models and systems are not unchanging numbers on a screen. They’re the result of a complex process that starts with years of historical context and involves a series of choices and norms, from data measurement to model evaluation to human interpretation. - Harini Suresh, “The problem with Biased Data” Five Sources of Bias in ML:  Representation Bias Evaluation Bias Measurement Bias Aggregation Bias(46:02) Historical Bias(46:26)     A few studies(47:13)   Racial Bias, Even when we have good intentions(new york times)(47:10)   gender(48:59)   Humans are biased, so why does algorithmic bias matter?: Algorithms &amp; humans are used differently (humans are usually decision maker)  Algorithms are accurate and objective No way to apeal if there if error processed large scale cheap Machine learning can amplify bias Machine learning can create feedback loops. Technology is power. And with that comes responsibility. Solutions:  Analyze a project at work/school:     Questions about AI   5 types of bias (Suresh &amp; Guttag)   Datasheets for datasets, Modelcards for model reporting   Accuracy rate on different sub-groups    Work with domain experts &amp; those impacted Increase diversity in our workspace Advocate for good policy Be on the ongoing lookout for bias"
     }, {
-    "id": 19,
+    "id": 21,
     "url": "http://localhost:4000/2020/02/classifier-city/",
     "title": "Making a classifier with image dataset made from gooogle",
     "body": "2020/02/15 - CONTENTS:  Creating dataset from google images     Using google_images_download   Create ImageDataBunch    Train model     fit_one_cycle()   Let’s find-tune   Let’s train the whole model!   Let’s make batch size bigger!    Interpretation Model in productionCode can be found hereDeployed model here Making a classifier which can distinguish Seoul from Munich and Sanfrancisco!(hoping my well in Munich!) Creating dataset from google images: In machine learning, you always need data before you build your model. You can use either URLs or google_images_download package. Since Jeremy explained specifically, I will try the other. Using google_images_download: note: This is not google official package Refer to Official Doncument, put that arguments. from google_images_download import google_images_downloadresponse = google_images_download. googleimagesdownload()  #class instantiationout_dir = os. path. abspath('. . /. . /materials/dataset/pkg/')os. mkdir(out_dir)arguments = { keywords : Cebu,Munich,Seoul ,        print_urls :True,        suffix_keywords : city ,        output_directory :out_dir,        type : photo ,      }paths = response. download(arguments)  #passing the arguments to the functionprint(paths)and if you need, here is main code. Create ImageDataBunch: We need to separate validation set because we just grabbed these imagese from Google. Most of the dataset we use (kaggle/research) splited into train / validation / test so if they are not devided beforehand we should make databunch, and Jeremy recommended assign 20% to validation. Help on function verify_images in module fastai. vision. data:verify_images(path: Union[pathlib. Path, str], delete: bool = True, max_workers: int = 4, max_size: int = None, recurse: bool = False, dest: Union[pathlib. Path, str] = '. ', n_channels: int = 3, interp=2, ext: str = None, img_format: str = None, resume: bool = None, **kwargs)  Check if the images in `path` aren't broken, maybe resize them and copy it in `dest`. Data from google image url Data from package Train model:           len(class)   len(train)   len(valid)         Data_url   3   432   108       Data_pkg   3   216   53   Uisng model: restnet34 1, Measurement: accuracy 2 fit_one_cycle(): What is fit one cycle? Cyclical Learning Rates for Training Neural Networks One of the way to find good learning rate. Core idea is to start with small learning rate (like 1e-4, 1e-3) and increase the learning rate after each mini-batch till loss starts exploding. And pick up learning rate one order lower than exploding point. For example, plotted learning rate is like below picture, picking up around 1e-2 is the best way.  Why this methods Traditionally, the learning rate is decreased as the learning starts converging with time. But this paper suggests to cycle our learning rate, because it makes us avoid local minimum. Basically this cyclic method enables us to explore whole of loss function so that find out global minimum. In other words, higher learning rate behaves like regularisation. Let’s find-tune: Do train just one last layer by learning rate found by find_lr This section you should find the strongest downward slope that kind of sticking around for quite a while. And choose just one order lower than lowest point. As explained before, I will pick up 1e-2. And of course, this is fine-tuning, we don’t need discriminative learning rate yet.  Let’s train the whole model!: link When you plot the learning rate again, maybe you will get soaring shape of learning rate. Rule of thumb, When you slice the learning rate, use learning rate you used at unfrozen part. Divide it by 5 or 10 and put it on maximum bound. At minimum bound, get the point just before it soared, and divide it by 10.  Let’s make batch size bigger!: Since default batch size is 64, I tried it to 128. And it gets way more better result(even it’s still underfitting!) And if I freeze model and train whole model again, the model would be better. Also, you can use this method to the other big dataset model training! Interpretation: See the confusion matrix. Result is quite great. *Since I’m using colab, I will skip data cleansing. But I highly recommend you to use ImageCleaner widget, only if you are using jupyter notebook (not jupyter lab) Model in production: You can deploy your model in simple way. I referred fast. ai, and used render(it’s free for limited time). You can find detailed document here. and you can create a route like this. @app. route( /classify-url , methods=[ GET ])async def classify_url(request):  bytes = await get_bytes(request. query_params[ url ])  img = open_image(BytesIO(bytes))  _,_,losses = learner. predict(img)  return JSONResponse({     predictions : sorted(      zip(cat_learner. data. classes, map(float, losses)),      key=lambda p: p[1],      reverse=True    )  })You can find my deployed model here Reference: How to create a deep learning dataset using Google Images towardsdatascience - one cycle policy       Deep Residual Learning for Image Recognition &#8617;        Accuracy_and_precision &#8617;    "
     }, {
-    "id": 20,
+    "id": 22,
     "url": "http://localhost:4000/2020/02/dps-week5/",
     "title": "Digital Product School week 5",
     "body": "2020/02/09 - The 5th week retropect at Digital Product School Week 5 - Create a Storymap and sync it with Lean Canvas     This week's schedule  CONTENT:  How to create our story map Prepare your story Discover your product’s AI potentialMondayHow to create our story map:     We need this 'aha' moment  There was a Milestone workshop, about our weekly goal. As we are agile working, we go fast and change every week’s goal. This week we will finalize our story map based on user’s pain-point and HMW questions.     How should we make our story-map  Basically we should make story map based on this rule  Tell stories, don’t just write them!     We always need context, that means all the story component should be connected    Visualize your product to establish a shared understanding and speed up discussions!     Post-it filled of text is not enough, we should fill it with visualizations then team mates can understand it fast    Only discuss in front our your story map! (Speed)     So we can update our story-map as soon as we change our opinion   And also    Use a story map to find the parts that matter most and to identify holes in your idea!     Since the story map consists of techinical part, we should consider each story’s technical feasibility    Minimise output, maximise outcome and impact! Build tests to figure out what’s minimum and what’s viable!     This story map functions to find out our minimum value of ideas    Work iteratively: Change your story map according to your learnings!     We should repeat this process again and again    PMs: Make sure Storymap is up to date!Prepare your story:     team cero, our whole story map    Our goal    Technical feasibility of our storyWhat is your strategy to make user achieve something? This would be our expand point Discover your product’s AI potential: How can we apply AI to our product? Let’s write down our ‘HMW’ questions, and find out all p ossibilities. These are suggestion of possibilities, so don’t attached to feasibility (we will do in at lean start-up)     Software section's expectation    AI section's expectationTuesday    Engineer's task, week5This 5th week, engineers settled WendesdayThursdayFriday"
     }, {
-    "id": 21,
+    "id": 23,
     "url": "http://localhost:4000/2020/02/GPU-time/",
     "title": "4 reasons took much time to setting GPU for fast.ai than I expected",
     "body": "2020/02/05 - Motivation: Before now, me as a undergraduate student, I was parsimony who usually depend on colab, kaggle, friend’s server(occasional) whenever i need GPU. . And this time it’s been for a while to install GPU than I expected and I share the several component that stood in my way. Written at Oct 24 2019, if you think this is deprecated, please do not have a leap of faith. Just for the record, I’ve used Kaggle, Colab, GCP, Azure, EC2 as GPU cloud. 1. Did not know there is JupyterLab option in Google Cloud Platform. : At the first time when GCP came out, there was no AI Platform service. So from starting vm instance to launching jupyter and installing packages, I did all of the things myself. (and I learned 🤗) $	curl -O https://repo. continuum. io/archive/Anaconda3-5. 0. 1-Linux-x86_64. sh[Downloading conda in ssh] I created VM instance,selected zone, machine type and disk type. Then, define firewall rules and in ssh terminal, install jupyter and other packages. But you can do all of these things just using AI Platform.  [AI Platform] I think it especially save your time if you are living in Asia-Pacific, which google doesn’t support not that much GPU resources.  2. Consider if the platform has limited resources in a region you live in. : I live in South Korea, East Asia, and it seems like this region has lots of limitation in GPU (except quite expensive AWS) And the Taiwan which was the only one region where I can launch my own VM with GPU (I tried all the other regions in the list) sometimes do normaly, but not always. 😥After launching, I did several works and next day I could not start VM. (I didn’t count it, but tried it a few hours because I didn’t want cost any more time…) Endlessly failed to start instance, then I choose to move AWS as an alternative way. 3. Fast. ai gives deliberate guide and I didn’t know it. : Fast. ai offer the guide for all available platform. (Colab, salamander, Gradient, Kaggle, Colab, and so on) It is so important, and really needs, because cloud computing options are vary as occasion and purpose arise. I didn’t know fast. ai has manual to running GCP, and I think it’s as good a reason as any for me to be have taken time. It helped me so much when I had aws and shortened my time. I don’t want to read all of the manual in amazno. . (It is recommended. . but I’d rather read GIT PRO now…) ssh -i ~/. ssh/&lt;your_private_key_pair&gt; -L localhost:8888:localhost:8888 ubuntu@&lt;your instance IP&gt;4. You should wait to add more volume just after add volume, by building AWS EC2. : Since Elastic Block Store(EBS) storage supports optimized storage, users can’t extend storage volume two times in a row.  Unfortunately, at the first time, I didn’t know it (again 👻) and when VM lacked volume, I doubled dist capacity (76*2) at a rough but It needs more.  &lt;!– this time I installed GPU in two years, and it became little complicated compared to 2 years ago. And this time for the first time(maybe not the first time. . but i handled it in my class or with my friend. but it’s my first time on my own. ) I very I’m started to using used google colab, kaggleand, GCP-JupyterLab, ec2 - friend made, aws vm machine but I had a environment variable but i did not know of it. On these days, I could not get a resources from taiwan…    I couldn’t notice a deliberate     Anyway, as a result I tried myself gcp myself and aws ec2 with fast. ai But I think doing on my self surely takes much time (in this point I wonder why I’m doing this, and should remind me, especially I was studying disk volume optimization)     disk volume exceed - https://askubuntu. com/questions/919748/no-space-left-on-device-even-though-there-is:  "
     }, {
-    "id": 22,
+    "id": 24,
     "url": "http://localhost:4000/2020/02/dps-week4/",
     "title": "Digital Product School week 4",
     "body": "2020/02/01 - The 4th week retropect at Digital Product School Week 4 - Find solution ideas and run experiments   [This week’s schedule] CONTENT:  Ideation Techniques     What is ideation techniques?    Generating idea in my team AIdeation Team brain storming of idea Die Produkt MacherMondayIdeation Techniques:   [slides from @steffen] What is ideation techniques?: We tried to find out user’s painpoint last week. Tried to users talk about their, pain point. No question directly, but extract from them their pain with transportation. Generating idea in my team:   AIdeation:   TuesdayTeam brain storming of idea:   Based on generated idea on Monday, we extended our idea doing rolling-paper!     Die Produkt Macher: What is lean start-up?  Lean startup is a methodology for developing businesses and products that aims to shorten product development cycles and rapidly discover if a proposed business model is viable; this is achieved by adopting a combination of business-hypothesis-driven experimentation, iterative product releases, and validated learning. - wikipedia WendesdayThursdayFriday"
     }, {
-    "id": 23,
+    "id": 25,
     "url": "http://localhost:4000/2020/01/retrosprect-of-acl-paper-2020/",
     "title": "Retrospect of ACL 2020 paper writing",
     "body": "2020/01/29 - 2020 Annual Conference of the Association for Computational Linguistics Why I can’t use ‘Cebuano’ for the research?: Why I had to change target language from ‘Cebuano’ to ‘Tagalog’?-&gt; No language translator options except google translation. But before knowing that I already consult my friend, whose mother tongue is English. So I had to aplogize her, but couldn’t tell her why suddenly I changed my plan. -&gt; I realized there are many languages even can’t be researched at all. . -&gt; Getting accustomed to discrimination makes misunderstanding, sometimes. At my country, we couldn’t use music streaming service, because of legal problem. But at that moment, I thought it was discrimination, which is done by music company. "
     }, {
-    "id": 24,
+    "id": 26,
     "url": "http://localhost:4000/2020/01/Git-Merge/",
     "title": "Why am I not listed as a contributor?!",
     "body": "2020/01/10 - From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’m not listed as contributor of repo which my PR accessed. How did I come to a repository?: When I’m stuck, I would prefer to code, than to go deep in theory. (It must be so. . too much to understand 🤒)It was BERT released by Google AI I felt keenly the necessity of implementing, because not only couldn’t understand the way they figured out positional encoding formula, but how it actually works. What does it mean to “scale” dot product in Attention? (Now I know it’s far from my section 😂) Figure 1. Scaled Dot Product. Adopted from tensorflow blogWhat was the code error?: For implement code in paper, I read the papers Transformer and BERT, structured the model, and refered the others’ code. Meanwhile, I found out a small error in tokenization process, which was changing a token into [MASK], enabled bidirectional representation. I’ve made PR, and got merged. But I was not in contributors. Why?: Figure 2. Merged Pull request Adopted from graykode projectActually I happened to know there can be couple of reasons github doesn’t include my name as contributor. Well, if contributors tab has more than 100 people, in which case it shows you up only if you are in the top 100 contributors because displaying too many contributors can make webpages down. Somethimes, however, it doesn’t that problem. Why not? Two possibilities are there.    First, According to Joel-Glovier, if repository maintainer merged-as-a-rebase PR will end up showing as maintainer’s commit. But maintainer shouldn’t normally do this.     Second, if you happend to commit using a different git email that what is in your GitHub profile, it will not be attached to your Github user, and “doesn’t show up” as you.  Reference:  Michał Chromiak’s blog Github: why are my contributions are not showing on my profile atlassian-gitfetch"
     }, {
-    "id": 25,
-    "url": "http://localhost:4000/2019/12/lesson1-fastai/",
-    "title": "Fine Grained Classification",
-    "body": "2019/12/31 - Finally you can solve the mystery behind this weird drawing. . through this course. juptyer notebook magic: %reload_ext autoreload%autoreload 2%matplotlib inlinethis is special directives to jupyter notebook, not python code. And it is called ‘magics’ (but i think jeremy is magicion)  If somebody changes underlying library code while I’m running this, please reload it automatically If somebody asks to plot something, then please plot it here in this Jupyter NotebookDon’t hesitate to import start~ Digging into untar_data, path. ls: Union[pathlib. Path, str]: typed programming language? -&gt; maybe i think disclaim the type beforehand for sure.  Q. like assert? path. ls()this is some module that fast. ai made because os. listdir(‘path’) is unconvinient. Python3 pathlib library!: pathlib "
-    }, {
-    "id": 26,
+    "id": 27,
     "url": "http://localhost:4000/2019/12/jeremy-howard/",
     "title": "Jeremy Howard",
     "body": "2019/12/15 - This is journey to find out ‘who am I trying to be?’: How he impacted me?  The person who made me start Computer Vision again. He emphasized the importance of studying NLP and Computer together to understand the deep-learning. He didn’t order it to study, but always he pursuade me with reasonable way.  “It’s not just something I can throw away. NLP and computer vision a few weeks apart and that’s going to force your brain to realize like ‘oh I have to remember this’”  He made me admit my failure in deep-learning. I started to objectify where am I. What should I do when I’m frustrated.  “Keep going. You’re not expected to remember everything. Yet.  You’re not expected to understand everything. Yet. You’re not expected to know why everything works.  Yet. ”  His articles are numerous, below.      What is torch. nn Really?   High Performance Numeric Programming with Swift: Explorations and Reflections   C++11, random distributions, and Swift    And especially, I like this book. Designing great data products Great predictive modeling is an important part of the solution, but it no longer stands on its own; as products become more sophisticated, it disappears into the plumbing. Designing great data products And he is also famous for words. Here are some.  we’re going to try and use that to really understand what’s going on. So to warn you, none of it is rocket science but a lot of its going to look really new. So don’t expect to get it the first time but expect to listen and jump into the notebook try a few things test things out look particularly at like tensor shapes and inputs and outputs to check your understanding then go back and listen again. But and kind of try it, a few times, because you will get there right, it’s just that there’s going to be a lot of new concepts because we haven’t done that much stuff in pure Pytorch. Lesson 6: Deep Learning 2019 "
     }, {
-    "id": 27,
+    "id": 28,
     "url": "http://localhost:4000/2019/11/julia-evans/",
     "title": "Julia Evans",
     "body": "2019/11/20 - This is journey to find out ‘who am I trying to be?’: The women who surprised me in many ways. First, she approached me to teaching some concepts drawing cartoons. It was at Hackers news, which was hightest ranks. Personally I have the use of not to reading title, so and cartoon was so cute and clear. I naturally gonna understood mechanism and astonished by her explaination ability. Her value, which she was taught by many people so want to do same things, moved me. Volume of her knowledge, that just reading post title is a deal of work, amazed me. "
     }, {
-    "id": 28,
+    "id": 29,
     "url": "http://localhost:4000/2019/11/coc-retropective/",
     "title": "Retrospective on Pycon 2019 Korea (CoC Committee)",
     "body": "2019/11/05 - When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized our responsibility, and if I passed each other’s burden, It could be my burden next time. In solidarity of the obligation, we finished conference well. And after participating PyCon Korea 2018 as volunteer, I’ve joined PyCon Korea Organizer last year. &lt;Figure 1&gt; First meeting of PyCon 2019 Korea Organizers It’s been a while since PyCon 2019 finished. It’s held on Aug 15 - 18, at Coex Grand Balloom &lt;Figure 2&gt; Ongoing session, speaking on news comment processing &lt;Figure 3&gt; Sponsor Booth iin Coex Hall &lt;Figure 4&gt; After PyCon 2019, with all of volunteer, organizer, speakers 😍 🥰  Serving as part of the coc TF, I spent large fraction of last year doing CoC job. here’s the path what we’ve been grappled with to grasp a solution. First half: Before the conference Toward Diverse Community:  Formally we’ve been reusing and modifying PyCon US CoC, but we needed fit in Korean and I was part of that to revise code of conduct. Except ‘That’ Diversity, Because it is ‘Harassment’:  Specific point was harassment, and the others were not. process of finding the points. How can we settle this point?Second half: During the conference Handling the potential Harassment: Disjunction of policy and real-time situation: This ‘PyCon 2019 Korea retrospective series’ would be devided into 3 Episodes.  “Retrospective on Pycon 2019 Korea (CoC Committee)” “Retrospective on Pycon 2019 Korea (Program Chair)” (20 Nov, To Be Update) “Maintaining participation while still making timely decisions” (29 Nov, To Be Update)"
     }, {
-    "id": 29,
+    "id": 30,
     "url": "http://localhost:4000/2019/11/elif-shafak/",
     "title": "Elif Shafak",
     "body": "2019/11/05 - This is journey to find out ‘who am I trying to be?’: For creative-minded people, Istanbul is a treasure. ’ Photo © Chris Boland, licensed under CC BY-NC-ND 2. 0    it suddenly felt like what I was trying to convey was more complicated and detailed than what the circumstances allowed me to say.     And I did what I usually do in similar situations: I stammered, I shut down, and I stopped talking. I stopped talking because the truth was complicated, even though I knew, deep within, that one should never, ever remain silent for fear of complexity.         &lt;Figure 1&gt; Elif Shafak   Photo credit: www. elifsafak. com. tr      I want to talk about emotions and the need to boost our emotional intelligence. I think it’s a pity that mainstream political theory pays very little attention to emotions.     Oftentimes, analysts and experts are so busy with data and metrics that they seem to forget those things in life that are difficult to measure and perhaps impossible to cluster under statistical models. But I think this is a mistake, for two main reasons. We are emotional beings.     I think it’s going to be one of our biggest intellectual challenges, because our political systems are replete with emotions. In country after country, we have seen illiberal politicians exploiting these emotions. And yet within the academia and among the intelligentsia, we are yet to take emotions seriously. I think we should.  1 2 Reference:       British Council Worldwide &#8617;        Ted Talk &#8617;    "
     }, {
-    "id": 30,
+    "id": 31,
     "url": "http://localhost:4000/2019/01/dps-week1/",
     "title": "Digital Product School week 1",
     "body": "2019/01/11 - The 1th week retropect at Digital Product School [This week’s schedule] CONTENT:  Welcome to Digital Product School!     Trip to Spitzingsee   Welcome to Design Office   Specifying our goal of product   Welcome to Digital Product School!: Trip to Spitzingsee:    At the first day of Digital Product School, we had a off-site with all of batch 9 people. All the costs were managed by dps. At the beautiful mountain, we settled the team, and got my team goal.     Basically, there are two kind of team in DPS. (1) Wild team - the team has fixed topic(2) Company team - the team which has specific stakeholders, and also topic defined by that stakeholders  The Core-team will fix what team you will join in DPS for 3 months based on ymy professionals, they announce it at off-site. [My team for 3 months at DPS]    And we decide on my batch #9 theme song. How? Each team draw for songs and pitch ‘why this song should be batch #9 theme song’The result? Imagine dragon - Believer (I didn’t know at the moment, this song would be stamped in my memory)     We have a workshop for getting to know each other. For example, we share 1) what do I expect from 3 months of dps, 2) when I feel happy in my life time, 3) what I worked for last week, 4) what was my last project and 5) what plays important role in my life      My team's board  Cero  Welcome to Design Office:  At first day of design office, we had workshop, which celebrates my day in dps also discuss specific rule, menifesto and stakeholders    We get sticker and attach it in map depends on my nationality  Now time to get to know my team’s stakeholders. What they want for us? What they expect from us? How free my team are on the topic?To be honest, it is endless tug-of-war. We should discuss with my stakeholders, endlessly, and find out solution which can meet interest of users, stakeholders and my team. Basically, my team’s main stakeholder is ADAC, but BMW, City of munich and Nokia will also participate as my team’s stakeholders. Specifying our goal of product: "
diff --git a/_site/2020/02/what-is-convolution/index.html b/_site/2020/02/what-is-convolution/index.html
index 407589c775..99e30b93a8 100644
--- a/_site/2020/02/what-is-convolution/index.html
+++ b/_site/2020/02/what-is-convolution/index.html
@@ -19,9 +19,9 @@
 <meta property="og:site_name" content="SpellOnYou" />
 <meta property="og:image" content="http://localhost:4000/assets/images/cnn-jiwon.png" />
 <meta property="og:type" content="article" />
-<meta property="article:published_time" content="2020-02-28T00:00:00+01:00" />
+<meta property="article:published_time" content="2020-02-28T00:00:00+09:00" />
 <script type="application/ld+json">
-{"description":"Issues","author":{"@type":"Person","name":"dionne"},"@type":"BlogPosting","url":"http://localhost:4000/2020/02/what-is-convolution/","publisher":{"@type":"Organization","logo":{"@type":"ImageObject","url":"http://localhost:4000/assets/images/logo.png"},"name":"dionne"},"image":"http://localhost:4000/assets/images/cnn-jiwon.png","headline":"Digging into convolution","dateModified":"2020-02-28T00:00:00+01:00","datePublished":"2020-02-28T00:00:00+01:00","mainEntityOfPage":{"@type":"WebPage","@id":"http://localhost:4000/2020/02/what-is-convolution/"},"@context":"http://schema.org"}</script>
+{"description":"Issues","author":{"@type":"Person","name":"dionne"},"@type":"BlogPosting","url":"http://localhost:4000/2020/02/what-is-convolution/","publisher":{"@type":"Organization","logo":{"@type":"ImageObject","url":"http://localhost:4000/assets/images/logo.png"},"name":"dionne"},"image":"http://localhost:4000/assets/images/cnn-jiwon.png","headline":"Digging into convolution","dateModified":"2020-02-28T00:00:00+09:00","datePublished":"2020-02-28T00:00:00+09:00","mainEntityOfPage":{"@type":"WebPage","@id":"http://localhost:4000/2020/02/what-is-convolution/"},"@context":"http://schema.org"}</script>
 <!-- End Jekyll SEO tag -->
 
 
@@ -161,96 +161,101 @@
     "body": "  {% if page. url ==  /  %}            {% assign latest_post = site. posts[0] %}          &lt;div class= topfirstimage  style= background-image: url({% if latest_post. image contains  ://  %}{{ latest_post. image }}{% else %} {{site. baseurl}}/{{ latest_post. image}}{% endif %}); height: 200px;  background-size: cover;  background-repeat: no-repeat; &gt;&lt;/div&gt;           {{ latest_post. title }}  :       {{ latest_post. excerpt | strip_html | strip_newlines | truncate: 136 }}               In         {% for category in latest_post. categories %}        {{ category }},         {% endfor %}                                {{ latest_post. date | date: '%b %d, %Y' }}                            {%- assign second_post = site. posts[1] -%}                        {% if second_post. image %}                         &lt;img class= w-100  src= {% if second_post. image contains  ://  %}{{ second_post. image }}{% else %}{{ second_post. image | absolute_url }}{% endif %}  alt= {{ second_post. title }} &gt;                        {% endif %}                                    {{ second_post. title }}          :                       In             {% for category in second_post. categories %}            {{ category }},             {% endfor %}                                                      {{ second_post. date | date: '%b %d, %Y' }}                                    {%- assign third_post = site. posts[2] -%}                        {% if third_post. image %}                         &lt;img class= w-100  src= {% if third_post. image contains  ://  %}{{ third_post. image }}{% else %}{{site. baseurl}}/{{ third_post. image }}{% endif %}  alt= {{ third_post. title }} &gt;                        {% endif %}                                    {{ third_post. title }}          :                       In             {% for category in third_post. categories %}            {{ category }},             {% endfor %}                                                      {{ third_post. date | date: '%b %d, %Y' }}                                    {%- assign fourth_post = site. posts[3] -%}                        {% if fourth_post. image %}                        &lt;img class= w-100  src= {% if fourth_post. image contains  ://  %}{{ fourth_post. image }}{% else %}{{site. baseurl}}/{{ fourth_post. image }}{% endif %}  alt= {{ fourth_post. title }} &gt;                        {% endif %}                                    {{ fourth_post. title }}          :                       In             {% for category in fourth_post. categories %}            {{ category }},             {% endfor %}                                                      {{ fourth_post. date | date: '%b %d, %Y' }}                                  {% for post in site. posts %} {% if post. tags contains  sticky  %}                    {{post. title}}                  {{ post. excerpt | strip_html | strip_newlines | truncate: 136 }}                 Read More            	             {% endif %}{% endfor %}  {% endif %}                All Stories:         {% for post in paginator. posts %}          {% include main-loop-card. html %}        {% endfor %}                   {% if paginator. total_pages &gt; 1 %}              {% if paginator. previous_page %}        &laquo; Prev       {% else %}        &laquo;       {% endif %}       {% for page in (1. . paginator. total_pages) %}        {% if page == paginator. page %}        {{ page }}        {% elsif page == 1 %}        {{ page }}        {% else %}        {{ page }}        {% endif %}       {% endfor %}       {% if paginator. next_page %}        Next &raquo;       {% else %}        &raquo;       {% endif %}            {% endif %}                     {% include sidebar-featured. html %}      "
     }, {
     "id": 12,
+    "url": "http://localhost:4000/2020/04/v3-2019-lesson06-note/",
+    "title": "fastai 2019 course-v3 Part1, lesson06",
+    "body": "2020/04/15 - Lesson 06Rossmann(Tabular):  Tabular data: be careful on Categorical variable vs Continuous variable.  if datatype is int, fastai think it is classification, not a regression.  Root mean square percentage error. as loss function.  When you assign the y_range, it’s better to assign little bit more than actual maximum. &gt; because it’s sigmoid.  intermediate layers, which is weight matrix is 1) 1000, and 2) 500 -&gt; which means our parameter would be 500*1000. learn. modelWhat is dropout and embedding dropout?: Nitish Srivastava, Dropout: A Simple way to prevent Neural Networks from Overfitting  you can dropout with p value, make it specified to specific layer, or make it applied to all the layers.  Pytorch code 1) bernoulli, which decides whether you will hold it? 2) and divide the noise value depends on noise value. so noise became 2 or remain 0.      According to pytorch code, We do change at training time, but we do nothing at test time. and this means you don’t have to do anything special with inference time. ’   TODO: find at forums what is inference time - Related to NVIDIA, GPU.     Embedding dropout is just a dropout.      It’s different between continuous variable and embedding layer.  TODO Still can’t understand. why embedding dropout is effective. or,… in need.    Let’s delete at random, some of the results of the embedding.    and It worked well especially at Kaggle   Batch Normalization: Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift -&gt; came out false! According to How Does Batch Normalization Help Optimization?  The key was multiplicative bias {\gamma} and additive bias {\beta}` Explain     Let $$ \hat{y} = f(w_1, w_2, w_3, … , x)} $$ , loss = MSE , Then y_range should be between 1 and 5`   And Activation function ends with -1 -&gt; +1   To mitigate this problem, we can add the other parameter, like $$w_n$$   But there’re so much interactions in the process so just re-scale the output.    Momentum parameter at BatchNorm1d:  Different from momentum like in optimization.  This momentum is Exponentially weighted moving average of the mean, instead of deviation.      If this is small number: mean standard deviation would be less from mini_batch to mini_batch » less regularization effect. (If this is large number, variation would be greater from mini_batch to mini_batch » more regularization effect)   TODO: can’t sure, but i understand, this is not about how to update parameter but about how much reflect previous value when scale and shift   Q. Preference between batchnorm and the other regularizations(drop out, weight decay)A. Nope, always try and see the results## lesson6-pets-more### Data Augmentation- Last reg- `get_transforms` has lots of params (even not yet learned all) -&gt; check documentation	- Remember you can implement all the doc contents bc it's made from nbdev	- TODO: try this!!- Essence of data augmentation is you should maintain the label, while somewhat making sense. 	- ex) tilt, because it's optically sensible, you can always change the angle of the data view. - zeros, border, and reflection but always `reflection` works most of the time, so that is the default### Convolutional Kernel(What is convolution?)- Will make heat\_map from scratch, which means the parts convolution focuses on![setosa_visualization]()- http://setosa. io/ev/image-kernels/	- javascript thing	- How convolution works	- Kernel. which does element-wise multiplication, and sum them up	- so it has on pixel less at borders -&gt; so it uses padding, and fastai uses reflection as said. - why this Kernel(matrix) helps catching horizontal edge side?	- because this kernel`(picture2)` weights differently, depends on `x axis`	- why familiar, because it's similar intuition with fugus`(paper)` paper- CNN from different viewpoints`link`	- output of pixel is results from different linear equations. 	- If you connect this with represents of neural network nodes, you can see that the specific inp nodes connected with specific out nodes. 	- **Summarize**: cnn does 1) matmul some of the elements are always zero 2) same weight for every row, which is called `weight time? weight. . ?, 1:18:50` `(picture)`#### Further lowdown- Because generally image has 3 channels, we need rank 3 kernel. - And **do multiply with all channel output is one pixel**. (`draw by your self`)	- but this kernel will catch one feature, like horizontal, so that we make more kernel so that output becomes (h * w * kernel)	- And that `kernel` come to `channel`- **Conv2d**: with 3 by 3 kernel, stride 2 conv -&gt; (h/2 * w/2 * kernel)	- skip or jump over input pixel	- to protect from memory out of control~~~pythonlearn. modellearn. summary()~~~TODO: understand yourself the blocks of conv-kernel: - Usually use big kernel size at first layer (will study this at part2)- Bottom right highlighting kernel(`pic / draw`)- `torch. tensor. expand`: for memory efficient, because we should do RGB- We do not make separate kernel, but make rank 4 kernel	- 4d tensor is just stacked kernel- `t[None]. shape` create new unit axis, and why? we make this -&gt; it should move unit of batch, not one size image. ### Average pooling, feature- suppose our pre-trained model results in size of `11 by 11 by 512 ` `pic 4` and my classification task has 37 classes	* take the first face of channel, which is 11 by 11 and `mean` it, so that make rank 2 tensor, 512 by 1	* and make 2d matrix, which is 512 by 37 and multiply so that we can get 37 by 1 matrix. - Feature, at convolution block	- So, when we transfer-learning without unfreeze, every element of last matrix (512 by 1) should represent(or could catch) each feature. 	### Heatmap, Hook~~~hook_output(model[0]) -&gt; acts -&gt; avg_acts~~~- if we average the block with `axis=feature`, result of matrix(11 by 11) depicts `how activated was that area?` -&gt; it is heatmap, `avg_acts`- and acts comes from hook, which is more advanced pytorch feature. 	- hook into pytorch machine itself, and run any arbitrary Pytorch code	- Why this is cool?: Normally it gives set of outputs of forward pass, but we can interrupt and hook the forward pass. 	- Also can store the output of the convolutional part of the model, which is before avg_pooling- Thinking back when we do cut off `after` the conv part. 	- but with fast. ai the original convolutional part of the model would be *the first thing in the model*, specifically could be given from `learn. model. eval()[0]`	- And this is gotten from `hooked_output` and having hooked the output, we can pass our x_minibatch to output. 	- Not directly, but with normalized, minibatch, put on to the gpu	- `one_item()` function do it, when we have one data `TODO: this is assignment` do it yourself without one_item function	- and `. cuda()` put it on gpu- you should print out very often the shape of tensor, and try think why. "
+    }, {
+    "id": 13,
+    "url": "http://localhost:4000/2020/04/qna-image-segmentation/",
+    "title": "[Q&A] Image Segmentation, using Unet with Driving Video data",
+    "body": "2020/04/02 - This post is about my questions while I was studying USF Deep Learning course about image segmentation task. All the answers are from the course, source code, library document, or document. I cared about being clear at reporting information including source of information, however if there are still anything unclear, please contact me. And thank you Jeremy&amp;Rachael for everything. Also Thank you Cambridge Computer Vision Lab to made us to study with your labor. The Cambridge-driving Labeled Video Database (CamVid) is the first collection of videos with object class semantic labels, complete with metadata. The database provides ground truth labels that associate each pixel with one of 32 semantic classes. If someone is interested in this project, please check the site and see the details. Now, let’s start first using jupyter’s one of tricks which I love most. It enables cell to print the code without print function. from IPython. core. interactiveshell import InteractiveShell# pretty print all cell's output and not just the last oneInteractiveShell. ast_node_interactivity =  all from fastai. vision import *from fastai. callbacks. hooks import *from fastai. utils. mem import *path = untar_data(URLs. CAMVID) # The locations where the data and models are downloaded are set in config. ymlpath. ls() I’m trying to accustomed to using pathlib module, not just it became built-in module in python, but I felt uncomfortable myself with os module. However, still unpredictable conflicts are remain, even in the quite standard library like Pytorch, tensorflow, onnx. (it require me string for path. not PosixPath. will send PR. . ) [PosixPath('/root/. fastai/data/camvid/valid. txt'), PosixPath('/root/. fastai/data/camvid/images'), PosixPath('/root/. fastai/data/camvid/labels'), PosixPath('/root/. fastai/data/camvid/codes. txt')]path_img = path/'images'path_lbl = path/'labels'fnames = get_image_files(path_img) #filenamelbl_names = get_image_files(path_lbl)1. (Play with data) My Hypothesis:  File name has A_B format. and A / B would be at key-value position.  Use collections - defaultdict  Default Dict: Link:   easy to group a sequence of key and value pairs into a dictionary of list?from collections import defaultdictfnames[0], lbl_names[0](PosixPath('/root/. fastai/data/camvid/images/0001TP_009210. png'), PosixPath('/root/. fastai/data/camvid/labels/0016E5_01800_P. png'))files = [tuple(i. stem. split('_')) for i in fnames]labels = [tuple(i. stem. split('_')[:-1]) for i in lbl_names]d = defaultdict(list)for k, v in files: d[k]. append(v)d. keys()len(d['0001TP'])124for k, v in d. items():  print(k, v)0001TP ['009210', '008850', '007350', '008970', '009840', '010140', '008490', '008520', '009540', '008250', '008340', '006840', '007860', '007410', '007740', '009870', '010080', '007890', '008790', '010020', '008400', '007080', '008280', '010380', '009330', '009060', '007470', '006810', '009720', '008580', '007110', '008730', '009150', '007680', '009780', '007800', '007290', '008760', '009510', '008640', '008310', '007440', '006900', '007500', '008460', '009030', '008130', '009480', '009900', '010230', '009270', '008040', '007590', '007950', '009990', '008550', '007260', '008100', '007530', '006960', '008190', '009420', '009930', '009000', '007830', '008940', '006690', '009570', '008880', '010170', '007560', '009300', '006750', '009360', '010200', '007320', '008010', '009120', '007620', '007200', '007140', '010320', '006720', '008670', '007230', '008370', '010260', '009690', '006930', '009090', '007770', '010290', '010350', '008610', '008070', '009600', '008430', '009450', '007380', '009240', '007710', '007170', '008160', '008910', '007020', '006780', '007050', '009960', '009810', '008220', '009180', '009750', '010050', '009660', '010110', '007920', '009630', '007650', '006990', '008700', '009390', '007980', '008820', '006870']0016E5 ['01290', '08159', '05760', '08133', '08063', '06660', '00960', '05850', '00750', '06960', '08035', '08107', '07975', '08017', '05610', '07140', '08119', '08027', '07170', '08400', '08093', '02100', '06390', '04470', '08340', '06060', '00600', '07470', '08151', '07800', '01620', '05730', '01530', '00690', '08430', '05940', '01980', '07320', '08069', '07965', '04380', '05430', '01410', '06780', '08007', '08087', '08079', '06600', '08109', '05490', '00901', '04590', '04680', '08045', '01770', '06690', '08085', '06810', '00420', '08011', '07440', '02190', '06300', '04800', '01500', '00450', '08029', '01470', '06330', '07997', '08067', '05370', '08013', '08190', '00840', '02370', '08049', '08135', '01440', '06870', '05820', '05280', '08051', '04440', '08091', '01380', '00630', '07290', '05520', '04770', '00540', '07995', '07999', '05550', '07920', '08101', '08141', '08053', '04620', '08103', '05160', '07350', '08057', '06030', '06000', '08550', '07963', '08089', '05970', '08047', '05640', '06240', '05220', '04350', '01590', '07959', '01950', '08117', '06180', '01560', '05400', '08043', '07680', '00780', '08081', '07050', '01020', '01350', '04530', '06720', '07969', '08149', '08003', '08131', '08129', '08033', '05460', '01650', '07530', '08023', '05340', '08640', '05100', '08075', '01230', '04980', '02070', '01080', '06210', '05910', '08009', '01800', '05190', '02400', '08083', '08019', '07620', '07200', '07890', '08059', '06990', '04410', '08121', '08123', '06930', '08137', '08147', '08095', '06570', '06150', '08153', '06840', '05250', '00510', '08370', '08580', '08113', '07410', '08097', '01200', '04950', '07770', '07650', '04710', '06090', '08055', '07110', '07981', '00990', '08250', '08127', '01920', '07985', '08220', '08005', '08157', '05130', '08071', '01140', '04830', '07740', '08143', '06120', '02040', '08111', '08115', '00660', '08280', '06420', '07983', '02220', '05700', '01860', '01260', '04920', '06510', '07020', '08073', '08105', '08125', '06360', '07860', '07993', '00810', '06540', '08099', '08139', '02010', '07973', '08155', '07991', '06630', '00480', '06750', '04890', '08001', '08025', '00870', '08490', '01830', '07977', '05010', '01170', '07961', '01680', '01050', '07987', '07080', '04560', '00930', '05310', '02340', '05790', '08460', '00720', '08031', '02280', '08039', '08037', '08065', '06270', '08077', '06900', '04650', '06480', '07230', '08041', '06450', '00570', '07989', '04740', '07979', '02250', '07380', '00390', '01710', '07590', '08021', '08520', '07500', '01110', '04500', '02310', '07971', '02130', '05580', '05880', '08610', '08310', '08145', '05670', '04860', '07260', '08015', '07967', '01740', '01320', '07560', '07830', '01890', '08061', '02160', '07710', '05070', '05040']Seq05VD ['f00030', 'f02550', 'f03450', 'f01110', 'f00480', 'f00210', 'f04590', 'f04170', 'f01800', 'f03990', 'f03360', 'f03900', 'f02070', 'f00810', 'f03690', 'f01350', 'f01530', 'f04980', 'f05100', 'f03060', 'f00900', 'f03870', 'f02460', 'f01470', 'f02370', 'f02820', 'f04080', 'f02760', 'f04860', 'f02250', 'f04200', 'f00270', 'f03720', 'f02850', 'f04410', 'f01200', 'f03090', 'f02010', 'f03930', 'f00090', 'f01650', 'f01890', 'f03840', 'f03030', 'f02130', 'f01230', 'f04110', 'f02520', 'f04140', 'f04020', 'f00060', 'f03420', 'f01560', 'f00120', 'f04290', 'f02340', 'f00300', 'f01380', 'f00870', 'f01860', 'f02970', 'f04560', 'f02730', 'f00330', 'f04530', 'f03780', 'f01770', 'f03390', 'f05040', 'f02430', 'f03330', 'f00660', 'f01740', 'f02100', 'f04800', 'f04050', 'f00510', 'f02790', 'f04350', 'f00690', 'f00540', 'f02490', 'f00960', 'f00930', 'f04230', 'f02880', 'f03600', 'f01020', 'f01500', 'f02400', 'f04830', 'f04470', 'f03300', 'f02670', 'f00450', 'f01980', 'f01170', 'f01620', 'f04500', 'f01080', 'f03180', 'f05070', 'f03150', 'f04950', 'f01440', 'f03510', 'f01710', 'f00360', 'f04770', 'f02910', 'f01050', 'f00630', 'f04320', 'f00570', 'f03240', 'f02190', 'f01140', 'f03540', 'f02220', 'f02640', 'f03960', 'f00000', 'f04920', 'f01950', 'f00990', 'f03480', 'f03000', 'f00420', 'f04620', 'f03210', 'f00780', 'f03570', 'f01590', 'f00750', 'f01920', 'f04650', 'f03750', 'f03630', 'f02310', 'f02610', 'f02580', 'f04740', 'f02280', 'f04680', 'f00390', 'f00720', 'f03660', 'f02040', 'f03270', 'f00180', 'f03810', 'f01410', 'f01290', 'f03120', 'f00840', 'f04440', 'f00150', 'f01260', 'f02700', 'f02940', 'f00600', 'f01830', 'f04260', 'f05010', 'f04890', 'f02160', 'f00240', 'f04380', 'f01680', 'f04710', 'f01320']0006R0 ['f02820', 'f03690', 'f03180', 'f02550', 'f01020', 'f03660', 'f02340', 'f01170', 'f02610', 'f02940', 'f01290', 'f02100', 'f01350', 'f03270', 'f03870', 'f01380', 'f01980', 'f03810', 'f02430', 'f02310', 'f01830', 'f03480', 'f02970', 'f01890', 'f03210', 'f03930', 'f02040', 'f02070', 'f02400', 'f01560', 'f03030', 'f01770', 'f01590', 'f01950', 'f03420', 'f01650', 'f03450', 'f00990', 'f03630', 'f01500', 'f03570', 'f00930', 'f03090', 'f03360', 'f02880', 'f02460', 'f01440', 'f01920', 'f01230', 'f03840', 'f02730', 'f01620', 'f02220', 'f03750', 'f03330', 'f03540', 'f02520', 'f02790', 'f01050', 'f03120', 'f01800', 'f01140', 'f01860', 'f01530', 'f01470', 'f02670', 'f02490', 'f01260', 'f01110', 'f02760', 'f01680', 'f03150', 'f02580', 'f03300', 'f02280', 'f01200', 'f03390', 'f03510', 'f02640', 'f02190', 'f02370', 'f01320', 'f02130', 'f03600', 'f03240', 'f03780', 'f03720', 'f02700', 'f01410', 'f01080', 'f02850', 'f01710', 'f03900', 'f03060', 'f01740', 'f02010', 'f02250', 'f00960', 'f03000', 'f02160', 'f02910']for k, v in d. items():  print(k, len(d[k]))0001TP 1240016E5 305Seq05VD 1710006R0 101for i in d2. keys():  print(i,len(d2[i]))0016E5 3050001TP 1240006R0 101Seq05VD 171files[0], labels[0](('0001TP', '009210'), ('0016E5', '01800'))2. My question: Link:  Why do we need masking? and does color from fastai library? (have to look into source code) What do the parameter alpha do? When people make masked img, would it be have ranged integer limit? Does image normalization related with this?lbl_sorted = sorted(lbl_names)f_sorted = sorted(fnames)lbl_1 = lbl_sorted[33]f_1 = f_sorted[33]img = open_image(lbl_1)mask = open_mask(lbl_1)_,axs = plt. subplots(1,2, figsize=(10,5))# img. show(ax=axs[0], y=mask, title='masked')img. show(ax=axs[0], title='1')mask. show(ax=axs[1], title='2', alpha=1. ) img_2 = open_image(f_1)mask_2 = open_mask(f_1)_,axs = plt. subplots(1,2, figsize=(10,5))# img. show(ax=axs[0], y=mask, title='masked')img_2. show(ax=axs[0], title='3',)mask_2. show(ax=axs[1], title='4', alpha=1. ) open_mask(lbl_1). data. shapetorch. Size([1, 720, 960])open_mask(lbl_1). data. shapetorch. Size([1, 720, 960])open_image(f_1). data. shapetorch. Size([3, 720, 960])open_image(f_1). data. shapetorch. Size([3, 720, 960])img. data #labeled datatensor([[[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]],    [[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]],    [[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]]])mask. data # after mask, labeled datatensor([[[ 4, 4, 4, . . . , 21, 21, 21],     [ 4, 4, 4, . . . , 21, 21, 21],     [ 4, 4, 4, . . . , 21, 21, 21],     . . . ,     [17, 17, 17, . . . , 30, 30, 30],     [17, 17, 17, . . . , 30, 30, 30],     [17, 17, 17, . . . , 30, 30, 30]]])img_2. data, mask_2. data(tensor([[[0. 0706, 0. 0667, 0. 0706, . . . , 0. 6431, 0. 6549, 0. 6627],     [0. 0745, 0. 0706, 0. 0706, . . . , 0. 6431, 0. 6510, 0. 6549],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 6392, 0. 6588, 0. 6588],     . . . ,     [0. 0863, 0. 0824, 0. 0824, . . . , 0. 1333, 0. 1216, 0. 1255],     [0. 0902, 0. 0863, 0. 0824, . . . , 0. 1255, 0. 1176, 0. 1216],     [0. 0863, 0. 0824, 0. 0784, . . . , 0. 1137, 0. 1059, 0. 1137]],     [[0. 0706, 0. 0667, 0. 0706, . . . , 0. 7490, 0. 7608, 0. 7686],     [0. 0745, 0. 0706, 0. 0706, . . . , 0. 7451, 0. 7569, 0. 7608],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 7412, 0. 7529, 0. 7529],     . . . ,     [0. 0980, 0. 0941, 0. 0941, . . . , 0. 1804, 0. 1686, 0. 1725],     [0. 1059, 0. 1020, 0. 0980, . . . , 0. 1725, 0. 1647, 0. 1686],     [0. 1020, 0. 0980, 0. 0941, . . . , 0. 1608, 0. 1529, 0. 1608]],     [[0. 0784, 0. 0745, 0. 0784, . . . , 0. 7569, 0. 7686, 0. 7765],     [0. 0824, 0. 0784, 0. 0784, . . . , 0. 7647, 0. 7647, 0. 7686],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 7608, 0. 7647, 0. 7647],     . . . ,     [0. 1216, 0. 1176, 0. 1176, . . . , 0. 2000, 0. 1882, 0. 1922],     [0. 1176, 0. 1137, 0. 1098, . . . , 0. 1843, 0. 1765, 0. 1804],     [0. 1137, 0. 1098, 0. 1059, . . . , 0. 1725, 0. 1647, 0. 1725]]]), tensor([[[ 18, 17, 18, . . . , 183, 186, 188],     [ 19, 18, 18, . . . , 183, 185, 186],     [ 20, 18, 19, . . . , 182, 185, 185],     . . . ,     [ 25, 24, 24, . . . , 43, 40, 41],     [ 26, 25, 24, . . . , 41, 39, 40],     [ 25, 24, 23, . . . , 38, 36, 38]]]))3. What is a difference between image and imageSegment?: imageSegment  An ImageSegment object has the same properties as an Image. The only difference is that when applying the transformations to an ImageSegment, it will ignore the functions that deal with lighting and keep values of 0 and 1.  It’s easy to show the segmentation mask over the associated Image by using the y argument of show_image. img = open_image(fnames[0])mask = open_mask(lbl_names[0])_,axs = plt. subplots(1,3, figsize=(8,4))img. show(ax=axs[0], title='no mask')img. show(ax=axs[1], y=mask, title='masked') #seg mask over the img using y argmask. show(ax=axs[2], title='mask only', alpha=1. ) vision. image ##4. Why/How img div by 255 and how it results fast. ai : vision. image - If div=True, pixel values are divided by 255. to become floats between 0. and 1.  At times, you want to get rid of distortions caused by lights and shadows in an image.    Normalizing the RGB values of an image can at times be a simple and effective way of achieving this.     So sum of the pixel’s value over all channels(which is S) divides each intensified channel so that nomalized value will be R/S, G/S and B/S (where, S=R+G+B).   Detailed explain here4. Python Evaluation Order: Python evaluates expressions from left to right. Notice that while evaluating an assignment, the right-hand side is evaluated before the left-hand side. mask_tmp, trg_tmp, void_tmp = 2, 1, 10mask_tmp = trg_tmp != void_tmpprint(mask_tmp, trg_tmp, void_tmp) # (1) target is not same with voidTrue 1 10# Example 1x = 1y = 2x,y = y,x+yx, y(2, 3)# Example 2x = 1y = 2x = yy = x+yx, y(2, 4)5. model learner parameter :: pct_start: A: Percentage of total number of epochs when learning rate rises during one cycle. Q: Sorry, I still confused that one cycle in the new API only runs one epoch. How the percentage of total number of epochs works? Can you give a example? If learn. fit_one_cycle(10, slice(1e-4,1e-3,1e-2), pct_start=0. 05)??A: Ok, strictly correct answer would be percentage of iterations, so you can have lr both increase and decrease during same epoch. In your example, say, you have 100 iterations per epoch, then for half an epoch (0. 05 * (10 * 100) = 50) lr will rise, then slowly decrease. Q2: Thanks for this explanation … so essentially, it is the percentage of overall iterations where the LR is increasing, correct? So, given the default of 0. 3, it means that your LR is going up for 30% of your iterations and then decreasing over the last 70%. Is that a correct summation of what is happening? A2: Yes, I think that’s correct. You can verify that by changing its value and check:learn. recorder. plot_lr() For example if pct_start = 0. 2 source: forums. fastai "
+    }, {
+    "id": 14,
     "url": "http://localhost:4000/2020/03/note08-fastai-4/",
     "title": "Gradient backward, Chain Rule, Refactoring",
-    "body": "2020/03/02 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves   ⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
+    "body": "2020/03/02 -  This note is divided into 4 section.      Section1: What is the meaning of ‘deep-learning from foundations?’   Section2: What’s inside Pytorch Operator?   Section3: Implement forward&amp;backward pass from scratch   Section4: Gradient backward, Chain Rule, Refactoring   ” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves    Postfix underscore means in pytorch, in-place function, What is in-place function?⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
     }, {
-    "id": 13,
+    "id": 15,
     "url": "http://localhost:4000/2020/03/note08-fastai-3/",
     "title": "Implement forward&backward pass from scratch",
     "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring1. The forward and backward passes: 1. 1 Normalization: train_mean,train_std = x_train. mean(),x_train. std()&gt;&gt;&gt; train_mean,train_std(tensor(0. 1304), tensor(0. 3073))Remember!  Dataset, which is x_train, mean and standard deviation is not 0&amp;1. But we need them to be which means we should substract means and divide data by std.  You should not standarlize validation set because training set and validation set should be aparted.  after normalize, mean is close to zero, and standard deviation is close to 1. 1. 2 Variable definition:  n,m: size of the training set c: the number of activations we need in our model2. Foundation Version: 2. 1 Basic architecture:  Our model has one hidden layer, output to have 10 activations, used in cross entropy.    But in process of building architecture, we will use mean square error, output to have 1 activations and lator change it to cross entropy   number of hidden unit; 50see below pic  We want to make w1&amp;w2 mean and std be 0&amp;1.      why initializating and make mean zero and std one is important?   paper highlighting importance of normalisation - training 10,000 layer network without regularisation1   2. 1. 1 simplified kaiming initQ: Why we did init, normalize with only validation data? Because we can not handle and get statistics from each value of x_valid?{: style=”color:red; font-size: 130%; text-align: center;”}  what about hidden(first) layer?w1 = torch. randn(m,nh)b1 = torch. zeros(nh)t = lin(x_valid, w1, b1) # hidden&gt;&gt;&gt; t. mean(), t. std()((tensor(2. 3191), tensor(27. 0303))In output(second) layer, w2 = torch. randn(nh,1)b2 = torch. zeros(1)t2 = lin(t, w2, b2) # output&gt;&gt;&gt; t2. mean(), t2. std()(tensor(-58. 2665), tensor(170. 9717))   which is terribly far from normalzed value.     But if we apply simplified kaiming init  w1 = torch. randn(m,nh)/math. sqrt(m); b1 = torch. zeros(nh)w2 = torch. randn(nh,1)/math. sqrt(nh); b2 = torch. zeros(1)t = lin(x_valid, w1, b1)t. mean(),t. std()&gt;&gt;&gt; (tensor(-0. 0516), tensor(0. 9354)) But, actually, we use activations not only linear function After applying activations relu at linear layer, mean and deviation became 0. 5. 2. 1. 2 Glorrot initializationPaper2: Understanding the difficulty of training deep feedforward neural networks  Gaussian(, bell shaped, normal distributions) is not trained very well.  How to initialize neural nets? with the size of layer , the number of filters .  But there is No acount for import of ReLU If we got 1000 layers, vanishing gradients problem emerges2. 1. 3 Kaiming initializatingPaper3: Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification  Kaiming He, explained here rectifier: rectified linear unit rectifier network: neural network with rectifier linear units  This is kaiming init, and why suddenly replace one to two on a top?     to avoid vanishing gradient(weights)   But it doesn’t give very nice mean tough.    2. 1. 4 Pytorch package Why fan_out?     according to pytorch documentation,   choosing 'fan_in' preserves the magnitude of the variance of the wights in the forward pass. choosing 'fan_out' preserves the magnitues in the backward pass(, which means matmul; with transposed matrix) ➡️ in the other words, torch use fan_out cz pytorch transpose in linear transformaton.  What about CNN in Pytorch?I tried torch. nn. Conv2d. conv2d_forward?? Jeremy digged into using torch. nn. modules. conv. _ConvNd. reset_parameters?? 2  in Pytorch, it doesn’t seem to be implemented kaiming init in right formula. so we should use our own operation.  But actually, this has been discussed in Pytorch community before. 3 4 Jeremy said it enhanced variance also, so I sampled 100 times and counted better results.  To make sure the shape seems sensible. check with assert. (remember we will replace 1 to 10 in cross entropy)assert model(x_valid). shape==torch. Size([x_valid. shape[0],1])&gt;&gt;&gt; model(x_valid). shape(10000, 1) We have made Relu, init, linear, it seems we can forward pass code we need for basic architecture nh = 50def lin(x, w, b): return x@w + b;w1 = torch. randn(m,nh)*math. sqrt(2. /m ); b1 = torch. zeros(nh)w2 = torch. randn(nh,1); b2 = torch. zeros(1)def relu(x): return x. clamp_min(0. ) - 0. 5t1 = relu(lin(x_valid, w1, b1))def model(xb):  l1 = lin(xb, w1, b1)  l2 = relu(l1)  l3 = lin(l2, w2, b2)  return l32. 2 Loss function: MSE:  Mean squared error need unit vector, so we remove unit axis.   def mse(output, targ): return (output. squeeze(-1) - targ). pow(2). mean()    In python, in case you remove axis, you use ‘squeeze’, or add axis use ‘unsqueeze’ torch. squeeze where code commonly broken. so, when you use squeeze, clarify dimension axis you want to removetmp = torch. tensor([1,1])tmp. squeeze()&gt;&gt;&gt; tensor([1, 1]) make sure to make as float when you calculateBut why??? because it is tensor?{: style=”color:red; font-size: 130%;”} Here’s the error when I don’t transform the data type ---------------------------------------------------------------------------TypeError                 Traceback (most recent call last)&lt;ipython-input-22-ae6009bef8b4&gt; in &lt;module&gt;()----&gt; 1 y_train = get_data()[1] # call data again   2 mse(preds, y_train)TypeError: 'map' object is not subscriptable This is forward passFootnote: Other materials:  Understanding the difficulty of training deep feedforward neural networks, paper that introduced Xavier initialization      Fixup Initialization: Residual Learning Without Normalization &#8617;        Pytorch implementaion on Kaiming init of conv and linear layers &#8617;        Pytorch kaiming init issue &#8617;        Pytorch kaiming init explained &#8617;    "
     }, {
-    "id": 14,
+    "id": 16,
     "url": "http://localhost:4000/2020/03/note08-fastai-2/",
     "title": "What's inside Pytorch Operator?",
     "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, RefactoringWhat’s inside Pytorch Operator?: Section02 Time comparison with pure Python:    Matmul with broadcasting&gt; 3194. 95 times faster     Einstein summation&gt; 16090. 91 times faster     Pytorch’s operator&gt; 49166. 67 times faster  1. Elementwise op: 1. 1 Frobenius norm:   above converted into (m*m). sum(). sqrt() Plus, don’t suffer from mathmatical symbols. He also copy and paste that equations from wikipedia.  and if you need latex form, download it from archive. 2. Elementwise Matmul:  What is the meaning of elementwise?   We do not calculate each component. But all of the component at once. Because, length of column of A and row of B are fixed.   How much time we saved?  So now that takes 1. 37ms. We have removed one line of code and it is a 178 times faster…#TODOI don’t know where the 5 from. but keep it. Maybe this is related with frobenius norm…?as a result, the code before for k in range(ac):  c[i,j] += a[i,k] + b[k,j]the code after c[i,j] = (a[i,:] * b[:,j]). sum()To compare it (result betweet original and adjusted version) we use not test_eq but other function. The reason for this is that due to rounding errors from math operations, matrices may not be exactly the same. As a result, we want a function that will “is a equal to b within some tolerance” #exportdef near(a,b):   return torch. allclose(a, b, rtol=1e-3, atol=1e-5)def test_near(a,b):   test(a,b,near)test_near(t1, matmul(m1, m2))3. Broadcasting:  Now, we will use the broadcasting and removec[i,j] = (a[i,:] * b[:,j]). sum() How it works?&gt;&gt;&gt; a=tensor([[10,10,10],     [20,20,20],     [30,30,30]])&gt;&gt;&gt; b=tensor([1,2,3,])&gt;&gt;&gt; a,b  (tensor([[10, 10, 10],     [20, 20, 20],     [30, 30, 30]]),tensor([1, 2, 3]))     &gt;&gt;&gt; a+btensor([[11, 12, 13],    [21, 22, 23],    [31, 32, 33]])  &lt;Figure 2&gt; demonstrated how array b is broadcasting(or copied but not occupy memory) to compatible with a. Refered from numpy_tutorial   there is no loop, but it seems there is exactly the loop.     This is not from jeremy (actually after a moment he cover it) but i wondered How to broadcast an array by columns?  c=tensor([[1],[2],[3]])a+ctensor([[11, 11, 11],    [22, 22, 22],    [33, 33, 33]])s  What is tensor. stride()?help(t. stride)Help on built-in function stride:  stride(…) method of torch. Tensor instancestride(dim) -&gt; tuple or intReturns the stride of :attr:’self’ tensor. Stride is the jump necessary to go from one element to the next one in the specified dimension :attr:’dim’. A tuple of all strides is returned when no argument is passed in. Otherwise, an integer value is returned as the stride in the particular dimension :attr:’dim’. Args:  dim (int, optional): the desired dimension in which stride is requiredExample::* x = torch. tensor([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]])`x. stride()&gt;&gt;&gt; (5, 1)x. stride(0)&gt;&gt;&gt; 5x. stride(-1)&gt;&gt;&gt; 1   unsqueeze &amp; None index   We can manipulate rank of tensor Special value ‘None’, which means please squeeze a new axis here== please broadcast herec = torch. tensor([10,20,30])c[None,:] in c, squeeze a new axis in here please. 2. 2 Matmul with broadcasting: for i in range(ar):#  c[i,j] = (a[i,:]).      *[:,j]. sum() #previous  c[i]  = (a[i]. unsqueeze(-1) * b). sum(dim=0) And Using None also (As howard teached)c[i]  = (a[i ]. unsqueeze(-1) * b). sum(dim=0) #howardc[i]  = (a[i][:,None] * b). sum(dim=0) # using Nonec[i]  = (a[i,:,None]*b). sum(dim=0)⭐️Tips🌟 1) Anytime there’s a trailinng(final) colon in numpy or pytorch you can delete it ex) c[i, :] = c [i]2) any number of colon commas at the start, you can switch it with the single elipsis.  ex) c[:,:,:,:,i] = c […,i] 2. 3 Broadcasting Rules:  What if we tensor. size([1,3]) * tensor. size([3,1])?  torch. Size([3, 3])    What is scale????   What if they are one array is times of the other array? ex) Image : 256 x 256 x 3Scale : 128 x 256 x 3Result: ?   Why I did not inserted axis via None, but happened broadcasting? &gt;&gt;&gt; c * c[:,None]tensor([[100. , 200. , 300. ],    [200. , 400. , 600. ],    [300. , 600. , 900. ]])maybe it broadcast cz following array has 3 rows as same principle, no matter what nature shape was, if we do the operation tensor broadcasts to the other. &gt;&gt;&gt; c==c[None]tensor([[True, True, True]])&gt;&gt;&gt; c[None]==c[None,:]tensor([[True, True, True]])&gt;&gt;&gt;c[None,:]==ctensor([[True, True, True]])3. Einstein summation:  Creates batch-wise, remove inner most loop, and replaced it with an elementwise producta. k. ac[i,j] += a[i,k] * b[k,j]inner most loop c[i,j] = (a[i,:] * b[:,j]). sum()elementwise product  Because K is repeated so we do a dot product. And it is torch. Usage of einsum()1) transpose2) diagnalisation tracing3) batch-wise (matmul) …  einstein summation notationdef matmul(a,b): return torch. einsum('ik,kj-&gt;ij', a, b)so after all, we are now 16000 times faster than Python. 4. Pytorch op: 49166. 67 times faster than pure python And we will use this matrix multiplication in Fully Connect forward, with some initialized parameters and ReLU. But before that, we need initialized parameters and ReLU, Footnote:  TensorRank ti noteResources:  Frobenius Norm Review Broadcasting Review (especially Rule)     Refer colab! (I totally confused with extension of arrays)    torch. allclose Review np. einsum Reviewh "
     }, {
-    "id": 15,
+    "id": 17,
     "url": "http://localhost:4000/2020/02/note08-fastai-1/",
     "title": "What is the meaning of 'deep-learning from foundations?'",
     "body": "2020/02/29 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ I don’t know if you read this article, but I heartily appreciate Rachael Thomas and Jeremy Howard for providing these priceless lectures for free Homework:  Review concepts 16 concepts from Course 1 (lessons 1 - 7)(1) Affine Functions &amp; non-linearities; 2) Parameters &amp; activations; 3) Random initialization &amp; transfer learning; 4) SGD, Momentum, Adam; 5) Convolutions; Batch-norm; 6) Dropout; 7) Data augmentation; 8) Weight decay; 9) Res/dense blocks; 10) Image classification and regression; 11)Embeddings; 12) Continuous &amp; Categorical variables; 13) Collaborative filtering; 14) Language models; 15) NLP classification; 16) Segmentation; U-net; GANS) Make sure you understand broadcasting Read section 2. 2 in Delving Deep into Rectifiers Try to replicate as much of the notebooks as you can without peeking; when you get stuck, peek at the lesson notebook, but then close it and try to do it yourself calculus for machine learning     based on weight…    einsum conventionCONTENTS:  What is going on in this course?     What is ‘from foundations’?   Steps to a basic modern CNN model   Today’s implementation goal: 1) matmul -&gt; 4) FC backward    Library development using jupyter notebook     jupyter notebook certainly can make module    Elementwise ops     How can we make python faster?         What is element wise operation?           FootnoteWhat is going on in this course?: What is ‘from foundations’?: 1) Recreate fast. ai and Pytorch 2) using pure python  Evade OverfittingOverfit : validation error getting worsetraining loss &lt; validation loss  Know the name of the symbol you usefind in this page if you don’t know the symbol that you are using or just draw it here (run by ML!) Steps to a basic modern CNN model:  1) Matrix multiplication -&gt; 2) Relu/Initialization -&gt; 3) Fully-connected Forward-&gt; 4) Fully-connected Backward -&gt; 5) Train loop -&gt; 6) Convolution-&gt; 7) Optimization -&gt;8) Batchnormalization -&gt; 9) Resnet Today’s implementation goal: 1) matmul -&gt; 4) FC backward: Library development using jupyter notebook: what is assers? jupyter notebook certainly can make module:  There will be #export tag that Howard (and we) want to extract special notebook2script. py will detect sign of #expert and convert following into python module and test ittest\_eq(TEST,'test')test\_eq(TEST,'test1')  what is run_notebook. py?     when you want to test your module in command line interface   		!python run\_notebook. py 01_matmul. ipynb  Is there any difference between 1) and 2)?1) test -&gt; test01 2) test01 -&gt; test #TODO I don’t know yet  look into run_notebook. py, package fire Jeremy used. What is that?read and run the code in a notebook, and in the process, Jeremy made Python Fire library called!shockingly, fire takes any kind of function and converts into CLI command. fire library was released by Google open source, Thursday, March 2, 2017    Get data   pytorch and numpy are pretty much same.  variable c explains how many pixels there are in in MNIST, 28 pixels PyTorch’s view() method: torch function that manipulating tensor, and squeeze() in torch &amp; mathmatical operation similar function Rao &amp; McMahan said usually this functions result in feature vector.    In part 1, you can use view function several times.     Initial python model     Which is Linear, like $Xw$(weight)$+a$(bias) $= Y$     If you don’t know hou to multiple matrix, refer this site matmul visulization site   How many time spends if we we use pure python   function matmul, typical matrix multiplication function, takes about 1 second for calculating 1 single train data! (maybe assumed stochastic, 5 data points in validation)     it takes about 11. 36 hours to update parameters even single layer and 1 iteration! (if that was my computer, it would be 14 hours. . )🤪   THIS is why we need to consider ‘time’&amp;’space’ This is kinda slow - what if we could speed it up by 50,000 times? Let’s try! Elementwise ops: How can we make python faster?:  If we want to calculate faster, then do remove pythonic calcuation, by passing its computation down to something that is written something other than python, like pytorch.  According to PyTorch doc it uses C++ (via ATen), so we are going to implement that function with python. What is element wise operation?:  items makes a pair, operate corresponding componentFootnote:  notebooks material video broadcasting excel"
     }, {
-    "id": 16,
+    "id": 18,
     "url": "http://localhost:4000/2020/02/what-is-convolution/",
     "title": "Digging into convolution",
     "body": "2020/02/28 - Issues 1) Kaiming Initializtion in Pytorch was in trouble. 1 2) Jeremy started to dig in, in lesson09, but I didn’t know why the size of tensor is 2 and even understand this spreadsheet data. 3 Homework: Read Visualizing and Understanding Convolutional Networks paper  What is a convolution?     Visualization         one kernel     Matthew D Zeiler &amp; Rob Fergus Paper          Convolution can be represented as matmul   Padding   Kernel has rank 3   How can we find a side-edge, a gradient and area of constant weight?   What is a convolution?: A convolutional neural network is that your red, green, and blue pixels go into the simple computation, and something comes out of that, and then the result of that goes into a second layer, and the result of that goes into the third layer and so forth. Visualization: one kernel Refer this site for visualizing CNN filteringMatthew D Zeiler &amp; Rob Fergus PaperLecture01     Nine examples of the actual coefficients from the **first layer**  Convolution can be represented as matmul: CNNs from different viewpoints {align-items: center;}    [A B C D E F G H I J] is 3 by 3 image data flatten to vector.   As a result, convolution is a just matrix just two things happens     Some of entries are set to zeros at all the times   same color always have the same weight. That called weight time / wegith sharing    So, we can implement a convolution with matrix multiplication. But, we don’t do that because it’s slow!Padding:  What most of libraries do is just put zeros asdie of matrix  fast. ai uses reflection paddings (what is this? Jeremy said he uttered it)Kernel has rank 3:  As standard picture input would be 4 5, it would be actually 3d, not 2d.  If we make kernel as a 3x3 size, we pass over same kernel all the different Red, Green, Blue Pixels.      This could make problem, because, if we want to detect frog, which is green, we would want more activations on the green(I made a test cell in my colab 6)   How can we find a side-edge, a gradient and area of constant weight?: Not top-edge!  One kernel can find only the top-edge, so we should stack the kernels 7 So, we pass it through bunch of kernels to the input images, and that process gives us height x width x corresponding number of kernels.  Usually that number of chanel is 16 And if we want to get the more channels and features, we should repeat that process     This process gives rise to memory out of control, we do the stride   #### conv-example. xlsx  2 convolutional filters At a second layer, filter is 3x3x2 tensor, because to add up together the first layer’s channel. Reference:       Problem was math. sqrt(5) was not kaiming initialization formula, Implementation in Pytorch &#8617;        size of tensor, lecture09 &#8617;        conv-example. xlsx &#8617;        Why do computer use red, green and blue instead of primary colors &#8617;        Grayscale is a group of shades without any visible color. … Each of these dots has its own brightness level as well and, therefore, can be converted to grayscale. A grayscale image is one with all color information removed.  &#8617;        Testing RGB and grayscale &#8617;        stack kernel and make new rank of tensor at output, Lesson06-2019 &#8617;    "
     }, {
-    "id": 17,
+    "id": 19,
     "url": "http://localhost:4000/2020/02/dps-week8/",
-    "title": "Digital Product School week 8&9",
-    "body": "2020/02/24 - The 8th week retropect at Digital Product School Week 8/9 - Ship your MVP/Release next iteration each day     This week's schedule  CONTENT:  Preparing engineering weekly Agile Process     Daily Stand-up    Making application flowchart (feat draw. io) / ER diagram     Flowchart, understaning user journey   ER diagram    Engineering weekly AI lunch Connecting firebase andPreparing engineering weekly: This week at Wednesday, I planned to explain the Language Modelings, mainly focusing ELMo, ULMFiT, BERT and GPT-2. Slides is available here Changed the presentation, because there were people who are not in ML domain. hereWhenever I do the presentation, I learn more than the information I give them. At the same time, I realize I need to learn more than I know. Agile Process: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m not sure ‘what is agile’ but this is what we tried to make agile process.  Daily Stand-up: Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Making application flowchart (feat draw. io) / ER diagram:  Before we start the party, we should clarify the flowchart and ER diagram of our application. Flowchart, understaning user journey: Thanks for google, we could use draw. io for our framechart framework. Actually, we cana choice other good flatform, but draw. io has connected app throgh google drive, most of our engineer was used to it. And after this job, I got to know there is also (of course) rule with the symbols, color, size, space, scaling and direction of arrow -reference. But why we should do this? WE have made our storymap before!! I think storymap is for visualize our status and app. So it should be shared with whole the team, and they should able to understand each role’s issue. But flowchart is more like testing technical feasibility, and error that user can experience. So it could be little more specific, complicated, and hypothetical.     This week engineer's main plan  ER diagram: Even if we use NoSQL database through firebase, my team was accustomed to SQL more. That what we educated when we were at college, so we had to organize our concept while we were learning NoSQL. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Before today, my AI collegues presented regression, knn and it was my turn. I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! AI lunch: Connecting firebase and: "
+    "title": "My life in Digital Product School - week 8/19/10",
+    "body": "2020/02/24 - The 8/9/10th week retropect at Digital Product School Week 8 - Ship your MVPWeek 9/10 - Release next iteration each day     Week 8th schedule  CONTENT:  Agile Product Development     Daily Stand-up(planning)   Gemba Walk   Sprint Reviews    Engineering weeklyAgile Product Development: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m still not sure ‘what is agile’ but this is how we tried to make agile process.  Daily Stand-up(planning): Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Gemba Walk:     Team Cero with core team  Every 2 weeks, we do the Gemba work, which is ‘question everything to the core team’ time. At this period, people can ask anything related to our product, workshop, and framework. Core team will help just for each team, and each team can solve the problem related to their work. &lt; br/&gt;Why we need this session? because with workshop and general schedule, core team has no time just focus on each team. So through this session, we can have opportunity to understand each program and workshop, like why we are using this platform, and when is the due of our small project, and we have this problem and we need help for this. whatever small problem you have, core team is always willing to help you. Sprint Reviews: Every Friday, we have time to summarise what we did for the week. Maybe we need HMW question and our storymap to share our process and then tell and share what we did try, what point we succeeded and what point it was deviant of our prediction, and why we tried it. .     Sprint of Ve-link  And then, just after all team’s ppt, we do vote with such a cute marvel. Always it’s very difficult to vote (of course you can’t vote to your team!) Because it depends on criteria what do I value!But since this is process of our agile work, I try to focus on what they have changed since last week, and why they did it, how they did it. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Everyone have their knowledge to share and we can be tutor and at the same time can be of tutee. Previously, my AI collegues presented regression, knn. And because I’m somewhat specialized to NLP, I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! "
     }, {
-    "id": 18,
+    "id": 20,
     "url": "http://localhost:4000/2020/02/fast.ai-nlp-note-16/",
     "title": "Algorithmic bias",
     "body": "2020/02/20 - Algorithms can encode &amp; magnify human bias Case Study 1: Facial Recognition &amp; Predictive Policing:  Joy Buolamwini &amp; Timnit Gebru, gendershades. org     Microsoft, FACE+, IBM - All of these things are sell now.    Largest gap between $\therefore\ Lighter Male\ &gt;\ Darker\ Female $      This US mayor joked cops should “mount . 50-caliber” guns where AI predicts crime      With machine learning, with automation, there’s a 99% success, so that robot is ㅡwill beㅡ99% accurate in telling us what is going to happen next, which is really interesting.     - city official in Lancater, CA, approving on using IBM for public security  Bias:  Bias is type of error Statistical Bias: difference between a statistic’s expected value and the true value Unjust Bias: disproportionate preference for or prejudice against a group Unconscious bias: bias that we don’t realize we have But, term bias is too generic to be productive. Different sources of bias have different causes Representation Bias: Dataset was not representative of the algorithm that might be used on later. Above : Data is okay, but algorithm has some problem. Below : Data has error.  For example, object detection production that performs very well in common product of US. But in contrast, change of target product region, like Zimbabwe, Solomon Island, and so on, reduced the performence remarkably. It is not the algorithmic problem, so we should care about data volume of region. Evaluation Bias: Benchmark datasets spur on research, 4. 4% of IJB-A images are dark-skinned women. 2/3 of ImageNet images from the West (Sharkar et al, 2017) Case Study 2: Recidivism Algorithm Used Prison Sentencing: Case Study 3: Online Ad Delivery: Bias in NLP: ( Nothing to do with the course, but I’m researching this field these days. )    But all about Englsih     ImpactThe person is doctor. The person is nurse -&gt; 그는 의사다. 그녀는 간호사다.  Concept of “biased data” often too generic to be useful:  Different sources of bias have different sources Data, models and systems are not unchanging numbers on a screen. They’re the result of a complex process that starts with years of historical context and involves a series of choices and norms, from data measurement to model evaluation to human interpretation. - Harini Suresh, “The problem with Biased Data” Five Sources of Bias in ML:  Representation Bias Evaluation Bias Measurement Bias Aggregation Bias(46:02) Historical Bias(46:26)     A few studies(47:13)   Racial Bias, Even when we have good intentions(new york times)(47:10)   gender(48:59)   Humans are biased, so why does algorithmic bias matter?: Algorithms &amp; humans are used differently (humans are usually decision maker)  Algorithms are accurate and objective No way to apeal if there if error processed large scale cheap Machine learning can amplify bias Machine learning can create feedback loops. Technology is power. And with that comes responsibility. Solutions:  Analyze a project at work/school:     Questions about AI   5 types of bias (Suresh &amp; Guttag)   Datasheets for datasets, Modelcards for model reporting   Accuracy rate on different sub-groups    Work with domain experts &amp; those impacted Increase diversity in our workspace Advocate for good policy Be on the ongoing lookout for bias"
     }, {
-    "id": 19,
+    "id": 21,
     "url": "http://localhost:4000/2020/02/classifier-city/",
     "title": "Making a classifier with image dataset made from gooogle",
     "body": "2020/02/15 - CONTENTS:  Creating dataset from google images     Using google_images_download   Create ImageDataBunch    Train model     fit_one_cycle()   Let’s find-tune   Let’s train the whole model!   Let’s make batch size bigger!    Interpretation Model in productionCode can be found hereDeployed model here Making a classifier which can distinguish Seoul from Munich and Sanfrancisco!(hoping my well in Munich!) Creating dataset from google images: In machine learning, you always need data before you build your model. You can use either URLs or google_images_download package. Since Jeremy explained specifically, I will try the other. Using google_images_download: note: This is not google official package Refer to Official Doncument, put that arguments. from google_images_download import google_images_downloadresponse = google_images_download. googleimagesdownload()  #class instantiationout_dir = os. path. abspath('. . /. . /materials/dataset/pkg/')os. mkdir(out_dir)arguments = { keywords : Cebu,Munich,Seoul ,        print_urls :True,        suffix_keywords : city ,        output_directory :out_dir,        type : photo ,      }paths = response. download(arguments)  #passing the arguments to the functionprint(paths)and if you need, here is main code. Create ImageDataBunch: We need to separate validation set because we just grabbed these imagese from Google. Most of the dataset we use (kaggle/research) splited into train / validation / test so if they are not devided beforehand we should make databunch, and Jeremy recommended assign 20% to validation. Help on function verify_images in module fastai. vision. data:verify_images(path: Union[pathlib. Path, str], delete: bool = True, max_workers: int = 4, max_size: int = None, recurse: bool = False, dest: Union[pathlib. Path, str] = '. ', n_channels: int = 3, interp=2, ext: str = None, img_format: str = None, resume: bool = None, **kwargs)  Check if the images in `path` aren't broken, maybe resize them and copy it in `dest`. Data from google image url Data from package Train model:           len(class)   len(train)   len(valid)         Data_url   3   432   108       Data_pkg   3   216   53   Uisng model: restnet34 1, Measurement: accuracy 2 fit_one_cycle(): What is fit one cycle? Cyclical Learning Rates for Training Neural Networks One of the way to find good learning rate. Core idea is to start with small learning rate (like 1e-4, 1e-3) and increase the learning rate after each mini-batch till loss starts exploding. And pick up learning rate one order lower than exploding point. For example, plotted learning rate is like below picture, picking up around 1e-2 is the best way.  Why this methods Traditionally, the learning rate is decreased as the learning starts converging with time. But this paper suggests to cycle our learning rate, because it makes us avoid local minimum. Basically this cyclic method enables us to explore whole of loss function so that find out global minimum. In other words, higher learning rate behaves like regularisation. Let’s find-tune: Do train just one last layer by learning rate found by find_lr This section you should find the strongest downward slope that kind of sticking around for quite a while. And choose just one order lower than lowest point. As explained before, I will pick up 1e-2. And of course, this is fine-tuning, we don’t need discriminative learning rate yet.  Let’s train the whole model!: link When you plot the learning rate again, maybe you will get soaring shape of learning rate. Rule of thumb, When you slice the learning rate, use learning rate you used at unfrozen part. Divide it by 5 or 10 and put it on maximum bound. At minimum bound, get the point just before it soared, and divide it by 10.  Let’s make batch size bigger!: Since default batch size is 64, I tried it to 128. And it gets way more better result(even it’s still underfitting!) And if I freeze model and train whole model again, the model would be better. Also, you can use this method to the other big dataset model training! Interpretation: See the confusion matrix. Result is quite great. *Since I’m using colab, I will skip data cleansing. But I highly recommend you to use ImageCleaner widget, only if you are using jupyter notebook (not jupyter lab) Model in production: You can deploy your model in simple way. I referred fast. ai, and used render(it’s free for limited time). You can find detailed document here. and you can create a route like this. @app. route( /classify-url , methods=[ GET ])async def classify_url(request):  bytes = await get_bytes(request. query_params[ url ])  img = open_image(BytesIO(bytes))  _,_,losses = learner. predict(img)  return JSONResponse({     predictions : sorted(      zip(cat_learner. data. classes, map(float, losses)),      key=lambda p: p[1],      reverse=True    )  })You can find my deployed model here Reference: How to create a deep learning dataset using Google Images towardsdatascience - one cycle policy       Deep Residual Learning for Image Recognition &#8617;        Accuracy_and_precision &#8617;    "
     }, {
-    "id": 20,
+    "id": 22,
     "url": "http://localhost:4000/2020/02/dps-week5/",
     "title": "Digital Product School week 5",
     "body": "2020/02/09 - The 5th week retropect at Digital Product School Week 5 - Create a Storymap and sync it with Lean Canvas     This week's schedule  CONTENT:  How to create our story map Prepare your story Discover your product’s AI potentialMondayHow to create our story map:     We need this 'aha' moment  There was a Milestone workshop, about our weekly goal. As we are agile working, we go fast and change every week’s goal. This week we will finalize our story map based on user’s pain-point and HMW questions.     How should we make our story-map  Basically we should make story map based on this rule  Tell stories, don’t just write them!     We always need context, that means all the story component should be connected    Visualize your product to establish a shared understanding and speed up discussions!     Post-it filled of text is not enough, we should fill it with visualizations then team mates can understand it fast    Only discuss in front our your story map! (Speed)     So we can update our story-map as soon as we change our opinion   And also    Use a story map to find the parts that matter most and to identify holes in your idea!     Since the story map consists of techinical part, we should consider each story’s technical feasibility    Minimise output, maximise outcome and impact! Build tests to figure out what’s minimum and what’s viable!     This story map functions to find out our minimum value of ideas    Work iteratively: Change your story map according to your learnings!     We should repeat this process again and again    PMs: Make sure Storymap is up to date!Prepare your story:     team cero, our whole story map    Our goal    Technical feasibility of our storyWhat is your strategy to make user achieve something? This would be our expand point Discover your product’s AI potential: How can we apply AI to our product? Let’s write down our ‘HMW’ questions, and find out all p ossibilities. These are suggestion of possibilities, so don’t attached to feasibility (we will do in at lean start-up)     Software section's expectation    AI section's expectationTuesday    Engineer's task, week5This 5th week, engineers settled WendesdayThursdayFriday"
     }, {
-    "id": 21,
+    "id": 23,
     "url": "http://localhost:4000/2020/02/GPU-time/",
     "title": "4 reasons took much time to setting GPU for fast.ai than I expected",
     "body": "2020/02/05 - Motivation: Before now, me as a undergraduate student, I was parsimony who usually depend on colab, kaggle, friend’s server(occasional) whenever i need GPU. . And this time it’s been for a while to install GPU than I expected and I share the several component that stood in my way. Written at Oct 24 2019, if you think this is deprecated, please do not have a leap of faith. Just for the record, I’ve used Kaggle, Colab, GCP, Azure, EC2 as GPU cloud. 1. Did not know there is JupyterLab option in Google Cloud Platform. : At the first time when GCP came out, there was no AI Platform service. So from starting vm instance to launching jupyter and installing packages, I did all of the things myself. (and I learned 🤗) $	curl -O https://repo. continuum. io/archive/Anaconda3-5. 0. 1-Linux-x86_64. sh[Downloading conda in ssh] I created VM instance,selected zone, machine type and disk type. Then, define firewall rules and in ssh terminal, install jupyter and other packages. But you can do all of these things just using AI Platform.  [AI Platform] I think it especially save your time if you are living in Asia-Pacific, which google doesn’t support not that much GPU resources.  2. Consider if the platform has limited resources in a region you live in. : I live in South Korea, East Asia, and it seems like this region has lots of limitation in GPU (except quite expensive AWS) And the Taiwan which was the only one region where I can launch my own VM with GPU (I tried all the other regions in the list) sometimes do normaly, but not always. 😥After launching, I did several works and next day I could not start VM. (I didn’t count it, but tried it a few hours because I didn’t want cost any more time…) Endlessly failed to start instance, then I choose to move AWS as an alternative way. 3. Fast. ai gives deliberate guide and I didn’t know it. : Fast. ai offer the guide for all available platform. (Colab, salamander, Gradient, Kaggle, Colab, and so on) It is so important, and really needs, because cloud computing options are vary as occasion and purpose arise. I didn’t know fast. ai has manual to running GCP, and I think it’s as good a reason as any for me to be have taken time. It helped me so much when I had aws and shortened my time. I don’t want to read all of the manual in amazno. . (It is recommended. . but I’d rather read GIT PRO now…) ssh -i ~/. ssh/&lt;your_private_key_pair&gt; -L localhost:8888:localhost:8888 ubuntu@&lt;your instance IP&gt;4. You should wait to add more volume just after add volume, by building AWS EC2. : Since Elastic Block Store(EBS) storage supports optimized storage, users can’t extend storage volume two times in a row.  Unfortunately, at the first time, I didn’t know it (again 👻) and when VM lacked volume, I doubled dist capacity (76*2) at a rough but It needs more.  &lt;!– this time I installed GPU in two years, and it became little complicated compared to 2 years ago. And this time for the first time(maybe not the first time. . but i handled it in my class or with my friend. but it’s my first time on my own. ) I very I’m started to using used google colab, kaggleand, GCP-JupyterLab, ec2 - friend made, aws vm machine but I had a environment variable but i did not know of it. On these days, I could not get a resources from taiwan…    I couldn’t notice a deliberate     Anyway, as a result I tried myself gcp myself and aws ec2 with fast. ai But I think doing on my self surely takes much time (in this point I wonder why I’m doing this, and should remind me, especially I was studying disk volume optimization)     disk volume exceed - https://askubuntu. com/questions/919748/no-space-left-on-device-even-though-there-is:  "
     }, {
-    "id": 22,
+    "id": 24,
     "url": "http://localhost:4000/2020/02/dps-week4/",
     "title": "Digital Product School week 4",
     "body": "2020/02/01 - The 4th week retropect at Digital Product School Week 4 - Find solution ideas and run experiments   [This week’s schedule] CONTENT:  Ideation Techniques     What is ideation techniques?    Generating idea in my team AIdeation Team brain storming of idea Die Produkt MacherMondayIdeation Techniques:   [slides from @steffen] What is ideation techniques?: We tried to find out user’s painpoint last week. Tried to users talk about their, pain point. No question directly, but extract from them their pain with transportation. Generating idea in my team:   AIdeation:   TuesdayTeam brain storming of idea:   Based on generated idea on Monday, we extended our idea doing rolling-paper!     Die Produkt Macher: What is lean start-up?  Lean startup is a methodology for developing businesses and products that aims to shorten product development cycles and rapidly discover if a proposed business model is viable; this is achieved by adopting a combination of business-hypothesis-driven experimentation, iterative product releases, and validated learning. - wikipedia WendesdayThursdayFriday"
     }, {
-    "id": 23,
+    "id": 25,
     "url": "http://localhost:4000/2020/01/retrosprect-of-acl-paper-2020/",
     "title": "Retrospect of ACL 2020 paper writing",
     "body": "2020/01/29 - 2020 Annual Conference of the Association for Computational Linguistics Why I can’t use ‘Cebuano’ for the research?: Why I had to change target language from ‘Cebuano’ to ‘Tagalog’?-&gt; No language translator options except google translation. But before knowing that I already consult my friend, whose mother tongue is English. So I had to aplogize her, but couldn’t tell her why suddenly I changed my plan. -&gt; I realized there are many languages even can’t be researched at all. . -&gt; Getting accustomed to discrimination makes misunderstanding, sometimes. At my country, we couldn’t use music streaming service, because of legal problem. But at that moment, I thought it was discrimination, which is done by music company. "
     }, {
-    "id": 24,
+    "id": 26,
     "url": "http://localhost:4000/2020/01/Git-Merge/",
     "title": "Why am I not listed as a contributor?!",
     "body": "2020/01/10 - From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’m not listed as contributor of repo which my PR accessed. How did I come to a repository?: When I’m stuck, I would prefer to code, than to go deep in theory. (It must be so. . too much to understand 🤒)It was BERT released by Google AI I felt keenly the necessity of implementing, because not only couldn’t understand the way they figured out positional encoding formula, but how it actually works. What does it mean to “scale” dot product in Attention? (Now I know it’s far from my section 😂) Figure 1. Scaled Dot Product. Adopted from tensorflow blogWhat was the code error?: For implement code in paper, I read the papers Transformer and BERT, structured the model, and refered the others’ code. Meanwhile, I found out a small error in tokenization process, which was changing a token into [MASK], enabled bidirectional representation. I’ve made PR, and got merged. But I was not in contributors. Why?: Figure 2. Merged Pull request Adopted from graykode projectActually I happened to know there can be couple of reasons github doesn’t include my name as contributor. Well, if contributors tab has more than 100 people, in which case it shows you up only if you are in the top 100 contributors because displaying too many contributors can make webpages down. Somethimes, however, it doesn’t that problem. Why not? Two possibilities are there.    First, According to Joel-Glovier, if repository maintainer merged-as-a-rebase PR will end up showing as maintainer’s commit. But maintainer shouldn’t normally do this.     Second, if you happend to commit using a different git email that what is in your GitHub profile, it will not be attached to your Github user, and “doesn’t show up” as you.  Reference:  Michał Chromiak’s blog Github: why are my contributions are not showing on my profile atlassian-gitfetch"
     }, {
-    "id": 25,
-    "url": "http://localhost:4000/2019/12/lesson1-fastai/",
-    "title": "Fine Grained Classification",
-    "body": "2019/12/31 - Finally you can solve the mystery behind this weird drawing. . through this course. juptyer notebook magic: %reload_ext autoreload%autoreload 2%matplotlib inlinethis is special directives to jupyter notebook, not python code. And it is called ‘magics’ (but i think jeremy is magicion)  If somebody changes underlying library code while I’m running this, please reload it automatically If somebody asks to plot something, then please plot it here in this Jupyter NotebookDon’t hesitate to import start~ Digging into untar_data, path. ls: Union[pathlib. Path, str]: typed programming language? -&gt; maybe i think disclaim the type beforehand for sure.  Q. like assert? path. ls()this is some module that fast. ai made because os. listdir(‘path’) is unconvinient. Python3 pathlib library!: pathlib "
-    }, {
-    "id": 26,
+    "id": 27,
     "url": "http://localhost:4000/2019/12/jeremy-howard/",
     "title": "Jeremy Howard",
     "body": "2019/12/15 - This is journey to find out ‘who am I trying to be?’: How he impacted me?  The person who made me start Computer Vision again. He emphasized the importance of studying NLP and Computer together to understand the deep-learning. He didn’t order it to study, but always he pursuade me with reasonable way.  “It’s not just something I can throw away. NLP and computer vision a few weeks apart and that’s going to force your brain to realize like ‘oh I have to remember this’”  He made me admit my failure in deep-learning. I started to objectify where am I. What should I do when I’m frustrated.  “Keep going. You’re not expected to remember everything. Yet.  You’re not expected to understand everything. Yet. You’re not expected to know why everything works.  Yet. ”  His articles are numerous, below.      What is torch. nn Really?   High Performance Numeric Programming with Swift: Explorations and Reflections   C++11, random distributions, and Swift    And especially, I like this book. Designing great data products Great predictive modeling is an important part of the solution, but it no longer stands on its own; as products become more sophisticated, it disappears into the plumbing. Designing great data products And he is also famous for words. Here are some.  we’re going to try and use that to really understand what’s going on. So to warn you, none of it is rocket science but a lot of its going to look really new. So don’t expect to get it the first time but expect to listen and jump into the notebook try a few things test things out look particularly at like tensor shapes and inputs and outputs to check your understanding then go back and listen again. But and kind of try it, a few times, because you will get there right, it’s just that there’s going to be a lot of new concepts because we haven’t done that much stuff in pure Pytorch. Lesson 6: Deep Learning 2019 "
     }, {
-    "id": 27,
+    "id": 28,
     "url": "http://localhost:4000/2019/11/julia-evans/",
     "title": "Julia Evans",
     "body": "2019/11/20 - This is journey to find out ‘who am I trying to be?’: The women who surprised me in many ways. First, she approached me to teaching some concepts drawing cartoons. It was at Hackers news, which was hightest ranks. Personally I have the use of not to reading title, so and cartoon was so cute and clear. I naturally gonna understood mechanism and astonished by her explaination ability. Her value, which she was taught by many people so want to do same things, moved me. Volume of her knowledge, that just reading post title is a deal of work, amazed me. "
     }, {
-    "id": 28,
+    "id": 29,
     "url": "http://localhost:4000/2019/11/coc-retropective/",
     "title": "Retrospective on Pycon 2019 Korea (CoC Committee)",
     "body": "2019/11/05 - When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized our responsibility, and if I passed each other’s burden, It could be my burden next time. In solidarity of the obligation, we finished conference well. And after participating PyCon Korea 2018 as volunteer, I’ve joined PyCon Korea Organizer last year. &lt;Figure 1&gt; First meeting of PyCon 2019 Korea Organizers It’s been a while since PyCon 2019 finished. It’s held on Aug 15 - 18, at Coex Grand Balloom &lt;Figure 2&gt; Ongoing session, speaking on news comment processing &lt;Figure 3&gt; Sponsor Booth iin Coex Hall &lt;Figure 4&gt; After PyCon 2019, with all of volunteer, organizer, speakers 😍 🥰  Serving as part of the coc TF, I spent large fraction of last year doing CoC job. here’s the path what we’ve been grappled with to grasp a solution. First half: Before the conference Toward Diverse Community:  Formally we’ve been reusing and modifying PyCon US CoC, but we needed fit in Korean and I was part of that to revise code of conduct. Except ‘That’ Diversity, Because it is ‘Harassment’:  Specific point was harassment, and the others were not. process of finding the points. How can we settle this point?Second half: During the conference Handling the potential Harassment: Disjunction of policy and real-time situation: This ‘PyCon 2019 Korea retrospective series’ would be devided into 3 Episodes.  “Retrospective on Pycon 2019 Korea (CoC Committee)” “Retrospective on Pycon 2019 Korea (Program Chair)” (20 Nov, To Be Update) “Maintaining participation while still making timely decisions” (29 Nov, To Be Update)"
     }, {
-    "id": 29,
+    "id": 30,
     "url": "http://localhost:4000/2019/11/elif-shafak/",
     "title": "Elif Shafak",
     "body": "2019/11/05 - This is journey to find out ‘who am I trying to be?’: For creative-minded people, Istanbul is a treasure. ’ Photo © Chris Boland, licensed under CC BY-NC-ND 2. 0    it suddenly felt like what I was trying to convey was more complicated and detailed than what the circumstances allowed me to say.     And I did what I usually do in similar situations: I stammered, I shut down, and I stopped talking. I stopped talking because the truth was complicated, even though I knew, deep within, that one should never, ever remain silent for fear of complexity.         &lt;Figure 1&gt; Elif Shafak   Photo credit: www. elifsafak. com. tr      I want to talk about emotions and the need to boost our emotional intelligence. I think it’s a pity that mainstream political theory pays very little attention to emotions.     Oftentimes, analysts and experts are so busy with data and metrics that they seem to forget those things in life that are difficult to measure and perhaps impossible to cluster under statistical models. But I think this is a mistake, for two main reasons. We are emotional beings.     I think it’s going to be one of our biggest intellectual challenges, because our political systems are replete with emotions. In country after country, we have seen illiberal politicians exploiting these emotions. And yet within the academia and among the intelligentsia, we are yet to take emotions seriously. I think we should.  1 2 Reference:       British Council Worldwide &#8617;        Ted Talk &#8617;    "
     }, {
-    "id": 30,
+    "id": 31,
     "url": "http://localhost:4000/2019/01/dps-week1/",
     "title": "Digital Product School week 1",
     "body": "2019/01/11 - The 1th week retropect at Digital Product School [This week’s schedule] CONTENT:  Welcome to Digital Product School!     Trip to Spitzingsee   Welcome to Design Office   Specifying our goal of product   Welcome to Digital Product School!: Trip to Spitzingsee:    At the first day of Digital Product School, we had a off-site with all of batch 9 people. All the costs were managed by dps. At the beautiful mountain, we settled the team, and got my team goal.     Basically, there are two kind of team in DPS. (1) Wild team - the team has fixed topic(2) Company team - the team which has specific stakeholders, and also topic defined by that stakeholders  The Core-team will fix what team you will join in DPS for 3 months based on ymy professionals, they announce it at off-site. [My team for 3 months at DPS]    And we decide on my batch #9 theme song. How? Each team draw for songs and pitch ‘why this song should be batch #9 theme song’The result? Imagine dragon - Believer (I didn’t know at the moment, this song would be stamped in my memory)     We have a workshop for getting to know each other. For example, we share 1) what do I expect from 3 months of dps, 2) when I feel happy in my life time, 3) what I worked for last week, 4) what was my last project and 5) what plays important role in my life      My team's board  Cero  Welcome to Design Office:  At first day of design office, we had workshop, which celebrates my day in dps also discuss specific rule, menifesto and stakeholders    We get sticker and attach it in map depends on my nationality  Now time to get to know my team’s stakeholders. What they want for us? What they expect from us? How free my team are on the topic?To be honest, it is endless tug-of-war. We should discuss with my stakeholders, endlessly, and find out solution which can meet interest of users, stakeholders and my team. Basically, my team’s main stakeholder is ADAC, but BMW, City of munich and Nokia will also participate as my team’s stakeholders. Specifying our goal of product: "
@@ -612,7 +617,7 @@ <h5 class="font-weight-bold">Written by  </h5>
         <div class="row prevnextlinks small font-weight-bold">
           
             <div class="col-md-6 rightborder pl-0">
-                <a class="text-dark" href="/2020/02/dps-week8/"> <img height="30px" class="mr-1" src="/assets/images/week8/gate.png">  Digital Product School week 8&9</a>
+                <a class="text-dark" href="/2020/02/dps-week8/"> <img height="30px" class="mr-1" src="/assets/images/week8/gate.png">  My life in Digital Product School - week 8/19/10</a>
             </div>
           
           
diff --git a/_site/2020/03/note08-fastai-2/index.html b/_site/2020/03/note08-fastai-2/index.html
index 7b3320aeb8..e02d17a07d 100644
--- a/_site/2020/03/note08-fastai-2/index.html
+++ b/_site/2020/03/note08-fastai-2/index.html
@@ -19,9 +19,9 @@
 <meta property="og:site_name" content="SpellOnYou" />
 <meta property="og:image" content="http://localhost:4000/assets/images/30.png" />
 <meta property="og:type" content="article" />
-<meta property="article:published_time" content="2020-03-01T00:00:00+01:00" />
+<meta property="article:published_time" content="2020-03-01T00:00:00+09:00" />
 <script type="application/ld+json">
-{"description":"This note is divided into 4 section. Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring","author":{"@type":"Person","name":"dionne"},"@type":"BlogPosting","url":"http://localhost:4000/2020/03/note08-fastai-2/","publisher":{"@type":"Organization","logo":{"@type":"ImageObject","url":"http://localhost:4000/assets/images/logo.png"},"name":"dionne"},"image":"http://localhost:4000/assets/images/30.png","headline":"What’s inside Pytorch Operator?","dateModified":"2020-03-01T00:00:00+01:00","datePublished":"2020-03-01T00:00:00+01:00","mainEntityOfPage":{"@type":"WebPage","@id":"http://localhost:4000/2020/03/note08-fastai-2/"},"@context":"http://schema.org"}</script>
+{"description":"This note is divided into 4 section. Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring","author":{"@type":"Person","name":"dionne"},"@type":"BlogPosting","url":"http://localhost:4000/2020/03/note08-fastai-2/","publisher":{"@type":"Organization","logo":{"@type":"ImageObject","url":"http://localhost:4000/assets/images/logo.png"},"name":"dionne"},"image":"http://localhost:4000/assets/images/30.png","headline":"What’s inside Pytorch Operator?","dateModified":"2020-03-01T00:00:00+09:00","datePublished":"2020-03-01T00:00:00+09:00","mainEntityOfPage":{"@type":"WebPage","@id":"http://localhost:4000/2020/03/note08-fastai-2/"},"@context":"http://schema.org"}</script>
 <!-- End Jekyll SEO tag -->
 
 
@@ -161,96 +161,101 @@
     "body": "  {% if page. url ==  /  %}            {% assign latest_post = site. posts[0] %}          &lt;div class= topfirstimage  style= background-image: url({% if latest_post. image contains  ://  %}{{ latest_post. image }}{% else %} {{site. baseurl}}/{{ latest_post. image}}{% endif %}); height: 200px;  background-size: cover;  background-repeat: no-repeat; &gt;&lt;/div&gt;           {{ latest_post. title }}  :       {{ latest_post. excerpt | strip_html | strip_newlines | truncate: 136 }}               In         {% for category in latest_post. categories %}        {{ category }},         {% endfor %}                                {{ latest_post. date | date: '%b %d, %Y' }}                            {%- assign second_post = site. posts[1] -%}                        {% if second_post. image %}                         &lt;img class= w-100  src= {% if second_post. image contains  ://  %}{{ second_post. image }}{% else %}{{ second_post. image | absolute_url }}{% endif %}  alt= {{ second_post. title }} &gt;                        {% endif %}                                    {{ second_post. title }}          :                       In             {% for category in second_post. categories %}            {{ category }},             {% endfor %}                                                      {{ second_post. date | date: '%b %d, %Y' }}                                    {%- assign third_post = site. posts[2] -%}                        {% if third_post. image %}                         &lt;img class= w-100  src= {% if third_post. image contains  ://  %}{{ third_post. image }}{% else %}{{site. baseurl}}/{{ third_post. image }}{% endif %}  alt= {{ third_post. title }} &gt;                        {% endif %}                                    {{ third_post. title }}          :                       In             {% for category in third_post. categories %}            {{ category }},             {% endfor %}                                                      {{ third_post. date | date: '%b %d, %Y' }}                                    {%- assign fourth_post = site. posts[3] -%}                        {% if fourth_post. image %}                        &lt;img class= w-100  src= {% if fourth_post. image contains  ://  %}{{ fourth_post. image }}{% else %}{{site. baseurl}}/{{ fourth_post. image }}{% endif %}  alt= {{ fourth_post. title }} &gt;                        {% endif %}                                    {{ fourth_post. title }}          :                       In             {% for category in fourth_post. categories %}            {{ category }},             {% endfor %}                                                      {{ fourth_post. date | date: '%b %d, %Y' }}                                  {% for post in site. posts %} {% if post. tags contains  sticky  %}                    {{post. title}}                  {{ post. excerpt | strip_html | strip_newlines | truncate: 136 }}                 Read More            	             {% endif %}{% endfor %}  {% endif %}                All Stories:         {% for post in paginator. posts %}          {% include main-loop-card. html %}        {% endfor %}                   {% if paginator. total_pages &gt; 1 %}              {% if paginator. previous_page %}        &laquo; Prev       {% else %}        &laquo;       {% endif %}       {% for page in (1. . paginator. total_pages) %}        {% if page == paginator. page %}        {{ page }}        {% elsif page == 1 %}        {{ page }}        {% else %}        {{ page }}        {% endif %}       {% endfor %}       {% if paginator. next_page %}        Next &raquo;       {% else %}        &raquo;       {% endif %}            {% endif %}                     {% include sidebar-featured. html %}      "
     }, {
     "id": 12,
+    "url": "http://localhost:4000/2020/04/v3-2019-lesson06-note/",
+    "title": "fastai 2019 course-v3 Part1, lesson06",
+    "body": "2020/04/15 - Lesson 06Rossmann(Tabular):  Tabular data: be careful on Categorical variable vs Continuous variable.  if datatype is int, fastai think it is classification, not a regression.  Root mean square percentage error. as loss function.  When you assign the y_range, it’s better to assign little bit more than actual maximum. &gt; because it’s sigmoid.  intermediate layers, which is weight matrix is 1) 1000, and 2) 500 -&gt; which means our parameter would be 500*1000. learn. modelWhat is dropout and embedding dropout?: Nitish Srivastava, Dropout: A Simple way to prevent Neural Networks from Overfitting  you can dropout with p value, make it specified to specific layer, or make it applied to all the layers.  Pytorch code 1) bernoulli, which decides whether you will hold it? 2) and divide the noise value depends on noise value. so noise became 2 or remain 0.      According to pytorch code, We do change at training time, but we do nothing at test time. and this means you don’t have to do anything special with inference time. ’   TODO: find at forums what is inference time - Related to NVIDIA, GPU.     Embedding dropout is just a dropout.      It’s different between continuous variable and embedding layer.  TODO Still can’t understand. why embedding dropout is effective. or,… in need.    Let’s delete at random, some of the results of the embedding.    and It worked well especially at Kaggle   Batch Normalization: Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift -&gt; came out false! According to How Does Batch Normalization Help Optimization?  The key was multiplicative bias {\gamma} and additive bias {\beta}` Explain     Let $$ \hat{y} = f(w_1, w_2, w_3, … , x)} $$ , loss = MSE , Then y_range should be between 1 and 5`   And Activation function ends with -1 -&gt; +1   To mitigate this problem, we can add the other parameter, like $$w_n$$   But there’re so much interactions in the process so just re-scale the output.    Momentum parameter at BatchNorm1d:  Different from momentum like in optimization.  This momentum is Exponentially weighted moving average of the mean, instead of deviation.      If this is small number: mean standard deviation would be less from mini_batch to mini_batch » less regularization effect. (If this is large number, variation would be greater from mini_batch to mini_batch » more regularization effect)   TODO: can’t sure, but i understand, this is not about how to update parameter but about how much reflect previous value when scale and shift   Q. Preference between batchnorm and the other regularizations(drop out, weight decay)A. Nope, always try and see the results## lesson6-pets-more### Data Augmentation- Last reg- `get_transforms` has lots of params (even not yet learned all) -&gt; check documentation	- Remember you can implement all the doc contents bc it's made from nbdev	- TODO: try this!!- Essence of data augmentation is you should maintain the label, while somewhat making sense. 	- ex) tilt, because it's optically sensible, you can always change the angle of the data view. - zeros, border, and reflection but always `reflection` works most of the time, so that is the default### Convolutional Kernel(What is convolution?)- Will make heat\_map from scratch, which means the parts convolution focuses on![setosa_visualization]()- http://setosa. io/ev/image-kernels/	- javascript thing	- How convolution works	- Kernel. which does element-wise multiplication, and sum them up	- so it has on pixel less at borders -&gt; so it uses padding, and fastai uses reflection as said. - why this Kernel(matrix) helps catching horizontal edge side?	- because this kernel`(picture2)` weights differently, depends on `x axis`	- why familiar, because it's similar intuition with fugus`(paper)` paper- CNN from different viewpoints`link`	- output of pixel is results from different linear equations. 	- If you connect this with represents of neural network nodes, you can see that the specific inp nodes connected with specific out nodes. 	- **Summarize**: cnn does 1) matmul some of the elements are always zero 2) same weight for every row, which is called `weight time? weight. . ?, 1:18:50` `(picture)`#### Further lowdown- Because generally image has 3 channels, we need rank 3 kernel. - And **do multiply with all channel output is one pixel**. (`draw by your self`)	- but this kernel will catch one feature, like horizontal, so that we make more kernel so that output becomes (h * w * kernel)	- And that `kernel` come to `channel`- **Conv2d**: with 3 by 3 kernel, stride 2 conv -&gt; (h/2 * w/2 * kernel)	- skip or jump over input pixel	- to protect from memory out of control~~~pythonlearn. modellearn. summary()~~~TODO: understand yourself the blocks of conv-kernel: - Usually use big kernel size at first layer (will study this at part2)- Bottom right highlighting kernel(`pic / draw`)- `torch. tensor. expand`: for memory efficient, because we should do RGB- We do not make separate kernel, but make rank 4 kernel	- 4d tensor is just stacked kernel- `t[None]. shape` create new unit axis, and why? we make this -&gt; it should move unit of batch, not one size image. ### Average pooling, feature- suppose our pre-trained model results in size of `11 by 11 by 512 ` `pic 4` and my classification task has 37 classes	* take the first face of channel, which is 11 by 11 and `mean` it, so that make rank 2 tensor, 512 by 1	* and make 2d matrix, which is 512 by 37 and multiply so that we can get 37 by 1 matrix. - Feature, at convolution block	- So, when we transfer-learning without unfreeze, every element of last matrix (512 by 1) should represent(or could catch) each feature. 	### Heatmap, Hook~~~hook_output(model[0]) -&gt; acts -&gt; avg_acts~~~- if we average the block with `axis=feature`, result of matrix(11 by 11) depicts `how activated was that area?` -&gt; it is heatmap, `avg_acts`- and acts comes from hook, which is more advanced pytorch feature. 	- hook into pytorch machine itself, and run any arbitrary Pytorch code	- Why this is cool?: Normally it gives set of outputs of forward pass, but we can interrupt and hook the forward pass. 	- Also can store the output of the convolutional part of the model, which is before avg_pooling- Thinking back when we do cut off `after` the conv part. 	- but with fast. ai the original convolutional part of the model would be *the first thing in the model*, specifically could be given from `learn. model. eval()[0]`	- And this is gotten from `hooked_output` and having hooked the output, we can pass our x_minibatch to output. 	- Not directly, but with normalized, minibatch, put on to the gpu	- `one_item()` function do it, when we have one data `TODO: this is assignment` do it yourself without one_item function	- and `. cuda()` put it on gpu- you should print out very often the shape of tensor, and try think why. "
+    }, {
+    "id": 13,
+    "url": "http://localhost:4000/2020/04/qna-image-segmentation/",
+    "title": "[Q&A] Image Segmentation, using Unet with Driving Video data",
+    "body": "2020/04/02 - This post is about my questions while I was studying USF Deep Learning course about image segmentation task. All the answers are from the course, source code, library document, or document. I cared about being clear at reporting information including source of information, however if there are still anything unclear, please contact me. And thank you Jeremy&amp;Rachael for everything. Also Thank you Cambridge Computer Vision Lab to made us to study with your labor. The Cambridge-driving Labeled Video Database (CamVid) is the first collection of videos with object class semantic labels, complete with metadata. The database provides ground truth labels that associate each pixel with one of 32 semantic classes. If someone is interested in this project, please check the site and see the details. Now, let’s start first using jupyter’s one of tricks which I love most. It enables cell to print the code without print function. from IPython. core. interactiveshell import InteractiveShell# pretty print all cell's output and not just the last oneInteractiveShell. ast_node_interactivity =  all from fastai. vision import *from fastai. callbacks. hooks import *from fastai. utils. mem import *path = untar_data(URLs. CAMVID) # The locations where the data and models are downloaded are set in config. ymlpath. ls() I’m trying to accustomed to using pathlib module, not just it became built-in module in python, but I felt uncomfortable myself with os module. However, still unpredictable conflicts are remain, even in the quite standard library like Pytorch, tensorflow, onnx. (it require me string for path. not PosixPath. will send PR. . ) [PosixPath('/root/. fastai/data/camvid/valid. txt'), PosixPath('/root/. fastai/data/camvid/images'), PosixPath('/root/. fastai/data/camvid/labels'), PosixPath('/root/. fastai/data/camvid/codes. txt')]path_img = path/'images'path_lbl = path/'labels'fnames = get_image_files(path_img) #filenamelbl_names = get_image_files(path_lbl)1. (Play with data) My Hypothesis:  File name has A_B format. and A / B would be at key-value position.  Use collections - defaultdict  Default Dict: Link:   easy to group a sequence of key and value pairs into a dictionary of list?from collections import defaultdictfnames[0], lbl_names[0](PosixPath('/root/. fastai/data/camvid/images/0001TP_009210. png'), PosixPath('/root/. fastai/data/camvid/labels/0016E5_01800_P. png'))files = [tuple(i. stem. split('_')) for i in fnames]labels = [tuple(i. stem. split('_')[:-1]) for i in lbl_names]d = defaultdict(list)for k, v in files: d[k]. append(v)d. keys()len(d['0001TP'])124for k, v in d. items():  print(k, v)0001TP ['009210', '008850', '007350', '008970', '009840', '010140', '008490', '008520', '009540', '008250', '008340', '006840', '007860', '007410', '007740', '009870', '010080', '007890', '008790', '010020', '008400', '007080', '008280', '010380', '009330', '009060', '007470', '006810', '009720', '008580', '007110', '008730', '009150', '007680', '009780', '007800', '007290', '008760', '009510', '008640', '008310', '007440', '006900', '007500', '008460', '009030', '008130', '009480', '009900', '010230', '009270', '008040', '007590', '007950', '009990', '008550', '007260', '008100', '007530', '006960', '008190', '009420', '009930', '009000', '007830', '008940', '006690', '009570', '008880', '010170', '007560', '009300', '006750', '009360', '010200', '007320', '008010', '009120', '007620', '007200', '007140', '010320', '006720', '008670', '007230', '008370', '010260', '009690', '006930', '009090', '007770', '010290', '010350', '008610', '008070', '009600', '008430', '009450', '007380', '009240', '007710', '007170', '008160', '008910', '007020', '006780', '007050', '009960', '009810', '008220', '009180', '009750', '010050', '009660', '010110', '007920', '009630', '007650', '006990', '008700', '009390', '007980', '008820', '006870']0016E5 ['01290', '08159', '05760', '08133', '08063', '06660', '00960', '05850', '00750', '06960', '08035', '08107', '07975', '08017', '05610', '07140', '08119', '08027', '07170', '08400', '08093', '02100', '06390', '04470', '08340', '06060', '00600', '07470', '08151', '07800', '01620', '05730', '01530', '00690', '08430', '05940', '01980', '07320', '08069', '07965', '04380', '05430', '01410', '06780', '08007', '08087', '08079', '06600', '08109', '05490', '00901', '04590', '04680', '08045', '01770', '06690', '08085', '06810', '00420', '08011', '07440', '02190', '06300', '04800', '01500', '00450', '08029', '01470', '06330', '07997', '08067', '05370', '08013', '08190', '00840', '02370', '08049', '08135', '01440', '06870', '05820', '05280', '08051', '04440', '08091', '01380', '00630', '07290', '05520', '04770', '00540', '07995', '07999', '05550', '07920', '08101', '08141', '08053', '04620', '08103', '05160', '07350', '08057', '06030', '06000', '08550', '07963', '08089', '05970', '08047', '05640', '06240', '05220', '04350', '01590', '07959', '01950', '08117', '06180', '01560', '05400', '08043', '07680', '00780', '08081', '07050', '01020', '01350', '04530', '06720', '07969', '08149', '08003', '08131', '08129', '08033', '05460', '01650', '07530', '08023', '05340', '08640', '05100', '08075', '01230', '04980', '02070', '01080', '06210', '05910', '08009', '01800', '05190', '02400', '08083', '08019', '07620', '07200', '07890', '08059', '06990', '04410', '08121', '08123', '06930', '08137', '08147', '08095', '06570', '06150', '08153', '06840', '05250', '00510', '08370', '08580', '08113', '07410', '08097', '01200', '04950', '07770', '07650', '04710', '06090', '08055', '07110', '07981', '00990', '08250', '08127', '01920', '07985', '08220', '08005', '08157', '05130', '08071', '01140', '04830', '07740', '08143', '06120', '02040', '08111', '08115', '00660', '08280', '06420', '07983', '02220', '05700', '01860', '01260', '04920', '06510', '07020', '08073', '08105', '08125', '06360', '07860', '07993', '00810', '06540', '08099', '08139', '02010', '07973', '08155', '07991', '06630', '00480', '06750', '04890', '08001', '08025', '00870', '08490', '01830', '07977', '05010', '01170', '07961', '01680', '01050', '07987', '07080', '04560', '00930', '05310', '02340', '05790', '08460', '00720', '08031', '02280', '08039', '08037', '08065', '06270', '08077', '06900', '04650', '06480', '07230', '08041', '06450', '00570', '07989', '04740', '07979', '02250', '07380', '00390', '01710', '07590', '08021', '08520', '07500', '01110', '04500', '02310', '07971', '02130', '05580', '05880', '08610', '08310', '08145', '05670', '04860', '07260', '08015', '07967', '01740', '01320', '07560', '07830', '01890', '08061', '02160', '07710', '05070', '05040']Seq05VD ['f00030', 'f02550', 'f03450', 'f01110', 'f00480', 'f00210', 'f04590', 'f04170', 'f01800', 'f03990', 'f03360', 'f03900', 'f02070', 'f00810', 'f03690', 'f01350', 'f01530', 'f04980', 'f05100', 'f03060', 'f00900', 'f03870', 'f02460', 'f01470', 'f02370', 'f02820', 'f04080', 'f02760', 'f04860', 'f02250', 'f04200', 'f00270', 'f03720', 'f02850', 'f04410', 'f01200', 'f03090', 'f02010', 'f03930', 'f00090', 'f01650', 'f01890', 'f03840', 'f03030', 'f02130', 'f01230', 'f04110', 'f02520', 'f04140', 'f04020', 'f00060', 'f03420', 'f01560', 'f00120', 'f04290', 'f02340', 'f00300', 'f01380', 'f00870', 'f01860', 'f02970', 'f04560', 'f02730', 'f00330', 'f04530', 'f03780', 'f01770', 'f03390', 'f05040', 'f02430', 'f03330', 'f00660', 'f01740', 'f02100', 'f04800', 'f04050', 'f00510', 'f02790', 'f04350', 'f00690', 'f00540', 'f02490', 'f00960', 'f00930', 'f04230', 'f02880', 'f03600', 'f01020', 'f01500', 'f02400', 'f04830', 'f04470', 'f03300', 'f02670', 'f00450', 'f01980', 'f01170', 'f01620', 'f04500', 'f01080', 'f03180', 'f05070', 'f03150', 'f04950', 'f01440', 'f03510', 'f01710', 'f00360', 'f04770', 'f02910', 'f01050', 'f00630', 'f04320', 'f00570', 'f03240', 'f02190', 'f01140', 'f03540', 'f02220', 'f02640', 'f03960', 'f00000', 'f04920', 'f01950', 'f00990', 'f03480', 'f03000', 'f00420', 'f04620', 'f03210', 'f00780', 'f03570', 'f01590', 'f00750', 'f01920', 'f04650', 'f03750', 'f03630', 'f02310', 'f02610', 'f02580', 'f04740', 'f02280', 'f04680', 'f00390', 'f00720', 'f03660', 'f02040', 'f03270', 'f00180', 'f03810', 'f01410', 'f01290', 'f03120', 'f00840', 'f04440', 'f00150', 'f01260', 'f02700', 'f02940', 'f00600', 'f01830', 'f04260', 'f05010', 'f04890', 'f02160', 'f00240', 'f04380', 'f01680', 'f04710', 'f01320']0006R0 ['f02820', 'f03690', 'f03180', 'f02550', 'f01020', 'f03660', 'f02340', 'f01170', 'f02610', 'f02940', 'f01290', 'f02100', 'f01350', 'f03270', 'f03870', 'f01380', 'f01980', 'f03810', 'f02430', 'f02310', 'f01830', 'f03480', 'f02970', 'f01890', 'f03210', 'f03930', 'f02040', 'f02070', 'f02400', 'f01560', 'f03030', 'f01770', 'f01590', 'f01950', 'f03420', 'f01650', 'f03450', 'f00990', 'f03630', 'f01500', 'f03570', 'f00930', 'f03090', 'f03360', 'f02880', 'f02460', 'f01440', 'f01920', 'f01230', 'f03840', 'f02730', 'f01620', 'f02220', 'f03750', 'f03330', 'f03540', 'f02520', 'f02790', 'f01050', 'f03120', 'f01800', 'f01140', 'f01860', 'f01530', 'f01470', 'f02670', 'f02490', 'f01260', 'f01110', 'f02760', 'f01680', 'f03150', 'f02580', 'f03300', 'f02280', 'f01200', 'f03390', 'f03510', 'f02640', 'f02190', 'f02370', 'f01320', 'f02130', 'f03600', 'f03240', 'f03780', 'f03720', 'f02700', 'f01410', 'f01080', 'f02850', 'f01710', 'f03900', 'f03060', 'f01740', 'f02010', 'f02250', 'f00960', 'f03000', 'f02160', 'f02910']for k, v in d. items():  print(k, len(d[k]))0001TP 1240016E5 305Seq05VD 1710006R0 101for i in d2. keys():  print(i,len(d2[i]))0016E5 3050001TP 1240006R0 101Seq05VD 171files[0], labels[0](('0001TP', '009210'), ('0016E5', '01800'))2. My question: Link:  Why do we need masking? and does color from fastai library? (have to look into source code) What do the parameter alpha do? When people make masked img, would it be have ranged integer limit? Does image normalization related with this?lbl_sorted = sorted(lbl_names)f_sorted = sorted(fnames)lbl_1 = lbl_sorted[33]f_1 = f_sorted[33]img = open_image(lbl_1)mask = open_mask(lbl_1)_,axs = plt. subplots(1,2, figsize=(10,5))# img. show(ax=axs[0], y=mask, title='masked')img. show(ax=axs[0], title='1')mask. show(ax=axs[1], title='2', alpha=1. ) img_2 = open_image(f_1)mask_2 = open_mask(f_1)_,axs = plt. subplots(1,2, figsize=(10,5))# img. show(ax=axs[0], y=mask, title='masked')img_2. show(ax=axs[0], title='3',)mask_2. show(ax=axs[1], title='4', alpha=1. ) open_mask(lbl_1). data. shapetorch. Size([1, 720, 960])open_mask(lbl_1). data. shapetorch. Size([1, 720, 960])open_image(f_1). data. shapetorch. Size([3, 720, 960])open_image(f_1). data. shapetorch. Size([3, 720, 960])img. data #labeled datatensor([[[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]],    [[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]],    [[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]]])mask. data # after mask, labeled datatensor([[[ 4, 4, 4, . . . , 21, 21, 21],     [ 4, 4, 4, . . . , 21, 21, 21],     [ 4, 4, 4, . . . , 21, 21, 21],     . . . ,     [17, 17, 17, . . . , 30, 30, 30],     [17, 17, 17, . . . , 30, 30, 30],     [17, 17, 17, . . . , 30, 30, 30]]])img_2. data, mask_2. data(tensor([[[0. 0706, 0. 0667, 0. 0706, . . . , 0. 6431, 0. 6549, 0. 6627],     [0. 0745, 0. 0706, 0. 0706, . . . , 0. 6431, 0. 6510, 0. 6549],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 6392, 0. 6588, 0. 6588],     . . . ,     [0. 0863, 0. 0824, 0. 0824, . . . , 0. 1333, 0. 1216, 0. 1255],     [0. 0902, 0. 0863, 0. 0824, . . . , 0. 1255, 0. 1176, 0. 1216],     [0. 0863, 0. 0824, 0. 0784, . . . , 0. 1137, 0. 1059, 0. 1137]],     [[0. 0706, 0. 0667, 0. 0706, . . . , 0. 7490, 0. 7608, 0. 7686],     [0. 0745, 0. 0706, 0. 0706, . . . , 0. 7451, 0. 7569, 0. 7608],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 7412, 0. 7529, 0. 7529],     . . . ,     [0. 0980, 0. 0941, 0. 0941, . . . , 0. 1804, 0. 1686, 0. 1725],     [0. 1059, 0. 1020, 0. 0980, . . . , 0. 1725, 0. 1647, 0. 1686],     [0. 1020, 0. 0980, 0. 0941, . . . , 0. 1608, 0. 1529, 0. 1608]],     [[0. 0784, 0. 0745, 0. 0784, . . . , 0. 7569, 0. 7686, 0. 7765],     [0. 0824, 0. 0784, 0. 0784, . . . , 0. 7647, 0. 7647, 0. 7686],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 7608, 0. 7647, 0. 7647],     . . . ,     [0. 1216, 0. 1176, 0. 1176, . . . , 0. 2000, 0. 1882, 0. 1922],     [0. 1176, 0. 1137, 0. 1098, . . . , 0. 1843, 0. 1765, 0. 1804],     [0. 1137, 0. 1098, 0. 1059, . . . , 0. 1725, 0. 1647, 0. 1725]]]), tensor([[[ 18, 17, 18, . . . , 183, 186, 188],     [ 19, 18, 18, . . . , 183, 185, 186],     [ 20, 18, 19, . . . , 182, 185, 185],     . . . ,     [ 25, 24, 24, . . . , 43, 40, 41],     [ 26, 25, 24, . . . , 41, 39, 40],     [ 25, 24, 23, . . . , 38, 36, 38]]]))3. What is a difference between image and imageSegment?: imageSegment  An ImageSegment object has the same properties as an Image. The only difference is that when applying the transformations to an ImageSegment, it will ignore the functions that deal with lighting and keep values of 0 and 1.  It’s easy to show the segmentation mask over the associated Image by using the y argument of show_image. img = open_image(fnames[0])mask = open_mask(lbl_names[0])_,axs = plt. subplots(1,3, figsize=(8,4))img. show(ax=axs[0], title='no mask')img. show(ax=axs[1], y=mask, title='masked') #seg mask over the img using y argmask. show(ax=axs[2], title='mask only', alpha=1. ) vision. image ##4. Why/How img div by 255 and how it results fast. ai : vision. image - If div=True, pixel values are divided by 255. to become floats between 0. and 1.  At times, you want to get rid of distortions caused by lights and shadows in an image.    Normalizing the RGB values of an image can at times be a simple and effective way of achieving this.     So sum of the pixel’s value over all channels(which is S) divides each intensified channel so that nomalized value will be R/S, G/S and B/S (where, S=R+G+B).   Detailed explain here4. Python Evaluation Order: Python evaluates expressions from left to right. Notice that while evaluating an assignment, the right-hand side is evaluated before the left-hand side. mask_tmp, trg_tmp, void_tmp = 2, 1, 10mask_tmp = trg_tmp != void_tmpprint(mask_tmp, trg_tmp, void_tmp) # (1) target is not same with voidTrue 1 10# Example 1x = 1y = 2x,y = y,x+yx, y(2, 3)# Example 2x = 1y = 2x = yy = x+yx, y(2, 4)5. model learner parameter :: pct_start: A: Percentage of total number of epochs when learning rate rises during one cycle. Q: Sorry, I still confused that one cycle in the new API only runs one epoch. How the percentage of total number of epochs works? Can you give a example? If learn. fit_one_cycle(10, slice(1e-4,1e-3,1e-2), pct_start=0. 05)??A: Ok, strictly correct answer would be percentage of iterations, so you can have lr both increase and decrease during same epoch. In your example, say, you have 100 iterations per epoch, then for half an epoch (0. 05 * (10 * 100) = 50) lr will rise, then slowly decrease. Q2: Thanks for this explanation … so essentially, it is the percentage of overall iterations where the LR is increasing, correct? So, given the default of 0. 3, it means that your LR is going up for 30% of your iterations and then decreasing over the last 70%. Is that a correct summation of what is happening? A2: Yes, I think that’s correct. You can verify that by changing its value and check:learn. recorder. plot_lr() For example if pct_start = 0. 2 source: forums. fastai "
+    }, {
+    "id": 14,
     "url": "http://localhost:4000/2020/03/note08-fastai-4/",
     "title": "Gradient backward, Chain Rule, Refactoring",
-    "body": "2020/03/02 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves   ⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
+    "body": "2020/03/02 -  This note is divided into 4 section.      Section1: What is the meaning of ‘deep-learning from foundations?’   Section2: What’s inside Pytorch Operator?   Section3: Implement forward&amp;backward pass from scratch   Section4: Gradient backward, Chain Rule, Refactoring   ” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves    Postfix underscore means in pytorch, in-place function, What is in-place function?⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
     }, {
-    "id": 13,
+    "id": 15,
     "url": "http://localhost:4000/2020/03/note08-fastai-3/",
     "title": "Implement forward&backward pass from scratch",
     "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring1. The forward and backward passes: 1. 1 Normalization: train_mean,train_std = x_train. mean(),x_train. std()&gt;&gt;&gt; train_mean,train_std(tensor(0. 1304), tensor(0. 3073))Remember!  Dataset, which is x_train, mean and standard deviation is not 0&amp;1. But we need them to be which means we should substract means and divide data by std.  You should not standarlize validation set because training set and validation set should be aparted.  after normalize, mean is close to zero, and standard deviation is close to 1. 1. 2 Variable definition:  n,m: size of the training set c: the number of activations we need in our model2. Foundation Version: 2. 1 Basic architecture:  Our model has one hidden layer, output to have 10 activations, used in cross entropy.    But in process of building architecture, we will use mean square error, output to have 1 activations and lator change it to cross entropy   number of hidden unit; 50see below pic  We want to make w1&amp;w2 mean and std be 0&amp;1.      why initializating and make mean zero and std one is important?   paper highlighting importance of normalisation - training 10,000 layer network without regularisation1   2. 1. 1 simplified kaiming initQ: Why we did init, normalize with only validation data? Because we can not handle and get statistics from each value of x_valid?{: style=”color:red; font-size: 130%; text-align: center;”}  what about hidden(first) layer?w1 = torch. randn(m,nh)b1 = torch. zeros(nh)t = lin(x_valid, w1, b1) # hidden&gt;&gt;&gt; t. mean(), t. std()((tensor(2. 3191), tensor(27. 0303))In output(second) layer, w2 = torch. randn(nh,1)b2 = torch. zeros(1)t2 = lin(t, w2, b2) # output&gt;&gt;&gt; t2. mean(), t2. std()(tensor(-58. 2665), tensor(170. 9717))   which is terribly far from normalzed value.     But if we apply simplified kaiming init  w1 = torch. randn(m,nh)/math. sqrt(m); b1 = torch. zeros(nh)w2 = torch. randn(nh,1)/math. sqrt(nh); b2 = torch. zeros(1)t = lin(x_valid, w1, b1)t. mean(),t. std()&gt;&gt;&gt; (tensor(-0. 0516), tensor(0. 9354)) But, actually, we use activations not only linear function After applying activations relu at linear layer, mean and deviation became 0. 5. 2. 1. 2 Glorrot initializationPaper2: Understanding the difficulty of training deep feedforward neural networks  Gaussian(, bell shaped, normal distributions) is not trained very well.  How to initialize neural nets? with the size of layer , the number of filters .  But there is No acount for import of ReLU If we got 1000 layers, vanishing gradients problem emerges2. 1. 3 Kaiming initializatingPaper3: Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification  Kaiming He, explained here rectifier: rectified linear unit rectifier network: neural network with rectifier linear units  This is kaiming init, and why suddenly replace one to two on a top?     to avoid vanishing gradient(weights)   But it doesn’t give very nice mean tough.    2. 1. 4 Pytorch package Why fan_out?     according to pytorch documentation,   choosing 'fan_in' preserves the magnitude of the variance of the wights in the forward pass. choosing 'fan_out' preserves the magnitues in the backward pass(, which means matmul; with transposed matrix) ➡️ in the other words, torch use fan_out cz pytorch transpose in linear transformaton.  What about CNN in Pytorch?I tried torch. nn. Conv2d. conv2d_forward?? Jeremy digged into using torch. nn. modules. conv. _ConvNd. reset_parameters?? 2  in Pytorch, it doesn’t seem to be implemented kaiming init in right formula. so we should use our own operation.  But actually, this has been discussed in Pytorch community before. 3 4 Jeremy said it enhanced variance also, so I sampled 100 times and counted better results.  To make sure the shape seems sensible. check with assert. (remember we will replace 1 to 10 in cross entropy)assert model(x_valid). shape==torch. Size([x_valid. shape[0],1])&gt;&gt;&gt; model(x_valid). shape(10000, 1) We have made Relu, init, linear, it seems we can forward pass code we need for basic architecture nh = 50def lin(x, w, b): return x@w + b;w1 = torch. randn(m,nh)*math. sqrt(2. /m ); b1 = torch. zeros(nh)w2 = torch. randn(nh,1); b2 = torch. zeros(1)def relu(x): return x. clamp_min(0. ) - 0. 5t1 = relu(lin(x_valid, w1, b1))def model(xb):  l1 = lin(xb, w1, b1)  l2 = relu(l1)  l3 = lin(l2, w2, b2)  return l32. 2 Loss function: MSE:  Mean squared error need unit vector, so we remove unit axis.   def mse(output, targ): return (output. squeeze(-1) - targ). pow(2). mean()    In python, in case you remove axis, you use ‘squeeze’, or add axis use ‘unsqueeze’ torch. squeeze where code commonly broken. so, when you use squeeze, clarify dimension axis you want to removetmp = torch. tensor([1,1])tmp. squeeze()&gt;&gt;&gt; tensor([1, 1]) make sure to make as float when you calculateBut why??? because it is tensor?{: style=”color:red; font-size: 130%;”} Here’s the error when I don’t transform the data type ---------------------------------------------------------------------------TypeError                 Traceback (most recent call last)&lt;ipython-input-22-ae6009bef8b4&gt; in &lt;module&gt;()----&gt; 1 y_train = get_data()[1] # call data again   2 mse(preds, y_train)TypeError: 'map' object is not subscriptable This is forward passFootnote: Other materials:  Understanding the difficulty of training deep feedforward neural networks, paper that introduced Xavier initialization      Fixup Initialization: Residual Learning Without Normalization &#8617;        Pytorch implementaion on Kaiming init of conv and linear layers &#8617;        Pytorch kaiming init issue &#8617;        Pytorch kaiming init explained &#8617;    "
     }, {
-    "id": 14,
+    "id": 16,
     "url": "http://localhost:4000/2020/03/note08-fastai-2/",
     "title": "What's inside Pytorch Operator?",
     "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, RefactoringWhat’s inside Pytorch Operator?: Section02 Time comparison with pure Python:    Matmul with broadcasting&gt; 3194. 95 times faster     Einstein summation&gt; 16090. 91 times faster     Pytorch’s operator&gt; 49166. 67 times faster  1. Elementwise op: 1. 1 Frobenius norm:   above converted into (m*m). sum(). sqrt() Plus, don’t suffer from mathmatical symbols. He also copy and paste that equations from wikipedia.  and if you need latex form, download it from archive. 2. Elementwise Matmul:  What is the meaning of elementwise?   We do not calculate each component. But all of the component at once. Because, length of column of A and row of B are fixed.   How much time we saved?  So now that takes 1. 37ms. We have removed one line of code and it is a 178 times faster…#TODOI don’t know where the 5 from. but keep it. Maybe this is related with frobenius norm…?as a result, the code before for k in range(ac):  c[i,j] += a[i,k] + b[k,j]the code after c[i,j] = (a[i,:] * b[:,j]). sum()To compare it (result betweet original and adjusted version) we use not test_eq but other function. The reason for this is that due to rounding errors from math operations, matrices may not be exactly the same. As a result, we want a function that will “is a equal to b within some tolerance” #exportdef near(a,b):   return torch. allclose(a, b, rtol=1e-3, atol=1e-5)def test_near(a,b):   test(a,b,near)test_near(t1, matmul(m1, m2))3. Broadcasting:  Now, we will use the broadcasting and removec[i,j] = (a[i,:] * b[:,j]). sum() How it works?&gt;&gt;&gt; a=tensor([[10,10,10],     [20,20,20],     [30,30,30]])&gt;&gt;&gt; b=tensor([1,2,3,])&gt;&gt;&gt; a,b  (tensor([[10, 10, 10],     [20, 20, 20],     [30, 30, 30]]),tensor([1, 2, 3]))     &gt;&gt;&gt; a+btensor([[11, 12, 13],    [21, 22, 23],    [31, 32, 33]])  &lt;Figure 2&gt; demonstrated how array b is broadcasting(or copied but not occupy memory) to compatible with a. Refered from numpy_tutorial   there is no loop, but it seems there is exactly the loop.     This is not from jeremy (actually after a moment he cover it) but i wondered How to broadcast an array by columns?  c=tensor([[1],[2],[3]])a+ctensor([[11, 11, 11],    [22, 22, 22],    [33, 33, 33]])s  What is tensor. stride()?help(t. stride)Help on built-in function stride:  stride(…) method of torch. Tensor instancestride(dim) -&gt; tuple or intReturns the stride of :attr:’self’ tensor. Stride is the jump necessary to go from one element to the next one in the specified dimension :attr:’dim’. A tuple of all strides is returned when no argument is passed in. Otherwise, an integer value is returned as the stride in the particular dimension :attr:’dim’. Args:  dim (int, optional): the desired dimension in which stride is requiredExample::* x = torch. tensor([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]])`x. stride()&gt;&gt;&gt; (5, 1)x. stride(0)&gt;&gt;&gt; 5x. stride(-1)&gt;&gt;&gt; 1   unsqueeze &amp; None index   We can manipulate rank of tensor Special value ‘None’, which means please squeeze a new axis here== please broadcast herec = torch. tensor([10,20,30])c[None,:] in c, squeeze a new axis in here please. 2. 2 Matmul with broadcasting: for i in range(ar):#  c[i,j] = (a[i,:]).      *[:,j]. sum() #previous  c[i]  = (a[i]. unsqueeze(-1) * b). sum(dim=0) And Using None also (As howard teached)c[i]  = (a[i ]. unsqueeze(-1) * b). sum(dim=0) #howardc[i]  = (a[i][:,None] * b). sum(dim=0) # using Nonec[i]  = (a[i,:,None]*b). sum(dim=0)⭐️Tips🌟 1) Anytime there’s a trailinng(final) colon in numpy or pytorch you can delete it ex) c[i, :] = c [i]2) any number of colon commas at the start, you can switch it with the single elipsis.  ex) c[:,:,:,:,i] = c […,i] 2. 3 Broadcasting Rules:  What if we tensor. size([1,3]) * tensor. size([3,1])?  torch. Size([3, 3])    What is scale????   What if they are one array is times of the other array? ex) Image : 256 x 256 x 3Scale : 128 x 256 x 3Result: ?   Why I did not inserted axis via None, but happened broadcasting? &gt;&gt;&gt; c * c[:,None]tensor([[100. , 200. , 300. ],    [200. , 400. , 600. ],    [300. , 600. , 900. ]])maybe it broadcast cz following array has 3 rows as same principle, no matter what nature shape was, if we do the operation tensor broadcasts to the other. &gt;&gt;&gt; c==c[None]tensor([[True, True, True]])&gt;&gt;&gt; c[None]==c[None,:]tensor([[True, True, True]])&gt;&gt;&gt;c[None,:]==ctensor([[True, True, True]])3. Einstein summation:  Creates batch-wise, remove inner most loop, and replaced it with an elementwise producta. k. ac[i,j] += a[i,k] * b[k,j]inner most loop c[i,j] = (a[i,:] * b[:,j]). sum()elementwise product  Because K is repeated so we do a dot product. And it is torch. Usage of einsum()1) transpose2) diagnalisation tracing3) batch-wise (matmul) …  einstein summation notationdef matmul(a,b): return torch. einsum('ik,kj-&gt;ij', a, b)so after all, we are now 16000 times faster than Python. 4. Pytorch op: 49166. 67 times faster than pure python And we will use this matrix multiplication in Fully Connect forward, with some initialized parameters and ReLU. But before that, we need initialized parameters and ReLU, Footnote:  TensorRank ti noteResources:  Frobenius Norm Review Broadcasting Review (especially Rule)     Refer colab! (I totally confused with extension of arrays)    torch. allclose Review np. einsum Reviewh "
     }, {
-    "id": 15,
+    "id": 17,
     "url": "http://localhost:4000/2020/02/note08-fastai-1/",
     "title": "What is the meaning of 'deep-learning from foundations?'",
     "body": "2020/02/29 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ I don’t know if you read this article, but I heartily appreciate Rachael Thomas and Jeremy Howard for providing these priceless lectures for free Homework:  Review concepts 16 concepts from Course 1 (lessons 1 - 7)(1) Affine Functions &amp; non-linearities; 2) Parameters &amp; activations; 3) Random initialization &amp; transfer learning; 4) SGD, Momentum, Adam; 5) Convolutions; Batch-norm; 6) Dropout; 7) Data augmentation; 8) Weight decay; 9) Res/dense blocks; 10) Image classification and regression; 11)Embeddings; 12) Continuous &amp; Categorical variables; 13) Collaborative filtering; 14) Language models; 15) NLP classification; 16) Segmentation; U-net; GANS) Make sure you understand broadcasting Read section 2. 2 in Delving Deep into Rectifiers Try to replicate as much of the notebooks as you can without peeking; when you get stuck, peek at the lesson notebook, but then close it and try to do it yourself calculus for machine learning     based on weight…    einsum conventionCONTENTS:  What is going on in this course?     What is ‘from foundations’?   Steps to a basic modern CNN model   Today’s implementation goal: 1) matmul -&gt; 4) FC backward    Library development using jupyter notebook     jupyter notebook certainly can make module    Elementwise ops     How can we make python faster?         What is element wise operation?           FootnoteWhat is going on in this course?: What is ‘from foundations’?: 1) Recreate fast. ai and Pytorch 2) using pure python  Evade OverfittingOverfit : validation error getting worsetraining loss &lt; validation loss  Know the name of the symbol you usefind in this page if you don’t know the symbol that you are using or just draw it here (run by ML!) Steps to a basic modern CNN model:  1) Matrix multiplication -&gt; 2) Relu/Initialization -&gt; 3) Fully-connected Forward-&gt; 4) Fully-connected Backward -&gt; 5) Train loop -&gt; 6) Convolution-&gt; 7) Optimization -&gt;8) Batchnormalization -&gt; 9) Resnet Today’s implementation goal: 1) matmul -&gt; 4) FC backward: Library development using jupyter notebook: what is assers? jupyter notebook certainly can make module:  There will be #export tag that Howard (and we) want to extract special notebook2script. py will detect sign of #expert and convert following into python module and test ittest\_eq(TEST,'test')test\_eq(TEST,'test1')  what is run_notebook. py?     when you want to test your module in command line interface   		!python run\_notebook. py 01_matmul. ipynb  Is there any difference between 1) and 2)?1) test -&gt; test01 2) test01 -&gt; test #TODO I don’t know yet  look into run_notebook. py, package fire Jeremy used. What is that?read and run the code in a notebook, and in the process, Jeremy made Python Fire library called!shockingly, fire takes any kind of function and converts into CLI command. fire library was released by Google open source, Thursday, March 2, 2017    Get data   pytorch and numpy are pretty much same.  variable c explains how many pixels there are in in MNIST, 28 pixels PyTorch’s view() method: torch function that manipulating tensor, and squeeze() in torch &amp; mathmatical operation similar function Rao &amp; McMahan said usually this functions result in feature vector.    In part 1, you can use view function several times.     Initial python model     Which is Linear, like $Xw$(weight)$+a$(bias) $= Y$     If you don’t know hou to multiple matrix, refer this site matmul visulization site   How many time spends if we we use pure python   function matmul, typical matrix multiplication function, takes about 1 second for calculating 1 single train data! (maybe assumed stochastic, 5 data points in validation)     it takes about 11. 36 hours to update parameters even single layer and 1 iteration! (if that was my computer, it would be 14 hours. . )🤪   THIS is why we need to consider ‘time’&amp;’space’ This is kinda slow - what if we could speed it up by 50,000 times? Let’s try! Elementwise ops: How can we make python faster?:  If we want to calculate faster, then do remove pythonic calcuation, by passing its computation down to something that is written something other than python, like pytorch.  According to PyTorch doc it uses C++ (via ATen), so we are going to implement that function with python. What is element wise operation?:  items makes a pair, operate corresponding componentFootnote:  notebooks material video broadcasting excel"
     }, {
-    "id": 16,
+    "id": 18,
     "url": "http://localhost:4000/2020/02/what-is-convolution/",
     "title": "Digging into convolution",
     "body": "2020/02/28 - Issues 1) Kaiming Initializtion in Pytorch was in trouble. 1 2) Jeremy started to dig in, in lesson09, but I didn’t know why the size of tensor is 2 and even understand this spreadsheet data. 3 Homework: Read Visualizing and Understanding Convolutional Networks paper  What is a convolution?     Visualization         one kernel     Matthew D Zeiler &amp; Rob Fergus Paper          Convolution can be represented as matmul   Padding   Kernel has rank 3   How can we find a side-edge, a gradient and area of constant weight?   What is a convolution?: A convolutional neural network is that your red, green, and blue pixels go into the simple computation, and something comes out of that, and then the result of that goes into a second layer, and the result of that goes into the third layer and so forth. Visualization: one kernel Refer this site for visualizing CNN filteringMatthew D Zeiler &amp; Rob Fergus PaperLecture01     Nine examples of the actual coefficients from the **first layer**  Convolution can be represented as matmul: CNNs from different viewpoints {align-items: center;}    [A B C D E F G H I J] is 3 by 3 image data flatten to vector.   As a result, convolution is a just matrix just two things happens     Some of entries are set to zeros at all the times   same color always have the same weight. That called weight time / wegith sharing    So, we can implement a convolution with matrix multiplication. But, we don’t do that because it’s slow!Padding:  What most of libraries do is just put zeros asdie of matrix  fast. ai uses reflection paddings (what is this? Jeremy said he uttered it)Kernel has rank 3:  As standard picture input would be 4 5, it would be actually 3d, not 2d.  If we make kernel as a 3x3 size, we pass over same kernel all the different Red, Green, Blue Pixels.      This could make problem, because, if we want to detect frog, which is green, we would want more activations on the green(I made a test cell in my colab 6)   How can we find a side-edge, a gradient and area of constant weight?: Not top-edge!  One kernel can find only the top-edge, so we should stack the kernels 7 So, we pass it through bunch of kernels to the input images, and that process gives us height x width x corresponding number of kernels.  Usually that number of chanel is 16 And if we want to get the more channels and features, we should repeat that process     This process gives rise to memory out of control, we do the stride   #### conv-example. xlsx  2 convolutional filters At a second layer, filter is 3x3x2 tensor, because to add up together the first layer’s channel. Reference:       Problem was math. sqrt(5) was not kaiming initialization formula, Implementation in Pytorch &#8617;        size of tensor, lecture09 &#8617;        conv-example. xlsx &#8617;        Why do computer use red, green and blue instead of primary colors &#8617;        Grayscale is a group of shades without any visible color. … Each of these dots has its own brightness level as well and, therefore, can be converted to grayscale. A grayscale image is one with all color information removed.  &#8617;        Testing RGB and grayscale &#8617;        stack kernel and make new rank of tensor at output, Lesson06-2019 &#8617;    "
     }, {
-    "id": 17,
+    "id": 19,
     "url": "http://localhost:4000/2020/02/dps-week8/",
-    "title": "Digital Product School week 8&9",
-    "body": "2020/02/24 - The 8th week retropect at Digital Product School Week 8/9 - Ship your MVP/Release next iteration each day     This week's schedule  CONTENT:  Preparing engineering weekly Agile Process     Daily Stand-up    Making application flowchart (feat draw. io) / ER diagram     Flowchart, understaning user journey   ER diagram    Engineering weekly AI lunch Connecting firebase andPreparing engineering weekly: This week at Wednesday, I planned to explain the Language Modelings, mainly focusing ELMo, ULMFiT, BERT and GPT-2. Slides is available here Changed the presentation, because there were people who are not in ML domain. hereWhenever I do the presentation, I learn more than the information I give them. At the same time, I realize I need to learn more than I know. Agile Process: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m not sure ‘what is agile’ but this is what we tried to make agile process.  Daily Stand-up: Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Making application flowchart (feat draw. io) / ER diagram:  Before we start the party, we should clarify the flowchart and ER diagram of our application. Flowchart, understaning user journey: Thanks for google, we could use draw. io for our framechart framework. Actually, we cana choice other good flatform, but draw. io has connected app throgh google drive, most of our engineer was used to it. And after this job, I got to know there is also (of course) rule with the symbols, color, size, space, scaling and direction of arrow -reference. But why we should do this? WE have made our storymap before!! I think storymap is for visualize our status and app. So it should be shared with whole the team, and they should able to understand each role’s issue. But flowchart is more like testing technical feasibility, and error that user can experience. So it could be little more specific, complicated, and hypothetical.     This week engineer's main plan  ER diagram: Even if we use NoSQL database through firebase, my team was accustomed to SQL more. That what we educated when we were at college, so we had to organize our concept while we were learning NoSQL. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Before today, my AI collegues presented regression, knn and it was my turn. I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! AI lunch: Connecting firebase and: "
+    "title": "My life in Digital Product School - week 8/19/10",
+    "body": "2020/02/24 - The 8/9/10th week retropect at Digital Product School Week 8 - Ship your MVPWeek 9/10 - Release next iteration each day     Week 8th schedule  CONTENT:  Agile Product Development     Daily Stand-up(planning)   Gemba Walk   Sprint Reviews    Engineering weeklyAgile Product Development: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m still not sure ‘what is agile’ but this is how we tried to make agile process.  Daily Stand-up(planning): Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Gemba Walk:     Team Cero with core team  Every 2 weeks, we do the Gemba work, which is ‘question everything to the core team’ time. At this period, people can ask anything related to our product, workshop, and framework. Core team will help just for each team, and each team can solve the problem related to their work. &lt; br/&gt;Why we need this session? because with workshop and general schedule, core team has no time just focus on each team. So through this session, we can have opportunity to understand each program and workshop, like why we are using this platform, and when is the due of our small project, and we have this problem and we need help for this. whatever small problem you have, core team is always willing to help you. Sprint Reviews: Every Friday, we have time to summarise what we did for the week. Maybe we need HMW question and our storymap to share our process and then tell and share what we did try, what point we succeeded and what point it was deviant of our prediction, and why we tried it. .     Sprint of Ve-link  And then, just after all team’s ppt, we do vote with such a cute marvel. Always it’s very difficult to vote (of course you can’t vote to your team!) Because it depends on criteria what do I value!But since this is process of our agile work, I try to focus on what they have changed since last week, and why they did it, how they did it. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Everyone have their knowledge to share and we can be tutor and at the same time can be of tutee. Previously, my AI collegues presented regression, knn. And because I’m somewhat specialized to NLP, I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! "
     }, {
-    "id": 18,
+    "id": 20,
     "url": "http://localhost:4000/2020/02/fast.ai-nlp-note-16/",
     "title": "Algorithmic bias",
     "body": "2020/02/20 - Algorithms can encode &amp; magnify human bias Case Study 1: Facial Recognition &amp; Predictive Policing:  Joy Buolamwini &amp; Timnit Gebru, gendershades. org     Microsoft, FACE+, IBM - All of these things are sell now.    Largest gap between $\therefore\ Lighter Male\ &gt;\ Darker\ Female $      This US mayor joked cops should “mount . 50-caliber” guns where AI predicts crime      With machine learning, with automation, there’s a 99% success, so that robot is ㅡwill beㅡ99% accurate in telling us what is going to happen next, which is really interesting.     - city official in Lancater, CA, approving on using IBM for public security  Bias:  Bias is type of error Statistical Bias: difference between a statistic’s expected value and the true value Unjust Bias: disproportionate preference for or prejudice against a group Unconscious bias: bias that we don’t realize we have But, term bias is too generic to be productive. Different sources of bias have different causes Representation Bias: Dataset was not representative of the algorithm that might be used on later. Above : Data is okay, but algorithm has some problem. Below : Data has error.  For example, object detection production that performs very well in common product of US. But in contrast, change of target product region, like Zimbabwe, Solomon Island, and so on, reduced the performence remarkably. It is not the algorithmic problem, so we should care about data volume of region. Evaluation Bias: Benchmark datasets spur on research, 4. 4% of IJB-A images are dark-skinned women. 2/3 of ImageNet images from the West (Sharkar et al, 2017) Case Study 2: Recidivism Algorithm Used Prison Sentencing: Case Study 3: Online Ad Delivery: Bias in NLP: ( Nothing to do with the course, but I’m researching this field these days. )    But all about Englsih     ImpactThe person is doctor. The person is nurse -&gt; 그는 의사다. 그녀는 간호사다.  Concept of “biased data” often too generic to be useful:  Different sources of bias have different sources Data, models and systems are not unchanging numbers on a screen. They’re the result of a complex process that starts with years of historical context and involves a series of choices and norms, from data measurement to model evaluation to human interpretation. - Harini Suresh, “The problem with Biased Data” Five Sources of Bias in ML:  Representation Bias Evaluation Bias Measurement Bias Aggregation Bias(46:02) Historical Bias(46:26)     A few studies(47:13)   Racial Bias, Even when we have good intentions(new york times)(47:10)   gender(48:59)   Humans are biased, so why does algorithmic bias matter?: Algorithms &amp; humans are used differently (humans are usually decision maker)  Algorithms are accurate and objective No way to apeal if there if error processed large scale cheap Machine learning can amplify bias Machine learning can create feedback loops. Technology is power. And with that comes responsibility. Solutions:  Analyze a project at work/school:     Questions about AI   5 types of bias (Suresh &amp; Guttag)   Datasheets for datasets, Modelcards for model reporting   Accuracy rate on different sub-groups    Work with domain experts &amp; those impacted Increase diversity in our workspace Advocate for good policy Be on the ongoing lookout for bias"
     }, {
-    "id": 19,
+    "id": 21,
     "url": "http://localhost:4000/2020/02/classifier-city/",
     "title": "Making a classifier with image dataset made from gooogle",
     "body": "2020/02/15 - CONTENTS:  Creating dataset from google images     Using google_images_download   Create ImageDataBunch    Train model     fit_one_cycle()   Let’s find-tune   Let’s train the whole model!   Let’s make batch size bigger!    Interpretation Model in productionCode can be found hereDeployed model here Making a classifier which can distinguish Seoul from Munich and Sanfrancisco!(hoping my well in Munich!) Creating dataset from google images: In machine learning, you always need data before you build your model. You can use either URLs or google_images_download package. Since Jeremy explained specifically, I will try the other. Using google_images_download: note: This is not google official package Refer to Official Doncument, put that arguments. from google_images_download import google_images_downloadresponse = google_images_download. googleimagesdownload()  #class instantiationout_dir = os. path. abspath('. . /. . /materials/dataset/pkg/')os. mkdir(out_dir)arguments = { keywords : Cebu,Munich,Seoul ,        print_urls :True,        suffix_keywords : city ,        output_directory :out_dir,        type : photo ,      }paths = response. download(arguments)  #passing the arguments to the functionprint(paths)and if you need, here is main code. Create ImageDataBunch: We need to separate validation set because we just grabbed these imagese from Google. Most of the dataset we use (kaggle/research) splited into train / validation / test so if they are not devided beforehand we should make databunch, and Jeremy recommended assign 20% to validation. Help on function verify_images in module fastai. vision. data:verify_images(path: Union[pathlib. Path, str], delete: bool = True, max_workers: int = 4, max_size: int = None, recurse: bool = False, dest: Union[pathlib. Path, str] = '. ', n_channels: int = 3, interp=2, ext: str = None, img_format: str = None, resume: bool = None, **kwargs)  Check if the images in `path` aren't broken, maybe resize them and copy it in `dest`. Data from google image url Data from package Train model:           len(class)   len(train)   len(valid)         Data_url   3   432   108       Data_pkg   3   216   53   Uisng model: restnet34 1, Measurement: accuracy 2 fit_one_cycle(): What is fit one cycle? Cyclical Learning Rates for Training Neural Networks One of the way to find good learning rate. Core idea is to start with small learning rate (like 1e-4, 1e-3) and increase the learning rate after each mini-batch till loss starts exploding. And pick up learning rate one order lower than exploding point. For example, plotted learning rate is like below picture, picking up around 1e-2 is the best way.  Why this methods Traditionally, the learning rate is decreased as the learning starts converging with time. But this paper suggests to cycle our learning rate, because it makes us avoid local minimum. Basically this cyclic method enables us to explore whole of loss function so that find out global minimum. In other words, higher learning rate behaves like regularisation. Let’s find-tune: Do train just one last layer by learning rate found by find_lr This section you should find the strongest downward slope that kind of sticking around for quite a while. And choose just one order lower than lowest point. As explained before, I will pick up 1e-2. And of course, this is fine-tuning, we don’t need discriminative learning rate yet.  Let’s train the whole model!: link When you plot the learning rate again, maybe you will get soaring shape of learning rate. Rule of thumb, When you slice the learning rate, use learning rate you used at unfrozen part. Divide it by 5 or 10 and put it on maximum bound. At minimum bound, get the point just before it soared, and divide it by 10.  Let’s make batch size bigger!: Since default batch size is 64, I tried it to 128. And it gets way more better result(even it’s still underfitting!) And if I freeze model and train whole model again, the model would be better. Also, you can use this method to the other big dataset model training! Interpretation: See the confusion matrix. Result is quite great. *Since I’m using colab, I will skip data cleansing. But I highly recommend you to use ImageCleaner widget, only if you are using jupyter notebook (not jupyter lab) Model in production: You can deploy your model in simple way. I referred fast. ai, and used render(it’s free for limited time). You can find detailed document here. and you can create a route like this. @app. route( /classify-url , methods=[ GET ])async def classify_url(request):  bytes = await get_bytes(request. query_params[ url ])  img = open_image(BytesIO(bytes))  _,_,losses = learner. predict(img)  return JSONResponse({     predictions : sorted(      zip(cat_learner. data. classes, map(float, losses)),      key=lambda p: p[1],      reverse=True    )  })You can find my deployed model here Reference: How to create a deep learning dataset using Google Images towardsdatascience - one cycle policy       Deep Residual Learning for Image Recognition &#8617;        Accuracy_and_precision &#8617;    "
     }, {
-    "id": 20,
+    "id": 22,
     "url": "http://localhost:4000/2020/02/dps-week5/",
     "title": "Digital Product School week 5",
     "body": "2020/02/09 - The 5th week retropect at Digital Product School Week 5 - Create a Storymap and sync it with Lean Canvas     This week's schedule  CONTENT:  How to create our story map Prepare your story Discover your product’s AI potentialMondayHow to create our story map:     We need this 'aha' moment  There was a Milestone workshop, about our weekly goal. As we are agile working, we go fast and change every week’s goal. This week we will finalize our story map based on user’s pain-point and HMW questions.     How should we make our story-map  Basically we should make story map based on this rule  Tell stories, don’t just write them!     We always need context, that means all the story component should be connected    Visualize your product to establish a shared understanding and speed up discussions!     Post-it filled of text is not enough, we should fill it with visualizations then team mates can understand it fast    Only discuss in front our your story map! (Speed)     So we can update our story-map as soon as we change our opinion   And also    Use a story map to find the parts that matter most and to identify holes in your idea!     Since the story map consists of techinical part, we should consider each story’s technical feasibility    Minimise output, maximise outcome and impact! Build tests to figure out what’s minimum and what’s viable!     This story map functions to find out our minimum value of ideas    Work iteratively: Change your story map according to your learnings!     We should repeat this process again and again    PMs: Make sure Storymap is up to date!Prepare your story:     team cero, our whole story map    Our goal    Technical feasibility of our storyWhat is your strategy to make user achieve something? This would be our expand point Discover your product’s AI potential: How can we apply AI to our product? Let’s write down our ‘HMW’ questions, and find out all p ossibilities. These are suggestion of possibilities, so don’t attached to feasibility (we will do in at lean start-up)     Software section's expectation    AI section's expectationTuesday    Engineer's task, week5This 5th week, engineers settled WendesdayThursdayFriday"
     }, {
-    "id": 21,
+    "id": 23,
     "url": "http://localhost:4000/2020/02/GPU-time/",
     "title": "4 reasons took much time to setting GPU for fast.ai than I expected",
     "body": "2020/02/05 - Motivation: Before now, me as a undergraduate student, I was parsimony who usually depend on colab, kaggle, friend’s server(occasional) whenever i need GPU. . And this time it’s been for a while to install GPU than I expected and I share the several component that stood in my way. Written at Oct 24 2019, if you think this is deprecated, please do not have a leap of faith. Just for the record, I’ve used Kaggle, Colab, GCP, Azure, EC2 as GPU cloud. 1. Did not know there is JupyterLab option in Google Cloud Platform. : At the first time when GCP came out, there was no AI Platform service. So from starting vm instance to launching jupyter and installing packages, I did all of the things myself. (and I learned 🤗) $	curl -O https://repo. continuum. io/archive/Anaconda3-5. 0. 1-Linux-x86_64. sh[Downloading conda in ssh] I created VM instance,selected zone, machine type and disk type. Then, define firewall rules and in ssh terminal, install jupyter and other packages. But you can do all of these things just using AI Platform.  [AI Platform] I think it especially save your time if you are living in Asia-Pacific, which google doesn’t support not that much GPU resources.  2. Consider if the platform has limited resources in a region you live in. : I live in South Korea, East Asia, and it seems like this region has lots of limitation in GPU (except quite expensive AWS) And the Taiwan which was the only one region where I can launch my own VM with GPU (I tried all the other regions in the list) sometimes do normaly, but not always. 😥After launching, I did several works and next day I could not start VM. (I didn’t count it, but tried it a few hours because I didn’t want cost any more time…) Endlessly failed to start instance, then I choose to move AWS as an alternative way. 3. Fast. ai gives deliberate guide and I didn’t know it. : Fast. ai offer the guide for all available platform. (Colab, salamander, Gradient, Kaggle, Colab, and so on) It is so important, and really needs, because cloud computing options are vary as occasion and purpose arise. I didn’t know fast. ai has manual to running GCP, and I think it’s as good a reason as any for me to be have taken time. It helped me so much when I had aws and shortened my time. I don’t want to read all of the manual in amazno. . (It is recommended. . but I’d rather read GIT PRO now…) ssh -i ~/. ssh/&lt;your_private_key_pair&gt; -L localhost:8888:localhost:8888 ubuntu@&lt;your instance IP&gt;4. You should wait to add more volume just after add volume, by building AWS EC2. : Since Elastic Block Store(EBS) storage supports optimized storage, users can’t extend storage volume two times in a row.  Unfortunately, at the first time, I didn’t know it (again 👻) and when VM lacked volume, I doubled dist capacity (76*2) at a rough but It needs more.  &lt;!– this time I installed GPU in two years, and it became little complicated compared to 2 years ago. And this time for the first time(maybe not the first time. . but i handled it in my class or with my friend. but it’s my first time on my own. ) I very I’m started to using used google colab, kaggleand, GCP-JupyterLab, ec2 - friend made, aws vm machine but I had a environment variable but i did not know of it. On these days, I could not get a resources from taiwan…    I couldn’t notice a deliberate     Anyway, as a result I tried myself gcp myself and aws ec2 with fast. ai But I think doing on my self surely takes much time (in this point I wonder why I’m doing this, and should remind me, especially I was studying disk volume optimization)     disk volume exceed - https://askubuntu. com/questions/919748/no-space-left-on-device-even-though-there-is:  "
     }, {
-    "id": 22,
+    "id": 24,
     "url": "http://localhost:4000/2020/02/dps-week4/",
     "title": "Digital Product School week 4",
     "body": "2020/02/01 - The 4th week retropect at Digital Product School Week 4 - Find solution ideas and run experiments   [This week’s schedule] CONTENT:  Ideation Techniques     What is ideation techniques?    Generating idea in my team AIdeation Team brain storming of idea Die Produkt MacherMondayIdeation Techniques:   [slides from @steffen] What is ideation techniques?: We tried to find out user’s painpoint last week. Tried to users talk about their, pain point. No question directly, but extract from them their pain with transportation. Generating idea in my team:   AIdeation:   TuesdayTeam brain storming of idea:   Based on generated idea on Monday, we extended our idea doing rolling-paper!     Die Produkt Macher: What is lean start-up?  Lean startup is a methodology for developing businesses and products that aims to shorten product development cycles and rapidly discover if a proposed business model is viable; this is achieved by adopting a combination of business-hypothesis-driven experimentation, iterative product releases, and validated learning. - wikipedia WendesdayThursdayFriday"
     }, {
-    "id": 23,
+    "id": 25,
     "url": "http://localhost:4000/2020/01/retrosprect-of-acl-paper-2020/",
     "title": "Retrospect of ACL 2020 paper writing",
     "body": "2020/01/29 - 2020 Annual Conference of the Association for Computational Linguistics Why I can’t use ‘Cebuano’ for the research?: Why I had to change target language from ‘Cebuano’ to ‘Tagalog’?-&gt; No language translator options except google translation. But before knowing that I already consult my friend, whose mother tongue is English. So I had to aplogize her, but couldn’t tell her why suddenly I changed my plan. -&gt; I realized there are many languages even can’t be researched at all. . -&gt; Getting accustomed to discrimination makes misunderstanding, sometimes. At my country, we couldn’t use music streaming service, because of legal problem. But at that moment, I thought it was discrimination, which is done by music company. "
     }, {
-    "id": 24,
+    "id": 26,
     "url": "http://localhost:4000/2020/01/Git-Merge/",
     "title": "Why am I not listed as a contributor?!",
     "body": "2020/01/10 - From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’m not listed as contributor of repo which my PR accessed. How did I come to a repository?: When I’m stuck, I would prefer to code, than to go deep in theory. (It must be so. . too much to understand 🤒)It was BERT released by Google AI I felt keenly the necessity of implementing, because not only couldn’t understand the way they figured out positional encoding formula, but how it actually works. What does it mean to “scale” dot product in Attention? (Now I know it’s far from my section 😂) Figure 1. Scaled Dot Product. Adopted from tensorflow blogWhat was the code error?: For implement code in paper, I read the papers Transformer and BERT, structured the model, and refered the others’ code. Meanwhile, I found out a small error in tokenization process, which was changing a token into [MASK], enabled bidirectional representation. I’ve made PR, and got merged. But I was not in contributors. Why?: Figure 2. Merged Pull request Adopted from graykode projectActually I happened to know there can be couple of reasons github doesn’t include my name as contributor. Well, if contributors tab has more than 100 people, in which case it shows you up only if you are in the top 100 contributors because displaying too many contributors can make webpages down. Somethimes, however, it doesn’t that problem. Why not? Two possibilities are there.    First, According to Joel-Glovier, if repository maintainer merged-as-a-rebase PR will end up showing as maintainer’s commit. But maintainer shouldn’t normally do this.     Second, if you happend to commit using a different git email that what is in your GitHub profile, it will not be attached to your Github user, and “doesn’t show up” as you.  Reference:  Michał Chromiak’s blog Github: why are my contributions are not showing on my profile atlassian-gitfetch"
     }, {
-    "id": 25,
-    "url": "http://localhost:4000/2019/12/lesson1-fastai/",
-    "title": "Fine Grained Classification",
-    "body": "2019/12/31 - Finally you can solve the mystery behind this weird drawing. . through this course. juptyer notebook magic: %reload_ext autoreload%autoreload 2%matplotlib inlinethis is special directives to jupyter notebook, not python code. And it is called ‘magics’ (but i think jeremy is magicion)  If somebody changes underlying library code while I’m running this, please reload it automatically If somebody asks to plot something, then please plot it here in this Jupyter NotebookDon’t hesitate to import start~ Digging into untar_data, path. ls: Union[pathlib. Path, str]: typed programming language? -&gt; maybe i think disclaim the type beforehand for sure.  Q. like assert? path. ls()this is some module that fast. ai made because os. listdir(‘path’) is unconvinient. Python3 pathlib library!: pathlib "
-    }, {
-    "id": 26,
+    "id": 27,
     "url": "http://localhost:4000/2019/12/jeremy-howard/",
     "title": "Jeremy Howard",
     "body": "2019/12/15 - This is journey to find out ‘who am I trying to be?’: How he impacted me?  The person who made me start Computer Vision again. He emphasized the importance of studying NLP and Computer together to understand the deep-learning. He didn’t order it to study, but always he pursuade me with reasonable way.  “It’s not just something I can throw away. NLP and computer vision a few weeks apart and that’s going to force your brain to realize like ‘oh I have to remember this’”  He made me admit my failure in deep-learning. I started to objectify where am I. What should I do when I’m frustrated.  “Keep going. You’re not expected to remember everything. Yet.  You’re not expected to understand everything. Yet. You’re not expected to know why everything works.  Yet. ”  His articles are numerous, below.      What is torch. nn Really?   High Performance Numeric Programming with Swift: Explorations and Reflections   C++11, random distributions, and Swift    And especially, I like this book. Designing great data products Great predictive modeling is an important part of the solution, but it no longer stands on its own; as products become more sophisticated, it disappears into the plumbing. Designing great data products And he is also famous for words. Here are some.  we’re going to try and use that to really understand what’s going on. So to warn you, none of it is rocket science but a lot of its going to look really new. So don’t expect to get it the first time but expect to listen and jump into the notebook try a few things test things out look particularly at like tensor shapes and inputs and outputs to check your understanding then go back and listen again. But and kind of try it, a few times, because you will get there right, it’s just that there’s going to be a lot of new concepts because we haven’t done that much stuff in pure Pytorch. Lesson 6: Deep Learning 2019 "
     }, {
-    "id": 27,
+    "id": 28,
     "url": "http://localhost:4000/2019/11/julia-evans/",
     "title": "Julia Evans",
     "body": "2019/11/20 - This is journey to find out ‘who am I trying to be?’: The women who surprised me in many ways. First, she approached me to teaching some concepts drawing cartoons. It was at Hackers news, which was hightest ranks. Personally I have the use of not to reading title, so and cartoon was so cute and clear. I naturally gonna understood mechanism and astonished by her explaination ability. Her value, which she was taught by many people so want to do same things, moved me. Volume of her knowledge, that just reading post title is a deal of work, amazed me. "
     }, {
-    "id": 28,
+    "id": 29,
     "url": "http://localhost:4000/2019/11/coc-retropective/",
     "title": "Retrospective on Pycon 2019 Korea (CoC Committee)",
     "body": "2019/11/05 - When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized our responsibility, and if I passed each other’s burden, It could be my burden next time. In solidarity of the obligation, we finished conference well. And after participating PyCon Korea 2018 as volunteer, I’ve joined PyCon Korea Organizer last year. &lt;Figure 1&gt; First meeting of PyCon 2019 Korea Organizers It’s been a while since PyCon 2019 finished. It’s held on Aug 15 - 18, at Coex Grand Balloom &lt;Figure 2&gt; Ongoing session, speaking on news comment processing &lt;Figure 3&gt; Sponsor Booth iin Coex Hall &lt;Figure 4&gt; After PyCon 2019, with all of volunteer, organizer, speakers 😍 🥰  Serving as part of the coc TF, I spent large fraction of last year doing CoC job. here’s the path what we’ve been grappled with to grasp a solution. First half: Before the conference Toward Diverse Community:  Formally we’ve been reusing and modifying PyCon US CoC, but we needed fit in Korean and I was part of that to revise code of conduct. Except ‘That’ Diversity, Because it is ‘Harassment’:  Specific point was harassment, and the others were not. process of finding the points. How can we settle this point?Second half: During the conference Handling the potential Harassment: Disjunction of policy and real-time situation: This ‘PyCon 2019 Korea retrospective series’ would be devided into 3 Episodes.  “Retrospective on Pycon 2019 Korea (CoC Committee)” “Retrospective on Pycon 2019 Korea (Program Chair)” (20 Nov, To Be Update) “Maintaining participation while still making timely decisions” (29 Nov, To Be Update)"
     }, {
-    "id": 29,
+    "id": 30,
     "url": "http://localhost:4000/2019/11/elif-shafak/",
     "title": "Elif Shafak",
     "body": "2019/11/05 - This is journey to find out ‘who am I trying to be?’: For creative-minded people, Istanbul is a treasure. ’ Photo © Chris Boland, licensed under CC BY-NC-ND 2. 0    it suddenly felt like what I was trying to convey was more complicated and detailed than what the circumstances allowed me to say.     And I did what I usually do in similar situations: I stammered, I shut down, and I stopped talking. I stopped talking because the truth was complicated, even though I knew, deep within, that one should never, ever remain silent for fear of complexity.         &lt;Figure 1&gt; Elif Shafak   Photo credit: www. elifsafak. com. tr      I want to talk about emotions and the need to boost our emotional intelligence. I think it’s a pity that mainstream political theory pays very little attention to emotions.     Oftentimes, analysts and experts are so busy with data and metrics that they seem to forget those things in life that are difficult to measure and perhaps impossible to cluster under statistical models. But I think this is a mistake, for two main reasons. We are emotional beings.     I think it’s going to be one of our biggest intellectual challenges, because our political systems are replete with emotions. In country after country, we have seen illiberal politicians exploiting these emotions. And yet within the academia and among the intelligentsia, we are yet to take emotions seriously. I think we should.  1 2 Reference:       British Council Worldwide &#8617;        Ted Talk &#8617;    "
     }, {
-    "id": 30,
+    "id": 31,
     "url": "http://localhost:4000/2019/01/dps-week1/",
     "title": "Digital Product School week 1",
     "body": "2019/01/11 - The 1th week retropect at Digital Product School [This week’s schedule] CONTENT:  Welcome to Digital Product School!     Trip to Spitzingsee   Welcome to Design Office   Specifying our goal of product   Welcome to Digital Product School!: Trip to Spitzingsee:    At the first day of Digital Product School, we had a off-site with all of batch 9 people. All the costs were managed by dps. At the beautiful mountain, we settled the team, and got my team goal.     Basically, there are two kind of team in DPS. (1) Wild team - the team has fixed topic(2) Company team - the team which has specific stakeholders, and also topic defined by that stakeholders  The Core-team will fix what team you will join in DPS for 3 months based on ymy professionals, they announce it at off-site. [My team for 3 months at DPS]    And we decide on my batch #9 theme song. How? Each team draw for songs and pitch ‘why this song should be batch #9 theme song’The result? Imagine dragon - Believer (I didn’t know at the moment, this song would be stamped in my memory)     We have a workshop for getting to know each other. For example, we share 1) what do I expect from 3 months of dps, 2) when I feel happy in my life time, 3) what I worked for last week, 4) what was my last project and 5) what plays important role in my life      My team's board  Cero  Welcome to Design Office:  At first day of design office, we had workshop, which celebrates my day in dps also discuss specific rule, menifesto and stakeholders    We get sticker and attach it in map depends on my nationality  Now time to get to know my team’s stakeholders. What they want for us? What they expect from us? How free my team are on the topic?To be honest, it is endless tug-of-war. We should discuss with my stakeholders, endlessly, and find out solution which can meet interest of users, stakeholders and my team. Basically, my team’s main stakeholder is ADAC, but BMW, City of munich and Nokia will also participate as my team’s stakeholders. Specifying our goal of product: "
diff --git a/_site/2020/03/note08-fastai-3/index.html b/_site/2020/03/note08-fastai-3/index.html
index d217f06eac..a151dc55ed 100644
--- a/_site/2020/03/note08-fastai-3/index.html
+++ b/_site/2020/03/note08-fastai-3/index.html
@@ -19,9 +19,9 @@
 <meta property="og:site_name" content="SpellOnYou" />
 <meta property="og:image" content="http://localhost:4000/assets/images/4-backward3.png" />
 <meta property="og:type" content="article" />
-<meta property="article:published_time" content="2020-03-01T00:00:00+01:00" />
+<meta property="article:published_time" content="2020-03-01T00:00:00+09:00" />
 <script type="application/ld+json">
-{"description":"This note is divided into 4 section. Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring","author":{"@type":"Person","name":"dionne"},"@type":"BlogPosting","url":"http://localhost:4000/2020/03/note08-fastai-3/","publisher":{"@type":"Organization","logo":{"@type":"ImageObject","url":"http://localhost:4000/assets/images/logo.png"},"name":"dionne"},"image":"http://localhost:4000/assets/images/4-backward3.png","headline":"Implement forward&amp;backward pass from scratch","dateModified":"2020-03-01T00:00:00+01:00","datePublished":"2020-03-01T00:00:00+01:00","mainEntityOfPage":{"@type":"WebPage","@id":"http://localhost:4000/2020/03/note08-fastai-3/"},"@context":"http://schema.org"}</script>
+{"description":"This note is divided into 4 section. Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring","author":{"@type":"Person","name":"dionne"},"@type":"BlogPosting","url":"http://localhost:4000/2020/03/note08-fastai-3/","publisher":{"@type":"Organization","logo":{"@type":"ImageObject","url":"http://localhost:4000/assets/images/logo.png"},"name":"dionne"},"image":"http://localhost:4000/assets/images/4-backward3.png","headline":"Implement forward&amp;backward pass from scratch","dateModified":"2020-03-01T00:00:00+09:00","datePublished":"2020-03-01T00:00:00+09:00","mainEntityOfPage":{"@type":"WebPage","@id":"http://localhost:4000/2020/03/note08-fastai-3/"},"@context":"http://schema.org"}</script>
 <!-- End Jekyll SEO tag -->
 
 
@@ -161,96 +161,101 @@
     "body": "  {% if page. url ==  /  %}            {% assign latest_post = site. posts[0] %}          &lt;div class= topfirstimage  style= background-image: url({% if latest_post. image contains  ://  %}{{ latest_post. image }}{% else %} {{site. baseurl}}/{{ latest_post. image}}{% endif %}); height: 200px;  background-size: cover;  background-repeat: no-repeat; &gt;&lt;/div&gt;           {{ latest_post. title }}  :       {{ latest_post. excerpt | strip_html | strip_newlines | truncate: 136 }}               In         {% for category in latest_post. categories %}        {{ category }},         {% endfor %}                                {{ latest_post. date | date: '%b %d, %Y' }}                            {%- assign second_post = site. posts[1] -%}                        {% if second_post. image %}                         &lt;img class= w-100  src= {% if second_post. image contains  ://  %}{{ second_post. image }}{% else %}{{ second_post. image | absolute_url }}{% endif %}  alt= {{ second_post. title }} &gt;                        {% endif %}                                    {{ second_post. title }}          :                       In             {% for category in second_post. categories %}            {{ category }},             {% endfor %}                                                      {{ second_post. date | date: '%b %d, %Y' }}                                    {%- assign third_post = site. posts[2] -%}                        {% if third_post. image %}                         &lt;img class= w-100  src= {% if third_post. image contains  ://  %}{{ third_post. image }}{% else %}{{site. baseurl}}/{{ third_post. image }}{% endif %}  alt= {{ third_post. title }} &gt;                        {% endif %}                                    {{ third_post. title }}          :                       In             {% for category in third_post. categories %}            {{ category }},             {% endfor %}                                                      {{ third_post. date | date: '%b %d, %Y' }}                                    {%- assign fourth_post = site. posts[3] -%}                        {% if fourth_post. image %}                        &lt;img class= w-100  src= {% if fourth_post. image contains  ://  %}{{ fourth_post. image }}{% else %}{{site. baseurl}}/{{ fourth_post. image }}{% endif %}  alt= {{ fourth_post. title }} &gt;                        {% endif %}                                    {{ fourth_post. title }}          :                       In             {% for category in fourth_post. categories %}            {{ category }},             {% endfor %}                                                      {{ fourth_post. date | date: '%b %d, %Y' }}                                  {% for post in site. posts %} {% if post. tags contains  sticky  %}                    {{post. title}}                  {{ post. excerpt | strip_html | strip_newlines | truncate: 136 }}                 Read More            	             {% endif %}{% endfor %}  {% endif %}                All Stories:         {% for post in paginator. posts %}          {% include main-loop-card. html %}        {% endfor %}                   {% if paginator. total_pages &gt; 1 %}              {% if paginator. previous_page %}        &laquo; Prev       {% else %}        &laquo;       {% endif %}       {% for page in (1. . paginator. total_pages) %}        {% if page == paginator. page %}        {{ page }}        {% elsif page == 1 %}        {{ page }}        {% else %}        {{ page }}        {% endif %}       {% endfor %}       {% if paginator. next_page %}        Next &raquo;       {% else %}        &raquo;       {% endif %}            {% endif %}                     {% include sidebar-featured. html %}      "
     }, {
     "id": 12,
+    "url": "http://localhost:4000/2020/04/v3-2019-lesson06-note/",
+    "title": "fastai 2019 course-v3 Part1, lesson06",
+    "body": "2020/04/15 - Lesson 06Rossmann(Tabular):  Tabular data: be careful on Categorical variable vs Continuous variable.  if datatype is int, fastai think it is classification, not a regression.  Root mean square percentage error. as loss function.  When you assign the y_range, it’s better to assign little bit more than actual maximum. &gt; because it’s sigmoid.  intermediate layers, which is weight matrix is 1) 1000, and 2) 500 -&gt; which means our parameter would be 500*1000. learn. modelWhat is dropout and embedding dropout?: Nitish Srivastava, Dropout: A Simple way to prevent Neural Networks from Overfitting  you can dropout with p value, make it specified to specific layer, or make it applied to all the layers.  Pytorch code 1) bernoulli, which decides whether you will hold it? 2) and divide the noise value depends on noise value. so noise became 2 or remain 0.      According to pytorch code, We do change at training time, but we do nothing at test time. and this means you don’t have to do anything special with inference time. ’   TODO: find at forums what is inference time - Related to NVIDIA, GPU.     Embedding dropout is just a dropout.      It’s different between continuous variable and embedding layer.  TODO Still can’t understand. why embedding dropout is effective. or,… in need.    Let’s delete at random, some of the results of the embedding.    and It worked well especially at Kaggle   Batch Normalization: Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift -&gt; came out false! According to How Does Batch Normalization Help Optimization?  The key was multiplicative bias {\gamma} and additive bias {\beta}` Explain     Let $$ \hat{y} = f(w_1, w_2, w_3, … , x)} $$ , loss = MSE , Then y_range should be between 1 and 5`   And Activation function ends with -1 -&gt; +1   To mitigate this problem, we can add the other parameter, like $$w_n$$   But there’re so much interactions in the process so just re-scale the output.    Momentum parameter at BatchNorm1d:  Different from momentum like in optimization.  This momentum is Exponentially weighted moving average of the mean, instead of deviation.      If this is small number: mean standard deviation would be less from mini_batch to mini_batch » less regularization effect. (If this is large number, variation would be greater from mini_batch to mini_batch » more regularization effect)   TODO: can’t sure, but i understand, this is not about how to update parameter but about how much reflect previous value when scale and shift   Q. Preference between batchnorm and the other regularizations(drop out, weight decay)A. Nope, always try and see the results## lesson6-pets-more### Data Augmentation- Last reg- `get_transforms` has lots of params (even not yet learned all) -&gt; check documentation	- Remember you can implement all the doc contents bc it's made from nbdev	- TODO: try this!!- Essence of data augmentation is you should maintain the label, while somewhat making sense. 	- ex) tilt, because it's optically sensible, you can always change the angle of the data view. - zeros, border, and reflection but always `reflection` works most of the time, so that is the default### Convolutional Kernel(What is convolution?)- Will make heat\_map from scratch, which means the parts convolution focuses on![setosa_visualization]()- http://setosa. io/ev/image-kernels/	- javascript thing	- How convolution works	- Kernel. which does element-wise multiplication, and sum them up	- so it has on pixel less at borders -&gt; so it uses padding, and fastai uses reflection as said. - why this Kernel(matrix) helps catching horizontal edge side?	- because this kernel`(picture2)` weights differently, depends on `x axis`	- why familiar, because it's similar intuition with fugus`(paper)` paper- CNN from different viewpoints`link`	- output of pixel is results from different linear equations. 	- If you connect this with represents of neural network nodes, you can see that the specific inp nodes connected with specific out nodes. 	- **Summarize**: cnn does 1) matmul some of the elements are always zero 2) same weight for every row, which is called `weight time? weight. . ?, 1:18:50` `(picture)`#### Further lowdown- Because generally image has 3 channels, we need rank 3 kernel. - And **do multiply with all channel output is one pixel**. (`draw by your self`)	- but this kernel will catch one feature, like horizontal, so that we make more kernel so that output becomes (h * w * kernel)	- And that `kernel` come to `channel`- **Conv2d**: with 3 by 3 kernel, stride 2 conv -&gt; (h/2 * w/2 * kernel)	- skip or jump over input pixel	- to protect from memory out of control~~~pythonlearn. modellearn. summary()~~~TODO: understand yourself the blocks of conv-kernel: - Usually use big kernel size at first layer (will study this at part2)- Bottom right highlighting kernel(`pic / draw`)- `torch. tensor. expand`: for memory efficient, because we should do RGB- We do not make separate kernel, but make rank 4 kernel	- 4d tensor is just stacked kernel- `t[None]. shape` create new unit axis, and why? we make this -&gt; it should move unit of batch, not one size image. ### Average pooling, feature- suppose our pre-trained model results in size of `11 by 11 by 512 ` `pic 4` and my classification task has 37 classes	* take the first face of channel, which is 11 by 11 and `mean` it, so that make rank 2 tensor, 512 by 1	* and make 2d matrix, which is 512 by 37 and multiply so that we can get 37 by 1 matrix. - Feature, at convolution block	- So, when we transfer-learning without unfreeze, every element of last matrix (512 by 1) should represent(or could catch) each feature. 	### Heatmap, Hook~~~hook_output(model[0]) -&gt; acts -&gt; avg_acts~~~- if we average the block with `axis=feature`, result of matrix(11 by 11) depicts `how activated was that area?` -&gt; it is heatmap, `avg_acts`- and acts comes from hook, which is more advanced pytorch feature. 	- hook into pytorch machine itself, and run any arbitrary Pytorch code	- Why this is cool?: Normally it gives set of outputs of forward pass, but we can interrupt and hook the forward pass. 	- Also can store the output of the convolutional part of the model, which is before avg_pooling- Thinking back when we do cut off `after` the conv part. 	- but with fast. ai the original convolutional part of the model would be *the first thing in the model*, specifically could be given from `learn. model. eval()[0]`	- And this is gotten from `hooked_output` and having hooked the output, we can pass our x_minibatch to output. 	- Not directly, but with normalized, minibatch, put on to the gpu	- `one_item()` function do it, when we have one data `TODO: this is assignment` do it yourself without one_item function	- and `. cuda()` put it on gpu- you should print out very often the shape of tensor, and try think why. "
+    }, {
+    "id": 13,
+    "url": "http://localhost:4000/2020/04/qna-image-segmentation/",
+    "title": "[Q&A] Image Segmentation, using Unet with Driving Video data",
+    "body": "2020/04/02 - This post is about my questions while I was studying USF Deep Learning course about image segmentation task. All the answers are from the course, source code, library document, or document. I cared about being clear at reporting information including source of information, however if there are still anything unclear, please contact me. And thank you Jeremy&amp;Rachael for everything. Also Thank you Cambridge Computer Vision Lab to made us to study with your labor. The Cambridge-driving Labeled Video Database (CamVid) is the first collection of videos with object class semantic labels, complete with metadata. The database provides ground truth labels that associate each pixel with one of 32 semantic classes. If someone is interested in this project, please check the site and see the details. Now, let’s start first using jupyter’s one of tricks which I love most. It enables cell to print the code without print function. from IPython. core. interactiveshell import InteractiveShell# pretty print all cell's output and not just the last oneInteractiveShell. ast_node_interactivity =  all from fastai. vision import *from fastai. callbacks. hooks import *from fastai. utils. mem import *path = untar_data(URLs. CAMVID) # The locations where the data and models are downloaded are set in config. ymlpath. ls() I’m trying to accustomed to using pathlib module, not just it became built-in module in python, but I felt uncomfortable myself with os module. However, still unpredictable conflicts are remain, even in the quite standard library like Pytorch, tensorflow, onnx. (it require me string for path. not PosixPath. will send PR. . ) [PosixPath('/root/. fastai/data/camvid/valid. txt'), PosixPath('/root/. fastai/data/camvid/images'), PosixPath('/root/. fastai/data/camvid/labels'), PosixPath('/root/. fastai/data/camvid/codes. txt')]path_img = path/'images'path_lbl = path/'labels'fnames = get_image_files(path_img) #filenamelbl_names = get_image_files(path_lbl)1. (Play with data) My Hypothesis:  File name has A_B format. and A / B would be at key-value position.  Use collections - defaultdict  Default Dict: Link:   easy to group a sequence of key and value pairs into a dictionary of list?from collections import defaultdictfnames[0], lbl_names[0](PosixPath('/root/. fastai/data/camvid/images/0001TP_009210. png'), PosixPath('/root/. fastai/data/camvid/labels/0016E5_01800_P. png'))files = [tuple(i. stem. split('_')) for i in fnames]labels = [tuple(i. stem. split('_')[:-1]) for i in lbl_names]d = defaultdict(list)for k, v in files: d[k]. append(v)d. keys()len(d['0001TP'])124for k, v in d. items():  print(k, v)0001TP ['009210', '008850', '007350', '008970', '009840', '010140', '008490', '008520', '009540', '008250', '008340', '006840', '007860', '007410', '007740', '009870', '010080', '007890', '008790', '010020', '008400', '007080', '008280', '010380', '009330', '009060', '007470', '006810', '009720', '008580', '007110', '008730', '009150', '007680', '009780', '007800', '007290', '008760', '009510', '008640', '008310', '007440', '006900', '007500', '008460', '009030', '008130', '009480', '009900', '010230', '009270', '008040', '007590', '007950', '009990', '008550', '007260', '008100', '007530', '006960', '008190', '009420', '009930', '009000', '007830', '008940', '006690', '009570', '008880', '010170', '007560', '009300', '006750', '009360', '010200', '007320', '008010', '009120', '007620', '007200', '007140', '010320', '006720', '008670', '007230', '008370', '010260', '009690', '006930', '009090', '007770', '010290', '010350', '008610', '008070', '009600', '008430', '009450', '007380', '009240', '007710', '007170', '008160', '008910', '007020', '006780', '007050', '009960', '009810', '008220', '009180', '009750', '010050', '009660', '010110', '007920', '009630', '007650', '006990', '008700', '009390', '007980', '008820', '006870']0016E5 ['01290', '08159', '05760', '08133', '08063', '06660', '00960', '05850', '00750', '06960', '08035', '08107', '07975', '08017', '05610', '07140', '08119', '08027', '07170', '08400', '08093', '02100', '06390', '04470', '08340', '06060', '00600', '07470', '08151', '07800', '01620', '05730', '01530', '00690', '08430', '05940', '01980', '07320', '08069', '07965', '04380', '05430', '01410', '06780', '08007', '08087', '08079', '06600', '08109', '05490', '00901', '04590', '04680', '08045', '01770', '06690', '08085', '06810', '00420', '08011', '07440', '02190', '06300', '04800', '01500', '00450', '08029', '01470', '06330', '07997', '08067', '05370', '08013', '08190', '00840', '02370', '08049', '08135', '01440', '06870', '05820', '05280', '08051', '04440', '08091', '01380', '00630', '07290', '05520', '04770', '00540', '07995', '07999', '05550', '07920', '08101', '08141', '08053', '04620', '08103', '05160', '07350', '08057', '06030', '06000', '08550', '07963', '08089', '05970', '08047', '05640', '06240', '05220', '04350', '01590', '07959', '01950', '08117', '06180', '01560', '05400', '08043', '07680', '00780', '08081', '07050', '01020', '01350', '04530', '06720', '07969', '08149', '08003', '08131', '08129', '08033', '05460', '01650', '07530', '08023', '05340', '08640', '05100', '08075', '01230', '04980', '02070', '01080', '06210', '05910', '08009', '01800', '05190', '02400', '08083', '08019', '07620', '07200', '07890', '08059', '06990', '04410', '08121', '08123', '06930', '08137', '08147', '08095', '06570', '06150', '08153', '06840', '05250', '00510', '08370', '08580', '08113', '07410', '08097', '01200', '04950', '07770', '07650', '04710', '06090', '08055', '07110', '07981', '00990', '08250', '08127', '01920', '07985', '08220', '08005', '08157', '05130', '08071', '01140', '04830', '07740', '08143', '06120', '02040', '08111', '08115', '00660', '08280', '06420', '07983', '02220', '05700', '01860', '01260', '04920', '06510', '07020', '08073', '08105', '08125', '06360', '07860', '07993', '00810', '06540', '08099', '08139', '02010', '07973', '08155', '07991', '06630', '00480', '06750', '04890', '08001', '08025', '00870', '08490', '01830', '07977', '05010', '01170', '07961', '01680', '01050', '07987', '07080', '04560', '00930', '05310', '02340', '05790', '08460', '00720', '08031', '02280', '08039', '08037', '08065', '06270', '08077', '06900', '04650', '06480', '07230', '08041', '06450', '00570', '07989', '04740', '07979', '02250', '07380', '00390', '01710', '07590', '08021', '08520', '07500', '01110', '04500', '02310', '07971', '02130', '05580', '05880', '08610', '08310', '08145', '05670', '04860', '07260', '08015', '07967', '01740', '01320', '07560', '07830', '01890', '08061', '02160', '07710', '05070', '05040']Seq05VD ['f00030', 'f02550', 'f03450', 'f01110', 'f00480', 'f00210', 'f04590', 'f04170', 'f01800', 'f03990', 'f03360', 'f03900', 'f02070', 'f00810', 'f03690', 'f01350', 'f01530', 'f04980', 'f05100', 'f03060', 'f00900', 'f03870', 'f02460', 'f01470', 'f02370', 'f02820', 'f04080', 'f02760', 'f04860', 'f02250', 'f04200', 'f00270', 'f03720', 'f02850', 'f04410', 'f01200', 'f03090', 'f02010', 'f03930', 'f00090', 'f01650', 'f01890', 'f03840', 'f03030', 'f02130', 'f01230', 'f04110', 'f02520', 'f04140', 'f04020', 'f00060', 'f03420', 'f01560', 'f00120', 'f04290', 'f02340', 'f00300', 'f01380', 'f00870', 'f01860', 'f02970', 'f04560', 'f02730', 'f00330', 'f04530', 'f03780', 'f01770', 'f03390', 'f05040', 'f02430', 'f03330', 'f00660', 'f01740', 'f02100', 'f04800', 'f04050', 'f00510', 'f02790', 'f04350', 'f00690', 'f00540', 'f02490', 'f00960', 'f00930', 'f04230', 'f02880', 'f03600', 'f01020', 'f01500', 'f02400', 'f04830', 'f04470', 'f03300', 'f02670', 'f00450', 'f01980', 'f01170', 'f01620', 'f04500', 'f01080', 'f03180', 'f05070', 'f03150', 'f04950', 'f01440', 'f03510', 'f01710', 'f00360', 'f04770', 'f02910', 'f01050', 'f00630', 'f04320', 'f00570', 'f03240', 'f02190', 'f01140', 'f03540', 'f02220', 'f02640', 'f03960', 'f00000', 'f04920', 'f01950', 'f00990', 'f03480', 'f03000', 'f00420', 'f04620', 'f03210', 'f00780', 'f03570', 'f01590', 'f00750', 'f01920', 'f04650', 'f03750', 'f03630', 'f02310', 'f02610', 'f02580', 'f04740', 'f02280', 'f04680', 'f00390', 'f00720', 'f03660', 'f02040', 'f03270', 'f00180', 'f03810', 'f01410', 'f01290', 'f03120', 'f00840', 'f04440', 'f00150', 'f01260', 'f02700', 'f02940', 'f00600', 'f01830', 'f04260', 'f05010', 'f04890', 'f02160', 'f00240', 'f04380', 'f01680', 'f04710', 'f01320']0006R0 ['f02820', 'f03690', 'f03180', 'f02550', 'f01020', 'f03660', 'f02340', 'f01170', 'f02610', 'f02940', 'f01290', 'f02100', 'f01350', 'f03270', 'f03870', 'f01380', 'f01980', 'f03810', 'f02430', 'f02310', 'f01830', 'f03480', 'f02970', 'f01890', 'f03210', 'f03930', 'f02040', 'f02070', 'f02400', 'f01560', 'f03030', 'f01770', 'f01590', 'f01950', 'f03420', 'f01650', 'f03450', 'f00990', 'f03630', 'f01500', 'f03570', 'f00930', 'f03090', 'f03360', 'f02880', 'f02460', 'f01440', 'f01920', 'f01230', 'f03840', 'f02730', 'f01620', 'f02220', 'f03750', 'f03330', 'f03540', 'f02520', 'f02790', 'f01050', 'f03120', 'f01800', 'f01140', 'f01860', 'f01530', 'f01470', 'f02670', 'f02490', 'f01260', 'f01110', 'f02760', 'f01680', 'f03150', 'f02580', 'f03300', 'f02280', 'f01200', 'f03390', 'f03510', 'f02640', 'f02190', 'f02370', 'f01320', 'f02130', 'f03600', 'f03240', 'f03780', 'f03720', 'f02700', 'f01410', 'f01080', 'f02850', 'f01710', 'f03900', 'f03060', 'f01740', 'f02010', 'f02250', 'f00960', 'f03000', 'f02160', 'f02910']for k, v in d. items():  print(k, len(d[k]))0001TP 1240016E5 305Seq05VD 1710006R0 101for i in d2. keys():  print(i,len(d2[i]))0016E5 3050001TP 1240006R0 101Seq05VD 171files[0], labels[0](('0001TP', '009210'), ('0016E5', '01800'))2. My question: Link:  Why do we need masking? and does color from fastai library? (have to look into source code) What do the parameter alpha do? When people make masked img, would it be have ranged integer limit? Does image normalization related with this?lbl_sorted = sorted(lbl_names)f_sorted = sorted(fnames)lbl_1 = lbl_sorted[33]f_1 = f_sorted[33]img = open_image(lbl_1)mask = open_mask(lbl_1)_,axs = plt. subplots(1,2, figsize=(10,5))# img. show(ax=axs[0], y=mask, title='masked')img. show(ax=axs[0], title='1')mask. show(ax=axs[1], title='2', alpha=1. ) img_2 = open_image(f_1)mask_2 = open_mask(f_1)_,axs = plt. subplots(1,2, figsize=(10,5))# img. show(ax=axs[0], y=mask, title='masked')img_2. show(ax=axs[0], title='3',)mask_2. show(ax=axs[1], title='4', alpha=1. ) open_mask(lbl_1). data. shapetorch. Size([1, 720, 960])open_mask(lbl_1). data. shapetorch. Size([1, 720, 960])open_image(f_1). data. shapetorch. Size([3, 720, 960])open_image(f_1). data. shapetorch. Size([3, 720, 960])img. data #labeled datatensor([[[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]],    [[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]],    [[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]]])mask. data # after mask, labeled datatensor([[[ 4, 4, 4, . . . , 21, 21, 21],     [ 4, 4, 4, . . . , 21, 21, 21],     [ 4, 4, 4, . . . , 21, 21, 21],     . . . ,     [17, 17, 17, . . . , 30, 30, 30],     [17, 17, 17, . . . , 30, 30, 30],     [17, 17, 17, . . . , 30, 30, 30]]])img_2. data, mask_2. data(tensor([[[0. 0706, 0. 0667, 0. 0706, . . . , 0. 6431, 0. 6549, 0. 6627],     [0. 0745, 0. 0706, 0. 0706, . . . , 0. 6431, 0. 6510, 0. 6549],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 6392, 0. 6588, 0. 6588],     . . . ,     [0. 0863, 0. 0824, 0. 0824, . . . , 0. 1333, 0. 1216, 0. 1255],     [0. 0902, 0. 0863, 0. 0824, . . . , 0. 1255, 0. 1176, 0. 1216],     [0. 0863, 0. 0824, 0. 0784, . . . , 0. 1137, 0. 1059, 0. 1137]],     [[0. 0706, 0. 0667, 0. 0706, . . . , 0. 7490, 0. 7608, 0. 7686],     [0. 0745, 0. 0706, 0. 0706, . . . , 0. 7451, 0. 7569, 0. 7608],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 7412, 0. 7529, 0. 7529],     . . . ,     [0. 0980, 0. 0941, 0. 0941, . . . , 0. 1804, 0. 1686, 0. 1725],     [0. 1059, 0. 1020, 0. 0980, . . . , 0. 1725, 0. 1647, 0. 1686],     [0. 1020, 0. 0980, 0. 0941, . . . , 0. 1608, 0. 1529, 0. 1608]],     [[0. 0784, 0. 0745, 0. 0784, . . . , 0. 7569, 0. 7686, 0. 7765],     [0. 0824, 0. 0784, 0. 0784, . . . , 0. 7647, 0. 7647, 0. 7686],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 7608, 0. 7647, 0. 7647],     . . . ,     [0. 1216, 0. 1176, 0. 1176, . . . , 0. 2000, 0. 1882, 0. 1922],     [0. 1176, 0. 1137, 0. 1098, . . . , 0. 1843, 0. 1765, 0. 1804],     [0. 1137, 0. 1098, 0. 1059, . . . , 0. 1725, 0. 1647, 0. 1725]]]), tensor([[[ 18, 17, 18, . . . , 183, 186, 188],     [ 19, 18, 18, . . . , 183, 185, 186],     [ 20, 18, 19, . . . , 182, 185, 185],     . . . ,     [ 25, 24, 24, . . . , 43, 40, 41],     [ 26, 25, 24, . . . , 41, 39, 40],     [ 25, 24, 23, . . . , 38, 36, 38]]]))3. What is a difference between image and imageSegment?: imageSegment  An ImageSegment object has the same properties as an Image. The only difference is that when applying the transformations to an ImageSegment, it will ignore the functions that deal with lighting and keep values of 0 and 1.  It’s easy to show the segmentation mask over the associated Image by using the y argument of show_image. img = open_image(fnames[0])mask = open_mask(lbl_names[0])_,axs = plt. subplots(1,3, figsize=(8,4))img. show(ax=axs[0], title='no mask')img. show(ax=axs[1], y=mask, title='masked') #seg mask over the img using y argmask. show(ax=axs[2], title='mask only', alpha=1. ) vision. image ##4. Why/How img div by 255 and how it results fast. ai : vision. image - If div=True, pixel values are divided by 255. to become floats between 0. and 1.  At times, you want to get rid of distortions caused by lights and shadows in an image.    Normalizing the RGB values of an image can at times be a simple and effective way of achieving this.     So sum of the pixel’s value over all channels(which is S) divides each intensified channel so that nomalized value will be R/S, G/S and B/S (where, S=R+G+B).   Detailed explain here4. Python Evaluation Order: Python evaluates expressions from left to right. Notice that while evaluating an assignment, the right-hand side is evaluated before the left-hand side. mask_tmp, trg_tmp, void_tmp = 2, 1, 10mask_tmp = trg_tmp != void_tmpprint(mask_tmp, trg_tmp, void_tmp) # (1) target is not same with voidTrue 1 10# Example 1x = 1y = 2x,y = y,x+yx, y(2, 3)# Example 2x = 1y = 2x = yy = x+yx, y(2, 4)5. model learner parameter :: pct_start: A: Percentage of total number of epochs when learning rate rises during one cycle. Q: Sorry, I still confused that one cycle in the new API only runs one epoch. How the percentage of total number of epochs works? Can you give a example? If learn. fit_one_cycle(10, slice(1e-4,1e-3,1e-2), pct_start=0. 05)??A: Ok, strictly correct answer would be percentage of iterations, so you can have lr both increase and decrease during same epoch. In your example, say, you have 100 iterations per epoch, then for half an epoch (0. 05 * (10 * 100) = 50) lr will rise, then slowly decrease. Q2: Thanks for this explanation … so essentially, it is the percentage of overall iterations where the LR is increasing, correct? So, given the default of 0. 3, it means that your LR is going up for 30% of your iterations and then decreasing over the last 70%. Is that a correct summation of what is happening? A2: Yes, I think that’s correct. You can verify that by changing its value and check:learn. recorder. plot_lr() For example if pct_start = 0. 2 source: forums. fastai "
+    }, {
+    "id": 14,
     "url": "http://localhost:4000/2020/03/note08-fastai-4/",
     "title": "Gradient backward, Chain Rule, Refactoring",
-    "body": "2020/03/02 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves   ⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
+    "body": "2020/03/02 -  This note is divided into 4 section.      Section1: What is the meaning of ‘deep-learning from foundations?’   Section2: What’s inside Pytorch Operator?   Section3: Implement forward&amp;backward pass from scratch   Section4: Gradient backward, Chain Rule, Refactoring   ” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves    Postfix underscore means in pytorch, in-place function, What is in-place function?⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
     }, {
-    "id": 13,
+    "id": 15,
     "url": "http://localhost:4000/2020/03/note08-fastai-3/",
     "title": "Implement forward&backward pass from scratch",
     "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring1. The forward and backward passes: 1. 1 Normalization: train_mean,train_std = x_train. mean(),x_train. std()&gt;&gt;&gt; train_mean,train_std(tensor(0. 1304), tensor(0. 3073))Remember!  Dataset, which is x_train, mean and standard deviation is not 0&amp;1. But we need them to be which means we should substract means and divide data by std.  You should not standarlize validation set because training set and validation set should be aparted.  after normalize, mean is close to zero, and standard deviation is close to 1. 1. 2 Variable definition:  n,m: size of the training set c: the number of activations we need in our model2. Foundation Version: 2. 1 Basic architecture:  Our model has one hidden layer, output to have 10 activations, used in cross entropy.    But in process of building architecture, we will use mean square error, output to have 1 activations and lator change it to cross entropy   number of hidden unit; 50see below pic  We want to make w1&amp;w2 mean and std be 0&amp;1.      why initializating and make mean zero and std one is important?   paper highlighting importance of normalisation - training 10,000 layer network without regularisation1   2. 1. 1 simplified kaiming initQ: Why we did init, normalize with only validation data? Because we can not handle and get statistics from each value of x_valid?{: style=”color:red; font-size: 130%; text-align: center;”}  what about hidden(first) layer?w1 = torch. randn(m,nh)b1 = torch. zeros(nh)t = lin(x_valid, w1, b1) # hidden&gt;&gt;&gt; t. mean(), t. std()((tensor(2. 3191), tensor(27. 0303))In output(second) layer, w2 = torch. randn(nh,1)b2 = torch. zeros(1)t2 = lin(t, w2, b2) # output&gt;&gt;&gt; t2. mean(), t2. std()(tensor(-58. 2665), tensor(170. 9717))   which is terribly far from normalzed value.     But if we apply simplified kaiming init  w1 = torch. randn(m,nh)/math. sqrt(m); b1 = torch. zeros(nh)w2 = torch. randn(nh,1)/math. sqrt(nh); b2 = torch. zeros(1)t = lin(x_valid, w1, b1)t. mean(),t. std()&gt;&gt;&gt; (tensor(-0. 0516), tensor(0. 9354)) But, actually, we use activations not only linear function After applying activations relu at linear layer, mean and deviation became 0. 5. 2. 1. 2 Glorrot initializationPaper2: Understanding the difficulty of training deep feedforward neural networks  Gaussian(, bell shaped, normal distributions) is not trained very well.  How to initialize neural nets? with the size of layer , the number of filters .  But there is No acount for import of ReLU If we got 1000 layers, vanishing gradients problem emerges2. 1. 3 Kaiming initializatingPaper3: Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification  Kaiming He, explained here rectifier: rectified linear unit rectifier network: neural network with rectifier linear units  This is kaiming init, and why suddenly replace one to two on a top?     to avoid vanishing gradient(weights)   But it doesn’t give very nice mean tough.    2. 1. 4 Pytorch package Why fan_out?     according to pytorch documentation,   choosing 'fan_in' preserves the magnitude of the variance of the wights in the forward pass. choosing 'fan_out' preserves the magnitues in the backward pass(, which means matmul; with transposed matrix) ➡️ in the other words, torch use fan_out cz pytorch transpose in linear transformaton.  What about CNN in Pytorch?I tried torch. nn. Conv2d. conv2d_forward?? Jeremy digged into using torch. nn. modules. conv. _ConvNd. reset_parameters?? 2  in Pytorch, it doesn’t seem to be implemented kaiming init in right formula. so we should use our own operation.  But actually, this has been discussed in Pytorch community before. 3 4 Jeremy said it enhanced variance also, so I sampled 100 times and counted better results.  To make sure the shape seems sensible. check with assert. (remember we will replace 1 to 10 in cross entropy)assert model(x_valid). shape==torch. Size([x_valid. shape[0],1])&gt;&gt;&gt; model(x_valid). shape(10000, 1) We have made Relu, init, linear, it seems we can forward pass code we need for basic architecture nh = 50def lin(x, w, b): return x@w + b;w1 = torch. randn(m,nh)*math. sqrt(2. /m ); b1 = torch. zeros(nh)w2 = torch. randn(nh,1); b2 = torch. zeros(1)def relu(x): return x. clamp_min(0. ) - 0. 5t1 = relu(lin(x_valid, w1, b1))def model(xb):  l1 = lin(xb, w1, b1)  l2 = relu(l1)  l3 = lin(l2, w2, b2)  return l32. 2 Loss function: MSE:  Mean squared error need unit vector, so we remove unit axis.   def mse(output, targ): return (output. squeeze(-1) - targ). pow(2). mean()    In python, in case you remove axis, you use ‘squeeze’, or add axis use ‘unsqueeze’ torch. squeeze where code commonly broken. so, when you use squeeze, clarify dimension axis you want to removetmp = torch. tensor([1,1])tmp. squeeze()&gt;&gt;&gt; tensor([1, 1]) make sure to make as float when you calculateBut why??? because it is tensor?{: style=”color:red; font-size: 130%;”} Here’s the error when I don’t transform the data type ---------------------------------------------------------------------------TypeError                 Traceback (most recent call last)&lt;ipython-input-22-ae6009bef8b4&gt; in &lt;module&gt;()----&gt; 1 y_train = get_data()[1] # call data again   2 mse(preds, y_train)TypeError: 'map' object is not subscriptable This is forward passFootnote: Other materials:  Understanding the difficulty of training deep feedforward neural networks, paper that introduced Xavier initialization      Fixup Initialization: Residual Learning Without Normalization &#8617;        Pytorch implementaion on Kaiming init of conv and linear layers &#8617;        Pytorch kaiming init issue &#8617;        Pytorch kaiming init explained &#8617;    "
     }, {
-    "id": 14,
+    "id": 16,
     "url": "http://localhost:4000/2020/03/note08-fastai-2/",
     "title": "What's inside Pytorch Operator?",
     "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, RefactoringWhat’s inside Pytorch Operator?: Section02 Time comparison with pure Python:    Matmul with broadcasting&gt; 3194. 95 times faster     Einstein summation&gt; 16090. 91 times faster     Pytorch’s operator&gt; 49166. 67 times faster  1. Elementwise op: 1. 1 Frobenius norm:   above converted into (m*m). sum(). sqrt() Plus, don’t suffer from mathmatical symbols. He also copy and paste that equations from wikipedia.  and if you need latex form, download it from archive. 2. Elementwise Matmul:  What is the meaning of elementwise?   We do not calculate each component. But all of the component at once. Because, length of column of A and row of B are fixed.   How much time we saved?  So now that takes 1. 37ms. We have removed one line of code and it is a 178 times faster…#TODOI don’t know where the 5 from. but keep it. Maybe this is related with frobenius norm…?as a result, the code before for k in range(ac):  c[i,j] += a[i,k] + b[k,j]the code after c[i,j] = (a[i,:] * b[:,j]). sum()To compare it (result betweet original and adjusted version) we use not test_eq but other function. The reason for this is that due to rounding errors from math operations, matrices may not be exactly the same. As a result, we want a function that will “is a equal to b within some tolerance” #exportdef near(a,b):   return torch. allclose(a, b, rtol=1e-3, atol=1e-5)def test_near(a,b):   test(a,b,near)test_near(t1, matmul(m1, m2))3. Broadcasting:  Now, we will use the broadcasting and removec[i,j] = (a[i,:] * b[:,j]). sum() How it works?&gt;&gt;&gt; a=tensor([[10,10,10],     [20,20,20],     [30,30,30]])&gt;&gt;&gt; b=tensor([1,2,3,])&gt;&gt;&gt; a,b  (tensor([[10, 10, 10],     [20, 20, 20],     [30, 30, 30]]),tensor([1, 2, 3]))     &gt;&gt;&gt; a+btensor([[11, 12, 13],    [21, 22, 23],    [31, 32, 33]])  &lt;Figure 2&gt; demonstrated how array b is broadcasting(or copied but not occupy memory) to compatible with a. Refered from numpy_tutorial   there is no loop, but it seems there is exactly the loop.     This is not from jeremy (actually after a moment he cover it) but i wondered How to broadcast an array by columns?  c=tensor([[1],[2],[3]])a+ctensor([[11, 11, 11],    [22, 22, 22],    [33, 33, 33]])s  What is tensor. stride()?help(t. stride)Help on built-in function stride:  stride(…) method of torch. Tensor instancestride(dim) -&gt; tuple or intReturns the stride of :attr:’self’ tensor. Stride is the jump necessary to go from one element to the next one in the specified dimension :attr:’dim’. A tuple of all strides is returned when no argument is passed in. Otherwise, an integer value is returned as the stride in the particular dimension :attr:’dim’. Args:  dim (int, optional): the desired dimension in which stride is requiredExample::* x = torch. tensor([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]])`x. stride()&gt;&gt;&gt; (5, 1)x. stride(0)&gt;&gt;&gt; 5x. stride(-1)&gt;&gt;&gt; 1   unsqueeze &amp; None index   We can manipulate rank of tensor Special value ‘None’, which means please squeeze a new axis here== please broadcast herec = torch. tensor([10,20,30])c[None,:] in c, squeeze a new axis in here please. 2. 2 Matmul with broadcasting: for i in range(ar):#  c[i,j] = (a[i,:]).      *[:,j]. sum() #previous  c[i]  = (a[i]. unsqueeze(-1) * b). sum(dim=0) And Using None also (As howard teached)c[i]  = (a[i ]. unsqueeze(-1) * b). sum(dim=0) #howardc[i]  = (a[i][:,None] * b). sum(dim=0) # using Nonec[i]  = (a[i,:,None]*b). sum(dim=0)⭐️Tips🌟 1) Anytime there’s a trailinng(final) colon in numpy or pytorch you can delete it ex) c[i, :] = c [i]2) any number of colon commas at the start, you can switch it with the single elipsis.  ex) c[:,:,:,:,i] = c […,i] 2. 3 Broadcasting Rules:  What if we tensor. size([1,3]) * tensor. size([3,1])?  torch. Size([3, 3])    What is scale????   What if they are one array is times of the other array? ex) Image : 256 x 256 x 3Scale : 128 x 256 x 3Result: ?   Why I did not inserted axis via None, but happened broadcasting? &gt;&gt;&gt; c * c[:,None]tensor([[100. , 200. , 300. ],    [200. , 400. , 600. ],    [300. , 600. , 900. ]])maybe it broadcast cz following array has 3 rows as same principle, no matter what nature shape was, if we do the operation tensor broadcasts to the other. &gt;&gt;&gt; c==c[None]tensor([[True, True, True]])&gt;&gt;&gt; c[None]==c[None,:]tensor([[True, True, True]])&gt;&gt;&gt;c[None,:]==ctensor([[True, True, True]])3. Einstein summation:  Creates batch-wise, remove inner most loop, and replaced it with an elementwise producta. k. ac[i,j] += a[i,k] * b[k,j]inner most loop c[i,j] = (a[i,:] * b[:,j]). sum()elementwise product  Because K is repeated so we do a dot product. And it is torch. Usage of einsum()1) transpose2) diagnalisation tracing3) batch-wise (matmul) …  einstein summation notationdef matmul(a,b): return torch. einsum('ik,kj-&gt;ij', a, b)so after all, we are now 16000 times faster than Python. 4. Pytorch op: 49166. 67 times faster than pure python And we will use this matrix multiplication in Fully Connect forward, with some initialized parameters and ReLU. But before that, we need initialized parameters and ReLU, Footnote:  TensorRank ti noteResources:  Frobenius Norm Review Broadcasting Review (especially Rule)     Refer colab! (I totally confused with extension of arrays)    torch. allclose Review np. einsum Reviewh "
     }, {
-    "id": 15,
+    "id": 17,
     "url": "http://localhost:4000/2020/02/note08-fastai-1/",
     "title": "What is the meaning of 'deep-learning from foundations?'",
     "body": "2020/02/29 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ I don’t know if you read this article, but I heartily appreciate Rachael Thomas and Jeremy Howard for providing these priceless lectures for free Homework:  Review concepts 16 concepts from Course 1 (lessons 1 - 7)(1) Affine Functions &amp; non-linearities; 2) Parameters &amp; activations; 3) Random initialization &amp; transfer learning; 4) SGD, Momentum, Adam; 5) Convolutions; Batch-norm; 6) Dropout; 7) Data augmentation; 8) Weight decay; 9) Res/dense blocks; 10) Image classification and regression; 11)Embeddings; 12) Continuous &amp; Categorical variables; 13) Collaborative filtering; 14) Language models; 15) NLP classification; 16) Segmentation; U-net; GANS) Make sure you understand broadcasting Read section 2. 2 in Delving Deep into Rectifiers Try to replicate as much of the notebooks as you can without peeking; when you get stuck, peek at the lesson notebook, but then close it and try to do it yourself calculus for machine learning     based on weight…    einsum conventionCONTENTS:  What is going on in this course?     What is ‘from foundations’?   Steps to a basic modern CNN model   Today’s implementation goal: 1) matmul -&gt; 4) FC backward    Library development using jupyter notebook     jupyter notebook certainly can make module    Elementwise ops     How can we make python faster?         What is element wise operation?           FootnoteWhat is going on in this course?: What is ‘from foundations’?: 1) Recreate fast. ai and Pytorch 2) using pure python  Evade OverfittingOverfit : validation error getting worsetraining loss &lt; validation loss  Know the name of the symbol you usefind in this page if you don’t know the symbol that you are using or just draw it here (run by ML!) Steps to a basic modern CNN model:  1) Matrix multiplication -&gt; 2) Relu/Initialization -&gt; 3) Fully-connected Forward-&gt; 4) Fully-connected Backward -&gt; 5) Train loop -&gt; 6) Convolution-&gt; 7) Optimization -&gt;8) Batchnormalization -&gt; 9) Resnet Today’s implementation goal: 1) matmul -&gt; 4) FC backward: Library development using jupyter notebook: what is assers? jupyter notebook certainly can make module:  There will be #export tag that Howard (and we) want to extract special notebook2script. py will detect sign of #expert and convert following into python module and test ittest\_eq(TEST,'test')test\_eq(TEST,'test1')  what is run_notebook. py?     when you want to test your module in command line interface   		!python run\_notebook. py 01_matmul. ipynb  Is there any difference between 1) and 2)?1) test -&gt; test01 2) test01 -&gt; test #TODO I don’t know yet  look into run_notebook. py, package fire Jeremy used. What is that?read and run the code in a notebook, and in the process, Jeremy made Python Fire library called!shockingly, fire takes any kind of function and converts into CLI command. fire library was released by Google open source, Thursday, March 2, 2017    Get data   pytorch and numpy are pretty much same.  variable c explains how many pixels there are in in MNIST, 28 pixels PyTorch’s view() method: torch function that manipulating tensor, and squeeze() in torch &amp; mathmatical operation similar function Rao &amp; McMahan said usually this functions result in feature vector.    In part 1, you can use view function several times.     Initial python model     Which is Linear, like $Xw$(weight)$+a$(bias) $= Y$     If you don’t know hou to multiple matrix, refer this site matmul visulization site   How many time spends if we we use pure python   function matmul, typical matrix multiplication function, takes about 1 second for calculating 1 single train data! (maybe assumed stochastic, 5 data points in validation)     it takes about 11. 36 hours to update parameters even single layer and 1 iteration! (if that was my computer, it would be 14 hours. . )🤪   THIS is why we need to consider ‘time’&amp;’space’ This is kinda slow - what if we could speed it up by 50,000 times? Let’s try! Elementwise ops: How can we make python faster?:  If we want to calculate faster, then do remove pythonic calcuation, by passing its computation down to something that is written something other than python, like pytorch.  According to PyTorch doc it uses C++ (via ATen), so we are going to implement that function with python. What is element wise operation?:  items makes a pair, operate corresponding componentFootnote:  notebooks material video broadcasting excel"
     }, {
-    "id": 16,
+    "id": 18,
     "url": "http://localhost:4000/2020/02/what-is-convolution/",
     "title": "Digging into convolution",
     "body": "2020/02/28 - Issues 1) Kaiming Initializtion in Pytorch was in trouble. 1 2) Jeremy started to dig in, in lesson09, but I didn’t know why the size of tensor is 2 and even understand this spreadsheet data. 3 Homework: Read Visualizing and Understanding Convolutional Networks paper  What is a convolution?     Visualization         one kernel     Matthew D Zeiler &amp; Rob Fergus Paper          Convolution can be represented as matmul   Padding   Kernel has rank 3   How can we find a side-edge, a gradient and area of constant weight?   What is a convolution?: A convolutional neural network is that your red, green, and blue pixels go into the simple computation, and something comes out of that, and then the result of that goes into a second layer, and the result of that goes into the third layer and so forth. Visualization: one kernel Refer this site for visualizing CNN filteringMatthew D Zeiler &amp; Rob Fergus PaperLecture01     Nine examples of the actual coefficients from the **first layer**  Convolution can be represented as matmul: CNNs from different viewpoints {align-items: center;}    [A B C D E F G H I J] is 3 by 3 image data flatten to vector.   As a result, convolution is a just matrix just two things happens     Some of entries are set to zeros at all the times   same color always have the same weight. That called weight time / wegith sharing    So, we can implement a convolution with matrix multiplication. But, we don’t do that because it’s slow!Padding:  What most of libraries do is just put zeros asdie of matrix  fast. ai uses reflection paddings (what is this? Jeremy said he uttered it)Kernel has rank 3:  As standard picture input would be 4 5, it would be actually 3d, not 2d.  If we make kernel as a 3x3 size, we pass over same kernel all the different Red, Green, Blue Pixels.      This could make problem, because, if we want to detect frog, which is green, we would want more activations on the green(I made a test cell in my colab 6)   How can we find a side-edge, a gradient and area of constant weight?: Not top-edge!  One kernel can find only the top-edge, so we should stack the kernels 7 So, we pass it through bunch of kernels to the input images, and that process gives us height x width x corresponding number of kernels.  Usually that number of chanel is 16 And if we want to get the more channels and features, we should repeat that process     This process gives rise to memory out of control, we do the stride   #### conv-example. xlsx  2 convolutional filters At a second layer, filter is 3x3x2 tensor, because to add up together the first layer’s channel. Reference:       Problem was math. sqrt(5) was not kaiming initialization formula, Implementation in Pytorch &#8617;        size of tensor, lecture09 &#8617;        conv-example. xlsx &#8617;        Why do computer use red, green and blue instead of primary colors &#8617;        Grayscale is a group of shades without any visible color. … Each of these dots has its own brightness level as well and, therefore, can be converted to grayscale. A grayscale image is one with all color information removed.  &#8617;        Testing RGB and grayscale &#8617;        stack kernel and make new rank of tensor at output, Lesson06-2019 &#8617;    "
     }, {
-    "id": 17,
+    "id": 19,
     "url": "http://localhost:4000/2020/02/dps-week8/",
-    "title": "Digital Product School week 8&9",
-    "body": "2020/02/24 - The 8th week retropect at Digital Product School Week 8/9 - Ship your MVP/Release next iteration each day     This week's schedule  CONTENT:  Preparing engineering weekly Agile Process     Daily Stand-up    Making application flowchart (feat draw. io) / ER diagram     Flowchart, understaning user journey   ER diagram    Engineering weekly AI lunch Connecting firebase andPreparing engineering weekly: This week at Wednesday, I planned to explain the Language Modelings, mainly focusing ELMo, ULMFiT, BERT and GPT-2. Slides is available here Changed the presentation, because there were people who are not in ML domain. hereWhenever I do the presentation, I learn more than the information I give them. At the same time, I realize I need to learn more than I know. Agile Process: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m not sure ‘what is agile’ but this is what we tried to make agile process.  Daily Stand-up: Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Making application flowchart (feat draw. io) / ER diagram:  Before we start the party, we should clarify the flowchart and ER diagram of our application. Flowchart, understaning user journey: Thanks for google, we could use draw. io for our framechart framework. Actually, we cana choice other good flatform, but draw. io has connected app throgh google drive, most of our engineer was used to it. And after this job, I got to know there is also (of course) rule with the symbols, color, size, space, scaling and direction of arrow -reference. But why we should do this? WE have made our storymap before!! I think storymap is for visualize our status and app. So it should be shared with whole the team, and they should able to understand each role’s issue. But flowchart is more like testing technical feasibility, and error that user can experience. So it could be little more specific, complicated, and hypothetical.     This week engineer's main plan  ER diagram: Even if we use NoSQL database through firebase, my team was accustomed to SQL more. That what we educated when we were at college, so we had to organize our concept while we were learning NoSQL. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Before today, my AI collegues presented regression, knn and it was my turn. I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! AI lunch: Connecting firebase and: "
+    "title": "My life in Digital Product School - week 8/19/10",
+    "body": "2020/02/24 - The 8/9/10th week retropect at Digital Product School Week 8 - Ship your MVPWeek 9/10 - Release next iteration each day     Week 8th schedule  CONTENT:  Agile Product Development     Daily Stand-up(planning)   Gemba Walk   Sprint Reviews    Engineering weeklyAgile Product Development: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m still not sure ‘what is agile’ but this is how we tried to make agile process.  Daily Stand-up(planning): Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Gemba Walk:     Team Cero with core team  Every 2 weeks, we do the Gemba work, which is ‘question everything to the core team’ time. At this period, people can ask anything related to our product, workshop, and framework. Core team will help just for each team, and each team can solve the problem related to their work. &lt; br/&gt;Why we need this session? because with workshop and general schedule, core team has no time just focus on each team. So through this session, we can have opportunity to understand each program and workshop, like why we are using this platform, and when is the due of our small project, and we have this problem and we need help for this. whatever small problem you have, core team is always willing to help you. Sprint Reviews: Every Friday, we have time to summarise what we did for the week. Maybe we need HMW question and our storymap to share our process and then tell and share what we did try, what point we succeeded and what point it was deviant of our prediction, and why we tried it. .     Sprint of Ve-link  And then, just after all team’s ppt, we do vote with such a cute marvel. Always it’s very difficult to vote (of course you can’t vote to your team!) Because it depends on criteria what do I value!But since this is process of our agile work, I try to focus on what they have changed since last week, and why they did it, how they did it. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Everyone have their knowledge to share and we can be tutor and at the same time can be of tutee. Previously, my AI collegues presented regression, knn. And because I’m somewhat specialized to NLP, I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! "
     }, {
-    "id": 18,
+    "id": 20,
     "url": "http://localhost:4000/2020/02/fast.ai-nlp-note-16/",
     "title": "Algorithmic bias",
     "body": "2020/02/20 - Algorithms can encode &amp; magnify human bias Case Study 1: Facial Recognition &amp; Predictive Policing:  Joy Buolamwini &amp; Timnit Gebru, gendershades. org     Microsoft, FACE+, IBM - All of these things are sell now.    Largest gap between $\therefore\ Lighter Male\ &gt;\ Darker\ Female $      This US mayor joked cops should “mount . 50-caliber” guns where AI predicts crime      With machine learning, with automation, there’s a 99% success, so that robot is ㅡwill beㅡ99% accurate in telling us what is going to happen next, which is really interesting.     - city official in Lancater, CA, approving on using IBM for public security  Bias:  Bias is type of error Statistical Bias: difference between a statistic’s expected value and the true value Unjust Bias: disproportionate preference for or prejudice against a group Unconscious bias: bias that we don’t realize we have But, term bias is too generic to be productive. Different sources of bias have different causes Representation Bias: Dataset was not representative of the algorithm that might be used on later. Above : Data is okay, but algorithm has some problem. Below : Data has error.  For example, object detection production that performs very well in common product of US. But in contrast, change of target product region, like Zimbabwe, Solomon Island, and so on, reduced the performence remarkably. It is not the algorithmic problem, so we should care about data volume of region. Evaluation Bias: Benchmark datasets spur on research, 4. 4% of IJB-A images are dark-skinned women. 2/3 of ImageNet images from the West (Sharkar et al, 2017) Case Study 2: Recidivism Algorithm Used Prison Sentencing: Case Study 3: Online Ad Delivery: Bias in NLP: ( Nothing to do with the course, but I’m researching this field these days. )    But all about Englsih     ImpactThe person is doctor. The person is nurse -&gt; 그는 의사다. 그녀는 간호사다.  Concept of “biased data” often too generic to be useful:  Different sources of bias have different sources Data, models and systems are not unchanging numbers on a screen. They’re the result of a complex process that starts with years of historical context and involves a series of choices and norms, from data measurement to model evaluation to human interpretation. - Harini Suresh, “The problem with Biased Data” Five Sources of Bias in ML:  Representation Bias Evaluation Bias Measurement Bias Aggregation Bias(46:02) Historical Bias(46:26)     A few studies(47:13)   Racial Bias, Even when we have good intentions(new york times)(47:10)   gender(48:59)   Humans are biased, so why does algorithmic bias matter?: Algorithms &amp; humans are used differently (humans are usually decision maker)  Algorithms are accurate and objective No way to apeal if there if error processed large scale cheap Machine learning can amplify bias Machine learning can create feedback loops. Technology is power. And with that comes responsibility. Solutions:  Analyze a project at work/school:     Questions about AI   5 types of bias (Suresh &amp; Guttag)   Datasheets for datasets, Modelcards for model reporting   Accuracy rate on different sub-groups    Work with domain experts &amp; those impacted Increase diversity in our workspace Advocate for good policy Be on the ongoing lookout for bias"
     }, {
-    "id": 19,
+    "id": 21,
     "url": "http://localhost:4000/2020/02/classifier-city/",
     "title": "Making a classifier with image dataset made from gooogle",
     "body": "2020/02/15 - CONTENTS:  Creating dataset from google images     Using google_images_download   Create ImageDataBunch    Train model     fit_one_cycle()   Let’s find-tune   Let’s train the whole model!   Let’s make batch size bigger!    Interpretation Model in productionCode can be found hereDeployed model here Making a classifier which can distinguish Seoul from Munich and Sanfrancisco!(hoping my well in Munich!) Creating dataset from google images: In machine learning, you always need data before you build your model. You can use either URLs or google_images_download package. Since Jeremy explained specifically, I will try the other. Using google_images_download: note: This is not google official package Refer to Official Doncument, put that arguments. from google_images_download import google_images_downloadresponse = google_images_download. googleimagesdownload()  #class instantiationout_dir = os. path. abspath('. . /. . /materials/dataset/pkg/')os. mkdir(out_dir)arguments = { keywords : Cebu,Munich,Seoul ,        print_urls :True,        suffix_keywords : city ,        output_directory :out_dir,        type : photo ,      }paths = response. download(arguments)  #passing the arguments to the functionprint(paths)and if you need, here is main code. Create ImageDataBunch: We need to separate validation set because we just grabbed these imagese from Google. Most of the dataset we use (kaggle/research) splited into train / validation / test so if they are not devided beforehand we should make databunch, and Jeremy recommended assign 20% to validation. Help on function verify_images in module fastai. vision. data:verify_images(path: Union[pathlib. Path, str], delete: bool = True, max_workers: int = 4, max_size: int = None, recurse: bool = False, dest: Union[pathlib. Path, str] = '. ', n_channels: int = 3, interp=2, ext: str = None, img_format: str = None, resume: bool = None, **kwargs)  Check if the images in `path` aren't broken, maybe resize them and copy it in `dest`. Data from google image url Data from package Train model:           len(class)   len(train)   len(valid)         Data_url   3   432   108       Data_pkg   3   216   53   Uisng model: restnet34 1, Measurement: accuracy 2 fit_one_cycle(): What is fit one cycle? Cyclical Learning Rates for Training Neural Networks One of the way to find good learning rate. Core idea is to start with small learning rate (like 1e-4, 1e-3) and increase the learning rate after each mini-batch till loss starts exploding. And pick up learning rate one order lower than exploding point. For example, plotted learning rate is like below picture, picking up around 1e-2 is the best way.  Why this methods Traditionally, the learning rate is decreased as the learning starts converging with time. But this paper suggests to cycle our learning rate, because it makes us avoid local minimum. Basically this cyclic method enables us to explore whole of loss function so that find out global minimum. In other words, higher learning rate behaves like regularisation. Let’s find-tune: Do train just one last layer by learning rate found by find_lr This section you should find the strongest downward slope that kind of sticking around for quite a while. And choose just one order lower than lowest point. As explained before, I will pick up 1e-2. And of course, this is fine-tuning, we don’t need discriminative learning rate yet.  Let’s train the whole model!: link When you plot the learning rate again, maybe you will get soaring shape of learning rate. Rule of thumb, When you slice the learning rate, use learning rate you used at unfrozen part. Divide it by 5 or 10 and put it on maximum bound. At minimum bound, get the point just before it soared, and divide it by 10.  Let’s make batch size bigger!: Since default batch size is 64, I tried it to 128. And it gets way more better result(even it’s still underfitting!) And if I freeze model and train whole model again, the model would be better. Also, you can use this method to the other big dataset model training! Interpretation: See the confusion matrix. Result is quite great. *Since I’m using colab, I will skip data cleansing. But I highly recommend you to use ImageCleaner widget, only if you are using jupyter notebook (not jupyter lab) Model in production: You can deploy your model in simple way. I referred fast. ai, and used render(it’s free for limited time). You can find detailed document here. and you can create a route like this. @app. route( /classify-url , methods=[ GET ])async def classify_url(request):  bytes = await get_bytes(request. query_params[ url ])  img = open_image(BytesIO(bytes))  _,_,losses = learner. predict(img)  return JSONResponse({     predictions : sorted(      zip(cat_learner. data. classes, map(float, losses)),      key=lambda p: p[1],      reverse=True    )  })You can find my deployed model here Reference: How to create a deep learning dataset using Google Images towardsdatascience - one cycle policy       Deep Residual Learning for Image Recognition &#8617;        Accuracy_and_precision &#8617;    "
     }, {
-    "id": 20,
+    "id": 22,
     "url": "http://localhost:4000/2020/02/dps-week5/",
     "title": "Digital Product School week 5",
     "body": "2020/02/09 - The 5th week retropect at Digital Product School Week 5 - Create a Storymap and sync it with Lean Canvas     This week's schedule  CONTENT:  How to create our story map Prepare your story Discover your product’s AI potentialMondayHow to create our story map:     We need this 'aha' moment  There was a Milestone workshop, about our weekly goal. As we are agile working, we go fast and change every week’s goal. This week we will finalize our story map based on user’s pain-point and HMW questions.     How should we make our story-map  Basically we should make story map based on this rule  Tell stories, don’t just write them!     We always need context, that means all the story component should be connected    Visualize your product to establish a shared understanding and speed up discussions!     Post-it filled of text is not enough, we should fill it with visualizations then team mates can understand it fast    Only discuss in front our your story map! (Speed)     So we can update our story-map as soon as we change our opinion   And also    Use a story map to find the parts that matter most and to identify holes in your idea!     Since the story map consists of techinical part, we should consider each story’s technical feasibility    Minimise output, maximise outcome and impact! Build tests to figure out what’s minimum and what’s viable!     This story map functions to find out our minimum value of ideas    Work iteratively: Change your story map according to your learnings!     We should repeat this process again and again    PMs: Make sure Storymap is up to date!Prepare your story:     team cero, our whole story map    Our goal    Technical feasibility of our storyWhat is your strategy to make user achieve something? This would be our expand point Discover your product’s AI potential: How can we apply AI to our product? Let’s write down our ‘HMW’ questions, and find out all p ossibilities. These are suggestion of possibilities, so don’t attached to feasibility (we will do in at lean start-up)     Software section's expectation    AI section's expectationTuesday    Engineer's task, week5This 5th week, engineers settled WendesdayThursdayFriday"
     }, {
-    "id": 21,
+    "id": 23,
     "url": "http://localhost:4000/2020/02/GPU-time/",
     "title": "4 reasons took much time to setting GPU for fast.ai than I expected",
     "body": "2020/02/05 - Motivation: Before now, me as a undergraduate student, I was parsimony who usually depend on colab, kaggle, friend’s server(occasional) whenever i need GPU. . And this time it’s been for a while to install GPU than I expected and I share the several component that stood in my way. Written at Oct 24 2019, if you think this is deprecated, please do not have a leap of faith. Just for the record, I’ve used Kaggle, Colab, GCP, Azure, EC2 as GPU cloud. 1. Did not know there is JupyterLab option in Google Cloud Platform. : At the first time when GCP came out, there was no AI Platform service. So from starting vm instance to launching jupyter and installing packages, I did all of the things myself. (and I learned 🤗) $	curl -O https://repo. continuum. io/archive/Anaconda3-5. 0. 1-Linux-x86_64. sh[Downloading conda in ssh] I created VM instance,selected zone, machine type and disk type. Then, define firewall rules and in ssh terminal, install jupyter and other packages. But you can do all of these things just using AI Platform.  [AI Platform] I think it especially save your time if you are living in Asia-Pacific, which google doesn’t support not that much GPU resources.  2. Consider if the platform has limited resources in a region you live in. : I live in South Korea, East Asia, and it seems like this region has lots of limitation in GPU (except quite expensive AWS) And the Taiwan which was the only one region where I can launch my own VM with GPU (I tried all the other regions in the list) sometimes do normaly, but not always. 😥After launching, I did several works and next day I could not start VM. (I didn’t count it, but tried it a few hours because I didn’t want cost any more time…) Endlessly failed to start instance, then I choose to move AWS as an alternative way. 3. Fast. ai gives deliberate guide and I didn’t know it. : Fast. ai offer the guide for all available platform. (Colab, salamander, Gradient, Kaggle, Colab, and so on) It is so important, and really needs, because cloud computing options are vary as occasion and purpose arise. I didn’t know fast. ai has manual to running GCP, and I think it’s as good a reason as any for me to be have taken time. It helped me so much when I had aws and shortened my time. I don’t want to read all of the manual in amazno. . (It is recommended. . but I’d rather read GIT PRO now…) ssh -i ~/. ssh/&lt;your_private_key_pair&gt; -L localhost:8888:localhost:8888 ubuntu@&lt;your instance IP&gt;4. You should wait to add more volume just after add volume, by building AWS EC2. : Since Elastic Block Store(EBS) storage supports optimized storage, users can’t extend storage volume two times in a row.  Unfortunately, at the first time, I didn’t know it (again 👻) and when VM lacked volume, I doubled dist capacity (76*2) at a rough but It needs more.  &lt;!– this time I installed GPU in two years, and it became little complicated compared to 2 years ago. And this time for the first time(maybe not the first time. . but i handled it in my class or with my friend. but it’s my first time on my own. ) I very I’m started to using used google colab, kaggleand, GCP-JupyterLab, ec2 - friend made, aws vm machine but I had a environment variable but i did not know of it. On these days, I could not get a resources from taiwan…    I couldn’t notice a deliberate     Anyway, as a result I tried myself gcp myself and aws ec2 with fast. ai But I think doing on my self surely takes much time (in this point I wonder why I’m doing this, and should remind me, especially I was studying disk volume optimization)     disk volume exceed - https://askubuntu. com/questions/919748/no-space-left-on-device-even-though-there-is:  "
     }, {
-    "id": 22,
+    "id": 24,
     "url": "http://localhost:4000/2020/02/dps-week4/",
     "title": "Digital Product School week 4",
     "body": "2020/02/01 - The 4th week retropect at Digital Product School Week 4 - Find solution ideas and run experiments   [This week’s schedule] CONTENT:  Ideation Techniques     What is ideation techniques?    Generating idea in my team AIdeation Team brain storming of idea Die Produkt MacherMondayIdeation Techniques:   [slides from @steffen] What is ideation techniques?: We tried to find out user’s painpoint last week. Tried to users talk about their, pain point. No question directly, but extract from them their pain with transportation. Generating idea in my team:   AIdeation:   TuesdayTeam brain storming of idea:   Based on generated idea on Monday, we extended our idea doing rolling-paper!     Die Produkt Macher: What is lean start-up?  Lean startup is a methodology for developing businesses and products that aims to shorten product development cycles and rapidly discover if a proposed business model is viable; this is achieved by adopting a combination of business-hypothesis-driven experimentation, iterative product releases, and validated learning. - wikipedia WendesdayThursdayFriday"
     }, {
-    "id": 23,
+    "id": 25,
     "url": "http://localhost:4000/2020/01/retrosprect-of-acl-paper-2020/",
     "title": "Retrospect of ACL 2020 paper writing",
     "body": "2020/01/29 - 2020 Annual Conference of the Association for Computational Linguistics Why I can’t use ‘Cebuano’ for the research?: Why I had to change target language from ‘Cebuano’ to ‘Tagalog’?-&gt; No language translator options except google translation. But before knowing that I already consult my friend, whose mother tongue is English. So I had to aplogize her, but couldn’t tell her why suddenly I changed my plan. -&gt; I realized there are many languages even can’t be researched at all. . -&gt; Getting accustomed to discrimination makes misunderstanding, sometimes. At my country, we couldn’t use music streaming service, because of legal problem. But at that moment, I thought it was discrimination, which is done by music company. "
     }, {
-    "id": 24,
+    "id": 26,
     "url": "http://localhost:4000/2020/01/Git-Merge/",
     "title": "Why am I not listed as a contributor?!",
     "body": "2020/01/10 - From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’m not listed as contributor of repo which my PR accessed. How did I come to a repository?: When I’m stuck, I would prefer to code, than to go deep in theory. (It must be so. . too much to understand 🤒)It was BERT released by Google AI I felt keenly the necessity of implementing, because not only couldn’t understand the way they figured out positional encoding formula, but how it actually works. What does it mean to “scale” dot product in Attention? (Now I know it’s far from my section 😂) Figure 1. Scaled Dot Product. Adopted from tensorflow blogWhat was the code error?: For implement code in paper, I read the papers Transformer and BERT, structured the model, and refered the others’ code. Meanwhile, I found out a small error in tokenization process, which was changing a token into [MASK], enabled bidirectional representation. I’ve made PR, and got merged. But I was not in contributors. Why?: Figure 2. Merged Pull request Adopted from graykode projectActually I happened to know there can be couple of reasons github doesn’t include my name as contributor. Well, if contributors tab has more than 100 people, in which case it shows you up only if you are in the top 100 contributors because displaying too many contributors can make webpages down. Somethimes, however, it doesn’t that problem. Why not? Two possibilities are there.    First, According to Joel-Glovier, if repository maintainer merged-as-a-rebase PR will end up showing as maintainer’s commit. But maintainer shouldn’t normally do this.     Second, if you happend to commit using a different git email that what is in your GitHub profile, it will not be attached to your Github user, and “doesn’t show up” as you.  Reference:  Michał Chromiak’s blog Github: why are my contributions are not showing on my profile atlassian-gitfetch"
     }, {
-    "id": 25,
-    "url": "http://localhost:4000/2019/12/lesson1-fastai/",
-    "title": "Fine Grained Classification",
-    "body": "2019/12/31 - Finally you can solve the mystery behind this weird drawing. . through this course. juptyer notebook magic: %reload_ext autoreload%autoreload 2%matplotlib inlinethis is special directives to jupyter notebook, not python code. And it is called ‘magics’ (but i think jeremy is magicion)  If somebody changes underlying library code while I’m running this, please reload it automatically If somebody asks to plot something, then please plot it here in this Jupyter NotebookDon’t hesitate to import start~ Digging into untar_data, path. ls: Union[pathlib. Path, str]: typed programming language? -&gt; maybe i think disclaim the type beforehand for sure.  Q. like assert? path. ls()this is some module that fast. ai made because os. listdir(‘path’) is unconvinient. Python3 pathlib library!: pathlib "
-    }, {
-    "id": 26,
+    "id": 27,
     "url": "http://localhost:4000/2019/12/jeremy-howard/",
     "title": "Jeremy Howard",
     "body": "2019/12/15 - This is journey to find out ‘who am I trying to be?’: How he impacted me?  The person who made me start Computer Vision again. He emphasized the importance of studying NLP and Computer together to understand the deep-learning. He didn’t order it to study, but always he pursuade me with reasonable way.  “It’s not just something I can throw away. NLP and computer vision a few weeks apart and that’s going to force your brain to realize like ‘oh I have to remember this’”  He made me admit my failure in deep-learning. I started to objectify where am I. What should I do when I’m frustrated.  “Keep going. You’re not expected to remember everything. Yet.  You’re not expected to understand everything. Yet. You’re not expected to know why everything works.  Yet. ”  His articles are numerous, below.      What is torch. nn Really?   High Performance Numeric Programming with Swift: Explorations and Reflections   C++11, random distributions, and Swift    And especially, I like this book. Designing great data products Great predictive modeling is an important part of the solution, but it no longer stands on its own; as products become more sophisticated, it disappears into the plumbing. Designing great data products And he is also famous for words. Here are some.  we’re going to try and use that to really understand what’s going on. So to warn you, none of it is rocket science but a lot of its going to look really new. So don’t expect to get it the first time but expect to listen and jump into the notebook try a few things test things out look particularly at like tensor shapes and inputs and outputs to check your understanding then go back and listen again. But and kind of try it, a few times, because you will get there right, it’s just that there’s going to be a lot of new concepts because we haven’t done that much stuff in pure Pytorch. Lesson 6: Deep Learning 2019 "
     }, {
-    "id": 27,
+    "id": 28,
     "url": "http://localhost:4000/2019/11/julia-evans/",
     "title": "Julia Evans",
     "body": "2019/11/20 - This is journey to find out ‘who am I trying to be?’: The women who surprised me in many ways. First, she approached me to teaching some concepts drawing cartoons. It was at Hackers news, which was hightest ranks. Personally I have the use of not to reading title, so and cartoon was so cute and clear. I naturally gonna understood mechanism and astonished by her explaination ability. Her value, which she was taught by many people so want to do same things, moved me. Volume of her knowledge, that just reading post title is a deal of work, amazed me. "
     }, {
-    "id": 28,
+    "id": 29,
     "url": "http://localhost:4000/2019/11/coc-retropective/",
     "title": "Retrospective on Pycon 2019 Korea (CoC Committee)",
     "body": "2019/11/05 - When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized our responsibility, and if I passed each other’s burden, It could be my burden next time. In solidarity of the obligation, we finished conference well. And after participating PyCon Korea 2018 as volunteer, I’ve joined PyCon Korea Organizer last year. &lt;Figure 1&gt; First meeting of PyCon 2019 Korea Organizers It’s been a while since PyCon 2019 finished. It’s held on Aug 15 - 18, at Coex Grand Balloom &lt;Figure 2&gt; Ongoing session, speaking on news comment processing &lt;Figure 3&gt; Sponsor Booth iin Coex Hall &lt;Figure 4&gt; After PyCon 2019, with all of volunteer, organizer, speakers 😍 🥰  Serving as part of the coc TF, I spent large fraction of last year doing CoC job. here’s the path what we’ve been grappled with to grasp a solution. First half: Before the conference Toward Diverse Community:  Formally we’ve been reusing and modifying PyCon US CoC, but we needed fit in Korean and I was part of that to revise code of conduct. Except ‘That’ Diversity, Because it is ‘Harassment’:  Specific point was harassment, and the others were not. process of finding the points. How can we settle this point?Second half: During the conference Handling the potential Harassment: Disjunction of policy and real-time situation: This ‘PyCon 2019 Korea retrospective series’ would be devided into 3 Episodes.  “Retrospective on Pycon 2019 Korea (CoC Committee)” “Retrospective on Pycon 2019 Korea (Program Chair)” (20 Nov, To Be Update) “Maintaining participation while still making timely decisions” (29 Nov, To Be Update)"
     }, {
-    "id": 29,
+    "id": 30,
     "url": "http://localhost:4000/2019/11/elif-shafak/",
     "title": "Elif Shafak",
     "body": "2019/11/05 - This is journey to find out ‘who am I trying to be?’: For creative-minded people, Istanbul is a treasure. ’ Photo © Chris Boland, licensed under CC BY-NC-ND 2. 0    it suddenly felt like what I was trying to convey was more complicated and detailed than what the circumstances allowed me to say.     And I did what I usually do in similar situations: I stammered, I shut down, and I stopped talking. I stopped talking because the truth was complicated, even though I knew, deep within, that one should never, ever remain silent for fear of complexity.         &lt;Figure 1&gt; Elif Shafak   Photo credit: www. elifsafak. com. tr      I want to talk about emotions and the need to boost our emotional intelligence. I think it’s a pity that mainstream political theory pays very little attention to emotions.     Oftentimes, analysts and experts are so busy with data and metrics that they seem to forget those things in life that are difficult to measure and perhaps impossible to cluster under statistical models. But I think this is a mistake, for two main reasons. We are emotional beings.     I think it’s going to be one of our biggest intellectual challenges, because our political systems are replete with emotions. In country after country, we have seen illiberal politicians exploiting these emotions. And yet within the academia and among the intelligentsia, we are yet to take emotions seriously. I think we should.  1 2 Reference:       British Council Worldwide &#8617;        Ted Talk &#8617;    "
     }, {
-    "id": 30,
+    "id": 31,
     "url": "http://localhost:4000/2019/01/dps-week1/",
     "title": "Digital Product School week 1",
     "body": "2019/01/11 - The 1th week retropect at Digital Product School [This week’s schedule] CONTENT:  Welcome to Digital Product School!     Trip to Spitzingsee   Welcome to Design Office   Specifying our goal of product   Welcome to Digital Product School!: Trip to Spitzingsee:    At the first day of Digital Product School, we had a off-site with all of batch 9 people. All the costs were managed by dps. At the beautiful mountain, we settled the team, and got my team goal.     Basically, there are two kind of team in DPS. (1) Wild team - the team has fixed topic(2) Company team - the team which has specific stakeholders, and also topic defined by that stakeholders  The Core-team will fix what team you will join in DPS for 3 months based on ymy professionals, they announce it at off-site. [My team for 3 months at DPS]    And we decide on my batch #9 theme song. How? Each team draw for songs and pitch ‘why this song should be batch #9 theme song’The result? Imagine dragon - Believer (I didn’t know at the moment, this song would be stamped in my memory)     We have a workshop for getting to know each other. For example, we share 1) what do I expect from 3 months of dps, 2) when I feel happy in my life time, 3) what I worked for last week, 4) what was my last project and 5) what plays important role in my life      My team's board  Cero  Welcome to Design Office:  At first day of design office, we had workshop, which celebrates my day in dps also discuss specific rule, menifesto and stakeholders    We get sticker and attach it in map depends on my nationality  Now time to get to know my team’s stakeholders. What they want for us? What they expect from us? How free my team are on the topic?To be honest, it is endless tug-of-war. We should discuss with my stakeholders, endlessly, and find out solution which can meet interest of users, stakeholders and my team. Basically, my team’s main stakeholder is ADAC, but BMW, City of munich and Nokia will also participate as my team’s stakeholders. Specifying our goal of product: "
diff --git a/_site/2020/03/note08-fastai-4/index.html b/_site/2020/03/note08-fastai-4/index.html
index 1495f2a44b..1ddeedd73b 100644
--- a/_site/2020/03/note08-fastai-4/index.html
+++ b/_site/2020/03/note08-fastai-4/index.html
@@ -19,9 +19,9 @@
 <meta property="og:site_name" content="SpellOnYou" />
 <meta property="og:image" content="http://localhost:4000/assets/images/4-classlin.png" />
 <meta property="og:type" content="article" />
-<meta property="article:published_time" content="2020-03-02T00:00:00+01:00" />
+<meta property="article:published_time" content="2020-03-02T00:00:00+09:00" />
 <script type="application/ld+json">
-{"description":"This note is divided into 4 section. Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring","author":{"@type":"Person","name":"dionne"},"@type":"BlogPosting","url":"http://localhost:4000/2020/03/note08-fastai-4/","publisher":{"@type":"Organization","logo":{"@type":"ImageObject","url":"http://localhost:4000/assets/images/logo.png"},"name":"dionne"},"image":"http://localhost:4000/assets/images/4-classlin.png","headline":"Gradient backward, Chain Rule, Refactoring","dateModified":"2020-03-02T00:00:00+01:00","datePublished":"2020-03-02T00:00:00+01:00","mainEntityOfPage":{"@type":"WebPage","@id":"http://localhost:4000/2020/03/note08-fastai-4/"},"@context":"http://schema.org"}</script>
+{"description":"This note is divided into 4 section. Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring","author":{"@type":"Person","name":"dionne"},"@type":"BlogPosting","url":"http://localhost:4000/2020/03/note08-fastai-4/","publisher":{"@type":"Organization","logo":{"@type":"ImageObject","url":"http://localhost:4000/assets/images/logo.png"},"name":"dionne"},"image":"http://localhost:4000/assets/images/4-classlin.png","headline":"Gradient backward, Chain Rule, Refactoring","dateModified":"2020-03-02T00:00:00+09:00","datePublished":"2020-03-02T00:00:00+09:00","mainEntityOfPage":{"@type":"WebPage","@id":"http://localhost:4000/2020/03/note08-fastai-4/"},"@context":"http://schema.org"}</script>
 <!-- End Jekyll SEO tag -->
 
 
@@ -161,96 +161,101 @@
     "body": "  {% if page. url ==  /  %}            {% assign latest_post = site. posts[0] %}          &lt;div class= topfirstimage  style= background-image: url({% if latest_post. image contains  ://  %}{{ latest_post. image }}{% else %} {{site. baseurl}}/{{ latest_post. image}}{% endif %}); height: 200px;  background-size: cover;  background-repeat: no-repeat; &gt;&lt;/div&gt;           {{ latest_post. title }}  :       {{ latest_post. excerpt | strip_html | strip_newlines | truncate: 136 }}               In         {% for category in latest_post. categories %}        {{ category }},         {% endfor %}                                {{ latest_post. date | date: '%b %d, %Y' }}                            {%- assign second_post = site. posts[1] -%}                        {% if second_post. image %}                         &lt;img class= w-100  src= {% if second_post. image contains  ://  %}{{ second_post. image }}{% else %}{{ second_post. image | absolute_url }}{% endif %}  alt= {{ second_post. title }} &gt;                        {% endif %}                                    {{ second_post. title }}          :                       In             {% for category in second_post. categories %}            {{ category }},             {% endfor %}                                                      {{ second_post. date | date: '%b %d, %Y' }}                                    {%- assign third_post = site. posts[2] -%}                        {% if third_post. image %}                         &lt;img class= w-100  src= {% if third_post. image contains  ://  %}{{ third_post. image }}{% else %}{{site. baseurl}}/{{ third_post. image }}{% endif %}  alt= {{ third_post. title }} &gt;                        {% endif %}                                    {{ third_post. title }}          :                       In             {% for category in third_post. categories %}            {{ category }},             {% endfor %}                                                      {{ third_post. date | date: '%b %d, %Y' }}                                    {%- assign fourth_post = site. posts[3] -%}                        {% if fourth_post. image %}                        &lt;img class= w-100  src= {% if fourth_post. image contains  ://  %}{{ fourth_post. image }}{% else %}{{site. baseurl}}/{{ fourth_post. image }}{% endif %}  alt= {{ fourth_post. title }} &gt;                        {% endif %}                                    {{ fourth_post. title }}          :                       In             {% for category in fourth_post. categories %}            {{ category }},             {% endfor %}                                                      {{ fourth_post. date | date: '%b %d, %Y' }}                                  {% for post in site. posts %} {% if post. tags contains  sticky  %}                    {{post. title}}                  {{ post. excerpt | strip_html | strip_newlines | truncate: 136 }}                 Read More            	             {% endif %}{% endfor %}  {% endif %}                All Stories:         {% for post in paginator. posts %}          {% include main-loop-card. html %}        {% endfor %}                   {% if paginator. total_pages &gt; 1 %}              {% if paginator. previous_page %}        &laquo; Prev       {% else %}        &laquo;       {% endif %}       {% for page in (1. . paginator. total_pages) %}        {% if page == paginator. page %}        {{ page }}        {% elsif page == 1 %}        {{ page }}        {% else %}        {{ page }}        {% endif %}       {% endfor %}       {% if paginator. next_page %}        Next &raquo;       {% else %}        &raquo;       {% endif %}            {% endif %}                     {% include sidebar-featured. html %}      "
     }, {
     "id": 12,
+    "url": "http://localhost:4000/2020/04/v3-2019-lesson06-note/",
+    "title": "fastai 2019 course-v3 Part1, lesson06",
+    "body": "2020/04/15 - Lesson 06Rossmann(Tabular):  Tabular data: be careful on Categorical variable vs Continuous variable.  if datatype is int, fastai think it is classification, not a regression.  Root mean square percentage error. as loss function.  When you assign the y_range, it’s better to assign little bit more than actual maximum. &gt; because it’s sigmoid.  intermediate layers, which is weight matrix is 1) 1000, and 2) 500 -&gt; which means our parameter would be 500*1000. learn. modelWhat is dropout and embedding dropout?: Nitish Srivastava, Dropout: A Simple way to prevent Neural Networks from Overfitting  you can dropout with p value, make it specified to specific layer, or make it applied to all the layers.  Pytorch code 1) bernoulli, which decides whether you will hold it? 2) and divide the noise value depends on noise value. so noise became 2 or remain 0.      According to pytorch code, We do change at training time, but we do nothing at test time. and this means you don’t have to do anything special with inference time. ’   TODO: find at forums what is inference time - Related to NVIDIA, GPU.     Embedding dropout is just a dropout.      It’s different between continuous variable and embedding layer.  TODO Still can’t understand. why embedding dropout is effective. or,… in need.    Let’s delete at random, some of the results of the embedding.    and It worked well especially at Kaggle   Batch Normalization: Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift -&gt; came out false! According to How Does Batch Normalization Help Optimization?  The key was multiplicative bias {\gamma} and additive bias {\beta}` Explain     Let $$ \hat{y} = f(w_1, w_2, w_3, … , x)} $$ , loss = MSE , Then y_range should be between 1 and 5`   And Activation function ends with -1 -&gt; +1   To mitigate this problem, we can add the other parameter, like $$w_n$$   But there’re so much interactions in the process so just re-scale the output.    Momentum parameter at BatchNorm1d:  Different from momentum like in optimization.  This momentum is Exponentially weighted moving average of the mean, instead of deviation.      If this is small number: mean standard deviation would be less from mini_batch to mini_batch » less regularization effect. (If this is large number, variation would be greater from mini_batch to mini_batch » more regularization effect)   TODO: can’t sure, but i understand, this is not about how to update parameter but about how much reflect previous value when scale and shift   Q. Preference between batchnorm and the other regularizations(drop out, weight decay)A. Nope, always try and see the results## lesson6-pets-more### Data Augmentation- Last reg- `get_transforms` has lots of params (even not yet learned all) -&gt; check documentation	- Remember you can implement all the doc contents bc it's made from nbdev	- TODO: try this!!- Essence of data augmentation is you should maintain the label, while somewhat making sense. 	- ex) tilt, because it's optically sensible, you can always change the angle of the data view. - zeros, border, and reflection but always `reflection` works most of the time, so that is the default### Convolutional Kernel(What is convolution?)- Will make heat\_map from scratch, which means the parts convolution focuses on![setosa_visualization]()- http://setosa. io/ev/image-kernels/	- javascript thing	- How convolution works	- Kernel. which does element-wise multiplication, and sum them up	- so it has on pixel less at borders -&gt; so it uses padding, and fastai uses reflection as said. - why this Kernel(matrix) helps catching horizontal edge side?	- because this kernel`(picture2)` weights differently, depends on `x axis`	- why familiar, because it's similar intuition with fugus`(paper)` paper- CNN from different viewpoints`link`	- output of pixel is results from different linear equations. 	- If you connect this with represents of neural network nodes, you can see that the specific inp nodes connected with specific out nodes. 	- **Summarize**: cnn does 1) matmul some of the elements are always zero 2) same weight for every row, which is called `weight time? weight. . ?, 1:18:50` `(picture)`#### Further lowdown- Because generally image has 3 channels, we need rank 3 kernel. - And **do multiply with all channel output is one pixel**. (`draw by your self`)	- but this kernel will catch one feature, like horizontal, so that we make more kernel so that output becomes (h * w * kernel)	- And that `kernel` come to `channel`- **Conv2d**: with 3 by 3 kernel, stride 2 conv -&gt; (h/2 * w/2 * kernel)	- skip or jump over input pixel	- to protect from memory out of control~~~pythonlearn. modellearn. summary()~~~TODO: understand yourself the blocks of conv-kernel: - Usually use big kernel size at first layer (will study this at part2)- Bottom right highlighting kernel(`pic / draw`)- `torch. tensor. expand`: for memory efficient, because we should do RGB- We do not make separate kernel, but make rank 4 kernel	- 4d tensor is just stacked kernel- `t[None]. shape` create new unit axis, and why? we make this -&gt; it should move unit of batch, not one size image. ### Average pooling, feature- suppose our pre-trained model results in size of `11 by 11 by 512 ` `pic 4` and my classification task has 37 classes	* take the first face of channel, which is 11 by 11 and `mean` it, so that make rank 2 tensor, 512 by 1	* and make 2d matrix, which is 512 by 37 and multiply so that we can get 37 by 1 matrix. - Feature, at convolution block	- So, when we transfer-learning without unfreeze, every element of last matrix (512 by 1) should represent(or could catch) each feature. 	### Heatmap, Hook~~~hook_output(model[0]) -&gt; acts -&gt; avg_acts~~~- if we average the block with `axis=feature`, result of matrix(11 by 11) depicts `how activated was that area?` -&gt; it is heatmap, `avg_acts`- and acts comes from hook, which is more advanced pytorch feature. 	- hook into pytorch machine itself, and run any arbitrary Pytorch code	- Why this is cool?: Normally it gives set of outputs of forward pass, but we can interrupt and hook the forward pass. 	- Also can store the output of the convolutional part of the model, which is before avg_pooling- Thinking back when we do cut off `after` the conv part. 	- but with fast. ai the original convolutional part of the model would be *the first thing in the model*, specifically could be given from `learn. model. eval()[0]`	- And this is gotten from `hooked_output` and having hooked the output, we can pass our x_minibatch to output. 	- Not directly, but with normalized, minibatch, put on to the gpu	- `one_item()` function do it, when we have one data `TODO: this is assignment` do it yourself without one_item function	- and `. cuda()` put it on gpu- you should print out very often the shape of tensor, and try think why. "
+    }, {
+    "id": 13,
+    "url": "http://localhost:4000/2020/04/qna-image-segmentation/",
+    "title": "[Q&A] Image Segmentation, using Unet with Driving Video data",
+    "body": "2020/04/02 - This post is about my questions while I was studying USF Deep Learning course about image segmentation task. All the answers are from the course, source code, library document, or document. I cared about being clear at reporting information including source of information, however if there are still anything unclear, please contact me. And thank you Jeremy&amp;Rachael for everything. Also Thank you Cambridge Computer Vision Lab to made us to study with your labor. The Cambridge-driving Labeled Video Database (CamVid) is the first collection of videos with object class semantic labels, complete with metadata. The database provides ground truth labels that associate each pixel with one of 32 semantic classes. If someone is interested in this project, please check the site and see the details. Now, let’s start first using jupyter’s one of tricks which I love most. It enables cell to print the code without print function. from IPython. core. interactiveshell import InteractiveShell# pretty print all cell's output and not just the last oneInteractiveShell. ast_node_interactivity =  all from fastai. vision import *from fastai. callbacks. hooks import *from fastai. utils. mem import *path = untar_data(URLs. CAMVID) # The locations where the data and models are downloaded are set in config. ymlpath. ls() I’m trying to accustomed to using pathlib module, not just it became built-in module in python, but I felt uncomfortable myself with os module. However, still unpredictable conflicts are remain, even in the quite standard library like Pytorch, tensorflow, onnx. (it require me string for path. not PosixPath. will send PR. . ) [PosixPath('/root/. fastai/data/camvid/valid. txt'), PosixPath('/root/. fastai/data/camvid/images'), PosixPath('/root/. fastai/data/camvid/labels'), PosixPath('/root/. fastai/data/camvid/codes. txt')]path_img = path/'images'path_lbl = path/'labels'fnames = get_image_files(path_img) #filenamelbl_names = get_image_files(path_lbl)1. (Play with data) My Hypothesis:  File name has A_B format. and A / B would be at key-value position.  Use collections - defaultdict  Default Dict: Link:   easy to group a sequence of key and value pairs into a dictionary of list?from collections import defaultdictfnames[0], lbl_names[0](PosixPath('/root/. fastai/data/camvid/images/0001TP_009210. png'), PosixPath('/root/. fastai/data/camvid/labels/0016E5_01800_P. png'))files = [tuple(i. stem. split('_')) for i in fnames]labels = [tuple(i. stem. split('_')[:-1]) for i in lbl_names]d = defaultdict(list)for k, v in files: d[k]. append(v)d. keys()len(d['0001TP'])124for k, v in d. items():  print(k, v)0001TP ['009210', '008850', '007350', '008970', '009840', '010140', '008490', '008520', '009540', '008250', '008340', '006840', '007860', '007410', '007740', '009870', '010080', '007890', '008790', '010020', '008400', '007080', '008280', '010380', '009330', '009060', '007470', '006810', '009720', '008580', '007110', '008730', '009150', '007680', '009780', '007800', '007290', '008760', '009510', '008640', '008310', '007440', '006900', '007500', '008460', '009030', '008130', '009480', '009900', '010230', '009270', '008040', '007590', '007950', '009990', '008550', '007260', '008100', '007530', '006960', '008190', '009420', '009930', '009000', '007830', '008940', '006690', '009570', '008880', '010170', '007560', '009300', '006750', '009360', '010200', '007320', '008010', '009120', '007620', '007200', '007140', '010320', '006720', '008670', '007230', '008370', '010260', '009690', '006930', '009090', '007770', '010290', '010350', '008610', '008070', '009600', '008430', '009450', '007380', '009240', '007710', '007170', '008160', '008910', '007020', '006780', '007050', '009960', '009810', '008220', '009180', '009750', '010050', '009660', '010110', '007920', '009630', '007650', '006990', '008700', '009390', '007980', '008820', '006870']0016E5 ['01290', '08159', '05760', '08133', '08063', '06660', '00960', '05850', '00750', '06960', '08035', '08107', '07975', '08017', '05610', '07140', '08119', '08027', '07170', '08400', '08093', '02100', '06390', '04470', '08340', '06060', '00600', '07470', '08151', '07800', '01620', '05730', '01530', '00690', '08430', '05940', '01980', '07320', '08069', '07965', '04380', '05430', '01410', '06780', '08007', '08087', '08079', '06600', '08109', '05490', '00901', '04590', '04680', '08045', '01770', '06690', '08085', '06810', '00420', '08011', '07440', '02190', '06300', '04800', '01500', '00450', '08029', '01470', '06330', '07997', '08067', '05370', '08013', '08190', '00840', '02370', '08049', '08135', '01440', '06870', '05820', '05280', '08051', '04440', '08091', '01380', '00630', '07290', '05520', '04770', '00540', '07995', '07999', '05550', '07920', '08101', '08141', '08053', '04620', '08103', '05160', '07350', '08057', '06030', '06000', '08550', '07963', '08089', '05970', '08047', '05640', '06240', '05220', '04350', '01590', '07959', '01950', '08117', '06180', '01560', '05400', '08043', '07680', '00780', '08081', '07050', '01020', '01350', '04530', '06720', '07969', '08149', '08003', '08131', '08129', '08033', '05460', '01650', '07530', '08023', '05340', '08640', '05100', '08075', '01230', '04980', '02070', '01080', '06210', '05910', '08009', '01800', '05190', '02400', '08083', '08019', '07620', '07200', '07890', '08059', '06990', '04410', '08121', '08123', '06930', '08137', '08147', '08095', '06570', '06150', '08153', '06840', '05250', '00510', '08370', '08580', '08113', '07410', '08097', '01200', '04950', '07770', '07650', '04710', '06090', '08055', '07110', '07981', '00990', '08250', '08127', '01920', '07985', '08220', '08005', '08157', '05130', '08071', '01140', '04830', '07740', '08143', '06120', '02040', '08111', '08115', '00660', '08280', '06420', '07983', '02220', '05700', '01860', '01260', '04920', '06510', '07020', '08073', '08105', '08125', '06360', '07860', '07993', '00810', '06540', '08099', '08139', '02010', '07973', '08155', '07991', '06630', '00480', '06750', '04890', '08001', '08025', '00870', '08490', '01830', '07977', '05010', '01170', '07961', '01680', '01050', '07987', '07080', '04560', '00930', '05310', '02340', '05790', '08460', '00720', '08031', '02280', '08039', '08037', '08065', '06270', '08077', '06900', '04650', '06480', '07230', '08041', '06450', '00570', '07989', '04740', '07979', '02250', '07380', '00390', '01710', '07590', '08021', '08520', '07500', '01110', '04500', '02310', '07971', '02130', '05580', '05880', '08610', '08310', '08145', '05670', '04860', '07260', '08015', '07967', '01740', '01320', '07560', '07830', '01890', '08061', '02160', '07710', '05070', '05040']Seq05VD ['f00030', 'f02550', 'f03450', 'f01110', 'f00480', 'f00210', 'f04590', 'f04170', 'f01800', 'f03990', 'f03360', 'f03900', 'f02070', 'f00810', 'f03690', 'f01350', 'f01530', 'f04980', 'f05100', 'f03060', 'f00900', 'f03870', 'f02460', 'f01470', 'f02370', 'f02820', 'f04080', 'f02760', 'f04860', 'f02250', 'f04200', 'f00270', 'f03720', 'f02850', 'f04410', 'f01200', 'f03090', 'f02010', 'f03930', 'f00090', 'f01650', 'f01890', 'f03840', 'f03030', 'f02130', 'f01230', 'f04110', 'f02520', 'f04140', 'f04020', 'f00060', 'f03420', 'f01560', 'f00120', 'f04290', 'f02340', 'f00300', 'f01380', 'f00870', 'f01860', 'f02970', 'f04560', 'f02730', 'f00330', 'f04530', 'f03780', 'f01770', 'f03390', 'f05040', 'f02430', 'f03330', 'f00660', 'f01740', 'f02100', 'f04800', 'f04050', 'f00510', 'f02790', 'f04350', 'f00690', 'f00540', 'f02490', 'f00960', 'f00930', 'f04230', 'f02880', 'f03600', 'f01020', 'f01500', 'f02400', 'f04830', 'f04470', 'f03300', 'f02670', 'f00450', 'f01980', 'f01170', 'f01620', 'f04500', 'f01080', 'f03180', 'f05070', 'f03150', 'f04950', 'f01440', 'f03510', 'f01710', 'f00360', 'f04770', 'f02910', 'f01050', 'f00630', 'f04320', 'f00570', 'f03240', 'f02190', 'f01140', 'f03540', 'f02220', 'f02640', 'f03960', 'f00000', 'f04920', 'f01950', 'f00990', 'f03480', 'f03000', 'f00420', 'f04620', 'f03210', 'f00780', 'f03570', 'f01590', 'f00750', 'f01920', 'f04650', 'f03750', 'f03630', 'f02310', 'f02610', 'f02580', 'f04740', 'f02280', 'f04680', 'f00390', 'f00720', 'f03660', 'f02040', 'f03270', 'f00180', 'f03810', 'f01410', 'f01290', 'f03120', 'f00840', 'f04440', 'f00150', 'f01260', 'f02700', 'f02940', 'f00600', 'f01830', 'f04260', 'f05010', 'f04890', 'f02160', 'f00240', 'f04380', 'f01680', 'f04710', 'f01320']0006R0 ['f02820', 'f03690', 'f03180', 'f02550', 'f01020', 'f03660', 'f02340', 'f01170', 'f02610', 'f02940', 'f01290', 'f02100', 'f01350', 'f03270', 'f03870', 'f01380', 'f01980', 'f03810', 'f02430', 'f02310', 'f01830', 'f03480', 'f02970', 'f01890', 'f03210', 'f03930', 'f02040', 'f02070', 'f02400', 'f01560', 'f03030', 'f01770', 'f01590', 'f01950', 'f03420', 'f01650', 'f03450', 'f00990', 'f03630', 'f01500', 'f03570', 'f00930', 'f03090', 'f03360', 'f02880', 'f02460', 'f01440', 'f01920', 'f01230', 'f03840', 'f02730', 'f01620', 'f02220', 'f03750', 'f03330', 'f03540', 'f02520', 'f02790', 'f01050', 'f03120', 'f01800', 'f01140', 'f01860', 'f01530', 'f01470', 'f02670', 'f02490', 'f01260', 'f01110', 'f02760', 'f01680', 'f03150', 'f02580', 'f03300', 'f02280', 'f01200', 'f03390', 'f03510', 'f02640', 'f02190', 'f02370', 'f01320', 'f02130', 'f03600', 'f03240', 'f03780', 'f03720', 'f02700', 'f01410', 'f01080', 'f02850', 'f01710', 'f03900', 'f03060', 'f01740', 'f02010', 'f02250', 'f00960', 'f03000', 'f02160', 'f02910']for k, v in d. items():  print(k, len(d[k]))0001TP 1240016E5 305Seq05VD 1710006R0 101for i in d2. keys():  print(i,len(d2[i]))0016E5 3050001TP 1240006R0 101Seq05VD 171files[0], labels[0](('0001TP', '009210'), ('0016E5', '01800'))2. My question: Link:  Why do we need masking? and does color from fastai library? (have to look into source code) What do the parameter alpha do? When people make masked img, would it be have ranged integer limit? Does image normalization related with this?lbl_sorted = sorted(lbl_names)f_sorted = sorted(fnames)lbl_1 = lbl_sorted[33]f_1 = f_sorted[33]img = open_image(lbl_1)mask = open_mask(lbl_1)_,axs = plt. subplots(1,2, figsize=(10,5))# img. show(ax=axs[0], y=mask, title='masked')img. show(ax=axs[0], title='1')mask. show(ax=axs[1], title='2', alpha=1. ) img_2 = open_image(f_1)mask_2 = open_mask(f_1)_,axs = plt. subplots(1,2, figsize=(10,5))# img. show(ax=axs[0], y=mask, title='masked')img_2. show(ax=axs[0], title='3',)mask_2. show(ax=axs[1], title='4', alpha=1. ) open_mask(lbl_1). data. shapetorch. Size([1, 720, 960])open_mask(lbl_1). data. shapetorch. Size([1, 720, 960])open_image(f_1). data. shapetorch. Size([3, 720, 960])open_image(f_1). data. shapetorch. Size([3, 720, 960])img. data #labeled datatensor([[[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]],    [[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]],    [[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]]])mask. data # after mask, labeled datatensor([[[ 4, 4, 4, . . . , 21, 21, 21],     [ 4, 4, 4, . . . , 21, 21, 21],     [ 4, 4, 4, . . . , 21, 21, 21],     . . . ,     [17, 17, 17, . . . , 30, 30, 30],     [17, 17, 17, . . . , 30, 30, 30],     [17, 17, 17, . . . , 30, 30, 30]]])img_2. data, mask_2. data(tensor([[[0. 0706, 0. 0667, 0. 0706, . . . , 0. 6431, 0. 6549, 0. 6627],     [0. 0745, 0. 0706, 0. 0706, . . . , 0. 6431, 0. 6510, 0. 6549],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 6392, 0. 6588, 0. 6588],     . . . ,     [0. 0863, 0. 0824, 0. 0824, . . . , 0. 1333, 0. 1216, 0. 1255],     [0. 0902, 0. 0863, 0. 0824, . . . , 0. 1255, 0. 1176, 0. 1216],     [0. 0863, 0. 0824, 0. 0784, . . . , 0. 1137, 0. 1059, 0. 1137]],     [[0. 0706, 0. 0667, 0. 0706, . . . , 0. 7490, 0. 7608, 0. 7686],     [0. 0745, 0. 0706, 0. 0706, . . . , 0. 7451, 0. 7569, 0. 7608],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 7412, 0. 7529, 0. 7529],     . . . ,     [0. 0980, 0. 0941, 0. 0941, . . . , 0. 1804, 0. 1686, 0. 1725],     [0. 1059, 0. 1020, 0. 0980, . . . , 0. 1725, 0. 1647, 0. 1686],     [0. 1020, 0. 0980, 0. 0941, . . . , 0. 1608, 0. 1529, 0. 1608]],     [[0. 0784, 0. 0745, 0. 0784, . . . , 0. 7569, 0. 7686, 0. 7765],     [0. 0824, 0. 0784, 0. 0784, . . . , 0. 7647, 0. 7647, 0. 7686],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 7608, 0. 7647, 0. 7647],     . . . ,     [0. 1216, 0. 1176, 0. 1176, . . . , 0. 2000, 0. 1882, 0. 1922],     [0. 1176, 0. 1137, 0. 1098, . . . , 0. 1843, 0. 1765, 0. 1804],     [0. 1137, 0. 1098, 0. 1059, . . . , 0. 1725, 0. 1647, 0. 1725]]]), tensor([[[ 18, 17, 18, . . . , 183, 186, 188],     [ 19, 18, 18, . . . , 183, 185, 186],     [ 20, 18, 19, . . . , 182, 185, 185],     . . . ,     [ 25, 24, 24, . . . , 43, 40, 41],     [ 26, 25, 24, . . . , 41, 39, 40],     [ 25, 24, 23, . . . , 38, 36, 38]]]))3. What is a difference between image and imageSegment?: imageSegment  An ImageSegment object has the same properties as an Image. The only difference is that when applying the transformations to an ImageSegment, it will ignore the functions that deal with lighting and keep values of 0 and 1.  It’s easy to show the segmentation mask over the associated Image by using the y argument of show_image. img = open_image(fnames[0])mask = open_mask(lbl_names[0])_,axs = plt. subplots(1,3, figsize=(8,4))img. show(ax=axs[0], title='no mask')img. show(ax=axs[1], y=mask, title='masked') #seg mask over the img using y argmask. show(ax=axs[2], title='mask only', alpha=1. ) vision. image ##4. Why/How img div by 255 and how it results fast. ai : vision. image - If div=True, pixel values are divided by 255. to become floats between 0. and 1.  At times, you want to get rid of distortions caused by lights and shadows in an image.    Normalizing the RGB values of an image can at times be a simple and effective way of achieving this.     So sum of the pixel’s value over all channels(which is S) divides each intensified channel so that nomalized value will be R/S, G/S and B/S (where, S=R+G+B).   Detailed explain here4. Python Evaluation Order: Python evaluates expressions from left to right. Notice that while evaluating an assignment, the right-hand side is evaluated before the left-hand side. mask_tmp, trg_tmp, void_tmp = 2, 1, 10mask_tmp = trg_tmp != void_tmpprint(mask_tmp, trg_tmp, void_tmp) # (1) target is not same with voidTrue 1 10# Example 1x = 1y = 2x,y = y,x+yx, y(2, 3)# Example 2x = 1y = 2x = yy = x+yx, y(2, 4)5. model learner parameter :: pct_start: A: Percentage of total number of epochs when learning rate rises during one cycle. Q: Sorry, I still confused that one cycle in the new API only runs one epoch. How the percentage of total number of epochs works? Can you give a example? If learn. fit_one_cycle(10, slice(1e-4,1e-3,1e-2), pct_start=0. 05)??A: Ok, strictly correct answer would be percentage of iterations, so you can have lr both increase and decrease during same epoch. In your example, say, you have 100 iterations per epoch, then for half an epoch (0. 05 * (10 * 100) = 50) lr will rise, then slowly decrease. Q2: Thanks for this explanation … so essentially, it is the percentage of overall iterations where the LR is increasing, correct? So, given the default of 0. 3, it means that your LR is going up for 30% of your iterations and then decreasing over the last 70%. Is that a correct summation of what is happening? A2: Yes, I think that’s correct. You can verify that by changing its value and check:learn. recorder. plot_lr() For example if pct_start = 0. 2 source: forums. fastai "
+    }, {
+    "id": 14,
     "url": "http://localhost:4000/2020/03/note08-fastai-4/",
     "title": "Gradient backward, Chain Rule, Refactoring",
-    "body": "2020/03/02 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves   ⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
+    "body": "2020/03/02 -  This note is divided into 4 section.      Section1: What is the meaning of ‘deep-learning from foundations?’   Section2: What’s inside Pytorch Operator?   Section3: Implement forward&amp;backward pass from scratch   Section4: Gradient backward, Chain Rule, Refactoring   ” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves    Postfix underscore means in pytorch, in-place function, What is in-place function?⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
     }, {
-    "id": 13,
+    "id": 15,
     "url": "http://localhost:4000/2020/03/note08-fastai-3/",
     "title": "Implement forward&backward pass from scratch",
     "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring1. The forward and backward passes: 1. 1 Normalization: train_mean,train_std = x_train. mean(),x_train. std()&gt;&gt;&gt; train_mean,train_std(tensor(0. 1304), tensor(0. 3073))Remember!  Dataset, which is x_train, mean and standard deviation is not 0&amp;1. But we need them to be which means we should substract means and divide data by std.  You should not standarlize validation set because training set and validation set should be aparted.  after normalize, mean is close to zero, and standard deviation is close to 1. 1. 2 Variable definition:  n,m: size of the training set c: the number of activations we need in our model2. Foundation Version: 2. 1 Basic architecture:  Our model has one hidden layer, output to have 10 activations, used in cross entropy.    But in process of building architecture, we will use mean square error, output to have 1 activations and lator change it to cross entropy   number of hidden unit; 50see below pic  We want to make w1&amp;w2 mean and std be 0&amp;1.      why initializating and make mean zero and std one is important?   paper highlighting importance of normalisation - training 10,000 layer network without regularisation1   2. 1. 1 simplified kaiming initQ: Why we did init, normalize with only validation data? Because we can not handle and get statistics from each value of x_valid?{: style=”color:red; font-size: 130%; text-align: center;”}  what about hidden(first) layer?w1 = torch. randn(m,nh)b1 = torch. zeros(nh)t = lin(x_valid, w1, b1) # hidden&gt;&gt;&gt; t. mean(), t. std()((tensor(2. 3191), tensor(27. 0303))In output(second) layer, w2 = torch. randn(nh,1)b2 = torch. zeros(1)t2 = lin(t, w2, b2) # output&gt;&gt;&gt; t2. mean(), t2. std()(tensor(-58. 2665), tensor(170. 9717))   which is terribly far from normalzed value.     But if we apply simplified kaiming init  w1 = torch. randn(m,nh)/math. sqrt(m); b1 = torch. zeros(nh)w2 = torch. randn(nh,1)/math. sqrt(nh); b2 = torch. zeros(1)t = lin(x_valid, w1, b1)t. mean(),t. std()&gt;&gt;&gt; (tensor(-0. 0516), tensor(0. 9354)) But, actually, we use activations not only linear function After applying activations relu at linear layer, mean and deviation became 0. 5. 2. 1. 2 Glorrot initializationPaper2: Understanding the difficulty of training deep feedforward neural networks  Gaussian(, bell shaped, normal distributions) is not trained very well.  How to initialize neural nets? with the size of layer , the number of filters .  But there is No acount for import of ReLU If we got 1000 layers, vanishing gradients problem emerges2. 1. 3 Kaiming initializatingPaper3: Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification  Kaiming He, explained here rectifier: rectified linear unit rectifier network: neural network with rectifier linear units  This is kaiming init, and why suddenly replace one to two on a top?     to avoid vanishing gradient(weights)   But it doesn’t give very nice mean tough.    2. 1. 4 Pytorch package Why fan_out?     according to pytorch documentation,   choosing 'fan_in' preserves the magnitude of the variance of the wights in the forward pass. choosing 'fan_out' preserves the magnitues in the backward pass(, which means matmul; with transposed matrix) ➡️ in the other words, torch use fan_out cz pytorch transpose in linear transformaton.  What about CNN in Pytorch?I tried torch. nn. Conv2d. conv2d_forward?? Jeremy digged into using torch. nn. modules. conv. _ConvNd. reset_parameters?? 2  in Pytorch, it doesn’t seem to be implemented kaiming init in right formula. so we should use our own operation.  But actually, this has been discussed in Pytorch community before. 3 4 Jeremy said it enhanced variance also, so I sampled 100 times and counted better results.  To make sure the shape seems sensible. check with assert. (remember we will replace 1 to 10 in cross entropy)assert model(x_valid). shape==torch. Size([x_valid. shape[0],1])&gt;&gt;&gt; model(x_valid). shape(10000, 1) We have made Relu, init, linear, it seems we can forward pass code we need for basic architecture nh = 50def lin(x, w, b): return x@w + b;w1 = torch. randn(m,nh)*math. sqrt(2. /m ); b1 = torch. zeros(nh)w2 = torch. randn(nh,1); b2 = torch. zeros(1)def relu(x): return x. clamp_min(0. ) - 0. 5t1 = relu(lin(x_valid, w1, b1))def model(xb):  l1 = lin(xb, w1, b1)  l2 = relu(l1)  l3 = lin(l2, w2, b2)  return l32. 2 Loss function: MSE:  Mean squared error need unit vector, so we remove unit axis.   def mse(output, targ): return (output. squeeze(-1) - targ). pow(2). mean()    In python, in case you remove axis, you use ‘squeeze’, or add axis use ‘unsqueeze’ torch. squeeze where code commonly broken. so, when you use squeeze, clarify dimension axis you want to removetmp = torch. tensor([1,1])tmp. squeeze()&gt;&gt;&gt; tensor([1, 1]) make sure to make as float when you calculateBut why??? because it is tensor?{: style=”color:red; font-size: 130%;”} Here’s the error when I don’t transform the data type ---------------------------------------------------------------------------TypeError                 Traceback (most recent call last)&lt;ipython-input-22-ae6009bef8b4&gt; in &lt;module&gt;()----&gt; 1 y_train = get_data()[1] # call data again   2 mse(preds, y_train)TypeError: 'map' object is not subscriptable This is forward passFootnote: Other materials:  Understanding the difficulty of training deep feedforward neural networks, paper that introduced Xavier initialization      Fixup Initialization: Residual Learning Without Normalization &#8617;        Pytorch implementaion on Kaiming init of conv and linear layers &#8617;        Pytorch kaiming init issue &#8617;        Pytorch kaiming init explained &#8617;    "
     }, {
-    "id": 14,
+    "id": 16,
     "url": "http://localhost:4000/2020/03/note08-fastai-2/",
     "title": "What's inside Pytorch Operator?",
     "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, RefactoringWhat’s inside Pytorch Operator?: Section02 Time comparison with pure Python:    Matmul with broadcasting&gt; 3194. 95 times faster     Einstein summation&gt; 16090. 91 times faster     Pytorch’s operator&gt; 49166. 67 times faster  1. Elementwise op: 1. 1 Frobenius norm:   above converted into (m*m). sum(). sqrt() Plus, don’t suffer from mathmatical symbols. He also copy and paste that equations from wikipedia.  and if you need latex form, download it from archive. 2. Elementwise Matmul:  What is the meaning of elementwise?   We do not calculate each component. But all of the component at once. Because, length of column of A and row of B are fixed.   How much time we saved?  So now that takes 1. 37ms. We have removed one line of code and it is a 178 times faster…#TODOI don’t know where the 5 from. but keep it. Maybe this is related with frobenius norm…?as a result, the code before for k in range(ac):  c[i,j] += a[i,k] + b[k,j]the code after c[i,j] = (a[i,:] * b[:,j]). sum()To compare it (result betweet original and adjusted version) we use not test_eq but other function. The reason for this is that due to rounding errors from math operations, matrices may not be exactly the same. As a result, we want a function that will “is a equal to b within some tolerance” #exportdef near(a,b):   return torch. allclose(a, b, rtol=1e-3, atol=1e-5)def test_near(a,b):   test(a,b,near)test_near(t1, matmul(m1, m2))3. Broadcasting:  Now, we will use the broadcasting and removec[i,j] = (a[i,:] * b[:,j]). sum() How it works?&gt;&gt;&gt; a=tensor([[10,10,10],     [20,20,20],     [30,30,30]])&gt;&gt;&gt; b=tensor([1,2,3,])&gt;&gt;&gt; a,b  (tensor([[10, 10, 10],     [20, 20, 20],     [30, 30, 30]]),tensor([1, 2, 3]))     &gt;&gt;&gt; a+btensor([[11, 12, 13],    [21, 22, 23],    [31, 32, 33]])  &lt;Figure 2&gt; demonstrated how array b is broadcasting(or copied but not occupy memory) to compatible with a. Refered from numpy_tutorial   there is no loop, but it seems there is exactly the loop.     This is not from jeremy (actually after a moment he cover it) but i wondered How to broadcast an array by columns?  c=tensor([[1],[2],[3]])a+ctensor([[11, 11, 11],    [22, 22, 22],    [33, 33, 33]])s  What is tensor. stride()?help(t. stride)Help on built-in function stride:  stride(…) method of torch. Tensor instancestride(dim) -&gt; tuple or intReturns the stride of :attr:’self’ tensor. Stride is the jump necessary to go from one element to the next one in the specified dimension :attr:’dim’. A tuple of all strides is returned when no argument is passed in. Otherwise, an integer value is returned as the stride in the particular dimension :attr:’dim’. Args:  dim (int, optional): the desired dimension in which stride is requiredExample::* x = torch. tensor([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]])`x. stride()&gt;&gt;&gt; (5, 1)x. stride(0)&gt;&gt;&gt; 5x. stride(-1)&gt;&gt;&gt; 1   unsqueeze &amp; None index   We can manipulate rank of tensor Special value ‘None’, which means please squeeze a new axis here== please broadcast herec = torch. tensor([10,20,30])c[None,:] in c, squeeze a new axis in here please. 2. 2 Matmul with broadcasting: for i in range(ar):#  c[i,j] = (a[i,:]).      *[:,j]. sum() #previous  c[i]  = (a[i]. unsqueeze(-1) * b). sum(dim=0) And Using None also (As howard teached)c[i]  = (a[i ]. unsqueeze(-1) * b). sum(dim=0) #howardc[i]  = (a[i][:,None] * b). sum(dim=0) # using Nonec[i]  = (a[i,:,None]*b). sum(dim=0)⭐️Tips🌟 1) Anytime there’s a trailinng(final) colon in numpy or pytorch you can delete it ex) c[i, :] = c [i]2) any number of colon commas at the start, you can switch it with the single elipsis.  ex) c[:,:,:,:,i] = c […,i] 2. 3 Broadcasting Rules:  What if we tensor. size([1,3]) * tensor. size([3,1])?  torch. Size([3, 3])    What is scale????   What if they are one array is times of the other array? ex) Image : 256 x 256 x 3Scale : 128 x 256 x 3Result: ?   Why I did not inserted axis via None, but happened broadcasting? &gt;&gt;&gt; c * c[:,None]tensor([[100. , 200. , 300. ],    [200. , 400. , 600. ],    [300. , 600. , 900. ]])maybe it broadcast cz following array has 3 rows as same principle, no matter what nature shape was, if we do the operation tensor broadcasts to the other. &gt;&gt;&gt; c==c[None]tensor([[True, True, True]])&gt;&gt;&gt; c[None]==c[None,:]tensor([[True, True, True]])&gt;&gt;&gt;c[None,:]==ctensor([[True, True, True]])3. Einstein summation:  Creates batch-wise, remove inner most loop, and replaced it with an elementwise producta. k. ac[i,j] += a[i,k] * b[k,j]inner most loop c[i,j] = (a[i,:] * b[:,j]). sum()elementwise product  Because K is repeated so we do a dot product. And it is torch. Usage of einsum()1) transpose2) diagnalisation tracing3) batch-wise (matmul) …  einstein summation notationdef matmul(a,b): return torch. einsum('ik,kj-&gt;ij', a, b)so after all, we are now 16000 times faster than Python. 4. Pytorch op: 49166. 67 times faster than pure python And we will use this matrix multiplication in Fully Connect forward, with some initialized parameters and ReLU. But before that, we need initialized parameters and ReLU, Footnote:  TensorRank ti noteResources:  Frobenius Norm Review Broadcasting Review (especially Rule)     Refer colab! (I totally confused with extension of arrays)    torch. allclose Review np. einsum Reviewh "
     }, {
-    "id": 15,
+    "id": 17,
     "url": "http://localhost:4000/2020/02/note08-fastai-1/",
     "title": "What is the meaning of 'deep-learning from foundations?'",
     "body": "2020/02/29 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ I don’t know if you read this article, but I heartily appreciate Rachael Thomas and Jeremy Howard for providing these priceless lectures for free Homework:  Review concepts 16 concepts from Course 1 (lessons 1 - 7)(1) Affine Functions &amp; non-linearities; 2) Parameters &amp; activations; 3) Random initialization &amp; transfer learning; 4) SGD, Momentum, Adam; 5) Convolutions; Batch-norm; 6) Dropout; 7) Data augmentation; 8) Weight decay; 9) Res/dense blocks; 10) Image classification and regression; 11)Embeddings; 12) Continuous &amp; Categorical variables; 13) Collaborative filtering; 14) Language models; 15) NLP classification; 16) Segmentation; U-net; GANS) Make sure you understand broadcasting Read section 2. 2 in Delving Deep into Rectifiers Try to replicate as much of the notebooks as you can without peeking; when you get stuck, peek at the lesson notebook, but then close it and try to do it yourself calculus for machine learning     based on weight…    einsum conventionCONTENTS:  What is going on in this course?     What is ‘from foundations’?   Steps to a basic modern CNN model   Today’s implementation goal: 1) matmul -&gt; 4) FC backward    Library development using jupyter notebook     jupyter notebook certainly can make module    Elementwise ops     How can we make python faster?         What is element wise operation?           FootnoteWhat is going on in this course?: What is ‘from foundations’?: 1) Recreate fast. ai and Pytorch 2) using pure python  Evade OverfittingOverfit : validation error getting worsetraining loss &lt; validation loss  Know the name of the symbol you usefind in this page if you don’t know the symbol that you are using or just draw it here (run by ML!) Steps to a basic modern CNN model:  1) Matrix multiplication -&gt; 2) Relu/Initialization -&gt; 3) Fully-connected Forward-&gt; 4) Fully-connected Backward -&gt; 5) Train loop -&gt; 6) Convolution-&gt; 7) Optimization -&gt;8) Batchnormalization -&gt; 9) Resnet Today’s implementation goal: 1) matmul -&gt; 4) FC backward: Library development using jupyter notebook: what is assers? jupyter notebook certainly can make module:  There will be #export tag that Howard (and we) want to extract special notebook2script. py will detect sign of #expert and convert following into python module and test ittest\_eq(TEST,'test')test\_eq(TEST,'test1')  what is run_notebook. py?     when you want to test your module in command line interface   		!python run\_notebook. py 01_matmul. ipynb  Is there any difference between 1) and 2)?1) test -&gt; test01 2) test01 -&gt; test #TODO I don’t know yet  look into run_notebook. py, package fire Jeremy used. What is that?read and run the code in a notebook, and in the process, Jeremy made Python Fire library called!shockingly, fire takes any kind of function and converts into CLI command. fire library was released by Google open source, Thursday, March 2, 2017    Get data   pytorch and numpy are pretty much same.  variable c explains how many pixels there are in in MNIST, 28 pixels PyTorch’s view() method: torch function that manipulating tensor, and squeeze() in torch &amp; mathmatical operation similar function Rao &amp; McMahan said usually this functions result in feature vector.    In part 1, you can use view function several times.     Initial python model     Which is Linear, like $Xw$(weight)$+a$(bias) $= Y$     If you don’t know hou to multiple matrix, refer this site matmul visulization site   How many time spends if we we use pure python   function matmul, typical matrix multiplication function, takes about 1 second for calculating 1 single train data! (maybe assumed stochastic, 5 data points in validation)     it takes about 11. 36 hours to update parameters even single layer and 1 iteration! (if that was my computer, it would be 14 hours. . )🤪   THIS is why we need to consider ‘time’&amp;’space’ This is kinda slow - what if we could speed it up by 50,000 times? Let’s try! Elementwise ops: How can we make python faster?:  If we want to calculate faster, then do remove pythonic calcuation, by passing its computation down to something that is written something other than python, like pytorch.  According to PyTorch doc it uses C++ (via ATen), so we are going to implement that function with python. What is element wise operation?:  items makes a pair, operate corresponding componentFootnote:  notebooks material video broadcasting excel"
     }, {
-    "id": 16,
+    "id": 18,
     "url": "http://localhost:4000/2020/02/what-is-convolution/",
     "title": "Digging into convolution",
     "body": "2020/02/28 - Issues 1) Kaiming Initializtion in Pytorch was in trouble. 1 2) Jeremy started to dig in, in lesson09, but I didn’t know why the size of tensor is 2 and even understand this spreadsheet data. 3 Homework: Read Visualizing and Understanding Convolutional Networks paper  What is a convolution?     Visualization         one kernel     Matthew D Zeiler &amp; Rob Fergus Paper          Convolution can be represented as matmul   Padding   Kernel has rank 3   How can we find a side-edge, a gradient and area of constant weight?   What is a convolution?: A convolutional neural network is that your red, green, and blue pixels go into the simple computation, and something comes out of that, and then the result of that goes into a second layer, and the result of that goes into the third layer and so forth. Visualization: one kernel Refer this site for visualizing CNN filteringMatthew D Zeiler &amp; Rob Fergus PaperLecture01     Nine examples of the actual coefficients from the **first layer**  Convolution can be represented as matmul: CNNs from different viewpoints {align-items: center;}    [A B C D E F G H I J] is 3 by 3 image data flatten to vector.   As a result, convolution is a just matrix just two things happens     Some of entries are set to zeros at all the times   same color always have the same weight. That called weight time / wegith sharing    So, we can implement a convolution with matrix multiplication. But, we don’t do that because it’s slow!Padding:  What most of libraries do is just put zeros asdie of matrix  fast. ai uses reflection paddings (what is this? Jeremy said he uttered it)Kernel has rank 3:  As standard picture input would be 4 5, it would be actually 3d, not 2d.  If we make kernel as a 3x3 size, we pass over same kernel all the different Red, Green, Blue Pixels.      This could make problem, because, if we want to detect frog, which is green, we would want more activations on the green(I made a test cell in my colab 6)   How can we find a side-edge, a gradient and area of constant weight?: Not top-edge!  One kernel can find only the top-edge, so we should stack the kernels 7 So, we pass it through bunch of kernels to the input images, and that process gives us height x width x corresponding number of kernels.  Usually that number of chanel is 16 And if we want to get the more channels and features, we should repeat that process     This process gives rise to memory out of control, we do the stride   #### conv-example. xlsx  2 convolutional filters At a second layer, filter is 3x3x2 tensor, because to add up together the first layer’s channel. Reference:       Problem was math. sqrt(5) was not kaiming initialization formula, Implementation in Pytorch &#8617;        size of tensor, lecture09 &#8617;        conv-example. xlsx &#8617;        Why do computer use red, green and blue instead of primary colors &#8617;        Grayscale is a group of shades without any visible color. … Each of these dots has its own brightness level as well and, therefore, can be converted to grayscale. A grayscale image is one with all color information removed.  &#8617;        Testing RGB and grayscale &#8617;        stack kernel and make new rank of tensor at output, Lesson06-2019 &#8617;    "
     }, {
-    "id": 17,
+    "id": 19,
     "url": "http://localhost:4000/2020/02/dps-week8/",
-    "title": "Digital Product School week 8&9",
-    "body": "2020/02/24 - The 8th week retropect at Digital Product School Week 8/9 - Ship your MVP/Release next iteration each day     This week's schedule  CONTENT:  Preparing engineering weekly Agile Process     Daily Stand-up    Making application flowchart (feat draw. io) / ER diagram     Flowchart, understaning user journey   ER diagram    Engineering weekly AI lunch Connecting firebase andPreparing engineering weekly: This week at Wednesday, I planned to explain the Language Modelings, mainly focusing ELMo, ULMFiT, BERT and GPT-2. Slides is available here Changed the presentation, because there were people who are not in ML domain. hereWhenever I do the presentation, I learn more than the information I give them. At the same time, I realize I need to learn more than I know. Agile Process: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m not sure ‘what is agile’ but this is what we tried to make agile process.  Daily Stand-up: Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Making application flowchart (feat draw. io) / ER diagram:  Before we start the party, we should clarify the flowchart and ER diagram of our application. Flowchart, understaning user journey: Thanks for google, we could use draw. io for our framechart framework. Actually, we cana choice other good flatform, but draw. io has connected app throgh google drive, most of our engineer was used to it. And after this job, I got to know there is also (of course) rule with the symbols, color, size, space, scaling and direction of arrow -reference. But why we should do this? WE have made our storymap before!! I think storymap is for visualize our status and app. So it should be shared with whole the team, and they should able to understand each role’s issue. But flowchart is more like testing technical feasibility, and error that user can experience. So it could be little more specific, complicated, and hypothetical.     This week engineer's main plan  ER diagram: Even if we use NoSQL database through firebase, my team was accustomed to SQL more. That what we educated when we were at college, so we had to organize our concept while we were learning NoSQL. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Before today, my AI collegues presented regression, knn and it was my turn. I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! AI lunch: Connecting firebase and: "
+    "title": "My life in Digital Product School - week 8/19/10",
+    "body": "2020/02/24 - The 8/9/10th week retropect at Digital Product School Week 8 - Ship your MVPWeek 9/10 - Release next iteration each day     Week 8th schedule  CONTENT:  Agile Product Development     Daily Stand-up(planning)   Gemba Walk   Sprint Reviews    Engineering weeklyAgile Product Development: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m still not sure ‘what is agile’ but this is how we tried to make agile process.  Daily Stand-up(planning): Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Gemba Walk:     Team Cero with core team  Every 2 weeks, we do the Gemba work, which is ‘question everything to the core team’ time. At this period, people can ask anything related to our product, workshop, and framework. Core team will help just for each team, and each team can solve the problem related to their work. &lt; br/&gt;Why we need this session? because with workshop and general schedule, core team has no time just focus on each team. So through this session, we can have opportunity to understand each program and workshop, like why we are using this platform, and when is the due of our small project, and we have this problem and we need help for this. whatever small problem you have, core team is always willing to help you. Sprint Reviews: Every Friday, we have time to summarise what we did for the week. Maybe we need HMW question and our storymap to share our process and then tell and share what we did try, what point we succeeded and what point it was deviant of our prediction, and why we tried it. .     Sprint of Ve-link  And then, just after all team’s ppt, we do vote with such a cute marvel. Always it’s very difficult to vote (of course you can’t vote to your team!) Because it depends on criteria what do I value!But since this is process of our agile work, I try to focus on what they have changed since last week, and why they did it, how they did it. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Everyone have their knowledge to share and we can be tutor and at the same time can be of tutee. Previously, my AI collegues presented regression, knn. And because I’m somewhat specialized to NLP, I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! "
     }, {
-    "id": 18,
+    "id": 20,
     "url": "http://localhost:4000/2020/02/fast.ai-nlp-note-16/",
     "title": "Algorithmic bias",
     "body": "2020/02/20 - Algorithms can encode &amp; magnify human bias Case Study 1: Facial Recognition &amp; Predictive Policing:  Joy Buolamwini &amp; Timnit Gebru, gendershades. org     Microsoft, FACE+, IBM - All of these things are sell now.    Largest gap between $\therefore\ Lighter Male\ &gt;\ Darker\ Female $      This US mayor joked cops should “mount . 50-caliber” guns where AI predicts crime      With machine learning, with automation, there’s a 99% success, so that robot is ㅡwill beㅡ99% accurate in telling us what is going to happen next, which is really interesting.     - city official in Lancater, CA, approving on using IBM for public security  Bias:  Bias is type of error Statistical Bias: difference between a statistic’s expected value and the true value Unjust Bias: disproportionate preference for or prejudice against a group Unconscious bias: bias that we don’t realize we have But, term bias is too generic to be productive. Different sources of bias have different causes Representation Bias: Dataset was not representative of the algorithm that might be used on later. Above : Data is okay, but algorithm has some problem. Below : Data has error.  For example, object detection production that performs very well in common product of US. But in contrast, change of target product region, like Zimbabwe, Solomon Island, and so on, reduced the performence remarkably. It is not the algorithmic problem, so we should care about data volume of region. Evaluation Bias: Benchmark datasets spur on research, 4. 4% of IJB-A images are dark-skinned women. 2/3 of ImageNet images from the West (Sharkar et al, 2017) Case Study 2: Recidivism Algorithm Used Prison Sentencing: Case Study 3: Online Ad Delivery: Bias in NLP: ( Nothing to do with the course, but I’m researching this field these days. )    But all about Englsih     ImpactThe person is doctor. The person is nurse -&gt; 그는 의사다. 그녀는 간호사다.  Concept of “biased data” often too generic to be useful:  Different sources of bias have different sources Data, models and systems are not unchanging numbers on a screen. They’re the result of a complex process that starts with years of historical context and involves a series of choices and norms, from data measurement to model evaluation to human interpretation. - Harini Suresh, “The problem with Biased Data” Five Sources of Bias in ML:  Representation Bias Evaluation Bias Measurement Bias Aggregation Bias(46:02) Historical Bias(46:26)     A few studies(47:13)   Racial Bias, Even when we have good intentions(new york times)(47:10)   gender(48:59)   Humans are biased, so why does algorithmic bias matter?: Algorithms &amp; humans are used differently (humans are usually decision maker)  Algorithms are accurate and objective No way to apeal if there if error processed large scale cheap Machine learning can amplify bias Machine learning can create feedback loops. Technology is power. And with that comes responsibility. Solutions:  Analyze a project at work/school:     Questions about AI   5 types of bias (Suresh &amp; Guttag)   Datasheets for datasets, Modelcards for model reporting   Accuracy rate on different sub-groups    Work with domain experts &amp; those impacted Increase diversity in our workspace Advocate for good policy Be on the ongoing lookout for bias"
     }, {
-    "id": 19,
+    "id": 21,
     "url": "http://localhost:4000/2020/02/classifier-city/",
     "title": "Making a classifier with image dataset made from gooogle",
     "body": "2020/02/15 - CONTENTS:  Creating dataset from google images     Using google_images_download   Create ImageDataBunch    Train model     fit_one_cycle()   Let’s find-tune   Let’s train the whole model!   Let’s make batch size bigger!    Interpretation Model in productionCode can be found hereDeployed model here Making a classifier which can distinguish Seoul from Munich and Sanfrancisco!(hoping my well in Munich!) Creating dataset from google images: In machine learning, you always need data before you build your model. You can use either URLs or google_images_download package. Since Jeremy explained specifically, I will try the other. Using google_images_download: note: This is not google official package Refer to Official Doncument, put that arguments. from google_images_download import google_images_downloadresponse = google_images_download. googleimagesdownload()  #class instantiationout_dir = os. path. abspath('. . /. . /materials/dataset/pkg/')os. mkdir(out_dir)arguments = { keywords : Cebu,Munich,Seoul ,        print_urls :True,        suffix_keywords : city ,        output_directory :out_dir,        type : photo ,      }paths = response. download(arguments)  #passing the arguments to the functionprint(paths)and if you need, here is main code. Create ImageDataBunch: We need to separate validation set because we just grabbed these imagese from Google. Most of the dataset we use (kaggle/research) splited into train / validation / test so if they are not devided beforehand we should make databunch, and Jeremy recommended assign 20% to validation. Help on function verify_images in module fastai. vision. data:verify_images(path: Union[pathlib. Path, str], delete: bool = True, max_workers: int = 4, max_size: int = None, recurse: bool = False, dest: Union[pathlib. Path, str] = '. ', n_channels: int = 3, interp=2, ext: str = None, img_format: str = None, resume: bool = None, **kwargs)  Check if the images in `path` aren't broken, maybe resize them and copy it in `dest`. Data from google image url Data from package Train model:           len(class)   len(train)   len(valid)         Data_url   3   432   108       Data_pkg   3   216   53   Uisng model: restnet34 1, Measurement: accuracy 2 fit_one_cycle(): What is fit one cycle? Cyclical Learning Rates for Training Neural Networks One of the way to find good learning rate. Core idea is to start with small learning rate (like 1e-4, 1e-3) and increase the learning rate after each mini-batch till loss starts exploding. And pick up learning rate one order lower than exploding point. For example, plotted learning rate is like below picture, picking up around 1e-2 is the best way.  Why this methods Traditionally, the learning rate is decreased as the learning starts converging with time. But this paper suggests to cycle our learning rate, because it makes us avoid local minimum. Basically this cyclic method enables us to explore whole of loss function so that find out global minimum. In other words, higher learning rate behaves like regularisation. Let’s find-tune: Do train just one last layer by learning rate found by find_lr This section you should find the strongest downward slope that kind of sticking around for quite a while. And choose just one order lower than lowest point. As explained before, I will pick up 1e-2. And of course, this is fine-tuning, we don’t need discriminative learning rate yet.  Let’s train the whole model!: link When you plot the learning rate again, maybe you will get soaring shape of learning rate. Rule of thumb, When you slice the learning rate, use learning rate you used at unfrozen part. Divide it by 5 or 10 and put it on maximum bound. At minimum bound, get the point just before it soared, and divide it by 10.  Let’s make batch size bigger!: Since default batch size is 64, I tried it to 128. And it gets way more better result(even it’s still underfitting!) And if I freeze model and train whole model again, the model would be better. Also, you can use this method to the other big dataset model training! Interpretation: See the confusion matrix. Result is quite great. *Since I’m using colab, I will skip data cleansing. But I highly recommend you to use ImageCleaner widget, only if you are using jupyter notebook (not jupyter lab) Model in production: You can deploy your model in simple way. I referred fast. ai, and used render(it’s free for limited time). You can find detailed document here. and you can create a route like this. @app. route( /classify-url , methods=[ GET ])async def classify_url(request):  bytes = await get_bytes(request. query_params[ url ])  img = open_image(BytesIO(bytes))  _,_,losses = learner. predict(img)  return JSONResponse({     predictions : sorted(      zip(cat_learner. data. classes, map(float, losses)),      key=lambda p: p[1],      reverse=True    )  })You can find my deployed model here Reference: How to create a deep learning dataset using Google Images towardsdatascience - one cycle policy       Deep Residual Learning for Image Recognition &#8617;        Accuracy_and_precision &#8617;    "
     }, {
-    "id": 20,
+    "id": 22,
     "url": "http://localhost:4000/2020/02/dps-week5/",
     "title": "Digital Product School week 5",
     "body": "2020/02/09 - The 5th week retropect at Digital Product School Week 5 - Create a Storymap and sync it with Lean Canvas     This week's schedule  CONTENT:  How to create our story map Prepare your story Discover your product’s AI potentialMondayHow to create our story map:     We need this 'aha' moment  There was a Milestone workshop, about our weekly goal. As we are agile working, we go fast and change every week’s goal. This week we will finalize our story map based on user’s pain-point and HMW questions.     How should we make our story-map  Basically we should make story map based on this rule  Tell stories, don’t just write them!     We always need context, that means all the story component should be connected    Visualize your product to establish a shared understanding and speed up discussions!     Post-it filled of text is not enough, we should fill it with visualizations then team mates can understand it fast    Only discuss in front our your story map! (Speed)     So we can update our story-map as soon as we change our opinion   And also    Use a story map to find the parts that matter most and to identify holes in your idea!     Since the story map consists of techinical part, we should consider each story’s technical feasibility    Minimise output, maximise outcome and impact! Build tests to figure out what’s minimum and what’s viable!     This story map functions to find out our minimum value of ideas    Work iteratively: Change your story map according to your learnings!     We should repeat this process again and again    PMs: Make sure Storymap is up to date!Prepare your story:     team cero, our whole story map    Our goal    Technical feasibility of our storyWhat is your strategy to make user achieve something? This would be our expand point Discover your product’s AI potential: How can we apply AI to our product? Let’s write down our ‘HMW’ questions, and find out all p ossibilities. These are suggestion of possibilities, so don’t attached to feasibility (we will do in at lean start-up)     Software section's expectation    AI section's expectationTuesday    Engineer's task, week5This 5th week, engineers settled WendesdayThursdayFriday"
     }, {
-    "id": 21,
+    "id": 23,
     "url": "http://localhost:4000/2020/02/GPU-time/",
     "title": "4 reasons took much time to setting GPU for fast.ai than I expected",
     "body": "2020/02/05 - Motivation: Before now, me as a undergraduate student, I was parsimony who usually depend on colab, kaggle, friend’s server(occasional) whenever i need GPU. . And this time it’s been for a while to install GPU than I expected and I share the several component that stood in my way. Written at Oct 24 2019, if you think this is deprecated, please do not have a leap of faith. Just for the record, I’ve used Kaggle, Colab, GCP, Azure, EC2 as GPU cloud. 1. Did not know there is JupyterLab option in Google Cloud Platform. : At the first time when GCP came out, there was no AI Platform service. So from starting vm instance to launching jupyter and installing packages, I did all of the things myself. (and I learned 🤗) $	curl -O https://repo. continuum. io/archive/Anaconda3-5. 0. 1-Linux-x86_64. sh[Downloading conda in ssh] I created VM instance,selected zone, machine type and disk type. Then, define firewall rules and in ssh terminal, install jupyter and other packages. But you can do all of these things just using AI Platform.  [AI Platform] I think it especially save your time if you are living in Asia-Pacific, which google doesn’t support not that much GPU resources.  2. Consider if the platform has limited resources in a region you live in. : I live in South Korea, East Asia, and it seems like this region has lots of limitation in GPU (except quite expensive AWS) And the Taiwan which was the only one region where I can launch my own VM with GPU (I tried all the other regions in the list) sometimes do normaly, but not always. 😥After launching, I did several works and next day I could not start VM. (I didn’t count it, but tried it a few hours because I didn’t want cost any more time…) Endlessly failed to start instance, then I choose to move AWS as an alternative way. 3. Fast. ai gives deliberate guide and I didn’t know it. : Fast. ai offer the guide for all available platform. (Colab, salamander, Gradient, Kaggle, Colab, and so on) It is so important, and really needs, because cloud computing options are vary as occasion and purpose arise. I didn’t know fast. ai has manual to running GCP, and I think it’s as good a reason as any for me to be have taken time. It helped me so much when I had aws and shortened my time. I don’t want to read all of the manual in amazno. . (It is recommended. . but I’d rather read GIT PRO now…) ssh -i ~/. ssh/&lt;your_private_key_pair&gt; -L localhost:8888:localhost:8888 ubuntu@&lt;your instance IP&gt;4. You should wait to add more volume just after add volume, by building AWS EC2. : Since Elastic Block Store(EBS) storage supports optimized storage, users can’t extend storage volume two times in a row.  Unfortunately, at the first time, I didn’t know it (again 👻) and when VM lacked volume, I doubled dist capacity (76*2) at a rough but It needs more.  &lt;!– this time I installed GPU in two years, and it became little complicated compared to 2 years ago. And this time for the first time(maybe not the first time. . but i handled it in my class or with my friend. but it’s my first time on my own. ) I very I’m started to using used google colab, kaggleand, GCP-JupyterLab, ec2 - friend made, aws vm machine but I had a environment variable but i did not know of it. On these days, I could not get a resources from taiwan…    I couldn’t notice a deliberate     Anyway, as a result I tried myself gcp myself and aws ec2 with fast. ai But I think doing on my self surely takes much time (in this point I wonder why I’m doing this, and should remind me, especially I was studying disk volume optimization)     disk volume exceed - https://askubuntu. com/questions/919748/no-space-left-on-device-even-though-there-is:  "
     }, {
-    "id": 22,
+    "id": 24,
     "url": "http://localhost:4000/2020/02/dps-week4/",
     "title": "Digital Product School week 4",
     "body": "2020/02/01 - The 4th week retropect at Digital Product School Week 4 - Find solution ideas and run experiments   [This week’s schedule] CONTENT:  Ideation Techniques     What is ideation techniques?    Generating idea in my team AIdeation Team brain storming of idea Die Produkt MacherMondayIdeation Techniques:   [slides from @steffen] What is ideation techniques?: We tried to find out user’s painpoint last week. Tried to users talk about their, pain point. No question directly, but extract from them their pain with transportation. Generating idea in my team:   AIdeation:   TuesdayTeam brain storming of idea:   Based on generated idea on Monday, we extended our idea doing rolling-paper!     Die Produkt Macher: What is lean start-up?  Lean startup is a methodology for developing businesses and products that aims to shorten product development cycles and rapidly discover if a proposed business model is viable; this is achieved by adopting a combination of business-hypothesis-driven experimentation, iterative product releases, and validated learning. - wikipedia WendesdayThursdayFriday"
     }, {
-    "id": 23,
+    "id": 25,
     "url": "http://localhost:4000/2020/01/retrosprect-of-acl-paper-2020/",
     "title": "Retrospect of ACL 2020 paper writing",
     "body": "2020/01/29 - 2020 Annual Conference of the Association for Computational Linguistics Why I can’t use ‘Cebuano’ for the research?: Why I had to change target language from ‘Cebuano’ to ‘Tagalog’?-&gt; No language translator options except google translation. But before knowing that I already consult my friend, whose mother tongue is English. So I had to aplogize her, but couldn’t tell her why suddenly I changed my plan. -&gt; I realized there are many languages even can’t be researched at all. . -&gt; Getting accustomed to discrimination makes misunderstanding, sometimes. At my country, we couldn’t use music streaming service, because of legal problem. But at that moment, I thought it was discrimination, which is done by music company. "
     }, {
-    "id": 24,
+    "id": 26,
     "url": "http://localhost:4000/2020/01/Git-Merge/",
     "title": "Why am I not listed as a contributor?!",
     "body": "2020/01/10 - From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’m not listed as contributor of repo which my PR accessed. How did I come to a repository?: When I’m stuck, I would prefer to code, than to go deep in theory. (It must be so. . too much to understand 🤒)It was BERT released by Google AI I felt keenly the necessity of implementing, because not only couldn’t understand the way they figured out positional encoding formula, but how it actually works. What does it mean to “scale” dot product in Attention? (Now I know it’s far from my section 😂) Figure 1. Scaled Dot Product. Adopted from tensorflow blogWhat was the code error?: For implement code in paper, I read the papers Transformer and BERT, structured the model, and refered the others’ code. Meanwhile, I found out a small error in tokenization process, which was changing a token into [MASK], enabled bidirectional representation. I’ve made PR, and got merged. But I was not in contributors. Why?: Figure 2. Merged Pull request Adopted from graykode projectActually I happened to know there can be couple of reasons github doesn’t include my name as contributor. Well, if contributors tab has more than 100 people, in which case it shows you up only if you are in the top 100 contributors because displaying too many contributors can make webpages down. Somethimes, however, it doesn’t that problem. Why not? Two possibilities are there.    First, According to Joel-Glovier, if repository maintainer merged-as-a-rebase PR will end up showing as maintainer’s commit. But maintainer shouldn’t normally do this.     Second, if you happend to commit using a different git email that what is in your GitHub profile, it will not be attached to your Github user, and “doesn’t show up” as you.  Reference:  Michał Chromiak’s blog Github: why are my contributions are not showing on my profile atlassian-gitfetch"
     }, {
-    "id": 25,
-    "url": "http://localhost:4000/2019/12/lesson1-fastai/",
-    "title": "Fine Grained Classification",
-    "body": "2019/12/31 - Finally you can solve the mystery behind this weird drawing. . through this course. juptyer notebook magic: %reload_ext autoreload%autoreload 2%matplotlib inlinethis is special directives to jupyter notebook, not python code. And it is called ‘magics’ (but i think jeremy is magicion)  If somebody changes underlying library code while I’m running this, please reload it automatically If somebody asks to plot something, then please plot it here in this Jupyter NotebookDon’t hesitate to import start~ Digging into untar_data, path. ls: Union[pathlib. Path, str]: typed programming language? -&gt; maybe i think disclaim the type beforehand for sure.  Q. like assert? path. ls()this is some module that fast. ai made because os. listdir(‘path’) is unconvinient. Python3 pathlib library!: pathlib "
-    }, {
-    "id": 26,
+    "id": 27,
     "url": "http://localhost:4000/2019/12/jeremy-howard/",
     "title": "Jeremy Howard",
     "body": "2019/12/15 - This is journey to find out ‘who am I trying to be?’: How he impacted me?  The person who made me start Computer Vision again. He emphasized the importance of studying NLP and Computer together to understand the deep-learning. He didn’t order it to study, but always he pursuade me with reasonable way.  “It’s not just something I can throw away. NLP and computer vision a few weeks apart and that’s going to force your brain to realize like ‘oh I have to remember this’”  He made me admit my failure in deep-learning. I started to objectify where am I. What should I do when I’m frustrated.  “Keep going. You’re not expected to remember everything. Yet.  You’re not expected to understand everything. Yet. You’re not expected to know why everything works.  Yet. ”  His articles are numerous, below.      What is torch. nn Really?   High Performance Numeric Programming with Swift: Explorations and Reflections   C++11, random distributions, and Swift    And especially, I like this book. Designing great data products Great predictive modeling is an important part of the solution, but it no longer stands on its own; as products become more sophisticated, it disappears into the plumbing. Designing great data products And he is also famous for words. Here are some.  we’re going to try and use that to really understand what’s going on. So to warn you, none of it is rocket science but a lot of its going to look really new. So don’t expect to get it the first time but expect to listen and jump into the notebook try a few things test things out look particularly at like tensor shapes and inputs and outputs to check your understanding then go back and listen again. But and kind of try it, a few times, because you will get there right, it’s just that there’s going to be a lot of new concepts because we haven’t done that much stuff in pure Pytorch. Lesson 6: Deep Learning 2019 "
     }, {
-    "id": 27,
+    "id": 28,
     "url": "http://localhost:4000/2019/11/julia-evans/",
     "title": "Julia Evans",
     "body": "2019/11/20 - This is journey to find out ‘who am I trying to be?’: The women who surprised me in many ways. First, she approached me to teaching some concepts drawing cartoons. It was at Hackers news, which was hightest ranks. Personally I have the use of not to reading title, so and cartoon was so cute and clear. I naturally gonna understood mechanism and astonished by her explaination ability. Her value, which she was taught by many people so want to do same things, moved me. Volume of her knowledge, that just reading post title is a deal of work, amazed me. "
     }, {
-    "id": 28,
+    "id": 29,
     "url": "http://localhost:4000/2019/11/coc-retropective/",
     "title": "Retrospective on Pycon 2019 Korea (CoC Committee)",
     "body": "2019/11/05 - When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized our responsibility, and if I passed each other’s burden, It could be my burden next time. In solidarity of the obligation, we finished conference well. And after participating PyCon Korea 2018 as volunteer, I’ve joined PyCon Korea Organizer last year. &lt;Figure 1&gt; First meeting of PyCon 2019 Korea Organizers It’s been a while since PyCon 2019 finished. It’s held on Aug 15 - 18, at Coex Grand Balloom &lt;Figure 2&gt; Ongoing session, speaking on news comment processing &lt;Figure 3&gt; Sponsor Booth iin Coex Hall &lt;Figure 4&gt; After PyCon 2019, with all of volunteer, organizer, speakers 😍 🥰  Serving as part of the coc TF, I spent large fraction of last year doing CoC job. here’s the path what we’ve been grappled with to grasp a solution. First half: Before the conference Toward Diverse Community:  Formally we’ve been reusing and modifying PyCon US CoC, but we needed fit in Korean and I was part of that to revise code of conduct. Except ‘That’ Diversity, Because it is ‘Harassment’:  Specific point was harassment, and the others were not. process of finding the points. How can we settle this point?Second half: During the conference Handling the potential Harassment: Disjunction of policy and real-time situation: This ‘PyCon 2019 Korea retrospective series’ would be devided into 3 Episodes.  “Retrospective on Pycon 2019 Korea (CoC Committee)” “Retrospective on Pycon 2019 Korea (Program Chair)” (20 Nov, To Be Update) “Maintaining participation while still making timely decisions” (29 Nov, To Be Update)"
     }, {
-    "id": 29,
+    "id": 30,
     "url": "http://localhost:4000/2019/11/elif-shafak/",
     "title": "Elif Shafak",
     "body": "2019/11/05 - This is journey to find out ‘who am I trying to be?’: For creative-minded people, Istanbul is a treasure. ’ Photo © Chris Boland, licensed under CC BY-NC-ND 2. 0    it suddenly felt like what I was trying to convey was more complicated and detailed than what the circumstances allowed me to say.     And I did what I usually do in similar situations: I stammered, I shut down, and I stopped talking. I stopped talking because the truth was complicated, even though I knew, deep within, that one should never, ever remain silent for fear of complexity.         &lt;Figure 1&gt; Elif Shafak   Photo credit: www. elifsafak. com. tr      I want to talk about emotions and the need to boost our emotional intelligence. I think it’s a pity that mainstream political theory pays very little attention to emotions.     Oftentimes, analysts and experts are so busy with data and metrics that they seem to forget those things in life that are difficult to measure and perhaps impossible to cluster under statistical models. But I think this is a mistake, for two main reasons. We are emotional beings.     I think it’s going to be one of our biggest intellectual challenges, because our political systems are replete with emotions. In country after country, we have seen illiberal politicians exploiting these emotions. And yet within the academia and among the intelligentsia, we are yet to take emotions seriously. I think we should.  1 2 Reference:       British Council Worldwide &#8617;        Ted Talk &#8617;    "
     }, {
-    "id": 30,
+    "id": 31,
     "url": "http://localhost:4000/2019/01/dps-week1/",
     "title": "Digital Product School week 1",
     "body": "2019/01/11 - The 1th week retropect at Digital Product School [This week’s schedule] CONTENT:  Welcome to Digital Product School!     Trip to Spitzingsee   Welcome to Design Office   Specifying our goal of product   Welcome to Digital Product School!: Trip to Spitzingsee:    At the first day of Digital Product School, we had a off-site with all of batch 9 people. All the costs were managed by dps. At the beautiful mountain, we settled the team, and got my team goal.     Basically, there are two kind of team in DPS. (1) Wild team - the team has fixed topic(2) Company team - the team which has specific stakeholders, and also topic defined by that stakeholders  The Core-team will fix what team you will join in DPS for 3 months based on ymy professionals, they announce it at off-site. [My team for 3 months at DPS]    And we decide on my batch #9 theme song. How? Each team draw for songs and pitch ‘why this song should be batch #9 theme song’The result? Imagine dragon - Believer (I didn’t know at the moment, this song would be stamped in my memory)     We have a workshop for getting to know each other. For example, we share 1) what do I expect from 3 months of dps, 2) when I feel happy in my life time, 3) what I worked for last week, 4) what was my last project and 5) what plays important role in my life      My team's board  Cero  Welcome to Design Office:  At first day of design office, we had workshop, which celebrates my day in dps also discuss specific rule, menifesto and stakeholders    We get sticker and attach it in map depends on my nationality  Now time to get to know my team’s stakeholders. What they want for us? What they expect from us? How free my team are on the topic?To be honest, it is endless tug-of-war. We should discuss with my stakeholders, endlessly, and find out solution which can meet interest of users, stakeholders and my team. Basically, my team’s main stakeholder is ADAC, but BMW, City of munich and Nokia will also participate as my team’s stakeholders. Specifying our goal of product: "
@@ -331,7 +336,7 @@
 					<p class="text-uppercase font-weight-bold">
                         <span class="catlist">
 						
-                          <a class="sscroll text-danger" href="/categories.html#fast.ai-v3">fast.ai-v3</a><span class="sep">, </span>
+                          <a class="sscroll text-danger" href="/categories.html#fastai-v3">fastai-v3</a><span class="sep">, </span>
                         
                         </span>
 					</p>
@@ -391,12 +396,15 @@ <h1 class="display-4 mb-4 article-headline">Gradient backward, Chain Rule, Refac
             
             <!-- Article -->
 			<article class="article-post">                
-			<p>This note is divided into 4 section.</p>
-<ul>
-  <li>Section1: <a href="https://spellonyou.github.io/2020/02/note08-fastai-1/">What is the meaning of ‘deep-learning from foundations?’</a></li>
-  <li>Section2: <a href="https://spellonyou.github.io/2020/03/note08-fastai-2/">What’s inside Pytorch Operator?</a></li>
-  <li>Section3: <a href="https://spellonyou.github.io/2020/03/note08-fastai-3/">Implement forward&amp;backward pass from scratch</a></li>
-  <li>Section4: <a href="https://spellonyou.github.io/2020/03/note08-fastai-4/">Gradient backward, Chain Rule, Refactoring</a></li>
+			<ul>
+  <li>This note is divided into 4 section.
+    <ul>
+      <li>Section1: <a href="https://spellonyou.github.io/2020/02/note08-fastai-1/">What is the meaning of ‘deep-learning from foundations?’</a></li>
+      <li>Section2: <a href="https://spellonyou.github.io/2020/03/note08-fastai-2/">What’s inside Pytorch Operator?</a></li>
+      <li>Section3: <a href="https://spellonyou.github.io/2020/03/note08-fastai-3/">Implement forward&amp;backward pass from scratch</a></li>
+      <li>Section4: <a href="https://spellonyou.github.io/2020/03/note08-fastai-4/">Gradient backward, Chain Rule, Refactoring</a></li>
+    </ul>
+  </li>
 </ul>
 
 <p>” Lecture 08 - Deep Learning From Foundations-part2 “</p>
@@ -675,6 +683,7 @@ <h5 id="check-the-result-using-pytorch-autograd">check the result using Pytorch
       <li>So it saves our time to differentiate ourselves</li>
     </ul>
   </li>
+  <li>Postfix underscore means in pytorch, <code class="highlighter-rouge">in-place</code> function, <a href="https://discuss.pytorch.org/t/what-is-in-place-operation/16244">What is in-place function?</a></li>
 </ul>
 
 <p>⤵️ THis is benchmark…..</p>
@@ -925,6 +934,10 @@ <h5 class="font-weight-bold">Written by  </h5>
             </div>
           
           
+            <div class="col-md-6 text-right pr-0">
+                <a class="text-dark" href="/2020/04/qna-image-segmentation/"> [Q&A] Image Segmentation, using Unet with Driving Video data  <img height="30px" class="ml-1" src="/assets/images/output_31_0.png"> </a>
+            </div>
+          
         </div>
     </div>
 </div>
diff --git a/_site/404/index.html b/_site/404/index.html
index 7dfadf4fac..c84fe8ce3a 100644
--- a/_site/404/index.html
+++ b/_site/404/index.html
@@ -157,96 +157,101 @@
     "body": "  {% if page. url ==  /  %}            {% assign latest_post = site. posts[0] %}          &lt;div class= topfirstimage  style= background-image: url({% if latest_post. image contains  ://  %}{{ latest_post. image }}{% else %} {{site. baseurl}}/{{ latest_post. image}}{% endif %}); height: 200px;  background-size: cover;  background-repeat: no-repeat; &gt;&lt;/div&gt;           {{ latest_post. title }}  :       {{ latest_post. excerpt | strip_html | strip_newlines | truncate: 136 }}               In         {% for category in latest_post. categories %}        {{ category }},         {% endfor %}                                {{ latest_post. date | date: '%b %d, %Y' }}                            {%- assign second_post = site. posts[1] -%}                        {% if second_post. image %}                         &lt;img class= w-100  src= {% if second_post. image contains  ://  %}{{ second_post. image }}{% else %}{{ second_post. image | absolute_url }}{% endif %}  alt= {{ second_post. title }} &gt;                        {% endif %}                                    {{ second_post. title }}          :                       In             {% for category in second_post. categories %}            {{ category }},             {% endfor %}                                                      {{ second_post. date | date: '%b %d, %Y' }}                                    {%- assign third_post = site. posts[2] -%}                        {% if third_post. image %}                         &lt;img class= w-100  src= {% if third_post. image contains  ://  %}{{ third_post. image }}{% else %}{{site. baseurl}}/{{ third_post. image }}{% endif %}  alt= {{ third_post. title }} &gt;                        {% endif %}                                    {{ third_post. title }}          :                       In             {% for category in third_post. categories %}            {{ category }},             {% endfor %}                                                      {{ third_post. date | date: '%b %d, %Y' }}                                    {%- assign fourth_post = site. posts[3] -%}                        {% if fourth_post. image %}                        &lt;img class= w-100  src= {% if fourth_post. image contains  ://  %}{{ fourth_post. image }}{% else %}{{site. baseurl}}/{{ fourth_post. image }}{% endif %}  alt= {{ fourth_post. title }} &gt;                        {% endif %}                                    {{ fourth_post. title }}          :                       In             {% for category in fourth_post. categories %}            {{ category }},             {% endfor %}                                                      {{ fourth_post. date | date: '%b %d, %Y' }}                                  {% for post in site. posts %} {% if post. tags contains  sticky  %}                    {{post. title}}                  {{ post. excerpt | strip_html | strip_newlines | truncate: 136 }}                 Read More            	             {% endif %}{% endfor %}  {% endif %}                All Stories:         {% for post in paginator. posts %}          {% include main-loop-card. html %}        {% endfor %}                   {% if paginator. total_pages &gt; 1 %}              {% if paginator. previous_page %}        &laquo; Prev       {% else %}        &laquo;       {% endif %}       {% for page in (1. . paginator. total_pages) %}        {% if page == paginator. page %}        {{ page }}        {% elsif page == 1 %}        {{ page }}        {% else %}        {{ page }}        {% endif %}       {% endfor %}       {% if paginator. next_page %}        Next &raquo;       {% else %}        &raquo;       {% endif %}            {% endif %}                     {% include sidebar-featured. html %}      "
     }, {
     "id": 12,
+    "url": "http://localhost:4000/2020/04/v3-2019-lesson06-note/",
+    "title": "fastai 2019 course-v3 Part1, lesson06",
+    "body": "2020/04/15 - Lesson 06Rossmann(Tabular):  Tabular data: be careful on Categorical variable vs Continuous variable.  if datatype is int, fastai think it is classification, not a regression.  Root mean square percentage error. as loss function.  When you assign the y_range, it’s better to assign little bit more than actual maximum. &gt; because it’s sigmoid.  intermediate layers, which is weight matrix is 1) 1000, and 2) 500 -&gt; which means our parameter would be 500*1000. learn. modelWhat is dropout and embedding dropout?: Nitish Srivastava, Dropout: A Simple way to prevent Neural Networks from Overfitting  you can dropout with p value, make it specified to specific layer, or make it applied to all the layers.  Pytorch code 1) bernoulli, which decides whether you will hold it? 2) and divide the noise value depends on noise value. so noise became 2 or remain 0.      According to pytorch code, We do change at training time, but we do nothing at test time. and this means you don’t have to do anything special with inference time. ’   TODO: find at forums what is inference time - Related to NVIDIA, GPU.     Embedding dropout is just a dropout.      It’s different between continuous variable and embedding layer.  TODO Still can’t understand. why embedding dropout is effective. or,… in need.    Let’s delete at random, some of the results of the embedding.    and It worked well especially at Kaggle   Batch Normalization: Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift -&gt; came out false! According to How Does Batch Normalization Help Optimization?  The key was multiplicative bias {\gamma} and additive bias {\beta}` Explain     Let $$ \hat{y} = f(w_1, w_2, w_3, … , x)} $$ , loss = MSE , Then y_range should be between 1 and 5`   And Activation function ends with -1 -&gt; +1   To mitigate this problem, we can add the other parameter, like $$w_n$$   But there’re so much interactions in the process so just re-scale the output.    Momentum parameter at BatchNorm1d:  Different from momentum like in optimization.  This momentum is Exponentially weighted moving average of the mean, instead of deviation.      If this is small number: mean standard deviation would be less from mini_batch to mini_batch » less regularization effect. (If this is large number, variation would be greater from mini_batch to mini_batch » more regularization effect)   TODO: can’t sure, but i understand, this is not about how to update parameter but about how much reflect previous value when scale and shift   Q. Preference between batchnorm and the other regularizations(drop out, weight decay)A. Nope, always try and see the results## lesson6-pets-more### Data Augmentation- Last reg- `get_transforms` has lots of params (even not yet learned all) -&gt; check documentation	- Remember you can implement all the doc contents bc it's made from nbdev	- TODO: try this!!- Essence of data augmentation is you should maintain the label, while somewhat making sense. 	- ex) tilt, because it's optically sensible, you can always change the angle of the data view. - zeros, border, and reflection but always `reflection` works most of the time, so that is the default### Convolutional Kernel(What is convolution?)- Will make heat\_map from scratch, which means the parts convolution focuses on![setosa_visualization]()- http://setosa. io/ev/image-kernels/	- javascript thing	- How convolution works	- Kernel. which does element-wise multiplication, and sum them up	- so it has on pixel less at borders -&gt; so it uses padding, and fastai uses reflection as said. - why this Kernel(matrix) helps catching horizontal edge side?	- because this kernel`(picture2)` weights differently, depends on `x axis`	- why familiar, because it's similar intuition with fugus`(paper)` paper- CNN from different viewpoints`link`	- output of pixel is results from different linear equations. 	- If you connect this with represents of neural network nodes, you can see that the specific inp nodes connected with specific out nodes. 	- **Summarize**: cnn does 1) matmul some of the elements are always zero 2) same weight for every row, which is called `weight time? weight. . ?, 1:18:50` `(picture)`#### Further lowdown- Because generally image has 3 channels, we need rank 3 kernel. - And **do multiply with all channel output is one pixel**. (`draw by your self`)	- but this kernel will catch one feature, like horizontal, so that we make more kernel so that output becomes (h * w * kernel)	- And that `kernel` come to `channel`- **Conv2d**: with 3 by 3 kernel, stride 2 conv -&gt; (h/2 * w/2 * kernel)	- skip or jump over input pixel	- to protect from memory out of control~~~pythonlearn. modellearn. summary()~~~TODO: understand yourself the blocks of conv-kernel: - Usually use big kernel size at first layer (will study this at part2)- Bottom right highlighting kernel(`pic / draw`)- `torch. tensor. expand`: for memory efficient, because we should do RGB- We do not make separate kernel, but make rank 4 kernel	- 4d tensor is just stacked kernel- `t[None]. shape` create new unit axis, and why? we make this -&gt; it should move unit of batch, not one size image. ### Average pooling, feature- suppose our pre-trained model results in size of `11 by 11 by 512 ` `pic 4` and my classification task has 37 classes	* take the first face of channel, which is 11 by 11 and `mean` it, so that make rank 2 tensor, 512 by 1	* and make 2d matrix, which is 512 by 37 and multiply so that we can get 37 by 1 matrix. - Feature, at convolution block	- So, when we transfer-learning without unfreeze, every element of last matrix (512 by 1) should represent(or could catch) each feature. 	### Heatmap, Hook~~~hook_output(model[0]) -&gt; acts -&gt; avg_acts~~~- if we average the block with `axis=feature`, result of matrix(11 by 11) depicts `how activated was that area?` -&gt; it is heatmap, `avg_acts`- and acts comes from hook, which is more advanced pytorch feature. 	- hook into pytorch machine itself, and run any arbitrary Pytorch code	- Why this is cool?: Normally it gives set of outputs of forward pass, but we can interrupt and hook the forward pass. 	- Also can store the output of the convolutional part of the model, which is before avg_pooling- Thinking back when we do cut off `after` the conv part. 	- but with fast. ai the original convolutional part of the model would be *the first thing in the model*, specifically could be given from `learn. model. eval()[0]`	- And this is gotten from `hooked_output` and having hooked the output, we can pass our x_minibatch to output. 	- Not directly, but with normalized, minibatch, put on to the gpu	- `one_item()` function do it, when we have one data `TODO: this is assignment` do it yourself without one_item function	- and `. cuda()` put it on gpu- you should print out very often the shape of tensor, and try think why. "
+    }, {
+    "id": 13,
+    "url": "http://localhost:4000/2020/04/qna-image-segmentation/",
+    "title": "[Q&A] Image Segmentation, using Unet with Driving Video data",
+    "body": "2020/04/02 - This post is about my questions while I was studying USF Deep Learning course about image segmentation task. All the answers are from the course, source code, library document, or document. I cared about being clear at reporting information including source of information, however if there are still anything unclear, please contact me. And thank you Jeremy&amp;Rachael for everything. Also Thank you Cambridge Computer Vision Lab to made us to study with your labor. The Cambridge-driving Labeled Video Database (CamVid) is the first collection of videos with object class semantic labels, complete with metadata. The database provides ground truth labels that associate each pixel with one of 32 semantic classes. If someone is interested in this project, please check the site and see the details. Now, let’s start first using jupyter’s one of tricks which I love most. It enables cell to print the code without print function. from IPython. core. interactiveshell import InteractiveShell# pretty print all cell's output and not just the last oneInteractiveShell. ast_node_interactivity =  all from fastai. vision import *from fastai. callbacks. hooks import *from fastai. utils. mem import *path = untar_data(URLs. CAMVID) # The locations where the data and models are downloaded are set in config. ymlpath. ls() I’m trying to accustomed to using pathlib module, not just it became built-in module in python, but I felt uncomfortable myself with os module. However, still unpredictable conflicts are remain, even in the quite standard library like Pytorch, tensorflow, onnx. (it require me string for path. not PosixPath. will send PR. . ) [PosixPath('/root/. fastai/data/camvid/valid. txt'), PosixPath('/root/. fastai/data/camvid/images'), PosixPath('/root/. fastai/data/camvid/labels'), PosixPath('/root/. fastai/data/camvid/codes. txt')]path_img = path/'images'path_lbl = path/'labels'fnames = get_image_files(path_img) #filenamelbl_names = get_image_files(path_lbl)1. (Play with data) My Hypothesis:  File name has A_B format. and A / B would be at key-value position.  Use collections - defaultdict  Default Dict: Link:   easy to group a sequence of key and value pairs into a dictionary of list?from collections import defaultdictfnames[0], lbl_names[0](PosixPath('/root/. fastai/data/camvid/images/0001TP_009210. png'), PosixPath('/root/. fastai/data/camvid/labels/0016E5_01800_P. png'))files = [tuple(i. stem. split('_')) for i in fnames]labels = [tuple(i. stem. split('_')[:-1]) for i in lbl_names]d = defaultdict(list)for k, v in files: d[k]. append(v)d. keys()len(d['0001TP'])124for k, v in d. items():  print(k, v)0001TP ['009210', '008850', '007350', '008970', '009840', '010140', '008490', '008520', '009540', '008250', '008340', '006840', '007860', '007410', '007740', '009870', '010080', '007890', '008790', '010020', '008400', '007080', '008280', '010380', '009330', '009060', '007470', '006810', '009720', '008580', '007110', '008730', '009150', '007680', '009780', '007800', '007290', '008760', '009510', '008640', '008310', '007440', '006900', '007500', '008460', '009030', '008130', '009480', '009900', '010230', '009270', '008040', '007590', '007950', '009990', '008550', '007260', '008100', '007530', '006960', '008190', '009420', '009930', '009000', '007830', '008940', '006690', '009570', '008880', '010170', '007560', '009300', '006750', '009360', '010200', '007320', '008010', '009120', '007620', '007200', '007140', '010320', '006720', '008670', '007230', '008370', '010260', '009690', '006930', '009090', '007770', '010290', '010350', '008610', '008070', '009600', '008430', '009450', '007380', '009240', '007710', '007170', '008160', '008910', '007020', '006780', '007050', '009960', '009810', '008220', '009180', '009750', '010050', '009660', '010110', '007920', '009630', '007650', '006990', '008700', '009390', '007980', '008820', '006870']0016E5 ['01290', '08159', '05760', '08133', '08063', '06660', '00960', '05850', '00750', '06960', '08035', '08107', '07975', '08017', '05610', '07140', '08119', '08027', '07170', '08400', '08093', '02100', '06390', '04470', '08340', '06060', '00600', '07470', '08151', '07800', '01620', '05730', '01530', '00690', '08430', '05940', '01980', '07320', '08069', '07965', '04380', '05430', '01410', '06780', '08007', '08087', '08079', '06600', '08109', '05490', '00901', '04590', '04680', '08045', '01770', '06690', '08085', '06810', '00420', '08011', '07440', '02190', '06300', '04800', '01500', '00450', '08029', '01470', '06330', '07997', '08067', '05370', '08013', '08190', '00840', '02370', '08049', '08135', '01440', '06870', '05820', '05280', '08051', '04440', '08091', '01380', '00630', '07290', '05520', '04770', '00540', '07995', '07999', '05550', '07920', '08101', '08141', '08053', '04620', '08103', '05160', '07350', '08057', '06030', '06000', '08550', '07963', '08089', '05970', '08047', '05640', '06240', '05220', '04350', '01590', '07959', '01950', '08117', '06180', '01560', '05400', '08043', '07680', '00780', '08081', '07050', '01020', '01350', '04530', '06720', '07969', '08149', '08003', '08131', '08129', '08033', '05460', '01650', '07530', '08023', '05340', '08640', '05100', '08075', '01230', '04980', '02070', '01080', '06210', '05910', '08009', '01800', '05190', '02400', '08083', '08019', '07620', '07200', '07890', '08059', '06990', '04410', '08121', '08123', '06930', '08137', '08147', '08095', '06570', '06150', '08153', '06840', '05250', '00510', '08370', '08580', '08113', '07410', '08097', '01200', '04950', '07770', '07650', '04710', '06090', '08055', '07110', '07981', '00990', '08250', '08127', '01920', '07985', '08220', '08005', '08157', '05130', '08071', '01140', '04830', '07740', '08143', '06120', '02040', '08111', '08115', '00660', '08280', '06420', '07983', '02220', '05700', '01860', '01260', '04920', '06510', '07020', '08073', '08105', '08125', '06360', '07860', '07993', '00810', '06540', '08099', '08139', '02010', '07973', '08155', '07991', '06630', '00480', '06750', '04890', '08001', '08025', '00870', '08490', '01830', '07977', '05010', '01170', '07961', '01680', '01050', '07987', '07080', '04560', '00930', '05310', '02340', '05790', '08460', '00720', '08031', '02280', '08039', '08037', '08065', '06270', '08077', '06900', '04650', '06480', '07230', '08041', '06450', '00570', '07989', '04740', '07979', '02250', '07380', '00390', '01710', '07590', '08021', '08520', '07500', '01110', '04500', '02310', '07971', '02130', '05580', '05880', '08610', '08310', '08145', '05670', '04860', '07260', '08015', '07967', '01740', '01320', '07560', '07830', '01890', '08061', '02160', '07710', '05070', '05040']Seq05VD ['f00030', 'f02550', 'f03450', 'f01110', 'f00480', 'f00210', 'f04590', 'f04170', 'f01800', 'f03990', 'f03360', 'f03900', 'f02070', 'f00810', 'f03690', 'f01350', 'f01530', 'f04980', 'f05100', 'f03060', 'f00900', 'f03870', 'f02460', 'f01470', 'f02370', 'f02820', 'f04080', 'f02760', 'f04860', 'f02250', 'f04200', 'f00270', 'f03720', 'f02850', 'f04410', 'f01200', 'f03090', 'f02010', 'f03930', 'f00090', 'f01650', 'f01890', 'f03840', 'f03030', 'f02130', 'f01230', 'f04110', 'f02520', 'f04140', 'f04020', 'f00060', 'f03420', 'f01560', 'f00120', 'f04290', 'f02340', 'f00300', 'f01380', 'f00870', 'f01860', 'f02970', 'f04560', 'f02730', 'f00330', 'f04530', 'f03780', 'f01770', 'f03390', 'f05040', 'f02430', 'f03330', 'f00660', 'f01740', 'f02100', 'f04800', 'f04050', 'f00510', 'f02790', 'f04350', 'f00690', 'f00540', 'f02490', 'f00960', 'f00930', 'f04230', 'f02880', 'f03600', 'f01020', 'f01500', 'f02400', 'f04830', 'f04470', 'f03300', 'f02670', 'f00450', 'f01980', 'f01170', 'f01620', 'f04500', 'f01080', 'f03180', 'f05070', 'f03150', 'f04950', 'f01440', 'f03510', 'f01710', 'f00360', 'f04770', 'f02910', 'f01050', 'f00630', 'f04320', 'f00570', 'f03240', 'f02190', 'f01140', 'f03540', 'f02220', 'f02640', 'f03960', 'f00000', 'f04920', 'f01950', 'f00990', 'f03480', 'f03000', 'f00420', 'f04620', 'f03210', 'f00780', 'f03570', 'f01590', 'f00750', 'f01920', 'f04650', 'f03750', 'f03630', 'f02310', 'f02610', 'f02580', 'f04740', 'f02280', 'f04680', 'f00390', 'f00720', 'f03660', 'f02040', 'f03270', 'f00180', 'f03810', 'f01410', 'f01290', 'f03120', 'f00840', 'f04440', 'f00150', 'f01260', 'f02700', 'f02940', 'f00600', 'f01830', 'f04260', 'f05010', 'f04890', 'f02160', 'f00240', 'f04380', 'f01680', 'f04710', 'f01320']0006R0 ['f02820', 'f03690', 'f03180', 'f02550', 'f01020', 'f03660', 'f02340', 'f01170', 'f02610', 'f02940', 'f01290', 'f02100', 'f01350', 'f03270', 'f03870', 'f01380', 'f01980', 'f03810', 'f02430', 'f02310', 'f01830', 'f03480', 'f02970', 'f01890', 'f03210', 'f03930', 'f02040', 'f02070', 'f02400', 'f01560', 'f03030', 'f01770', 'f01590', 'f01950', 'f03420', 'f01650', 'f03450', 'f00990', 'f03630', 'f01500', 'f03570', 'f00930', 'f03090', 'f03360', 'f02880', 'f02460', 'f01440', 'f01920', 'f01230', 'f03840', 'f02730', 'f01620', 'f02220', 'f03750', 'f03330', 'f03540', 'f02520', 'f02790', 'f01050', 'f03120', 'f01800', 'f01140', 'f01860', 'f01530', 'f01470', 'f02670', 'f02490', 'f01260', 'f01110', 'f02760', 'f01680', 'f03150', 'f02580', 'f03300', 'f02280', 'f01200', 'f03390', 'f03510', 'f02640', 'f02190', 'f02370', 'f01320', 'f02130', 'f03600', 'f03240', 'f03780', 'f03720', 'f02700', 'f01410', 'f01080', 'f02850', 'f01710', 'f03900', 'f03060', 'f01740', 'f02010', 'f02250', 'f00960', 'f03000', 'f02160', 'f02910']for k, v in d. items():  print(k, len(d[k]))0001TP 1240016E5 305Seq05VD 1710006R0 101for i in d2. keys():  print(i,len(d2[i]))0016E5 3050001TP 1240006R0 101Seq05VD 171files[0], labels[0](('0001TP', '009210'), ('0016E5', '01800'))2. My question: Link:  Why do we need masking? and does color from fastai library? (have to look into source code) What do the parameter alpha do? When people make masked img, would it be have ranged integer limit? Does image normalization related with this?lbl_sorted = sorted(lbl_names)f_sorted = sorted(fnames)lbl_1 = lbl_sorted[33]f_1 = f_sorted[33]img = open_image(lbl_1)mask = open_mask(lbl_1)_,axs = plt. subplots(1,2, figsize=(10,5))# img. show(ax=axs[0], y=mask, title='masked')img. show(ax=axs[0], title='1')mask. show(ax=axs[1], title='2', alpha=1. ) img_2 = open_image(f_1)mask_2 = open_mask(f_1)_,axs = plt. subplots(1,2, figsize=(10,5))# img. show(ax=axs[0], y=mask, title='masked')img_2. show(ax=axs[0], title='3',)mask_2. show(ax=axs[1], title='4', alpha=1. ) open_mask(lbl_1). data. shapetorch. Size([1, 720, 960])open_mask(lbl_1). data. shapetorch. Size([1, 720, 960])open_image(f_1). data. shapetorch. Size([3, 720, 960])open_image(f_1). data. shapetorch. Size([3, 720, 960])img. data #labeled datatensor([[[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]],    [[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]],    [[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]]])mask. data # after mask, labeled datatensor([[[ 4, 4, 4, . . . , 21, 21, 21],     [ 4, 4, 4, . . . , 21, 21, 21],     [ 4, 4, 4, . . . , 21, 21, 21],     . . . ,     [17, 17, 17, . . . , 30, 30, 30],     [17, 17, 17, . . . , 30, 30, 30],     [17, 17, 17, . . . , 30, 30, 30]]])img_2. data, mask_2. data(tensor([[[0. 0706, 0. 0667, 0. 0706, . . . , 0. 6431, 0. 6549, 0. 6627],     [0. 0745, 0. 0706, 0. 0706, . . . , 0. 6431, 0. 6510, 0. 6549],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 6392, 0. 6588, 0. 6588],     . . . ,     [0. 0863, 0. 0824, 0. 0824, . . . , 0. 1333, 0. 1216, 0. 1255],     [0. 0902, 0. 0863, 0. 0824, . . . , 0. 1255, 0. 1176, 0. 1216],     [0. 0863, 0. 0824, 0. 0784, . . . , 0. 1137, 0. 1059, 0. 1137]],     [[0. 0706, 0. 0667, 0. 0706, . . . , 0. 7490, 0. 7608, 0. 7686],     [0. 0745, 0. 0706, 0. 0706, . . . , 0. 7451, 0. 7569, 0. 7608],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 7412, 0. 7529, 0. 7529],     . . . ,     [0. 0980, 0. 0941, 0. 0941, . . . , 0. 1804, 0. 1686, 0. 1725],     [0. 1059, 0. 1020, 0. 0980, . . . , 0. 1725, 0. 1647, 0. 1686],     [0. 1020, 0. 0980, 0. 0941, . . . , 0. 1608, 0. 1529, 0. 1608]],     [[0. 0784, 0. 0745, 0. 0784, . . . , 0. 7569, 0. 7686, 0. 7765],     [0. 0824, 0. 0784, 0. 0784, . . . , 0. 7647, 0. 7647, 0. 7686],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 7608, 0. 7647, 0. 7647],     . . . ,     [0. 1216, 0. 1176, 0. 1176, . . . , 0. 2000, 0. 1882, 0. 1922],     [0. 1176, 0. 1137, 0. 1098, . . . , 0. 1843, 0. 1765, 0. 1804],     [0. 1137, 0. 1098, 0. 1059, . . . , 0. 1725, 0. 1647, 0. 1725]]]), tensor([[[ 18, 17, 18, . . . , 183, 186, 188],     [ 19, 18, 18, . . . , 183, 185, 186],     [ 20, 18, 19, . . . , 182, 185, 185],     . . . ,     [ 25, 24, 24, . . . , 43, 40, 41],     [ 26, 25, 24, . . . , 41, 39, 40],     [ 25, 24, 23, . . . , 38, 36, 38]]]))3. What is a difference between image and imageSegment?: imageSegment  An ImageSegment object has the same properties as an Image. The only difference is that when applying the transformations to an ImageSegment, it will ignore the functions that deal with lighting and keep values of 0 and 1.  It’s easy to show the segmentation mask over the associated Image by using the y argument of show_image. img = open_image(fnames[0])mask = open_mask(lbl_names[0])_,axs = plt. subplots(1,3, figsize=(8,4))img. show(ax=axs[0], title='no mask')img. show(ax=axs[1], y=mask, title='masked') #seg mask over the img using y argmask. show(ax=axs[2], title='mask only', alpha=1. ) vision. image ##4. Why/How img div by 255 and how it results fast. ai : vision. image - If div=True, pixel values are divided by 255. to become floats between 0. and 1.  At times, you want to get rid of distortions caused by lights and shadows in an image.    Normalizing the RGB values of an image can at times be a simple and effective way of achieving this.     So sum of the pixel’s value over all channels(which is S) divides each intensified channel so that nomalized value will be R/S, G/S and B/S (where, S=R+G+B).   Detailed explain here4. Python Evaluation Order: Python evaluates expressions from left to right. Notice that while evaluating an assignment, the right-hand side is evaluated before the left-hand side. mask_tmp, trg_tmp, void_tmp = 2, 1, 10mask_tmp = trg_tmp != void_tmpprint(mask_tmp, trg_tmp, void_tmp) # (1) target is not same with voidTrue 1 10# Example 1x = 1y = 2x,y = y,x+yx, y(2, 3)# Example 2x = 1y = 2x = yy = x+yx, y(2, 4)5. model learner parameter :: pct_start: A: Percentage of total number of epochs when learning rate rises during one cycle. Q: Sorry, I still confused that one cycle in the new API only runs one epoch. How the percentage of total number of epochs works? Can you give a example? If learn. fit_one_cycle(10, slice(1e-4,1e-3,1e-2), pct_start=0. 05)??A: Ok, strictly correct answer would be percentage of iterations, so you can have lr both increase and decrease during same epoch. In your example, say, you have 100 iterations per epoch, then for half an epoch (0. 05 * (10 * 100) = 50) lr will rise, then slowly decrease. Q2: Thanks for this explanation … so essentially, it is the percentage of overall iterations where the LR is increasing, correct? So, given the default of 0. 3, it means that your LR is going up for 30% of your iterations and then decreasing over the last 70%. Is that a correct summation of what is happening? A2: Yes, I think that’s correct. You can verify that by changing its value and check:learn. recorder. plot_lr() For example if pct_start = 0. 2 source: forums. fastai "
+    }, {
+    "id": 14,
     "url": "http://localhost:4000/2020/03/note08-fastai-4/",
     "title": "Gradient backward, Chain Rule, Refactoring",
-    "body": "2020/03/02 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves   ⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
+    "body": "2020/03/02 -  This note is divided into 4 section.      Section1: What is the meaning of ‘deep-learning from foundations?’   Section2: What’s inside Pytorch Operator?   Section3: Implement forward&amp;backward pass from scratch   Section4: Gradient backward, Chain Rule, Refactoring   ” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves    Postfix underscore means in pytorch, in-place function, What is in-place function?⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
     }, {
-    "id": 13,
+    "id": 15,
     "url": "http://localhost:4000/2020/03/note08-fastai-3/",
     "title": "Implement forward&backward pass from scratch",
     "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring1. The forward and backward passes: 1. 1 Normalization: train_mean,train_std = x_train. mean(),x_train. std()&gt;&gt;&gt; train_mean,train_std(tensor(0. 1304), tensor(0. 3073))Remember!  Dataset, which is x_train, mean and standard deviation is not 0&amp;1. But we need them to be which means we should substract means and divide data by std.  You should not standarlize validation set because training set and validation set should be aparted.  after normalize, mean is close to zero, and standard deviation is close to 1. 1. 2 Variable definition:  n,m: size of the training set c: the number of activations we need in our model2. Foundation Version: 2. 1 Basic architecture:  Our model has one hidden layer, output to have 10 activations, used in cross entropy.    But in process of building architecture, we will use mean square error, output to have 1 activations and lator change it to cross entropy   number of hidden unit; 50see below pic  We want to make w1&amp;w2 mean and std be 0&amp;1.      why initializating and make mean zero and std one is important?   paper highlighting importance of normalisation - training 10,000 layer network without regularisation1   2. 1. 1 simplified kaiming initQ: Why we did init, normalize with only validation data? Because we can not handle and get statistics from each value of x_valid?{: style=”color:red; font-size: 130%; text-align: center;”}  what about hidden(first) layer?w1 = torch. randn(m,nh)b1 = torch. zeros(nh)t = lin(x_valid, w1, b1) # hidden&gt;&gt;&gt; t. mean(), t. std()((tensor(2. 3191), tensor(27. 0303))In output(second) layer, w2 = torch. randn(nh,1)b2 = torch. zeros(1)t2 = lin(t, w2, b2) # output&gt;&gt;&gt; t2. mean(), t2. std()(tensor(-58. 2665), tensor(170. 9717))   which is terribly far from normalzed value.     But if we apply simplified kaiming init  w1 = torch. randn(m,nh)/math. sqrt(m); b1 = torch. zeros(nh)w2 = torch. randn(nh,1)/math. sqrt(nh); b2 = torch. zeros(1)t = lin(x_valid, w1, b1)t. mean(),t. std()&gt;&gt;&gt; (tensor(-0. 0516), tensor(0. 9354)) But, actually, we use activations not only linear function After applying activations relu at linear layer, mean and deviation became 0. 5. 2. 1. 2 Glorrot initializationPaper2: Understanding the difficulty of training deep feedforward neural networks  Gaussian(, bell shaped, normal distributions) is not trained very well.  How to initialize neural nets? with the size of layer , the number of filters .  But there is No acount for import of ReLU If we got 1000 layers, vanishing gradients problem emerges2. 1. 3 Kaiming initializatingPaper3: Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification  Kaiming He, explained here rectifier: rectified linear unit rectifier network: neural network with rectifier linear units  This is kaiming init, and why suddenly replace one to two on a top?     to avoid vanishing gradient(weights)   But it doesn’t give very nice mean tough.    2. 1. 4 Pytorch package Why fan_out?     according to pytorch documentation,   choosing 'fan_in' preserves the magnitude of the variance of the wights in the forward pass. choosing 'fan_out' preserves the magnitues in the backward pass(, which means matmul; with transposed matrix) ➡️ in the other words, torch use fan_out cz pytorch transpose in linear transformaton.  What about CNN in Pytorch?I tried torch. nn. Conv2d. conv2d_forward?? Jeremy digged into using torch. nn. modules. conv. _ConvNd. reset_parameters?? 2  in Pytorch, it doesn’t seem to be implemented kaiming init in right formula. so we should use our own operation.  But actually, this has been discussed in Pytorch community before. 3 4 Jeremy said it enhanced variance also, so I sampled 100 times and counted better results.  To make sure the shape seems sensible. check with assert. (remember we will replace 1 to 10 in cross entropy)assert model(x_valid). shape==torch. Size([x_valid. shape[0],1])&gt;&gt;&gt; model(x_valid). shape(10000, 1) We have made Relu, init, linear, it seems we can forward pass code we need for basic architecture nh = 50def lin(x, w, b): return x@w + b;w1 = torch. randn(m,nh)*math. sqrt(2. /m ); b1 = torch. zeros(nh)w2 = torch. randn(nh,1); b2 = torch. zeros(1)def relu(x): return x. clamp_min(0. ) - 0. 5t1 = relu(lin(x_valid, w1, b1))def model(xb):  l1 = lin(xb, w1, b1)  l2 = relu(l1)  l3 = lin(l2, w2, b2)  return l32. 2 Loss function: MSE:  Mean squared error need unit vector, so we remove unit axis.   def mse(output, targ): return (output. squeeze(-1) - targ). pow(2). mean()    In python, in case you remove axis, you use ‘squeeze’, or add axis use ‘unsqueeze’ torch. squeeze where code commonly broken. so, when you use squeeze, clarify dimension axis you want to removetmp = torch. tensor([1,1])tmp. squeeze()&gt;&gt;&gt; tensor([1, 1]) make sure to make as float when you calculateBut why??? because it is tensor?{: style=”color:red; font-size: 130%;”} Here’s the error when I don’t transform the data type ---------------------------------------------------------------------------TypeError                 Traceback (most recent call last)&lt;ipython-input-22-ae6009bef8b4&gt; in &lt;module&gt;()----&gt; 1 y_train = get_data()[1] # call data again   2 mse(preds, y_train)TypeError: 'map' object is not subscriptable This is forward passFootnote: Other materials:  Understanding the difficulty of training deep feedforward neural networks, paper that introduced Xavier initialization      Fixup Initialization: Residual Learning Without Normalization &#8617;        Pytorch implementaion on Kaiming init of conv and linear layers &#8617;        Pytorch kaiming init issue &#8617;        Pytorch kaiming init explained &#8617;    "
     }, {
-    "id": 14,
+    "id": 16,
     "url": "http://localhost:4000/2020/03/note08-fastai-2/",
     "title": "What's inside Pytorch Operator?",
     "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, RefactoringWhat’s inside Pytorch Operator?: Section02 Time comparison with pure Python:    Matmul with broadcasting&gt; 3194. 95 times faster     Einstein summation&gt; 16090. 91 times faster     Pytorch’s operator&gt; 49166. 67 times faster  1. Elementwise op: 1. 1 Frobenius norm:   above converted into (m*m). sum(). sqrt() Plus, don’t suffer from mathmatical symbols. He also copy and paste that equations from wikipedia.  and if you need latex form, download it from archive. 2. Elementwise Matmul:  What is the meaning of elementwise?   We do not calculate each component. But all of the component at once. Because, length of column of A and row of B are fixed.   How much time we saved?  So now that takes 1. 37ms. We have removed one line of code and it is a 178 times faster…#TODOI don’t know where the 5 from. but keep it. Maybe this is related with frobenius norm…?as a result, the code before for k in range(ac):  c[i,j] += a[i,k] + b[k,j]the code after c[i,j] = (a[i,:] * b[:,j]). sum()To compare it (result betweet original and adjusted version) we use not test_eq but other function. The reason for this is that due to rounding errors from math operations, matrices may not be exactly the same. As a result, we want a function that will “is a equal to b within some tolerance” #exportdef near(a,b):   return torch. allclose(a, b, rtol=1e-3, atol=1e-5)def test_near(a,b):   test(a,b,near)test_near(t1, matmul(m1, m2))3. Broadcasting:  Now, we will use the broadcasting and removec[i,j] = (a[i,:] * b[:,j]). sum() How it works?&gt;&gt;&gt; a=tensor([[10,10,10],     [20,20,20],     [30,30,30]])&gt;&gt;&gt; b=tensor([1,2,3,])&gt;&gt;&gt; a,b  (tensor([[10, 10, 10],     [20, 20, 20],     [30, 30, 30]]),tensor([1, 2, 3]))     &gt;&gt;&gt; a+btensor([[11, 12, 13],    [21, 22, 23],    [31, 32, 33]])  &lt;Figure 2&gt; demonstrated how array b is broadcasting(or copied but not occupy memory) to compatible with a. Refered from numpy_tutorial   there is no loop, but it seems there is exactly the loop.     This is not from jeremy (actually after a moment he cover it) but i wondered How to broadcast an array by columns?  c=tensor([[1],[2],[3]])a+ctensor([[11, 11, 11],    [22, 22, 22],    [33, 33, 33]])s  What is tensor. stride()?help(t. stride)Help on built-in function stride:  stride(…) method of torch. Tensor instancestride(dim) -&gt; tuple or intReturns the stride of :attr:’self’ tensor. Stride is the jump necessary to go from one element to the next one in the specified dimension :attr:’dim’. A tuple of all strides is returned when no argument is passed in. Otherwise, an integer value is returned as the stride in the particular dimension :attr:’dim’. Args:  dim (int, optional): the desired dimension in which stride is requiredExample::* x = torch. tensor([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]])`x. stride()&gt;&gt;&gt; (5, 1)x. stride(0)&gt;&gt;&gt; 5x. stride(-1)&gt;&gt;&gt; 1   unsqueeze &amp; None index   We can manipulate rank of tensor Special value ‘None’, which means please squeeze a new axis here== please broadcast herec = torch. tensor([10,20,30])c[None,:] in c, squeeze a new axis in here please. 2. 2 Matmul with broadcasting: for i in range(ar):#  c[i,j] = (a[i,:]).      *[:,j]. sum() #previous  c[i]  = (a[i]. unsqueeze(-1) * b). sum(dim=0) And Using None also (As howard teached)c[i]  = (a[i ]. unsqueeze(-1) * b). sum(dim=0) #howardc[i]  = (a[i][:,None] * b). sum(dim=0) # using Nonec[i]  = (a[i,:,None]*b). sum(dim=0)⭐️Tips🌟 1) Anytime there’s a trailinng(final) colon in numpy or pytorch you can delete it ex) c[i, :] = c [i]2) any number of colon commas at the start, you can switch it with the single elipsis.  ex) c[:,:,:,:,i] = c […,i] 2. 3 Broadcasting Rules:  What if we tensor. size([1,3]) * tensor. size([3,1])?  torch. Size([3, 3])    What is scale????   What if they are one array is times of the other array? ex) Image : 256 x 256 x 3Scale : 128 x 256 x 3Result: ?   Why I did not inserted axis via None, but happened broadcasting? &gt;&gt;&gt; c * c[:,None]tensor([[100. , 200. , 300. ],    [200. , 400. , 600. ],    [300. , 600. , 900. ]])maybe it broadcast cz following array has 3 rows as same principle, no matter what nature shape was, if we do the operation tensor broadcasts to the other. &gt;&gt;&gt; c==c[None]tensor([[True, True, True]])&gt;&gt;&gt; c[None]==c[None,:]tensor([[True, True, True]])&gt;&gt;&gt;c[None,:]==ctensor([[True, True, True]])3. Einstein summation:  Creates batch-wise, remove inner most loop, and replaced it with an elementwise producta. k. ac[i,j] += a[i,k] * b[k,j]inner most loop c[i,j] = (a[i,:] * b[:,j]). sum()elementwise product  Because K is repeated so we do a dot product. And it is torch. Usage of einsum()1) transpose2) diagnalisation tracing3) batch-wise (matmul) …  einstein summation notationdef matmul(a,b): return torch. einsum('ik,kj-&gt;ij', a, b)so after all, we are now 16000 times faster than Python. 4. Pytorch op: 49166. 67 times faster than pure python And we will use this matrix multiplication in Fully Connect forward, with some initialized parameters and ReLU. But before that, we need initialized parameters and ReLU, Footnote:  TensorRank ti noteResources:  Frobenius Norm Review Broadcasting Review (especially Rule)     Refer colab! (I totally confused with extension of arrays)    torch. allclose Review np. einsum Reviewh "
     }, {
-    "id": 15,
+    "id": 17,
     "url": "http://localhost:4000/2020/02/note08-fastai-1/",
     "title": "What is the meaning of 'deep-learning from foundations?'",
     "body": "2020/02/29 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ I don’t know if you read this article, but I heartily appreciate Rachael Thomas and Jeremy Howard for providing these priceless lectures for free Homework:  Review concepts 16 concepts from Course 1 (lessons 1 - 7)(1) Affine Functions &amp; non-linearities; 2) Parameters &amp; activations; 3) Random initialization &amp; transfer learning; 4) SGD, Momentum, Adam; 5) Convolutions; Batch-norm; 6) Dropout; 7) Data augmentation; 8) Weight decay; 9) Res/dense blocks; 10) Image classification and regression; 11)Embeddings; 12) Continuous &amp; Categorical variables; 13) Collaborative filtering; 14) Language models; 15) NLP classification; 16) Segmentation; U-net; GANS) Make sure you understand broadcasting Read section 2. 2 in Delving Deep into Rectifiers Try to replicate as much of the notebooks as you can without peeking; when you get stuck, peek at the lesson notebook, but then close it and try to do it yourself calculus for machine learning     based on weight…    einsum conventionCONTENTS:  What is going on in this course?     What is ‘from foundations’?   Steps to a basic modern CNN model   Today’s implementation goal: 1) matmul -&gt; 4) FC backward    Library development using jupyter notebook     jupyter notebook certainly can make module    Elementwise ops     How can we make python faster?         What is element wise operation?           FootnoteWhat is going on in this course?: What is ‘from foundations’?: 1) Recreate fast. ai and Pytorch 2) using pure python  Evade OverfittingOverfit : validation error getting worsetraining loss &lt; validation loss  Know the name of the symbol you usefind in this page if you don’t know the symbol that you are using or just draw it here (run by ML!) Steps to a basic modern CNN model:  1) Matrix multiplication -&gt; 2) Relu/Initialization -&gt; 3) Fully-connected Forward-&gt; 4) Fully-connected Backward -&gt; 5) Train loop -&gt; 6) Convolution-&gt; 7) Optimization -&gt;8) Batchnormalization -&gt; 9) Resnet Today’s implementation goal: 1) matmul -&gt; 4) FC backward: Library development using jupyter notebook: what is assers? jupyter notebook certainly can make module:  There will be #export tag that Howard (and we) want to extract special notebook2script. py will detect sign of #expert and convert following into python module and test ittest\_eq(TEST,'test')test\_eq(TEST,'test1')  what is run_notebook. py?     when you want to test your module in command line interface   		!python run\_notebook. py 01_matmul. ipynb  Is there any difference between 1) and 2)?1) test -&gt; test01 2) test01 -&gt; test #TODO I don’t know yet  look into run_notebook. py, package fire Jeremy used. What is that?read and run the code in a notebook, and in the process, Jeremy made Python Fire library called!shockingly, fire takes any kind of function and converts into CLI command. fire library was released by Google open source, Thursday, March 2, 2017    Get data   pytorch and numpy are pretty much same.  variable c explains how many pixels there are in in MNIST, 28 pixels PyTorch’s view() method: torch function that manipulating tensor, and squeeze() in torch &amp; mathmatical operation similar function Rao &amp; McMahan said usually this functions result in feature vector.    In part 1, you can use view function several times.     Initial python model     Which is Linear, like $Xw$(weight)$+a$(bias) $= Y$     If you don’t know hou to multiple matrix, refer this site matmul visulization site   How many time spends if we we use pure python   function matmul, typical matrix multiplication function, takes about 1 second for calculating 1 single train data! (maybe assumed stochastic, 5 data points in validation)     it takes about 11. 36 hours to update parameters even single layer and 1 iteration! (if that was my computer, it would be 14 hours. . )🤪   THIS is why we need to consider ‘time’&amp;’space’ This is kinda slow - what if we could speed it up by 50,000 times? Let’s try! Elementwise ops: How can we make python faster?:  If we want to calculate faster, then do remove pythonic calcuation, by passing its computation down to something that is written something other than python, like pytorch.  According to PyTorch doc it uses C++ (via ATen), so we are going to implement that function with python. What is element wise operation?:  items makes a pair, operate corresponding componentFootnote:  notebooks material video broadcasting excel"
     }, {
-    "id": 16,
+    "id": 18,
     "url": "http://localhost:4000/2020/02/what-is-convolution/",
     "title": "Digging into convolution",
     "body": "2020/02/28 - Issues 1) Kaiming Initializtion in Pytorch was in trouble. 1 2) Jeremy started to dig in, in lesson09, but I didn’t know why the size of tensor is 2 and even understand this spreadsheet data. 3 Homework: Read Visualizing and Understanding Convolutional Networks paper  What is a convolution?     Visualization         one kernel     Matthew D Zeiler &amp; Rob Fergus Paper          Convolution can be represented as matmul   Padding   Kernel has rank 3   How can we find a side-edge, a gradient and area of constant weight?   What is a convolution?: A convolutional neural network is that your red, green, and blue pixels go into the simple computation, and something comes out of that, and then the result of that goes into a second layer, and the result of that goes into the third layer and so forth. Visualization: one kernel Refer this site for visualizing CNN filteringMatthew D Zeiler &amp; Rob Fergus PaperLecture01     Nine examples of the actual coefficients from the **first layer**  Convolution can be represented as matmul: CNNs from different viewpoints {align-items: center;}    [A B C D E F G H I J] is 3 by 3 image data flatten to vector.   As a result, convolution is a just matrix just two things happens     Some of entries are set to zeros at all the times   same color always have the same weight. That called weight time / wegith sharing    So, we can implement a convolution with matrix multiplication. But, we don’t do that because it’s slow!Padding:  What most of libraries do is just put zeros asdie of matrix  fast. ai uses reflection paddings (what is this? Jeremy said he uttered it)Kernel has rank 3:  As standard picture input would be 4 5, it would be actually 3d, not 2d.  If we make kernel as a 3x3 size, we pass over same kernel all the different Red, Green, Blue Pixels.      This could make problem, because, if we want to detect frog, which is green, we would want more activations on the green(I made a test cell in my colab 6)   How can we find a side-edge, a gradient and area of constant weight?: Not top-edge!  One kernel can find only the top-edge, so we should stack the kernels 7 So, we pass it through bunch of kernels to the input images, and that process gives us height x width x corresponding number of kernels.  Usually that number of chanel is 16 And if we want to get the more channels and features, we should repeat that process     This process gives rise to memory out of control, we do the stride   #### conv-example. xlsx  2 convolutional filters At a second layer, filter is 3x3x2 tensor, because to add up together the first layer’s channel. Reference:       Problem was math. sqrt(5) was not kaiming initialization formula, Implementation in Pytorch &#8617;        size of tensor, lecture09 &#8617;        conv-example. xlsx &#8617;        Why do computer use red, green and blue instead of primary colors &#8617;        Grayscale is a group of shades without any visible color. … Each of these dots has its own brightness level as well and, therefore, can be converted to grayscale. A grayscale image is one with all color information removed.  &#8617;        Testing RGB and grayscale &#8617;        stack kernel and make new rank of tensor at output, Lesson06-2019 &#8617;    "
     }, {
-    "id": 17,
+    "id": 19,
     "url": "http://localhost:4000/2020/02/dps-week8/",
-    "title": "Digital Product School week 8&9",
-    "body": "2020/02/24 - The 8th week retropect at Digital Product School Week 8/9 - Ship your MVP/Release next iteration each day     This week's schedule  CONTENT:  Preparing engineering weekly Agile Process     Daily Stand-up    Making application flowchart (feat draw. io) / ER diagram     Flowchart, understaning user journey   ER diagram    Engineering weekly AI lunch Connecting firebase andPreparing engineering weekly: This week at Wednesday, I planned to explain the Language Modelings, mainly focusing ELMo, ULMFiT, BERT and GPT-2. Slides is available here Changed the presentation, because there were people who are not in ML domain. hereWhenever I do the presentation, I learn more than the information I give them. At the same time, I realize I need to learn more than I know. Agile Process: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m not sure ‘what is agile’ but this is what we tried to make agile process.  Daily Stand-up: Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Making application flowchart (feat draw. io) / ER diagram:  Before we start the party, we should clarify the flowchart and ER diagram of our application. Flowchart, understaning user journey: Thanks for google, we could use draw. io for our framechart framework. Actually, we cana choice other good flatform, but draw. io has connected app throgh google drive, most of our engineer was used to it. And after this job, I got to know there is also (of course) rule with the symbols, color, size, space, scaling and direction of arrow -reference. But why we should do this? WE have made our storymap before!! I think storymap is for visualize our status and app. So it should be shared with whole the team, and they should able to understand each role’s issue. But flowchart is more like testing technical feasibility, and error that user can experience. So it could be little more specific, complicated, and hypothetical.     This week engineer's main plan  ER diagram: Even if we use NoSQL database through firebase, my team was accustomed to SQL more. That what we educated when we were at college, so we had to organize our concept while we were learning NoSQL. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Before today, my AI collegues presented regression, knn and it was my turn. I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! AI lunch: Connecting firebase and: "
+    "title": "My life in Digital Product School - week 8/19/10",
+    "body": "2020/02/24 - The 8/9/10th week retropect at Digital Product School Week 8 - Ship your MVPWeek 9/10 - Release next iteration each day     Week 8th schedule  CONTENT:  Agile Product Development     Daily Stand-up(planning)   Gemba Walk   Sprint Reviews    Engineering weeklyAgile Product Development: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m still not sure ‘what is agile’ but this is how we tried to make agile process.  Daily Stand-up(planning): Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Gemba Walk:     Team Cero with core team  Every 2 weeks, we do the Gemba work, which is ‘question everything to the core team’ time. At this period, people can ask anything related to our product, workshop, and framework. Core team will help just for each team, and each team can solve the problem related to their work. &lt; br/&gt;Why we need this session? because with workshop and general schedule, core team has no time just focus on each team. So through this session, we can have opportunity to understand each program and workshop, like why we are using this platform, and when is the due of our small project, and we have this problem and we need help for this. whatever small problem you have, core team is always willing to help you. Sprint Reviews: Every Friday, we have time to summarise what we did for the week. Maybe we need HMW question and our storymap to share our process and then tell and share what we did try, what point we succeeded and what point it was deviant of our prediction, and why we tried it. .     Sprint of Ve-link  And then, just after all team’s ppt, we do vote with such a cute marvel. Always it’s very difficult to vote (of course you can’t vote to your team!) Because it depends on criteria what do I value!But since this is process of our agile work, I try to focus on what they have changed since last week, and why they did it, how they did it. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Everyone have their knowledge to share and we can be tutor and at the same time can be of tutee. Previously, my AI collegues presented regression, knn. And because I’m somewhat specialized to NLP, I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! "
     }, {
-    "id": 18,
+    "id": 20,
     "url": "http://localhost:4000/2020/02/fast.ai-nlp-note-16/",
     "title": "Algorithmic bias",
     "body": "2020/02/20 - Algorithms can encode &amp; magnify human bias Case Study 1: Facial Recognition &amp; Predictive Policing:  Joy Buolamwini &amp; Timnit Gebru, gendershades. org     Microsoft, FACE+, IBM - All of these things are sell now.    Largest gap between $\therefore\ Lighter Male\ &gt;\ Darker\ Female $      This US mayor joked cops should “mount . 50-caliber” guns where AI predicts crime      With machine learning, with automation, there’s a 99% success, so that robot is ㅡwill beㅡ99% accurate in telling us what is going to happen next, which is really interesting.     - city official in Lancater, CA, approving on using IBM for public security  Bias:  Bias is type of error Statistical Bias: difference between a statistic’s expected value and the true value Unjust Bias: disproportionate preference for or prejudice against a group Unconscious bias: bias that we don’t realize we have But, term bias is too generic to be productive. Different sources of bias have different causes Representation Bias: Dataset was not representative of the algorithm that might be used on later. Above : Data is okay, but algorithm has some problem. Below : Data has error.  For example, object detection production that performs very well in common product of US. But in contrast, change of target product region, like Zimbabwe, Solomon Island, and so on, reduced the performence remarkably. It is not the algorithmic problem, so we should care about data volume of region. Evaluation Bias: Benchmark datasets spur on research, 4. 4% of IJB-A images are dark-skinned women. 2/3 of ImageNet images from the West (Sharkar et al, 2017) Case Study 2: Recidivism Algorithm Used Prison Sentencing: Case Study 3: Online Ad Delivery: Bias in NLP: ( Nothing to do with the course, but I’m researching this field these days. )    But all about Englsih     ImpactThe person is doctor. The person is nurse -&gt; 그는 의사다. 그녀는 간호사다.  Concept of “biased data” often too generic to be useful:  Different sources of bias have different sources Data, models and systems are not unchanging numbers on a screen. They’re the result of a complex process that starts with years of historical context and involves a series of choices and norms, from data measurement to model evaluation to human interpretation. - Harini Suresh, “The problem with Biased Data” Five Sources of Bias in ML:  Representation Bias Evaluation Bias Measurement Bias Aggregation Bias(46:02) Historical Bias(46:26)     A few studies(47:13)   Racial Bias, Even when we have good intentions(new york times)(47:10)   gender(48:59)   Humans are biased, so why does algorithmic bias matter?: Algorithms &amp; humans are used differently (humans are usually decision maker)  Algorithms are accurate and objective No way to apeal if there if error processed large scale cheap Machine learning can amplify bias Machine learning can create feedback loops. Technology is power. And with that comes responsibility. Solutions:  Analyze a project at work/school:     Questions about AI   5 types of bias (Suresh &amp; Guttag)   Datasheets for datasets, Modelcards for model reporting   Accuracy rate on different sub-groups    Work with domain experts &amp; those impacted Increase diversity in our workspace Advocate for good policy Be on the ongoing lookout for bias"
     }, {
-    "id": 19,
+    "id": 21,
     "url": "http://localhost:4000/2020/02/classifier-city/",
     "title": "Making a classifier with image dataset made from gooogle",
     "body": "2020/02/15 - CONTENTS:  Creating dataset from google images     Using google_images_download   Create ImageDataBunch    Train model     fit_one_cycle()   Let’s find-tune   Let’s train the whole model!   Let’s make batch size bigger!    Interpretation Model in productionCode can be found hereDeployed model here Making a classifier which can distinguish Seoul from Munich and Sanfrancisco!(hoping my well in Munich!) Creating dataset from google images: In machine learning, you always need data before you build your model. You can use either URLs or google_images_download package. Since Jeremy explained specifically, I will try the other. Using google_images_download: note: This is not google official package Refer to Official Doncument, put that arguments. from google_images_download import google_images_downloadresponse = google_images_download. googleimagesdownload()  #class instantiationout_dir = os. path. abspath('. . /. . /materials/dataset/pkg/')os. mkdir(out_dir)arguments = { keywords : Cebu,Munich,Seoul ,        print_urls :True,        suffix_keywords : city ,        output_directory :out_dir,        type : photo ,      }paths = response. download(arguments)  #passing the arguments to the functionprint(paths)and if you need, here is main code. Create ImageDataBunch: We need to separate validation set because we just grabbed these imagese from Google. Most of the dataset we use (kaggle/research) splited into train / validation / test so if they are not devided beforehand we should make databunch, and Jeremy recommended assign 20% to validation. Help on function verify_images in module fastai. vision. data:verify_images(path: Union[pathlib. Path, str], delete: bool = True, max_workers: int = 4, max_size: int = None, recurse: bool = False, dest: Union[pathlib. Path, str] = '. ', n_channels: int = 3, interp=2, ext: str = None, img_format: str = None, resume: bool = None, **kwargs)  Check if the images in `path` aren't broken, maybe resize them and copy it in `dest`. Data from google image url Data from package Train model:           len(class)   len(train)   len(valid)         Data_url   3   432   108       Data_pkg   3   216   53   Uisng model: restnet34 1, Measurement: accuracy 2 fit_one_cycle(): What is fit one cycle? Cyclical Learning Rates for Training Neural Networks One of the way to find good learning rate. Core idea is to start with small learning rate (like 1e-4, 1e-3) and increase the learning rate after each mini-batch till loss starts exploding. And pick up learning rate one order lower than exploding point. For example, plotted learning rate is like below picture, picking up around 1e-2 is the best way.  Why this methods Traditionally, the learning rate is decreased as the learning starts converging with time. But this paper suggests to cycle our learning rate, because it makes us avoid local minimum. Basically this cyclic method enables us to explore whole of loss function so that find out global minimum. In other words, higher learning rate behaves like regularisation. Let’s find-tune: Do train just one last layer by learning rate found by find_lr This section you should find the strongest downward slope that kind of sticking around for quite a while. And choose just one order lower than lowest point. As explained before, I will pick up 1e-2. And of course, this is fine-tuning, we don’t need discriminative learning rate yet.  Let’s train the whole model!: link When you plot the learning rate again, maybe you will get soaring shape of learning rate. Rule of thumb, When you slice the learning rate, use learning rate you used at unfrozen part. Divide it by 5 or 10 and put it on maximum bound. At minimum bound, get the point just before it soared, and divide it by 10.  Let’s make batch size bigger!: Since default batch size is 64, I tried it to 128. And it gets way more better result(even it’s still underfitting!) And if I freeze model and train whole model again, the model would be better. Also, you can use this method to the other big dataset model training! Interpretation: See the confusion matrix. Result is quite great. *Since I’m using colab, I will skip data cleansing. But I highly recommend you to use ImageCleaner widget, only if you are using jupyter notebook (not jupyter lab) Model in production: You can deploy your model in simple way. I referred fast. ai, and used render(it’s free for limited time). You can find detailed document here. and you can create a route like this. @app. route( /classify-url , methods=[ GET ])async def classify_url(request):  bytes = await get_bytes(request. query_params[ url ])  img = open_image(BytesIO(bytes))  _,_,losses = learner. predict(img)  return JSONResponse({     predictions : sorted(      zip(cat_learner. data. classes, map(float, losses)),      key=lambda p: p[1],      reverse=True    )  })You can find my deployed model here Reference: How to create a deep learning dataset using Google Images towardsdatascience - one cycle policy       Deep Residual Learning for Image Recognition &#8617;        Accuracy_and_precision &#8617;    "
     }, {
-    "id": 20,
+    "id": 22,
     "url": "http://localhost:4000/2020/02/dps-week5/",
     "title": "Digital Product School week 5",
     "body": "2020/02/09 - The 5th week retropect at Digital Product School Week 5 - Create a Storymap and sync it with Lean Canvas     This week's schedule  CONTENT:  How to create our story map Prepare your story Discover your product’s AI potentialMondayHow to create our story map:     We need this 'aha' moment  There was a Milestone workshop, about our weekly goal. As we are agile working, we go fast and change every week’s goal. This week we will finalize our story map based on user’s pain-point and HMW questions.     How should we make our story-map  Basically we should make story map based on this rule  Tell stories, don’t just write them!     We always need context, that means all the story component should be connected    Visualize your product to establish a shared understanding and speed up discussions!     Post-it filled of text is not enough, we should fill it with visualizations then team mates can understand it fast    Only discuss in front our your story map! (Speed)     So we can update our story-map as soon as we change our opinion   And also    Use a story map to find the parts that matter most and to identify holes in your idea!     Since the story map consists of techinical part, we should consider each story’s technical feasibility    Minimise output, maximise outcome and impact! Build tests to figure out what’s minimum and what’s viable!     This story map functions to find out our minimum value of ideas    Work iteratively: Change your story map according to your learnings!     We should repeat this process again and again    PMs: Make sure Storymap is up to date!Prepare your story:     team cero, our whole story map    Our goal    Technical feasibility of our storyWhat is your strategy to make user achieve something? This would be our expand point Discover your product’s AI potential: How can we apply AI to our product? Let’s write down our ‘HMW’ questions, and find out all p ossibilities. These are suggestion of possibilities, so don’t attached to feasibility (we will do in at lean start-up)     Software section's expectation    AI section's expectationTuesday    Engineer's task, week5This 5th week, engineers settled WendesdayThursdayFriday"
     }, {
-    "id": 21,
+    "id": 23,
     "url": "http://localhost:4000/2020/02/GPU-time/",
     "title": "4 reasons took much time to setting GPU for fast.ai than I expected",
     "body": "2020/02/05 - Motivation: Before now, me as a undergraduate student, I was parsimony who usually depend on colab, kaggle, friend’s server(occasional) whenever i need GPU. . And this time it’s been for a while to install GPU than I expected and I share the several component that stood in my way. Written at Oct 24 2019, if you think this is deprecated, please do not have a leap of faith. Just for the record, I’ve used Kaggle, Colab, GCP, Azure, EC2 as GPU cloud. 1. Did not know there is JupyterLab option in Google Cloud Platform. : At the first time when GCP came out, there was no AI Platform service. So from starting vm instance to launching jupyter and installing packages, I did all of the things myself. (and I learned 🤗) $	curl -O https://repo. continuum. io/archive/Anaconda3-5. 0. 1-Linux-x86_64. sh[Downloading conda in ssh] I created VM instance,selected zone, machine type and disk type. Then, define firewall rules and in ssh terminal, install jupyter and other packages. But you can do all of these things just using AI Platform.  [AI Platform] I think it especially save your time if you are living in Asia-Pacific, which google doesn’t support not that much GPU resources.  2. Consider if the platform has limited resources in a region you live in. : I live in South Korea, East Asia, and it seems like this region has lots of limitation in GPU (except quite expensive AWS) And the Taiwan which was the only one region where I can launch my own VM with GPU (I tried all the other regions in the list) sometimes do normaly, but not always. 😥After launching, I did several works and next day I could not start VM. (I didn’t count it, but tried it a few hours because I didn’t want cost any more time…) Endlessly failed to start instance, then I choose to move AWS as an alternative way. 3. Fast. ai gives deliberate guide and I didn’t know it. : Fast. ai offer the guide for all available platform. (Colab, salamander, Gradient, Kaggle, Colab, and so on) It is so important, and really needs, because cloud computing options are vary as occasion and purpose arise. I didn’t know fast. ai has manual to running GCP, and I think it’s as good a reason as any for me to be have taken time. It helped me so much when I had aws and shortened my time. I don’t want to read all of the manual in amazno. . (It is recommended. . but I’d rather read GIT PRO now…) ssh -i ~/. ssh/&lt;your_private_key_pair&gt; -L localhost:8888:localhost:8888 ubuntu@&lt;your instance IP&gt;4. You should wait to add more volume just after add volume, by building AWS EC2. : Since Elastic Block Store(EBS) storage supports optimized storage, users can’t extend storage volume two times in a row.  Unfortunately, at the first time, I didn’t know it (again 👻) and when VM lacked volume, I doubled dist capacity (76*2) at a rough but It needs more.  &lt;!– this time I installed GPU in two years, and it became little complicated compared to 2 years ago. And this time for the first time(maybe not the first time. . but i handled it in my class or with my friend. but it’s my first time on my own. ) I very I’m started to using used google colab, kaggleand, GCP-JupyterLab, ec2 - friend made, aws vm machine but I had a environment variable but i did not know of it. On these days, I could not get a resources from taiwan…    I couldn’t notice a deliberate     Anyway, as a result I tried myself gcp myself and aws ec2 with fast. ai But I think doing on my self surely takes much time (in this point I wonder why I’m doing this, and should remind me, especially I was studying disk volume optimization)     disk volume exceed - https://askubuntu. com/questions/919748/no-space-left-on-device-even-though-there-is:  "
     }, {
-    "id": 22,
+    "id": 24,
     "url": "http://localhost:4000/2020/02/dps-week4/",
     "title": "Digital Product School week 4",
     "body": "2020/02/01 - The 4th week retropect at Digital Product School Week 4 - Find solution ideas and run experiments   [This week’s schedule] CONTENT:  Ideation Techniques     What is ideation techniques?    Generating idea in my team AIdeation Team brain storming of idea Die Produkt MacherMondayIdeation Techniques:   [slides from @steffen] What is ideation techniques?: We tried to find out user’s painpoint last week. Tried to users talk about their, pain point. No question directly, but extract from them their pain with transportation. Generating idea in my team:   AIdeation:   TuesdayTeam brain storming of idea:   Based on generated idea on Monday, we extended our idea doing rolling-paper!     Die Produkt Macher: What is lean start-up?  Lean startup is a methodology for developing businesses and products that aims to shorten product development cycles and rapidly discover if a proposed business model is viable; this is achieved by adopting a combination of business-hypothesis-driven experimentation, iterative product releases, and validated learning. - wikipedia WendesdayThursdayFriday"
     }, {
-    "id": 23,
+    "id": 25,
     "url": "http://localhost:4000/2020/01/retrosprect-of-acl-paper-2020/",
     "title": "Retrospect of ACL 2020 paper writing",
     "body": "2020/01/29 - 2020 Annual Conference of the Association for Computational Linguistics Why I can’t use ‘Cebuano’ for the research?: Why I had to change target language from ‘Cebuano’ to ‘Tagalog’?-&gt; No language translator options except google translation. But before knowing that I already consult my friend, whose mother tongue is English. So I had to aplogize her, but couldn’t tell her why suddenly I changed my plan. -&gt; I realized there are many languages even can’t be researched at all. . -&gt; Getting accustomed to discrimination makes misunderstanding, sometimes. At my country, we couldn’t use music streaming service, because of legal problem. But at that moment, I thought it was discrimination, which is done by music company. "
     }, {
-    "id": 24,
+    "id": 26,
     "url": "http://localhost:4000/2020/01/Git-Merge/",
     "title": "Why am I not listed as a contributor?!",
     "body": "2020/01/10 - From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’m not listed as contributor of repo which my PR accessed. How did I come to a repository?: When I’m stuck, I would prefer to code, than to go deep in theory. (It must be so. . too much to understand 🤒)It was BERT released by Google AI I felt keenly the necessity of implementing, because not only couldn’t understand the way they figured out positional encoding formula, but how it actually works. What does it mean to “scale” dot product in Attention? (Now I know it’s far from my section 😂) Figure 1. Scaled Dot Product. Adopted from tensorflow blogWhat was the code error?: For implement code in paper, I read the papers Transformer and BERT, structured the model, and refered the others’ code. Meanwhile, I found out a small error in tokenization process, which was changing a token into [MASK], enabled bidirectional representation. I’ve made PR, and got merged. But I was not in contributors. Why?: Figure 2. Merged Pull request Adopted from graykode projectActually I happened to know there can be couple of reasons github doesn’t include my name as contributor. Well, if contributors tab has more than 100 people, in which case it shows you up only if you are in the top 100 contributors because displaying too many contributors can make webpages down. Somethimes, however, it doesn’t that problem. Why not? Two possibilities are there.    First, According to Joel-Glovier, if repository maintainer merged-as-a-rebase PR will end up showing as maintainer’s commit. But maintainer shouldn’t normally do this.     Second, if you happend to commit using a different git email that what is in your GitHub profile, it will not be attached to your Github user, and “doesn’t show up” as you.  Reference:  Michał Chromiak’s blog Github: why are my contributions are not showing on my profile atlassian-gitfetch"
     }, {
-    "id": 25,
-    "url": "http://localhost:4000/2019/12/lesson1-fastai/",
-    "title": "Fine Grained Classification",
-    "body": "2019/12/31 - Finally you can solve the mystery behind this weird drawing. . through this course. juptyer notebook magic: %reload_ext autoreload%autoreload 2%matplotlib inlinethis is special directives to jupyter notebook, not python code. And it is called ‘magics’ (but i think jeremy is magicion)  If somebody changes underlying library code while I’m running this, please reload it automatically If somebody asks to plot something, then please plot it here in this Jupyter NotebookDon’t hesitate to import start~ Digging into untar_data, path. ls: Union[pathlib. Path, str]: typed programming language? -&gt; maybe i think disclaim the type beforehand for sure.  Q. like assert? path. ls()this is some module that fast. ai made because os. listdir(‘path’) is unconvinient. Python3 pathlib library!: pathlib "
-    }, {
-    "id": 26,
+    "id": 27,
     "url": "http://localhost:4000/2019/12/jeremy-howard/",
     "title": "Jeremy Howard",
     "body": "2019/12/15 - This is journey to find out ‘who am I trying to be?’: How he impacted me?  The person who made me start Computer Vision again. He emphasized the importance of studying NLP and Computer together to understand the deep-learning. He didn’t order it to study, but always he pursuade me with reasonable way.  “It’s not just something I can throw away. NLP and computer vision a few weeks apart and that’s going to force your brain to realize like ‘oh I have to remember this’”  He made me admit my failure in deep-learning. I started to objectify where am I. What should I do when I’m frustrated.  “Keep going. You’re not expected to remember everything. Yet.  You’re not expected to understand everything. Yet. You’re not expected to know why everything works.  Yet. ”  His articles are numerous, below.      What is torch. nn Really?   High Performance Numeric Programming with Swift: Explorations and Reflections   C++11, random distributions, and Swift    And especially, I like this book. Designing great data products Great predictive modeling is an important part of the solution, but it no longer stands on its own; as products become more sophisticated, it disappears into the plumbing. Designing great data products And he is also famous for words. Here are some.  we’re going to try and use that to really understand what’s going on. So to warn you, none of it is rocket science but a lot of its going to look really new. So don’t expect to get it the first time but expect to listen and jump into the notebook try a few things test things out look particularly at like tensor shapes and inputs and outputs to check your understanding then go back and listen again. But and kind of try it, a few times, because you will get there right, it’s just that there’s going to be a lot of new concepts because we haven’t done that much stuff in pure Pytorch. Lesson 6: Deep Learning 2019 "
     }, {
-    "id": 27,
+    "id": 28,
     "url": "http://localhost:4000/2019/11/julia-evans/",
     "title": "Julia Evans",
     "body": "2019/11/20 - This is journey to find out ‘who am I trying to be?’: The women who surprised me in many ways. First, she approached me to teaching some concepts drawing cartoons. It was at Hackers news, which was hightest ranks. Personally I have the use of not to reading title, so and cartoon was so cute and clear. I naturally gonna understood mechanism and astonished by her explaination ability. Her value, which she was taught by many people so want to do same things, moved me. Volume of her knowledge, that just reading post title is a deal of work, amazed me. "
     }, {
-    "id": 28,
+    "id": 29,
     "url": "http://localhost:4000/2019/11/coc-retropective/",
     "title": "Retrospective on Pycon 2019 Korea (CoC Committee)",
     "body": "2019/11/05 - When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized our responsibility, and if I passed each other’s burden, It could be my burden next time. In solidarity of the obligation, we finished conference well. And after participating PyCon Korea 2018 as volunteer, I’ve joined PyCon Korea Organizer last year. &lt;Figure 1&gt; First meeting of PyCon 2019 Korea Organizers It’s been a while since PyCon 2019 finished. It’s held on Aug 15 - 18, at Coex Grand Balloom &lt;Figure 2&gt; Ongoing session, speaking on news comment processing &lt;Figure 3&gt; Sponsor Booth iin Coex Hall &lt;Figure 4&gt; After PyCon 2019, with all of volunteer, organizer, speakers 😍 🥰  Serving as part of the coc TF, I spent large fraction of last year doing CoC job. here’s the path what we’ve been grappled with to grasp a solution. First half: Before the conference Toward Diverse Community:  Formally we’ve been reusing and modifying PyCon US CoC, but we needed fit in Korean and I was part of that to revise code of conduct. Except ‘That’ Diversity, Because it is ‘Harassment’:  Specific point was harassment, and the others were not. process of finding the points. How can we settle this point?Second half: During the conference Handling the potential Harassment: Disjunction of policy and real-time situation: This ‘PyCon 2019 Korea retrospective series’ would be devided into 3 Episodes.  “Retrospective on Pycon 2019 Korea (CoC Committee)” “Retrospective on Pycon 2019 Korea (Program Chair)” (20 Nov, To Be Update) “Maintaining participation while still making timely decisions” (29 Nov, To Be Update)"
     }, {
-    "id": 29,
+    "id": 30,
     "url": "http://localhost:4000/2019/11/elif-shafak/",
     "title": "Elif Shafak",
     "body": "2019/11/05 - This is journey to find out ‘who am I trying to be?’: For creative-minded people, Istanbul is a treasure. ’ Photo © Chris Boland, licensed under CC BY-NC-ND 2. 0    it suddenly felt like what I was trying to convey was more complicated and detailed than what the circumstances allowed me to say.     And I did what I usually do in similar situations: I stammered, I shut down, and I stopped talking. I stopped talking because the truth was complicated, even though I knew, deep within, that one should never, ever remain silent for fear of complexity.         &lt;Figure 1&gt; Elif Shafak   Photo credit: www. elifsafak. com. tr      I want to talk about emotions and the need to boost our emotional intelligence. I think it’s a pity that mainstream political theory pays very little attention to emotions.     Oftentimes, analysts and experts are so busy with data and metrics that they seem to forget those things in life that are difficult to measure and perhaps impossible to cluster under statistical models. But I think this is a mistake, for two main reasons. We are emotional beings.     I think it’s going to be one of our biggest intellectual challenges, because our political systems are replete with emotions. In country after country, we have seen illiberal politicians exploiting these emotions. And yet within the academia and among the intelligentsia, we are yet to take emotions seriously. I think we should.  1 2 Reference:       British Council Worldwide &#8617;        Ted Talk &#8617;    "
     }, {
-    "id": 30,
+    "id": 31,
     "url": "http://localhost:4000/2019/01/dps-week1/",
     "title": "Digital Product School week 1",
     "body": "2019/01/11 - The 1th week retropect at Digital Product School [This week’s schedule] CONTENT:  Welcome to Digital Product School!     Trip to Spitzingsee   Welcome to Design Office   Specifying our goal of product   Welcome to Digital Product School!: Trip to Spitzingsee:    At the first day of Digital Product School, we had a off-site with all of batch 9 people. All the costs were managed by dps. At the beautiful mountain, we settled the team, and got my team goal.     Basically, there are two kind of team in DPS. (1) Wild team - the team has fixed topic(2) Company team - the team which has specific stakeholders, and also topic defined by that stakeholders  The Core-team will fix what team you will join in DPS for 3 months based on ymy professionals, they announce it at off-site. [My team for 3 months at DPS]    And we decide on my batch #9 theme song. How? Each team draw for songs and pitch ‘why this song should be batch #9 theme song’The result? Imagine dragon - Believer (I didn’t know at the moment, this song would be stamped in my memory)     We have a workshop for getting to know each other. For example, we share 1) what do I expect from 3 months of dps, 2) when I feel happy in my life time, 3) what I worked for last week, 4) what was my last project and 5) what plays important role in my life      My team's board  Cero  Welcome to Design Office:  At first day of design office, we had workshop, which celebrates my day in dps also discuss specific rule, menifesto and stakeholders    We get sticker and attach it in map depends on my nationality  Now time to get to know my team’s stakeholders. What they want for us? What they expect from us? How free my team are on the topic?To be honest, it is endless tug-of-war. We should discuss with my stakeholders, endlessly, and find out solution which can meet interest of users, stakeholders and my team. Basically, my team’s main stakeholder is ADAC, but BMW, City of munich and Nokia will also participate as my team’s stakeholders. Specifying our goal of product: "
diff --git a/_site/about.html b/_site/about.html
index 2ec0a1125a..fbea517cca 100644
--- a/_site/about.html
+++ b/_site/about.html
@@ -158,96 +158,101 @@
     "body": "  {% if page. url ==  /  %}            {% assign latest_post = site. posts[0] %}          &lt;div class= topfirstimage  style= background-image: url({% if latest_post. image contains  ://  %}{{ latest_post. image }}{% else %} {{site. baseurl}}/{{ latest_post. image}}{% endif %}); height: 200px;  background-size: cover;  background-repeat: no-repeat; &gt;&lt;/div&gt;           {{ latest_post. title }}  :       {{ latest_post. excerpt | strip_html | strip_newlines | truncate: 136 }}               In         {% for category in latest_post. categories %}        {{ category }},         {% endfor %}                                {{ latest_post. date | date: '%b %d, %Y' }}                            {%- assign second_post = site. posts[1] -%}                        {% if second_post. image %}                         &lt;img class= w-100  src= {% if second_post. image contains  ://  %}{{ second_post. image }}{% else %}{{ second_post. image | absolute_url }}{% endif %}  alt= {{ second_post. title }} &gt;                        {% endif %}                                    {{ second_post. title }}          :                       In             {% for category in second_post. categories %}            {{ category }},             {% endfor %}                                                      {{ second_post. date | date: '%b %d, %Y' }}                                    {%- assign third_post = site. posts[2] -%}                        {% if third_post. image %}                         &lt;img class= w-100  src= {% if third_post. image contains  ://  %}{{ third_post. image }}{% else %}{{site. baseurl}}/{{ third_post. image }}{% endif %}  alt= {{ third_post. title }} &gt;                        {% endif %}                                    {{ third_post. title }}          :                       In             {% for category in third_post. categories %}            {{ category }},             {% endfor %}                                                      {{ third_post. date | date: '%b %d, %Y' }}                                    {%- assign fourth_post = site. posts[3] -%}                        {% if fourth_post. image %}                        &lt;img class= w-100  src= {% if fourth_post. image contains  ://  %}{{ fourth_post. image }}{% else %}{{site. baseurl}}/{{ fourth_post. image }}{% endif %}  alt= {{ fourth_post. title }} &gt;                        {% endif %}                                    {{ fourth_post. title }}          :                       In             {% for category in fourth_post. categories %}            {{ category }},             {% endfor %}                                                      {{ fourth_post. date | date: '%b %d, %Y' }}                                  {% for post in site. posts %} {% if post. tags contains  sticky  %}                    {{post. title}}                  {{ post. excerpt | strip_html | strip_newlines | truncate: 136 }}                 Read More            	             {% endif %}{% endfor %}  {% endif %}                All Stories:         {% for post in paginator. posts %}          {% include main-loop-card. html %}        {% endfor %}                   {% if paginator. total_pages &gt; 1 %}              {% if paginator. previous_page %}        &laquo; Prev       {% else %}        &laquo;       {% endif %}       {% for page in (1. . paginator. total_pages) %}        {% if page == paginator. page %}        {{ page }}        {% elsif page == 1 %}        {{ page }}        {% else %}        {{ page }}        {% endif %}       {% endfor %}       {% if paginator. next_page %}        Next &raquo;       {% else %}        &raquo;       {% endif %}            {% endif %}                     {% include sidebar-featured. html %}      "
     }, {
     "id": 12,
+    "url": "http://localhost:4000/2020/04/v3-2019-lesson06-note/",
+    "title": "fastai 2019 course-v3 Part1, lesson06",
+    "body": "2020/04/15 - Lesson 06Rossmann(Tabular):  Tabular data: be careful on Categorical variable vs Continuous variable.  if datatype is int, fastai think it is classification, not a regression.  Root mean square percentage error. as loss function.  When you assign the y_range, it’s better to assign little bit more than actual maximum. &gt; because it’s sigmoid.  intermediate layers, which is weight matrix is 1) 1000, and 2) 500 -&gt; which means our parameter would be 500*1000. learn. modelWhat is dropout and embedding dropout?: Nitish Srivastava, Dropout: A Simple way to prevent Neural Networks from Overfitting  you can dropout with p value, make it specified to specific layer, or make it applied to all the layers.  Pytorch code 1) bernoulli, which decides whether you will hold it? 2) and divide the noise value depends on noise value. so noise became 2 or remain 0.      According to pytorch code, We do change at training time, but we do nothing at test time. and this means you don’t have to do anything special with inference time. ’   TODO: find at forums what is inference time - Related to NVIDIA, GPU.     Embedding dropout is just a dropout.      It’s different between continuous variable and embedding layer.  TODO Still can’t understand. why embedding dropout is effective. or,… in need.    Let’s delete at random, some of the results of the embedding.    and It worked well especially at Kaggle   Batch Normalization: Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift -&gt; came out false! According to How Does Batch Normalization Help Optimization?  The key was multiplicative bias {\gamma} and additive bias {\beta}` Explain     Let $$ \hat{y} = f(w_1, w_2, w_3, … , x)} $$ , loss = MSE , Then y_range should be between 1 and 5`   And Activation function ends with -1 -&gt; +1   To mitigate this problem, we can add the other parameter, like $$w_n$$   But there’re so much interactions in the process so just re-scale the output.    Momentum parameter at BatchNorm1d:  Different from momentum like in optimization.  This momentum is Exponentially weighted moving average of the mean, instead of deviation.      If this is small number: mean standard deviation would be less from mini_batch to mini_batch » less regularization effect. (If this is large number, variation would be greater from mini_batch to mini_batch » more regularization effect)   TODO: can’t sure, but i understand, this is not about how to update parameter but about how much reflect previous value when scale and shift   Q. Preference between batchnorm and the other regularizations(drop out, weight decay)A. Nope, always try and see the results## lesson6-pets-more### Data Augmentation- Last reg- `get_transforms` has lots of params (even not yet learned all) -&gt; check documentation	- Remember you can implement all the doc contents bc it's made from nbdev	- TODO: try this!!- Essence of data augmentation is you should maintain the label, while somewhat making sense. 	- ex) tilt, because it's optically sensible, you can always change the angle of the data view. - zeros, border, and reflection but always `reflection` works most of the time, so that is the default### Convolutional Kernel(What is convolution?)- Will make heat\_map from scratch, which means the parts convolution focuses on![setosa_visualization]()- http://setosa. io/ev/image-kernels/	- javascript thing	- How convolution works	- Kernel. which does element-wise multiplication, and sum them up	- so it has on pixel less at borders -&gt; so it uses padding, and fastai uses reflection as said. - why this Kernel(matrix) helps catching horizontal edge side?	- because this kernel`(picture2)` weights differently, depends on `x axis`	- why familiar, because it's similar intuition with fugus`(paper)` paper- CNN from different viewpoints`link`	- output of pixel is results from different linear equations. 	- If you connect this with represents of neural network nodes, you can see that the specific inp nodes connected with specific out nodes. 	- **Summarize**: cnn does 1) matmul some of the elements are always zero 2) same weight for every row, which is called `weight time? weight. . ?, 1:18:50` `(picture)`#### Further lowdown- Because generally image has 3 channels, we need rank 3 kernel. - And **do multiply with all channel output is one pixel**. (`draw by your self`)	- but this kernel will catch one feature, like horizontal, so that we make more kernel so that output becomes (h * w * kernel)	- And that `kernel` come to `channel`- **Conv2d**: with 3 by 3 kernel, stride 2 conv -&gt; (h/2 * w/2 * kernel)	- skip or jump over input pixel	- to protect from memory out of control~~~pythonlearn. modellearn. summary()~~~TODO: understand yourself the blocks of conv-kernel: - Usually use big kernel size at first layer (will study this at part2)- Bottom right highlighting kernel(`pic / draw`)- `torch. tensor. expand`: for memory efficient, because we should do RGB- We do not make separate kernel, but make rank 4 kernel	- 4d tensor is just stacked kernel- `t[None]. shape` create new unit axis, and why? we make this -&gt; it should move unit of batch, not one size image. ### Average pooling, feature- suppose our pre-trained model results in size of `11 by 11 by 512 ` `pic 4` and my classification task has 37 classes	* take the first face of channel, which is 11 by 11 and `mean` it, so that make rank 2 tensor, 512 by 1	* and make 2d matrix, which is 512 by 37 and multiply so that we can get 37 by 1 matrix. - Feature, at convolution block	- So, when we transfer-learning without unfreeze, every element of last matrix (512 by 1) should represent(or could catch) each feature. 	### Heatmap, Hook~~~hook_output(model[0]) -&gt; acts -&gt; avg_acts~~~- if we average the block with `axis=feature`, result of matrix(11 by 11) depicts `how activated was that area?` -&gt; it is heatmap, `avg_acts`- and acts comes from hook, which is more advanced pytorch feature. 	- hook into pytorch machine itself, and run any arbitrary Pytorch code	- Why this is cool?: Normally it gives set of outputs of forward pass, but we can interrupt and hook the forward pass. 	- Also can store the output of the convolutional part of the model, which is before avg_pooling- Thinking back when we do cut off `after` the conv part. 	- but with fast. ai the original convolutional part of the model would be *the first thing in the model*, specifically could be given from `learn. model. eval()[0]`	- And this is gotten from `hooked_output` and having hooked the output, we can pass our x_minibatch to output. 	- Not directly, but with normalized, minibatch, put on to the gpu	- `one_item()` function do it, when we have one data `TODO: this is assignment` do it yourself without one_item function	- and `. cuda()` put it on gpu- you should print out very often the shape of tensor, and try think why. "
+    }, {
+    "id": 13,
+    "url": "http://localhost:4000/2020/04/qna-image-segmentation/",
+    "title": "[Q&A] Image Segmentation, using Unet with Driving Video data",
+    "body": "2020/04/02 - This post is about my questions while I was studying USF Deep Learning course about image segmentation task. All the answers are from the course, source code, library document, or document. I cared about being clear at reporting information including source of information, however if there are still anything unclear, please contact me. And thank you Jeremy&amp;Rachael for everything. Also Thank you Cambridge Computer Vision Lab to made us to study with your labor. The Cambridge-driving Labeled Video Database (CamVid) is the first collection of videos with object class semantic labels, complete with metadata. The database provides ground truth labels that associate each pixel with one of 32 semantic classes. If someone is interested in this project, please check the site and see the details. Now, let’s start first using jupyter’s one of tricks which I love most. It enables cell to print the code without print function. from IPython. core. interactiveshell import InteractiveShell# pretty print all cell's output and not just the last oneInteractiveShell. ast_node_interactivity =  all from fastai. vision import *from fastai. callbacks. hooks import *from fastai. utils. mem import *path = untar_data(URLs. CAMVID) # The locations where the data and models are downloaded are set in config. ymlpath. ls() I’m trying to accustomed to using pathlib module, not just it became built-in module in python, but I felt uncomfortable myself with os module. However, still unpredictable conflicts are remain, even in the quite standard library like Pytorch, tensorflow, onnx. (it require me string for path. not PosixPath. will send PR. . ) [PosixPath('/root/. fastai/data/camvid/valid. txt'), PosixPath('/root/. fastai/data/camvid/images'), PosixPath('/root/. fastai/data/camvid/labels'), PosixPath('/root/. fastai/data/camvid/codes. txt')]path_img = path/'images'path_lbl = path/'labels'fnames = get_image_files(path_img) #filenamelbl_names = get_image_files(path_lbl)1. (Play with data) My Hypothesis:  File name has A_B format. and A / B would be at key-value position.  Use collections - defaultdict  Default Dict: Link:   easy to group a sequence of key and value pairs into a dictionary of list?from collections import defaultdictfnames[0], lbl_names[0](PosixPath('/root/. fastai/data/camvid/images/0001TP_009210. png'), PosixPath('/root/. fastai/data/camvid/labels/0016E5_01800_P. png'))files = [tuple(i. stem. split('_')) for i in fnames]labels = [tuple(i. stem. split('_')[:-1]) for i in lbl_names]d = defaultdict(list)for k, v in files: d[k]. append(v)d. keys()len(d['0001TP'])124for k, v in d. items():  print(k, v)0001TP ['009210', '008850', '007350', '008970', '009840', '010140', '008490', '008520', '009540', '008250', '008340', '006840', '007860', '007410', '007740', '009870', '010080', '007890', '008790', '010020', '008400', '007080', '008280', '010380', '009330', '009060', '007470', '006810', '009720', '008580', '007110', '008730', '009150', '007680', '009780', '007800', '007290', '008760', '009510', '008640', '008310', '007440', '006900', '007500', '008460', '009030', '008130', '009480', '009900', '010230', '009270', '008040', '007590', '007950', '009990', '008550', '007260', '008100', '007530', '006960', '008190', '009420', '009930', '009000', '007830', '008940', '006690', '009570', '008880', '010170', '007560', '009300', '006750', '009360', '010200', '007320', '008010', '009120', '007620', '007200', '007140', '010320', '006720', '008670', '007230', '008370', '010260', '009690', '006930', '009090', '007770', '010290', '010350', '008610', '008070', '009600', '008430', '009450', '007380', '009240', '007710', '007170', '008160', '008910', '007020', '006780', '007050', '009960', '009810', '008220', '009180', '009750', '010050', '009660', '010110', '007920', '009630', '007650', '006990', '008700', '009390', '007980', '008820', '006870']0016E5 ['01290', '08159', '05760', '08133', '08063', '06660', '00960', '05850', '00750', '06960', '08035', '08107', '07975', '08017', '05610', '07140', '08119', '08027', '07170', '08400', '08093', '02100', '06390', '04470', '08340', '06060', '00600', '07470', '08151', '07800', '01620', '05730', '01530', '00690', '08430', '05940', '01980', '07320', '08069', '07965', '04380', '05430', '01410', '06780', '08007', '08087', '08079', '06600', '08109', '05490', '00901', '04590', '04680', '08045', '01770', '06690', '08085', '06810', '00420', '08011', '07440', '02190', '06300', '04800', '01500', '00450', '08029', '01470', '06330', '07997', '08067', '05370', '08013', '08190', '00840', '02370', '08049', '08135', '01440', '06870', '05820', '05280', '08051', '04440', '08091', '01380', '00630', '07290', '05520', '04770', '00540', '07995', '07999', '05550', '07920', '08101', '08141', '08053', '04620', '08103', '05160', '07350', '08057', '06030', '06000', '08550', '07963', '08089', '05970', '08047', '05640', '06240', '05220', '04350', '01590', '07959', '01950', '08117', '06180', '01560', '05400', '08043', '07680', '00780', '08081', '07050', '01020', '01350', '04530', '06720', '07969', '08149', '08003', '08131', '08129', '08033', '05460', '01650', '07530', '08023', '05340', '08640', '05100', '08075', '01230', '04980', '02070', '01080', '06210', '05910', '08009', '01800', '05190', '02400', '08083', '08019', '07620', '07200', '07890', '08059', '06990', '04410', '08121', '08123', '06930', '08137', '08147', '08095', '06570', '06150', '08153', '06840', '05250', '00510', '08370', '08580', '08113', '07410', '08097', '01200', '04950', '07770', '07650', '04710', '06090', '08055', '07110', '07981', '00990', '08250', '08127', '01920', '07985', '08220', '08005', '08157', '05130', '08071', '01140', '04830', '07740', '08143', '06120', '02040', '08111', '08115', '00660', '08280', '06420', '07983', '02220', '05700', '01860', '01260', '04920', '06510', '07020', '08073', '08105', '08125', '06360', '07860', '07993', '00810', '06540', '08099', '08139', '02010', '07973', '08155', '07991', '06630', '00480', '06750', '04890', '08001', '08025', '00870', '08490', '01830', '07977', '05010', '01170', '07961', '01680', '01050', '07987', '07080', '04560', '00930', '05310', '02340', '05790', '08460', '00720', '08031', '02280', '08039', '08037', '08065', '06270', '08077', '06900', '04650', '06480', '07230', '08041', '06450', '00570', '07989', '04740', '07979', '02250', '07380', '00390', '01710', '07590', '08021', '08520', '07500', '01110', '04500', '02310', '07971', '02130', '05580', '05880', '08610', '08310', '08145', '05670', '04860', '07260', '08015', '07967', '01740', '01320', '07560', '07830', '01890', '08061', '02160', '07710', '05070', '05040']Seq05VD ['f00030', 'f02550', 'f03450', 'f01110', 'f00480', 'f00210', 'f04590', 'f04170', 'f01800', 'f03990', 'f03360', 'f03900', 'f02070', 'f00810', 'f03690', 'f01350', 'f01530', 'f04980', 'f05100', 'f03060', 'f00900', 'f03870', 'f02460', 'f01470', 'f02370', 'f02820', 'f04080', 'f02760', 'f04860', 'f02250', 'f04200', 'f00270', 'f03720', 'f02850', 'f04410', 'f01200', 'f03090', 'f02010', 'f03930', 'f00090', 'f01650', 'f01890', 'f03840', 'f03030', 'f02130', 'f01230', 'f04110', 'f02520', 'f04140', 'f04020', 'f00060', 'f03420', 'f01560', 'f00120', 'f04290', 'f02340', 'f00300', 'f01380', 'f00870', 'f01860', 'f02970', 'f04560', 'f02730', 'f00330', 'f04530', 'f03780', 'f01770', 'f03390', 'f05040', 'f02430', 'f03330', 'f00660', 'f01740', 'f02100', 'f04800', 'f04050', 'f00510', 'f02790', 'f04350', 'f00690', 'f00540', 'f02490', 'f00960', 'f00930', 'f04230', 'f02880', 'f03600', 'f01020', 'f01500', 'f02400', 'f04830', 'f04470', 'f03300', 'f02670', 'f00450', 'f01980', 'f01170', 'f01620', 'f04500', 'f01080', 'f03180', 'f05070', 'f03150', 'f04950', 'f01440', 'f03510', 'f01710', 'f00360', 'f04770', 'f02910', 'f01050', 'f00630', 'f04320', 'f00570', 'f03240', 'f02190', 'f01140', 'f03540', 'f02220', 'f02640', 'f03960', 'f00000', 'f04920', 'f01950', 'f00990', 'f03480', 'f03000', 'f00420', 'f04620', 'f03210', 'f00780', 'f03570', 'f01590', 'f00750', 'f01920', 'f04650', 'f03750', 'f03630', 'f02310', 'f02610', 'f02580', 'f04740', 'f02280', 'f04680', 'f00390', 'f00720', 'f03660', 'f02040', 'f03270', 'f00180', 'f03810', 'f01410', 'f01290', 'f03120', 'f00840', 'f04440', 'f00150', 'f01260', 'f02700', 'f02940', 'f00600', 'f01830', 'f04260', 'f05010', 'f04890', 'f02160', 'f00240', 'f04380', 'f01680', 'f04710', 'f01320']0006R0 ['f02820', 'f03690', 'f03180', 'f02550', 'f01020', 'f03660', 'f02340', 'f01170', 'f02610', 'f02940', 'f01290', 'f02100', 'f01350', 'f03270', 'f03870', 'f01380', 'f01980', 'f03810', 'f02430', 'f02310', 'f01830', 'f03480', 'f02970', 'f01890', 'f03210', 'f03930', 'f02040', 'f02070', 'f02400', 'f01560', 'f03030', 'f01770', 'f01590', 'f01950', 'f03420', 'f01650', 'f03450', 'f00990', 'f03630', 'f01500', 'f03570', 'f00930', 'f03090', 'f03360', 'f02880', 'f02460', 'f01440', 'f01920', 'f01230', 'f03840', 'f02730', 'f01620', 'f02220', 'f03750', 'f03330', 'f03540', 'f02520', 'f02790', 'f01050', 'f03120', 'f01800', 'f01140', 'f01860', 'f01530', 'f01470', 'f02670', 'f02490', 'f01260', 'f01110', 'f02760', 'f01680', 'f03150', 'f02580', 'f03300', 'f02280', 'f01200', 'f03390', 'f03510', 'f02640', 'f02190', 'f02370', 'f01320', 'f02130', 'f03600', 'f03240', 'f03780', 'f03720', 'f02700', 'f01410', 'f01080', 'f02850', 'f01710', 'f03900', 'f03060', 'f01740', 'f02010', 'f02250', 'f00960', 'f03000', 'f02160', 'f02910']for k, v in d. items():  print(k, len(d[k]))0001TP 1240016E5 305Seq05VD 1710006R0 101for i in d2. keys():  print(i,len(d2[i]))0016E5 3050001TP 1240006R0 101Seq05VD 171files[0], labels[0](('0001TP', '009210'), ('0016E5', '01800'))2. My question: Link:  Why do we need masking? and does color from fastai library? (have to look into source code) What do the parameter alpha do? When people make masked img, would it be have ranged integer limit? Does image normalization related with this?lbl_sorted = sorted(lbl_names)f_sorted = sorted(fnames)lbl_1 = lbl_sorted[33]f_1 = f_sorted[33]img = open_image(lbl_1)mask = open_mask(lbl_1)_,axs = plt. subplots(1,2, figsize=(10,5))# img. show(ax=axs[0], y=mask, title='masked')img. show(ax=axs[0], title='1')mask. show(ax=axs[1], title='2', alpha=1. ) img_2 = open_image(f_1)mask_2 = open_mask(f_1)_,axs = plt. subplots(1,2, figsize=(10,5))# img. show(ax=axs[0], y=mask, title='masked')img_2. show(ax=axs[0], title='3',)mask_2. show(ax=axs[1], title='4', alpha=1. ) open_mask(lbl_1). data. shapetorch. Size([1, 720, 960])open_mask(lbl_1). data. shapetorch. Size([1, 720, 960])open_image(f_1). data. shapetorch. Size([3, 720, 960])open_image(f_1). data. shapetorch. Size([3, 720, 960])img. data #labeled datatensor([[[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]],    [[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]],    [[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]]])mask. data # after mask, labeled datatensor([[[ 4, 4, 4, . . . , 21, 21, 21],     [ 4, 4, 4, . . . , 21, 21, 21],     [ 4, 4, 4, . . . , 21, 21, 21],     . . . ,     [17, 17, 17, . . . , 30, 30, 30],     [17, 17, 17, . . . , 30, 30, 30],     [17, 17, 17, . . . , 30, 30, 30]]])img_2. data, mask_2. data(tensor([[[0. 0706, 0. 0667, 0. 0706, . . . , 0. 6431, 0. 6549, 0. 6627],     [0. 0745, 0. 0706, 0. 0706, . . . , 0. 6431, 0. 6510, 0. 6549],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 6392, 0. 6588, 0. 6588],     . . . ,     [0. 0863, 0. 0824, 0. 0824, . . . , 0. 1333, 0. 1216, 0. 1255],     [0. 0902, 0. 0863, 0. 0824, . . . , 0. 1255, 0. 1176, 0. 1216],     [0. 0863, 0. 0824, 0. 0784, . . . , 0. 1137, 0. 1059, 0. 1137]],     [[0. 0706, 0. 0667, 0. 0706, . . . , 0. 7490, 0. 7608, 0. 7686],     [0. 0745, 0. 0706, 0. 0706, . . . , 0. 7451, 0. 7569, 0. 7608],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 7412, 0. 7529, 0. 7529],     . . . ,     [0. 0980, 0. 0941, 0. 0941, . . . , 0. 1804, 0. 1686, 0. 1725],     [0. 1059, 0. 1020, 0. 0980, . . . , 0. 1725, 0. 1647, 0. 1686],     [0. 1020, 0. 0980, 0. 0941, . . . , 0. 1608, 0. 1529, 0. 1608]],     [[0. 0784, 0. 0745, 0. 0784, . . . , 0. 7569, 0. 7686, 0. 7765],     [0. 0824, 0. 0784, 0. 0784, . . . , 0. 7647, 0. 7647, 0. 7686],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 7608, 0. 7647, 0. 7647],     . . . ,     [0. 1216, 0. 1176, 0. 1176, . . . , 0. 2000, 0. 1882, 0. 1922],     [0. 1176, 0. 1137, 0. 1098, . . . , 0. 1843, 0. 1765, 0. 1804],     [0. 1137, 0. 1098, 0. 1059, . . . , 0. 1725, 0. 1647, 0. 1725]]]), tensor([[[ 18, 17, 18, . . . , 183, 186, 188],     [ 19, 18, 18, . . . , 183, 185, 186],     [ 20, 18, 19, . . . , 182, 185, 185],     . . . ,     [ 25, 24, 24, . . . , 43, 40, 41],     [ 26, 25, 24, . . . , 41, 39, 40],     [ 25, 24, 23, . . . , 38, 36, 38]]]))3. What is a difference between image and imageSegment?: imageSegment  An ImageSegment object has the same properties as an Image. The only difference is that when applying the transformations to an ImageSegment, it will ignore the functions that deal with lighting and keep values of 0 and 1.  It’s easy to show the segmentation mask over the associated Image by using the y argument of show_image. img = open_image(fnames[0])mask = open_mask(lbl_names[0])_,axs = plt. subplots(1,3, figsize=(8,4))img. show(ax=axs[0], title='no mask')img. show(ax=axs[1], y=mask, title='masked') #seg mask over the img using y argmask. show(ax=axs[2], title='mask only', alpha=1. ) vision. image ##4. Why/How img div by 255 and how it results fast. ai : vision. image - If div=True, pixel values are divided by 255. to become floats between 0. and 1.  At times, you want to get rid of distortions caused by lights and shadows in an image.    Normalizing the RGB values of an image can at times be a simple and effective way of achieving this.     So sum of the pixel’s value over all channels(which is S) divides each intensified channel so that nomalized value will be R/S, G/S and B/S (where, S=R+G+B).   Detailed explain here4. Python Evaluation Order: Python evaluates expressions from left to right. Notice that while evaluating an assignment, the right-hand side is evaluated before the left-hand side. mask_tmp, trg_tmp, void_tmp = 2, 1, 10mask_tmp = trg_tmp != void_tmpprint(mask_tmp, trg_tmp, void_tmp) # (1) target is not same with voidTrue 1 10# Example 1x = 1y = 2x,y = y,x+yx, y(2, 3)# Example 2x = 1y = 2x = yy = x+yx, y(2, 4)5. model learner parameter :: pct_start: A: Percentage of total number of epochs when learning rate rises during one cycle. Q: Sorry, I still confused that one cycle in the new API only runs one epoch. How the percentage of total number of epochs works? Can you give a example? If learn. fit_one_cycle(10, slice(1e-4,1e-3,1e-2), pct_start=0. 05)??A: Ok, strictly correct answer would be percentage of iterations, so you can have lr both increase and decrease during same epoch. In your example, say, you have 100 iterations per epoch, then for half an epoch (0. 05 * (10 * 100) = 50) lr will rise, then slowly decrease. Q2: Thanks for this explanation … so essentially, it is the percentage of overall iterations where the LR is increasing, correct? So, given the default of 0. 3, it means that your LR is going up for 30% of your iterations and then decreasing over the last 70%. Is that a correct summation of what is happening? A2: Yes, I think that’s correct. You can verify that by changing its value and check:learn. recorder. plot_lr() For example if pct_start = 0. 2 source: forums. fastai "
+    }, {
+    "id": 14,
     "url": "http://localhost:4000/2020/03/note08-fastai-4/",
     "title": "Gradient backward, Chain Rule, Refactoring",
-    "body": "2020/03/02 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves   ⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
+    "body": "2020/03/02 -  This note is divided into 4 section.      Section1: What is the meaning of ‘deep-learning from foundations?’   Section2: What’s inside Pytorch Operator?   Section3: Implement forward&amp;backward pass from scratch   Section4: Gradient backward, Chain Rule, Refactoring   ” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves    Postfix underscore means in pytorch, in-place function, What is in-place function?⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
     }, {
-    "id": 13,
+    "id": 15,
     "url": "http://localhost:4000/2020/03/note08-fastai-3/",
     "title": "Implement forward&backward pass from scratch",
     "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring1. The forward and backward passes: 1. 1 Normalization: train_mean,train_std = x_train. mean(),x_train. std()&gt;&gt;&gt; train_mean,train_std(tensor(0. 1304), tensor(0. 3073))Remember!  Dataset, which is x_train, mean and standard deviation is not 0&amp;1. But we need them to be which means we should substract means and divide data by std.  You should not standarlize validation set because training set and validation set should be aparted.  after normalize, mean is close to zero, and standard deviation is close to 1. 1. 2 Variable definition:  n,m: size of the training set c: the number of activations we need in our model2. Foundation Version: 2. 1 Basic architecture:  Our model has one hidden layer, output to have 10 activations, used in cross entropy.    But in process of building architecture, we will use mean square error, output to have 1 activations and lator change it to cross entropy   number of hidden unit; 50see below pic  We want to make w1&amp;w2 mean and std be 0&amp;1.      why initializating and make mean zero and std one is important?   paper highlighting importance of normalisation - training 10,000 layer network without regularisation1   2. 1. 1 simplified kaiming initQ: Why we did init, normalize with only validation data? Because we can not handle and get statistics from each value of x_valid?{: style=”color:red; font-size: 130%; text-align: center;”}  what about hidden(first) layer?w1 = torch. randn(m,nh)b1 = torch. zeros(nh)t = lin(x_valid, w1, b1) # hidden&gt;&gt;&gt; t. mean(), t. std()((tensor(2. 3191), tensor(27. 0303))In output(second) layer, w2 = torch. randn(nh,1)b2 = torch. zeros(1)t2 = lin(t, w2, b2) # output&gt;&gt;&gt; t2. mean(), t2. std()(tensor(-58. 2665), tensor(170. 9717))   which is terribly far from normalzed value.     But if we apply simplified kaiming init  w1 = torch. randn(m,nh)/math. sqrt(m); b1 = torch. zeros(nh)w2 = torch. randn(nh,1)/math. sqrt(nh); b2 = torch. zeros(1)t = lin(x_valid, w1, b1)t. mean(),t. std()&gt;&gt;&gt; (tensor(-0. 0516), tensor(0. 9354)) But, actually, we use activations not only linear function After applying activations relu at linear layer, mean and deviation became 0. 5. 2. 1. 2 Glorrot initializationPaper2: Understanding the difficulty of training deep feedforward neural networks  Gaussian(, bell shaped, normal distributions) is not trained very well.  How to initialize neural nets? with the size of layer , the number of filters .  But there is No acount for import of ReLU If we got 1000 layers, vanishing gradients problem emerges2. 1. 3 Kaiming initializatingPaper3: Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification  Kaiming He, explained here rectifier: rectified linear unit rectifier network: neural network with rectifier linear units  This is kaiming init, and why suddenly replace one to two on a top?     to avoid vanishing gradient(weights)   But it doesn’t give very nice mean tough.    2. 1. 4 Pytorch package Why fan_out?     according to pytorch documentation,   choosing 'fan_in' preserves the magnitude of the variance of the wights in the forward pass. choosing 'fan_out' preserves the magnitues in the backward pass(, which means matmul; with transposed matrix) ➡️ in the other words, torch use fan_out cz pytorch transpose in linear transformaton.  What about CNN in Pytorch?I tried torch. nn. Conv2d. conv2d_forward?? Jeremy digged into using torch. nn. modules. conv. _ConvNd. reset_parameters?? 2  in Pytorch, it doesn’t seem to be implemented kaiming init in right formula. so we should use our own operation.  But actually, this has been discussed in Pytorch community before. 3 4 Jeremy said it enhanced variance also, so I sampled 100 times and counted better results.  To make sure the shape seems sensible. check with assert. (remember we will replace 1 to 10 in cross entropy)assert model(x_valid). shape==torch. Size([x_valid. shape[0],1])&gt;&gt;&gt; model(x_valid). shape(10000, 1) We have made Relu, init, linear, it seems we can forward pass code we need for basic architecture nh = 50def lin(x, w, b): return x@w + b;w1 = torch. randn(m,nh)*math. sqrt(2. /m ); b1 = torch. zeros(nh)w2 = torch. randn(nh,1); b2 = torch. zeros(1)def relu(x): return x. clamp_min(0. ) - 0. 5t1 = relu(lin(x_valid, w1, b1))def model(xb):  l1 = lin(xb, w1, b1)  l2 = relu(l1)  l3 = lin(l2, w2, b2)  return l32. 2 Loss function: MSE:  Mean squared error need unit vector, so we remove unit axis.   def mse(output, targ): return (output. squeeze(-1) - targ). pow(2). mean()    In python, in case you remove axis, you use ‘squeeze’, or add axis use ‘unsqueeze’ torch. squeeze where code commonly broken. so, when you use squeeze, clarify dimension axis you want to removetmp = torch. tensor([1,1])tmp. squeeze()&gt;&gt;&gt; tensor([1, 1]) make sure to make as float when you calculateBut why??? because it is tensor?{: style=”color:red; font-size: 130%;”} Here’s the error when I don’t transform the data type ---------------------------------------------------------------------------TypeError                 Traceback (most recent call last)&lt;ipython-input-22-ae6009bef8b4&gt; in &lt;module&gt;()----&gt; 1 y_train = get_data()[1] # call data again   2 mse(preds, y_train)TypeError: 'map' object is not subscriptable This is forward passFootnote: Other materials:  Understanding the difficulty of training deep feedforward neural networks, paper that introduced Xavier initialization      Fixup Initialization: Residual Learning Without Normalization &#8617;        Pytorch implementaion on Kaiming init of conv and linear layers &#8617;        Pytorch kaiming init issue &#8617;        Pytorch kaiming init explained &#8617;    "
     }, {
-    "id": 14,
+    "id": 16,
     "url": "http://localhost:4000/2020/03/note08-fastai-2/",
     "title": "What's inside Pytorch Operator?",
     "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, RefactoringWhat’s inside Pytorch Operator?: Section02 Time comparison with pure Python:    Matmul with broadcasting&gt; 3194. 95 times faster     Einstein summation&gt; 16090. 91 times faster     Pytorch’s operator&gt; 49166. 67 times faster  1. Elementwise op: 1. 1 Frobenius norm:   above converted into (m*m). sum(). sqrt() Plus, don’t suffer from mathmatical symbols. He also copy and paste that equations from wikipedia.  and if you need latex form, download it from archive. 2. Elementwise Matmul:  What is the meaning of elementwise?   We do not calculate each component. But all of the component at once. Because, length of column of A and row of B are fixed.   How much time we saved?  So now that takes 1. 37ms. We have removed one line of code and it is a 178 times faster…#TODOI don’t know where the 5 from. but keep it. Maybe this is related with frobenius norm…?as a result, the code before for k in range(ac):  c[i,j] += a[i,k] + b[k,j]the code after c[i,j] = (a[i,:] * b[:,j]). sum()To compare it (result betweet original and adjusted version) we use not test_eq but other function. The reason for this is that due to rounding errors from math operations, matrices may not be exactly the same. As a result, we want a function that will “is a equal to b within some tolerance” #exportdef near(a,b):   return torch. allclose(a, b, rtol=1e-3, atol=1e-5)def test_near(a,b):   test(a,b,near)test_near(t1, matmul(m1, m2))3. Broadcasting:  Now, we will use the broadcasting and removec[i,j] = (a[i,:] * b[:,j]). sum() How it works?&gt;&gt;&gt; a=tensor([[10,10,10],     [20,20,20],     [30,30,30]])&gt;&gt;&gt; b=tensor([1,2,3,])&gt;&gt;&gt; a,b  (tensor([[10, 10, 10],     [20, 20, 20],     [30, 30, 30]]),tensor([1, 2, 3]))     &gt;&gt;&gt; a+btensor([[11, 12, 13],    [21, 22, 23],    [31, 32, 33]])  &lt;Figure 2&gt; demonstrated how array b is broadcasting(or copied but not occupy memory) to compatible with a. Refered from numpy_tutorial   there is no loop, but it seems there is exactly the loop.     This is not from jeremy (actually after a moment he cover it) but i wondered How to broadcast an array by columns?  c=tensor([[1],[2],[3]])a+ctensor([[11, 11, 11],    [22, 22, 22],    [33, 33, 33]])s  What is tensor. stride()?help(t. stride)Help on built-in function stride:  stride(…) method of torch. Tensor instancestride(dim) -&gt; tuple or intReturns the stride of :attr:’self’ tensor. Stride is the jump necessary to go from one element to the next one in the specified dimension :attr:’dim’. A tuple of all strides is returned when no argument is passed in. Otherwise, an integer value is returned as the stride in the particular dimension :attr:’dim’. Args:  dim (int, optional): the desired dimension in which stride is requiredExample::* x = torch. tensor([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]])`x. stride()&gt;&gt;&gt; (5, 1)x. stride(0)&gt;&gt;&gt; 5x. stride(-1)&gt;&gt;&gt; 1   unsqueeze &amp; None index   We can manipulate rank of tensor Special value ‘None’, which means please squeeze a new axis here== please broadcast herec = torch. tensor([10,20,30])c[None,:] in c, squeeze a new axis in here please. 2. 2 Matmul with broadcasting: for i in range(ar):#  c[i,j] = (a[i,:]).      *[:,j]. sum() #previous  c[i]  = (a[i]. unsqueeze(-1) * b). sum(dim=0) And Using None also (As howard teached)c[i]  = (a[i ]. unsqueeze(-1) * b). sum(dim=0) #howardc[i]  = (a[i][:,None] * b). sum(dim=0) # using Nonec[i]  = (a[i,:,None]*b). sum(dim=0)⭐️Tips🌟 1) Anytime there’s a trailinng(final) colon in numpy or pytorch you can delete it ex) c[i, :] = c [i]2) any number of colon commas at the start, you can switch it with the single elipsis.  ex) c[:,:,:,:,i] = c […,i] 2. 3 Broadcasting Rules:  What if we tensor. size([1,3]) * tensor. size([3,1])?  torch. Size([3, 3])    What is scale????   What if they are one array is times of the other array? ex) Image : 256 x 256 x 3Scale : 128 x 256 x 3Result: ?   Why I did not inserted axis via None, but happened broadcasting? &gt;&gt;&gt; c * c[:,None]tensor([[100. , 200. , 300. ],    [200. , 400. , 600. ],    [300. , 600. , 900. ]])maybe it broadcast cz following array has 3 rows as same principle, no matter what nature shape was, if we do the operation tensor broadcasts to the other. &gt;&gt;&gt; c==c[None]tensor([[True, True, True]])&gt;&gt;&gt; c[None]==c[None,:]tensor([[True, True, True]])&gt;&gt;&gt;c[None,:]==ctensor([[True, True, True]])3. Einstein summation:  Creates batch-wise, remove inner most loop, and replaced it with an elementwise producta. k. ac[i,j] += a[i,k] * b[k,j]inner most loop c[i,j] = (a[i,:] * b[:,j]). sum()elementwise product  Because K is repeated so we do a dot product. And it is torch. Usage of einsum()1) transpose2) diagnalisation tracing3) batch-wise (matmul) …  einstein summation notationdef matmul(a,b): return torch. einsum('ik,kj-&gt;ij', a, b)so after all, we are now 16000 times faster than Python. 4. Pytorch op: 49166. 67 times faster than pure python And we will use this matrix multiplication in Fully Connect forward, with some initialized parameters and ReLU. But before that, we need initialized parameters and ReLU, Footnote:  TensorRank ti noteResources:  Frobenius Norm Review Broadcasting Review (especially Rule)     Refer colab! (I totally confused with extension of arrays)    torch. allclose Review np. einsum Reviewh "
     }, {
-    "id": 15,
+    "id": 17,
     "url": "http://localhost:4000/2020/02/note08-fastai-1/",
     "title": "What is the meaning of 'deep-learning from foundations?'",
     "body": "2020/02/29 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ I don’t know if you read this article, but I heartily appreciate Rachael Thomas and Jeremy Howard for providing these priceless lectures for free Homework:  Review concepts 16 concepts from Course 1 (lessons 1 - 7)(1) Affine Functions &amp; non-linearities; 2) Parameters &amp; activations; 3) Random initialization &amp; transfer learning; 4) SGD, Momentum, Adam; 5) Convolutions; Batch-norm; 6) Dropout; 7) Data augmentation; 8) Weight decay; 9) Res/dense blocks; 10) Image classification and regression; 11)Embeddings; 12) Continuous &amp; Categorical variables; 13) Collaborative filtering; 14) Language models; 15) NLP classification; 16) Segmentation; U-net; GANS) Make sure you understand broadcasting Read section 2. 2 in Delving Deep into Rectifiers Try to replicate as much of the notebooks as you can without peeking; when you get stuck, peek at the lesson notebook, but then close it and try to do it yourself calculus for machine learning     based on weight…    einsum conventionCONTENTS:  What is going on in this course?     What is ‘from foundations’?   Steps to a basic modern CNN model   Today’s implementation goal: 1) matmul -&gt; 4) FC backward    Library development using jupyter notebook     jupyter notebook certainly can make module    Elementwise ops     How can we make python faster?         What is element wise operation?           FootnoteWhat is going on in this course?: What is ‘from foundations’?: 1) Recreate fast. ai and Pytorch 2) using pure python  Evade OverfittingOverfit : validation error getting worsetraining loss &lt; validation loss  Know the name of the symbol you usefind in this page if you don’t know the symbol that you are using or just draw it here (run by ML!) Steps to a basic modern CNN model:  1) Matrix multiplication -&gt; 2) Relu/Initialization -&gt; 3) Fully-connected Forward-&gt; 4) Fully-connected Backward -&gt; 5) Train loop -&gt; 6) Convolution-&gt; 7) Optimization -&gt;8) Batchnormalization -&gt; 9) Resnet Today’s implementation goal: 1) matmul -&gt; 4) FC backward: Library development using jupyter notebook: what is assers? jupyter notebook certainly can make module:  There will be #export tag that Howard (and we) want to extract special notebook2script. py will detect sign of #expert and convert following into python module and test ittest\_eq(TEST,'test')test\_eq(TEST,'test1')  what is run_notebook. py?     when you want to test your module in command line interface   		!python run\_notebook. py 01_matmul. ipynb  Is there any difference between 1) and 2)?1) test -&gt; test01 2) test01 -&gt; test #TODO I don’t know yet  look into run_notebook. py, package fire Jeremy used. What is that?read and run the code in a notebook, and in the process, Jeremy made Python Fire library called!shockingly, fire takes any kind of function and converts into CLI command. fire library was released by Google open source, Thursday, March 2, 2017    Get data   pytorch and numpy are pretty much same.  variable c explains how many pixels there are in in MNIST, 28 pixels PyTorch’s view() method: torch function that manipulating tensor, and squeeze() in torch &amp; mathmatical operation similar function Rao &amp; McMahan said usually this functions result in feature vector.    In part 1, you can use view function several times.     Initial python model     Which is Linear, like $Xw$(weight)$+a$(bias) $= Y$     If you don’t know hou to multiple matrix, refer this site matmul visulization site   How many time spends if we we use pure python   function matmul, typical matrix multiplication function, takes about 1 second for calculating 1 single train data! (maybe assumed stochastic, 5 data points in validation)     it takes about 11. 36 hours to update parameters even single layer and 1 iteration! (if that was my computer, it would be 14 hours. . )🤪   THIS is why we need to consider ‘time’&amp;’space’ This is kinda slow - what if we could speed it up by 50,000 times? Let’s try! Elementwise ops: How can we make python faster?:  If we want to calculate faster, then do remove pythonic calcuation, by passing its computation down to something that is written something other than python, like pytorch.  According to PyTorch doc it uses C++ (via ATen), so we are going to implement that function with python. What is element wise operation?:  items makes a pair, operate corresponding componentFootnote:  notebooks material video broadcasting excel"
     }, {
-    "id": 16,
+    "id": 18,
     "url": "http://localhost:4000/2020/02/what-is-convolution/",
     "title": "Digging into convolution",
     "body": "2020/02/28 - Issues 1) Kaiming Initializtion in Pytorch was in trouble. 1 2) Jeremy started to dig in, in lesson09, but I didn’t know why the size of tensor is 2 and even understand this spreadsheet data. 3 Homework: Read Visualizing and Understanding Convolutional Networks paper  What is a convolution?     Visualization         one kernel     Matthew D Zeiler &amp; Rob Fergus Paper          Convolution can be represented as matmul   Padding   Kernel has rank 3   How can we find a side-edge, a gradient and area of constant weight?   What is a convolution?: A convolutional neural network is that your red, green, and blue pixels go into the simple computation, and something comes out of that, and then the result of that goes into a second layer, and the result of that goes into the third layer and so forth. Visualization: one kernel Refer this site for visualizing CNN filteringMatthew D Zeiler &amp; Rob Fergus PaperLecture01     Nine examples of the actual coefficients from the **first layer**  Convolution can be represented as matmul: CNNs from different viewpoints {align-items: center;}    [A B C D E F G H I J] is 3 by 3 image data flatten to vector.   As a result, convolution is a just matrix just two things happens     Some of entries are set to zeros at all the times   same color always have the same weight. That called weight time / wegith sharing    So, we can implement a convolution with matrix multiplication. But, we don’t do that because it’s slow!Padding:  What most of libraries do is just put zeros asdie of matrix  fast. ai uses reflection paddings (what is this? Jeremy said he uttered it)Kernel has rank 3:  As standard picture input would be 4 5, it would be actually 3d, not 2d.  If we make kernel as a 3x3 size, we pass over same kernel all the different Red, Green, Blue Pixels.      This could make problem, because, if we want to detect frog, which is green, we would want more activations on the green(I made a test cell in my colab 6)   How can we find a side-edge, a gradient and area of constant weight?: Not top-edge!  One kernel can find only the top-edge, so we should stack the kernels 7 So, we pass it through bunch of kernels to the input images, and that process gives us height x width x corresponding number of kernels.  Usually that number of chanel is 16 And if we want to get the more channels and features, we should repeat that process     This process gives rise to memory out of control, we do the stride   #### conv-example. xlsx  2 convolutional filters At a second layer, filter is 3x3x2 tensor, because to add up together the first layer’s channel. Reference:       Problem was math. sqrt(5) was not kaiming initialization formula, Implementation in Pytorch &#8617;        size of tensor, lecture09 &#8617;        conv-example. xlsx &#8617;        Why do computer use red, green and blue instead of primary colors &#8617;        Grayscale is a group of shades without any visible color. … Each of these dots has its own brightness level as well and, therefore, can be converted to grayscale. A grayscale image is one with all color information removed.  &#8617;        Testing RGB and grayscale &#8617;        stack kernel and make new rank of tensor at output, Lesson06-2019 &#8617;    "
     }, {
-    "id": 17,
+    "id": 19,
     "url": "http://localhost:4000/2020/02/dps-week8/",
-    "title": "Digital Product School week 8&9",
-    "body": "2020/02/24 - The 8th week retropect at Digital Product School Week 8/9 - Ship your MVP/Release next iteration each day     This week's schedule  CONTENT:  Preparing engineering weekly Agile Process     Daily Stand-up    Making application flowchart (feat draw. io) / ER diagram     Flowchart, understaning user journey   ER diagram    Engineering weekly AI lunch Connecting firebase andPreparing engineering weekly: This week at Wednesday, I planned to explain the Language Modelings, mainly focusing ELMo, ULMFiT, BERT and GPT-2. Slides is available here Changed the presentation, because there were people who are not in ML domain. hereWhenever I do the presentation, I learn more than the information I give them. At the same time, I realize I need to learn more than I know. Agile Process: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m not sure ‘what is agile’ but this is what we tried to make agile process.  Daily Stand-up: Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Making application flowchart (feat draw. io) / ER diagram:  Before we start the party, we should clarify the flowchart and ER diagram of our application. Flowchart, understaning user journey: Thanks for google, we could use draw. io for our framechart framework. Actually, we cana choice other good flatform, but draw. io has connected app throgh google drive, most of our engineer was used to it. And after this job, I got to know there is also (of course) rule with the symbols, color, size, space, scaling and direction of arrow -reference. But why we should do this? WE have made our storymap before!! I think storymap is for visualize our status and app. So it should be shared with whole the team, and they should able to understand each role’s issue. But flowchart is more like testing technical feasibility, and error that user can experience. So it could be little more specific, complicated, and hypothetical.     This week engineer's main plan  ER diagram: Even if we use NoSQL database through firebase, my team was accustomed to SQL more. That what we educated when we were at college, so we had to organize our concept while we were learning NoSQL. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Before today, my AI collegues presented regression, knn and it was my turn. I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! AI lunch: Connecting firebase and: "
+    "title": "My life in Digital Product School - week 8/19/10",
+    "body": "2020/02/24 - The 8/9/10th week retropect at Digital Product School Week 8 - Ship your MVPWeek 9/10 - Release next iteration each day     Week 8th schedule  CONTENT:  Agile Product Development     Daily Stand-up(planning)   Gemba Walk   Sprint Reviews    Engineering weeklyAgile Product Development: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m still not sure ‘what is agile’ but this is how we tried to make agile process.  Daily Stand-up(planning): Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Gemba Walk:     Team Cero with core team  Every 2 weeks, we do the Gemba work, which is ‘question everything to the core team’ time. At this period, people can ask anything related to our product, workshop, and framework. Core team will help just for each team, and each team can solve the problem related to their work. &lt; br/&gt;Why we need this session? because with workshop and general schedule, core team has no time just focus on each team. So through this session, we can have opportunity to understand each program and workshop, like why we are using this platform, and when is the due of our small project, and we have this problem and we need help for this. whatever small problem you have, core team is always willing to help you. Sprint Reviews: Every Friday, we have time to summarise what we did for the week. Maybe we need HMW question and our storymap to share our process and then tell and share what we did try, what point we succeeded and what point it was deviant of our prediction, and why we tried it. .     Sprint of Ve-link  And then, just after all team’s ppt, we do vote with such a cute marvel. Always it’s very difficult to vote (of course you can’t vote to your team!) Because it depends on criteria what do I value!But since this is process of our agile work, I try to focus on what they have changed since last week, and why they did it, how they did it. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Everyone have their knowledge to share and we can be tutor and at the same time can be of tutee. Previously, my AI collegues presented regression, knn. And because I’m somewhat specialized to NLP, I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! "
     }, {
-    "id": 18,
+    "id": 20,
     "url": "http://localhost:4000/2020/02/fast.ai-nlp-note-16/",
     "title": "Algorithmic bias",
     "body": "2020/02/20 - Algorithms can encode &amp; magnify human bias Case Study 1: Facial Recognition &amp; Predictive Policing:  Joy Buolamwini &amp; Timnit Gebru, gendershades. org     Microsoft, FACE+, IBM - All of these things are sell now.    Largest gap between $\therefore\ Lighter Male\ &gt;\ Darker\ Female $      This US mayor joked cops should “mount . 50-caliber” guns where AI predicts crime      With machine learning, with automation, there’s a 99% success, so that robot is ㅡwill beㅡ99% accurate in telling us what is going to happen next, which is really interesting.     - city official in Lancater, CA, approving on using IBM for public security  Bias:  Bias is type of error Statistical Bias: difference between a statistic’s expected value and the true value Unjust Bias: disproportionate preference for or prejudice against a group Unconscious bias: bias that we don’t realize we have But, term bias is too generic to be productive. Different sources of bias have different causes Representation Bias: Dataset was not representative of the algorithm that might be used on later. Above : Data is okay, but algorithm has some problem. Below : Data has error.  For example, object detection production that performs very well in common product of US. But in contrast, change of target product region, like Zimbabwe, Solomon Island, and so on, reduced the performence remarkably. It is not the algorithmic problem, so we should care about data volume of region. Evaluation Bias: Benchmark datasets spur on research, 4. 4% of IJB-A images are dark-skinned women. 2/3 of ImageNet images from the West (Sharkar et al, 2017) Case Study 2: Recidivism Algorithm Used Prison Sentencing: Case Study 3: Online Ad Delivery: Bias in NLP: ( Nothing to do with the course, but I’m researching this field these days. )    But all about Englsih     ImpactThe person is doctor. The person is nurse -&gt; 그는 의사다. 그녀는 간호사다.  Concept of “biased data” often too generic to be useful:  Different sources of bias have different sources Data, models and systems are not unchanging numbers on a screen. They’re the result of a complex process that starts with years of historical context and involves a series of choices and norms, from data measurement to model evaluation to human interpretation. - Harini Suresh, “The problem with Biased Data” Five Sources of Bias in ML:  Representation Bias Evaluation Bias Measurement Bias Aggregation Bias(46:02) Historical Bias(46:26)     A few studies(47:13)   Racial Bias, Even when we have good intentions(new york times)(47:10)   gender(48:59)   Humans are biased, so why does algorithmic bias matter?: Algorithms &amp; humans are used differently (humans are usually decision maker)  Algorithms are accurate and objective No way to apeal if there if error processed large scale cheap Machine learning can amplify bias Machine learning can create feedback loops. Technology is power. And with that comes responsibility. Solutions:  Analyze a project at work/school:     Questions about AI   5 types of bias (Suresh &amp; Guttag)   Datasheets for datasets, Modelcards for model reporting   Accuracy rate on different sub-groups    Work with domain experts &amp; those impacted Increase diversity in our workspace Advocate for good policy Be on the ongoing lookout for bias"
     }, {
-    "id": 19,
+    "id": 21,
     "url": "http://localhost:4000/2020/02/classifier-city/",
     "title": "Making a classifier with image dataset made from gooogle",
     "body": "2020/02/15 - CONTENTS:  Creating dataset from google images     Using google_images_download   Create ImageDataBunch    Train model     fit_one_cycle()   Let’s find-tune   Let’s train the whole model!   Let’s make batch size bigger!    Interpretation Model in productionCode can be found hereDeployed model here Making a classifier which can distinguish Seoul from Munich and Sanfrancisco!(hoping my well in Munich!) Creating dataset from google images: In machine learning, you always need data before you build your model. You can use either URLs or google_images_download package. Since Jeremy explained specifically, I will try the other. Using google_images_download: note: This is not google official package Refer to Official Doncument, put that arguments. from google_images_download import google_images_downloadresponse = google_images_download. googleimagesdownload()  #class instantiationout_dir = os. path. abspath('. . /. . /materials/dataset/pkg/')os. mkdir(out_dir)arguments = { keywords : Cebu,Munich,Seoul ,        print_urls :True,        suffix_keywords : city ,        output_directory :out_dir,        type : photo ,      }paths = response. download(arguments)  #passing the arguments to the functionprint(paths)and if you need, here is main code. Create ImageDataBunch: We need to separate validation set because we just grabbed these imagese from Google. Most of the dataset we use (kaggle/research) splited into train / validation / test so if they are not devided beforehand we should make databunch, and Jeremy recommended assign 20% to validation. Help on function verify_images in module fastai. vision. data:verify_images(path: Union[pathlib. Path, str], delete: bool = True, max_workers: int = 4, max_size: int = None, recurse: bool = False, dest: Union[pathlib. Path, str] = '. ', n_channels: int = 3, interp=2, ext: str = None, img_format: str = None, resume: bool = None, **kwargs)  Check if the images in `path` aren't broken, maybe resize them and copy it in `dest`. Data from google image url Data from package Train model:           len(class)   len(train)   len(valid)         Data_url   3   432   108       Data_pkg   3   216   53   Uisng model: restnet34 1, Measurement: accuracy 2 fit_one_cycle(): What is fit one cycle? Cyclical Learning Rates for Training Neural Networks One of the way to find good learning rate. Core idea is to start with small learning rate (like 1e-4, 1e-3) and increase the learning rate after each mini-batch till loss starts exploding. And pick up learning rate one order lower than exploding point. For example, plotted learning rate is like below picture, picking up around 1e-2 is the best way.  Why this methods Traditionally, the learning rate is decreased as the learning starts converging with time. But this paper suggests to cycle our learning rate, because it makes us avoid local minimum. Basically this cyclic method enables us to explore whole of loss function so that find out global minimum. In other words, higher learning rate behaves like regularisation. Let’s find-tune: Do train just one last layer by learning rate found by find_lr This section you should find the strongest downward slope that kind of sticking around for quite a while. And choose just one order lower than lowest point. As explained before, I will pick up 1e-2. And of course, this is fine-tuning, we don’t need discriminative learning rate yet.  Let’s train the whole model!: link When you plot the learning rate again, maybe you will get soaring shape of learning rate. Rule of thumb, When you slice the learning rate, use learning rate you used at unfrozen part. Divide it by 5 or 10 and put it on maximum bound. At minimum bound, get the point just before it soared, and divide it by 10.  Let’s make batch size bigger!: Since default batch size is 64, I tried it to 128. And it gets way more better result(even it’s still underfitting!) And if I freeze model and train whole model again, the model would be better. Also, you can use this method to the other big dataset model training! Interpretation: See the confusion matrix. Result is quite great. *Since I’m using colab, I will skip data cleansing. But I highly recommend you to use ImageCleaner widget, only if you are using jupyter notebook (not jupyter lab) Model in production: You can deploy your model in simple way. I referred fast. ai, and used render(it’s free for limited time). You can find detailed document here. and you can create a route like this. @app. route( /classify-url , methods=[ GET ])async def classify_url(request):  bytes = await get_bytes(request. query_params[ url ])  img = open_image(BytesIO(bytes))  _,_,losses = learner. predict(img)  return JSONResponse({     predictions : sorted(      zip(cat_learner. data. classes, map(float, losses)),      key=lambda p: p[1],      reverse=True    )  })You can find my deployed model here Reference: How to create a deep learning dataset using Google Images towardsdatascience - one cycle policy       Deep Residual Learning for Image Recognition &#8617;        Accuracy_and_precision &#8617;    "
     }, {
-    "id": 20,
+    "id": 22,
     "url": "http://localhost:4000/2020/02/dps-week5/",
     "title": "Digital Product School week 5",
     "body": "2020/02/09 - The 5th week retropect at Digital Product School Week 5 - Create a Storymap and sync it with Lean Canvas     This week's schedule  CONTENT:  How to create our story map Prepare your story Discover your product’s AI potentialMondayHow to create our story map:     We need this 'aha' moment  There was a Milestone workshop, about our weekly goal. As we are agile working, we go fast and change every week’s goal. This week we will finalize our story map based on user’s pain-point and HMW questions.     How should we make our story-map  Basically we should make story map based on this rule  Tell stories, don’t just write them!     We always need context, that means all the story component should be connected    Visualize your product to establish a shared understanding and speed up discussions!     Post-it filled of text is not enough, we should fill it with visualizations then team mates can understand it fast    Only discuss in front our your story map! (Speed)     So we can update our story-map as soon as we change our opinion   And also    Use a story map to find the parts that matter most and to identify holes in your idea!     Since the story map consists of techinical part, we should consider each story’s technical feasibility    Minimise output, maximise outcome and impact! Build tests to figure out what’s minimum and what’s viable!     This story map functions to find out our minimum value of ideas    Work iteratively: Change your story map according to your learnings!     We should repeat this process again and again    PMs: Make sure Storymap is up to date!Prepare your story:     team cero, our whole story map    Our goal    Technical feasibility of our storyWhat is your strategy to make user achieve something? This would be our expand point Discover your product’s AI potential: How can we apply AI to our product? Let’s write down our ‘HMW’ questions, and find out all p ossibilities. These are suggestion of possibilities, so don’t attached to feasibility (we will do in at lean start-up)     Software section's expectation    AI section's expectationTuesday    Engineer's task, week5This 5th week, engineers settled WendesdayThursdayFriday"
     }, {
-    "id": 21,
+    "id": 23,
     "url": "http://localhost:4000/2020/02/GPU-time/",
     "title": "4 reasons took much time to setting GPU for fast.ai than I expected",
     "body": "2020/02/05 - Motivation: Before now, me as a undergraduate student, I was parsimony who usually depend on colab, kaggle, friend’s server(occasional) whenever i need GPU. . And this time it’s been for a while to install GPU than I expected and I share the several component that stood in my way. Written at Oct 24 2019, if you think this is deprecated, please do not have a leap of faith. Just for the record, I’ve used Kaggle, Colab, GCP, Azure, EC2 as GPU cloud. 1. Did not know there is JupyterLab option in Google Cloud Platform. : At the first time when GCP came out, there was no AI Platform service. So from starting vm instance to launching jupyter and installing packages, I did all of the things myself. (and I learned 🤗) $	curl -O https://repo. continuum. io/archive/Anaconda3-5. 0. 1-Linux-x86_64. sh[Downloading conda in ssh] I created VM instance,selected zone, machine type and disk type. Then, define firewall rules and in ssh terminal, install jupyter and other packages. But you can do all of these things just using AI Platform.  [AI Platform] I think it especially save your time if you are living in Asia-Pacific, which google doesn’t support not that much GPU resources.  2. Consider if the platform has limited resources in a region you live in. : I live in South Korea, East Asia, and it seems like this region has lots of limitation in GPU (except quite expensive AWS) And the Taiwan which was the only one region where I can launch my own VM with GPU (I tried all the other regions in the list) sometimes do normaly, but not always. 😥After launching, I did several works and next day I could not start VM. (I didn’t count it, but tried it a few hours because I didn’t want cost any more time…) Endlessly failed to start instance, then I choose to move AWS as an alternative way. 3. Fast. ai gives deliberate guide and I didn’t know it. : Fast. ai offer the guide for all available platform. (Colab, salamander, Gradient, Kaggle, Colab, and so on) It is so important, and really needs, because cloud computing options are vary as occasion and purpose arise. I didn’t know fast. ai has manual to running GCP, and I think it’s as good a reason as any for me to be have taken time. It helped me so much when I had aws and shortened my time. I don’t want to read all of the manual in amazno. . (It is recommended. . but I’d rather read GIT PRO now…) ssh -i ~/. ssh/&lt;your_private_key_pair&gt; -L localhost:8888:localhost:8888 ubuntu@&lt;your instance IP&gt;4. You should wait to add more volume just after add volume, by building AWS EC2. : Since Elastic Block Store(EBS) storage supports optimized storage, users can’t extend storage volume two times in a row.  Unfortunately, at the first time, I didn’t know it (again 👻) and when VM lacked volume, I doubled dist capacity (76*2) at a rough but It needs more.  &lt;!– this time I installed GPU in two years, and it became little complicated compared to 2 years ago. And this time for the first time(maybe not the first time. . but i handled it in my class or with my friend. but it’s my first time on my own. ) I very I’m started to using used google colab, kaggleand, GCP-JupyterLab, ec2 - friend made, aws vm machine but I had a environment variable but i did not know of it. On these days, I could not get a resources from taiwan…    I couldn’t notice a deliberate     Anyway, as a result I tried myself gcp myself and aws ec2 with fast. ai But I think doing on my self surely takes much time (in this point I wonder why I’m doing this, and should remind me, especially I was studying disk volume optimization)     disk volume exceed - https://askubuntu. com/questions/919748/no-space-left-on-device-even-though-there-is:  "
     }, {
-    "id": 22,
+    "id": 24,
     "url": "http://localhost:4000/2020/02/dps-week4/",
     "title": "Digital Product School week 4",
     "body": "2020/02/01 - The 4th week retropect at Digital Product School Week 4 - Find solution ideas and run experiments   [This week’s schedule] CONTENT:  Ideation Techniques     What is ideation techniques?    Generating idea in my team AIdeation Team brain storming of idea Die Produkt MacherMondayIdeation Techniques:   [slides from @steffen] What is ideation techniques?: We tried to find out user’s painpoint last week. Tried to users talk about their, pain point. No question directly, but extract from them their pain with transportation. Generating idea in my team:   AIdeation:   TuesdayTeam brain storming of idea:   Based on generated idea on Monday, we extended our idea doing rolling-paper!     Die Produkt Macher: What is lean start-up?  Lean startup is a methodology for developing businesses and products that aims to shorten product development cycles and rapidly discover if a proposed business model is viable; this is achieved by adopting a combination of business-hypothesis-driven experimentation, iterative product releases, and validated learning. - wikipedia WendesdayThursdayFriday"
     }, {
-    "id": 23,
+    "id": 25,
     "url": "http://localhost:4000/2020/01/retrosprect-of-acl-paper-2020/",
     "title": "Retrospect of ACL 2020 paper writing",
     "body": "2020/01/29 - 2020 Annual Conference of the Association for Computational Linguistics Why I can’t use ‘Cebuano’ for the research?: Why I had to change target language from ‘Cebuano’ to ‘Tagalog’?-&gt; No language translator options except google translation. But before knowing that I already consult my friend, whose mother tongue is English. So I had to aplogize her, but couldn’t tell her why suddenly I changed my plan. -&gt; I realized there are many languages even can’t be researched at all. . -&gt; Getting accustomed to discrimination makes misunderstanding, sometimes. At my country, we couldn’t use music streaming service, because of legal problem. But at that moment, I thought it was discrimination, which is done by music company. "
     }, {
-    "id": 24,
+    "id": 26,
     "url": "http://localhost:4000/2020/01/Git-Merge/",
     "title": "Why am I not listed as a contributor?!",
     "body": "2020/01/10 - From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’m not listed as contributor of repo which my PR accessed. How did I come to a repository?: When I’m stuck, I would prefer to code, than to go deep in theory. (It must be so. . too much to understand 🤒)It was BERT released by Google AI I felt keenly the necessity of implementing, because not only couldn’t understand the way they figured out positional encoding formula, but how it actually works. What does it mean to “scale” dot product in Attention? (Now I know it’s far from my section 😂) Figure 1. Scaled Dot Product. Adopted from tensorflow blogWhat was the code error?: For implement code in paper, I read the papers Transformer and BERT, structured the model, and refered the others’ code. Meanwhile, I found out a small error in tokenization process, which was changing a token into [MASK], enabled bidirectional representation. I’ve made PR, and got merged. But I was not in contributors. Why?: Figure 2. Merged Pull request Adopted from graykode projectActually I happened to know there can be couple of reasons github doesn’t include my name as contributor. Well, if contributors tab has more than 100 people, in which case it shows you up only if you are in the top 100 contributors because displaying too many contributors can make webpages down. Somethimes, however, it doesn’t that problem. Why not? Two possibilities are there.    First, According to Joel-Glovier, if repository maintainer merged-as-a-rebase PR will end up showing as maintainer’s commit. But maintainer shouldn’t normally do this.     Second, if you happend to commit using a different git email that what is in your GitHub profile, it will not be attached to your Github user, and “doesn’t show up” as you.  Reference:  Michał Chromiak’s blog Github: why are my contributions are not showing on my profile atlassian-gitfetch"
     }, {
-    "id": 25,
-    "url": "http://localhost:4000/2019/12/lesson1-fastai/",
-    "title": "Fine Grained Classification",
-    "body": "2019/12/31 - Finally you can solve the mystery behind this weird drawing. . through this course. juptyer notebook magic: %reload_ext autoreload%autoreload 2%matplotlib inlinethis is special directives to jupyter notebook, not python code. And it is called ‘magics’ (but i think jeremy is magicion)  If somebody changes underlying library code while I’m running this, please reload it automatically If somebody asks to plot something, then please plot it here in this Jupyter NotebookDon’t hesitate to import start~ Digging into untar_data, path. ls: Union[pathlib. Path, str]: typed programming language? -&gt; maybe i think disclaim the type beforehand for sure.  Q. like assert? path. ls()this is some module that fast. ai made because os. listdir(‘path’) is unconvinient. Python3 pathlib library!: pathlib "
-    }, {
-    "id": 26,
+    "id": 27,
     "url": "http://localhost:4000/2019/12/jeremy-howard/",
     "title": "Jeremy Howard",
     "body": "2019/12/15 - This is journey to find out ‘who am I trying to be?’: How he impacted me?  The person who made me start Computer Vision again. He emphasized the importance of studying NLP and Computer together to understand the deep-learning. He didn’t order it to study, but always he pursuade me with reasonable way.  “It’s not just something I can throw away. NLP and computer vision a few weeks apart and that’s going to force your brain to realize like ‘oh I have to remember this’”  He made me admit my failure in deep-learning. I started to objectify where am I. What should I do when I’m frustrated.  “Keep going. You’re not expected to remember everything. Yet.  You’re not expected to understand everything. Yet. You’re not expected to know why everything works.  Yet. ”  His articles are numerous, below.      What is torch. nn Really?   High Performance Numeric Programming with Swift: Explorations and Reflections   C++11, random distributions, and Swift    And especially, I like this book. Designing great data products Great predictive modeling is an important part of the solution, but it no longer stands on its own; as products become more sophisticated, it disappears into the plumbing. Designing great data products And he is also famous for words. Here are some.  we’re going to try and use that to really understand what’s going on. So to warn you, none of it is rocket science but a lot of its going to look really new. So don’t expect to get it the first time but expect to listen and jump into the notebook try a few things test things out look particularly at like tensor shapes and inputs and outputs to check your understanding then go back and listen again. But and kind of try it, a few times, because you will get there right, it’s just that there’s going to be a lot of new concepts because we haven’t done that much stuff in pure Pytorch. Lesson 6: Deep Learning 2019 "
     }, {
-    "id": 27,
+    "id": 28,
     "url": "http://localhost:4000/2019/11/julia-evans/",
     "title": "Julia Evans",
     "body": "2019/11/20 - This is journey to find out ‘who am I trying to be?’: The women who surprised me in many ways. First, she approached me to teaching some concepts drawing cartoons. It was at Hackers news, which was hightest ranks. Personally I have the use of not to reading title, so and cartoon was so cute and clear. I naturally gonna understood mechanism and astonished by her explaination ability. Her value, which she was taught by many people so want to do same things, moved me. Volume of her knowledge, that just reading post title is a deal of work, amazed me. "
     }, {
-    "id": 28,
+    "id": 29,
     "url": "http://localhost:4000/2019/11/coc-retropective/",
     "title": "Retrospective on Pycon 2019 Korea (CoC Committee)",
     "body": "2019/11/05 - When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized our responsibility, and if I passed each other’s burden, It could be my burden next time. In solidarity of the obligation, we finished conference well. And after participating PyCon Korea 2018 as volunteer, I’ve joined PyCon Korea Organizer last year. &lt;Figure 1&gt; First meeting of PyCon 2019 Korea Organizers It’s been a while since PyCon 2019 finished. It’s held on Aug 15 - 18, at Coex Grand Balloom &lt;Figure 2&gt; Ongoing session, speaking on news comment processing &lt;Figure 3&gt; Sponsor Booth iin Coex Hall &lt;Figure 4&gt; After PyCon 2019, with all of volunteer, organizer, speakers 😍 🥰  Serving as part of the coc TF, I spent large fraction of last year doing CoC job. here’s the path what we’ve been grappled with to grasp a solution. First half: Before the conference Toward Diverse Community:  Formally we’ve been reusing and modifying PyCon US CoC, but we needed fit in Korean and I was part of that to revise code of conduct. Except ‘That’ Diversity, Because it is ‘Harassment’:  Specific point was harassment, and the others were not. process of finding the points. How can we settle this point?Second half: During the conference Handling the potential Harassment: Disjunction of policy and real-time situation: This ‘PyCon 2019 Korea retrospective series’ would be devided into 3 Episodes.  “Retrospective on Pycon 2019 Korea (CoC Committee)” “Retrospective on Pycon 2019 Korea (Program Chair)” (20 Nov, To Be Update) “Maintaining participation while still making timely decisions” (29 Nov, To Be Update)"
     }, {
-    "id": 29,
+    "id": 30,
     "url": "http://localhost:4000/2019/11/elif-shafak/",
     "title": "Elif Shafak",
     "body": "2019/11/05 - This is journey to find out ‘who am I trying to be?’: For creative-minded people, Istanbul is a treasure. ’ Photo © Chris Boland, licensed under CC BY-NC-ND 2. 0    it suddenly felt like what I was trying to convey was more complicated and detailed than what the circumstances allowed me to say.     And I did what I usually do in similar situations: I stammered, I shut down, and I stopped talking. I stopped talking because the truth was complicated, even though I knew, deep within, that one should never, ever remain silent for fear of complexity.         &lt;Figure 1&gt; Elif Shafak   Photo credit: www. elifsafak. com. tr      I want to talk about emotions and the need to boost our emotional intelligence. I think it’s a pity that mainstream political theory pays very little attention to emotions.     Oftentimes, analysts and experts are so busy with data and metrics that they seem to forget those things in life that are difficult to measure and perhaps impossible to cluster under statistical models. But I think this is a mistake, for two main reasons. We are emotional beings.     I think it’s going to be one of our biggest intellectual challenges, because our political systems are replete with emotions. In country after country, we have seen illiberal politicians exploiting these emotions. And yet within the academia and among the intelligentsia, we are yet to take emotions seriously. I think we should.  1 2 Reference:       British Council Worldwide &#8617;        Ted Talk &#8617;    "
     }, {
-    "id": 30,
+    "id": 31,
     "url": "http://localhost:4000/2019/01/dps-week1/",
     "title": "Digital Product School week 1",
     "body": "2019/01/11 - The 1th week retropect at Digital Product School [This week’s schedule] CONTENT:  Welcome to Digital Product School!     Trip to Spitzingsee   Welcome to Design Office   Specifying our goal of product   Welcome to Digital Product School!: Trip to Spitzingsee:    At the first day of Digital Product School, we had a off-site with all of batch 9 people. All the costs were managed by dps. At the beautiful mountain, we settled the team, and got my team goal.     Basically, there are two kind of team in DPS. (1) Wild team - the team has fixed topic(2) Company team - the team which has specific stakeholders, and also topic defined by that stakeholders  The Core-team will fix what team you will join in DPS for 3 months based on ymy professionals, they announce it at off-site. [My team for 3 months at DPS]    And we decide on my batch #9 theme song. How? Each team draw for songs and pitch ‘why this song should be batch #9 theme song’The result? Imagine dragon - Believer (I didn’t know at the moment, this song would be stamped in my memory)     We have a workshop for getting to know each other. For example, we share 1) what do I expect from 3 months of dps, 2) when I feel happy in my life time, 3) what I worked for last week, 4) what was my last project and 5) what plays important role in my life      My team's board  Cero  Welcome to Design Office:  At first day of design office, we had workshop, which celebrates my day in dps also discuss specific rule, menifesto and stakeholders    We get sticker and attach it in map depends on my nationality  Now time to get to know my team’s stakeholders. What they want for us? What they expect from us? How free my team are on the topic?To be honest, it is endless tug-of-war. We should discuss with my stakeholders, endlessly, and find out solution which can meet interest of users, stakeholders and my team. Basically, my team’s main stakeholder is ADAC, but BMW, City of munich and Nokia will also participate as my team’s stakeholders. Specifying our goal of product: "
diff --git a/_site/assets/images/url_learningrate.png b/_site/assets/images/url_learningrate.png
deleted file mode 100644
index 65739f110f..0000000000
Binary files a/_site/assets/images/url_learningrate.png and /dev/null differ
diff --git a/_site/assets/images/weary.jpg b/_site/assets/images/weary.jpg
deleted file mode 100644
index 1f9a8bb7ee..0000000000
Binary files a/_site/assets/images/weary.jpg and /dev/null differ
diff --git a/_site/author-dionne.html b/_site/author-dionne.html
index ef03c2b860..a623b91ce2 100644
--- a/_site/author-dionne.html
+++ b/_site/author-dionne.html
@@ -109,7 +109,7 @@
     "id": 2,
     "url": "http://localhost:4000/author-dionne.html",
     "title": "dionne",
-    "body": "                        dionne Follow:         https://spellonyou. github. io         Welcome, I'm dionne. My old hobby is collecting and systemizing data related with me. (I'm with my kindergarten newsletter) I am an observationist. In the meantime, I enjoy the difference between my expectation and result which I've been facinated(my korean-sarcasm project). And I love a film written and directed by Paul Thomas Anderson. Now, I'm interested in AI ethics, and especially studying bias.                                   Posts by dionne:                   		Gradient backward, Chain Rule, Refactoring	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 02, 2020						            		Implement forward&amp;backward pass from scratch	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						            		What's inside Pytorch Operator?	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						            		What is the meaning of 'deep-learning from foundations?'	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Feb 29, 2020						            		Digging into convolution	: 		  Issues 	 			In 				Fast. AI-v3, 								Feb 28, 2020						            		Digital Product School week 8&amp;9	: 		  The 8th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 24, 2020						            		Algorithmic bias	: 		  Algorithms can encode &amp; magnify human bias	 			In 				Fast. AI-v3, 								Feb 20, 2020						            		Making a classifier with image dataset made from gooogle	: 		  CONTENTS	 			In 				toy-project, 								Feb 15, 2020						            		Digital Product School week 5	: 		  The 5th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 09, 2020						            		4 reasons took much time to setting GPU for fast. ai than I expected	: 		  Motivation	 			In 				Resource, 								Feb 05, 2020						            		Digital Product School week 4	: 		  The 4th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 01, 2020						            		Retrospect of ACL 2020 paper writing	: 		  2020 Annual Conference of the Association for Computational Linguistics	 			In 				Retrospect, 								Jan 29, 2020						            		Why am I not listed as a contributor?!	: 		  From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’. . . 	 			In 				Resource, 								Jan 10, 2020						            		Fine Grained Classification	: 		  Finally you can solve the mystery behind this weird drawing. . through this course. 	 			In 				Fast. AI-v3, 								Dec 31, 2019						            		Jeremy Howard	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Dec 15, 2019						            		Julia Evans	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 20, 2019						            		Retrospective on Pycon 2019 Korea (CoC Committee)	: 		  When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized. . . 	 			In 				Retrospect, 								Nov 05, 2019						            		Elif Shafak	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 05, 2019						            		Digital Product School week 1	: 		  The 1th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Jan 11, 2019						        "
+    "body": "                        dionne Follow:         https://spellonyou. github. io         Welcome, I'm dionne. My old hobby is collecting and systemizing data related with me. (I'm with my kindergarten newsletter) I am an observationist. In the meantime, I enjoy the difference between my expectation and result which I've been facinated(my korean-sarcasm project). And I love a film written and directed by Paul Thomas Anderson. Now, I'm interested in AI ethics, and especially studying bias.                                   Posts by dionne:                   		fastai 2019 course-v3 Part1, lesson06	: 		  Lesson 06	 			In 				fastai-v3, 								Apr 15, 2020						            		[Q&amp;A] Image Segmentation, using Unet with Driving Video data	: 		  This post is about my questions while I was studying USF Deep Learning course about image segmentation task. All the answers are from the course, source code, library document, or docu. . . 	 			In 				Fast. AI-v3, 								Apr 02, 2020						            		Gradient backward, Chain Rule, Refactoring	: 		   This note is divided into 4 section.      Section1: What is the meaning of ‘deep-learning from foundations?’   Section2: What’s inside Pytorch Operator?   Section3: Implem. . . 	 			In 				fastai-v3, 								Mar 02, 2020						            		Implement forward&amp;backward pass from scratch	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						            		What's inside Pytorch Operator?	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						            		What is the meaning of 'deep-learning from foundations?'	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Feb 29, 2020						            		Digging into convolution	: 		  Issues 	 			In 				Fast. AI-v3, 								Feb 28, 2020						            		My life in Digital Product School - week 8/19/10	: 		  The 8/9/10th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 24, 2020						            		Algorithmic bias	: 		  Algorithms can encode &amp; magnify human bias	 			In 				Fast. AI-v3, 								Feb 20, 2020						            		Making a classifier with image dataset made from gooogle	: 		  CONTENTS	 			In 				toy-project, 								Feb 15, 2020						            		Digital Product School week 5	: 		  The 5th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 09, 2020						            		4 reasons took much time to setting GPU for fast. ai than I expected	: 		  Motivation	 			In 				Resource, 								Feb 05, 2020						            		Digital Product School week 4	: 		  The 4th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 01, 2020						            		Retrospect of ACL 2020 paper writing	: 		  2020 Annual Conference of the Association for Computational Linguistics	 			In 				Retrospect, 								Jan 29, 2020						            		Why am I not listed as a contributor?!	: 		  From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’. . . 	 			In 				Resource, 								Jan 10, 2020						            		Jeremy Howard	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Dec 15, 2019						            		Julia Evans	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 20, 2019						            		Retrospective on Pycon 2019 Korea (CoC Committee)	: 		  When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized. . . 	 			In 				Retrospect, 								Nov 05, 2019						            		Elif Shafak	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 05, 2019						            		Digital Product School week 1	: 		  The 1th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Jan 11, 2019						        "
     }, {
     "id": 3,
     "url": "http://localhost:4000/authors-list.html",
@@ -157,96 +157,101 @@
     "body": "  {% if page. url ==  /  %}            {% assign latest_post = site. posts[0] %}          &lt;div class= topfirstimage  style= background-image: url({% if latest_post. image contains  ://  %}{{ latest_post. image }}{% else %} {{site. baseurl}}/{{ latest_post. image}}{% endif %}); height: 200px;  background-size: cover;  background-repeat: no-repeat; &gt;&lt;/div&gt;           {{ latest_post. title }}  :       {{ latest_post. excerpt | strip_html | strip_newlines | truncate: 136 }}               In         {% for category in latest_post. categories %}        {{ category }},         {% endfor %}                                {{ latest_post. date | date: '%b %d, %Y' }}                            {%- assign second_post = site. posts[1] -%}                        {% if second_post. image %}                         &lt;img class= w-100  src= {% if second_post. image contains  ://  %}{{ second_post. image }}{% else %}{{ second_post. image | absolute_url }}{% endif %}  alt= {{ second_post. title }} &gt;                        {% endif %}                                    {{ second_post. title }}          :                       In             {% for category in second_post. categories %}            {{ category }},             {% endfor %}                                                      {{ second_post. date | date: '%b %d, %Y' }}                                    {%- assign third_post = site. posts[2] -%}                        {% if third_post. image %}                         &lt;img class= w-100  src= {% if third_post. image contains  ://  %}{{ third_post. image }}{% else %}{{site. baseurl}}/{{ third_post. image }}{% endif %}  alt= {{ third_post. title }} &gt;                        {% endif %}                                    {{ third_post. title }}          :                       In             {% for category in third_post. categories %}            {{ category }},             {% endfor %}                                                      {{ third_post. date | date: '%b %d, %Y' }}                                    {%- assign fourth_post = site. posts[3] -%}                        {% if fourth_post. image %}                        &lt;img class= w-100  src= {% if fourth_post. image contains  ://  %}{{ fourth_post. image }}{% else %}{{site. baseurl}}/{{ fourth_post. image }}{% endif %}  alt= {{ fourth_post. title }} &gt;                        {% endif %}                                    {{ fourth_post. title }}          :                       In             {% for category in fourth_post. categories %}            {{ category }},             {% endfor %}                                                      {{ fourth_post. date | date: '%b %d, %Y' }}                                  {% for post in site. posts %} {% if post. tags contains  sticky  %}                    {{post. title}}                  {{ post. excerpt | strip_html | strip_newlines | truncate: 136 }}                 Read More            	             {% endif %}{% endfor %}  {% endif %}                All Stories:         {% for post in paginator. posts %}          {% include main-loop-card. html %}        {% endfor %}                   {% if paginator. total_pages &gt; 1 %}              {% if paginator. previous_page %}        &laquo; Prev       {% else %}        &laquo;       {% endif %}       {% for page in (1. . paginator. total_pages) %}        {% if page == paginator. page %}        {{ page }}        {% elsif page == 1 %}        {{ page }}        {% else %}        {{ page }}        {% endif %}       {% endfor %}       {% if paginator. next_page %}        Next &raquo;       {% else %}        &raquo;       {% endif %}            {% endif %}                     {% include sidebar-featured. html %}      "
     }, {
     "id": 12,
+    "url": "http://localhost:4000/2020/04/v3-2019-lesson06-note/",
+    "title": "fastai 2019 course-v3 Part1, lesson06",
+    "body": "2020/04/15 - Lesson 06Rossmann(Tabular):  Tabular data: be careful on Categorical variable vs Continuous variable.  if datatype is int, fastai think it is classification, not a regression.  Root mean square percentage error. as loss function.  When you assign the y_range, it’s better to assign little bit more than actual maximum. &gt; because it’s sigmoid.  intermediate layers, which is weight matrix is 1) 1000, and 2) 500 -&gt; which means our parameter would be 500*1000. learn. modelWhat is dropout and embedding dropout?: Nitish Srivastava, Dropout: A Simple way to prevent Neural Networks from Overfitting  you can dropout with p value, make it specified to specific layer, or make it applied to all the layers.  Pytorch code 1) bernoulli, which decides whether you will hold it? 2) and divide the noise value depends on noise value. so noise became 2 or remain 0.      According to pytorch code, We do change at training time, but we do nothing at test time. and this means you don’t have to do anything special with inference time. ’   TODO: find at forums what is inference time - Related to NVIDIA, GPU.     Embedding dropout is just a dropout.      It’s different between continuous variable and embedding layer.  TODO Still can’t understand. why embedding dropout is effective. or,… in need.    Let’s delete at random, some of the results of the embedding.    and It worked well especially at Kaggle   Batch Normalization: Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift -&gt; came out false! According to How Does Batch Normalization Help Optimization?  The key was multiplicative bias {\gamma} and additive bias {\beta}` Explain     Let $$ \hat{y} = f(w_1, w_2, w_3, … , x)} $$ , loss = MSE , Then y_range should be between 1 and 5`   And Activation function ends with -1 -&gt; +1   To mitigate this problem, we can add the other parameter, like $$w_n$$   But there’re so much interactions in the process so just re-scale the output.    Momentum parameter at BatchNorm1d:  Different from momentum like in optimization.  This momentum is Exponentially weighted moving average of the mean, instead of deviation.      If this is small number: mean standard deviation would be less from mini_batch to mini_batch » less regularization effect. (If this is large number, variation would be greater from mini_batch to mini_batch » more regularization effect)   TODO: can’t sure, but i understand, this is not about how to update parameter but about how much reflect previous value when scale and shift   Q. Preference between batchnorm and the other regularizations(drop out, weight decay)A. Nope, always try and see the results## lesson6-pets-more### Data Augmentation- Last reg- `get_transforms` has lots of params (even not yet learned all) -&gt; check documentation	- Remember you can implement all the doc contents bc it's made from nbdev	- TODO: try this!!- Essence of data augmentation is you should maintain the label, while somewhat making sense. 	- ex) tilt, because it's optically sensible, you can always change the angle of the data view. - zeros, border, and reflection but always `reflection` works most of the time, so that is the default### Convolutional Kernel(What is convolution?)- Will make heat\_map from scratch, which means the parts convolution focuses on![setosa_visualization]()- http://setosa. io/ev/image-kernels/	- javascript thing	- How convolution works	- Kernel. which does element-wise multiplication, and sum them up	- so it has on pixel less at borders -&gt; so it uses padding, and fastai uses reflection as said. - why this Kernel(matrix) helps catching horizontal edge side?	- because this kernel`(picture2)` weights differently, depends on `x axis`	- why familiar, because it's similar intuition with fugus`(paper)` paper- CNN from different viewpoints`link`	- output of pixel is results from different linear equations. 	- If you connect this with represents of neural network nodes, you can see that the specific inp nodes connected with specific out nodes. 	- **Summarize**: cnn does 1) matmul some of the elements are always zero 2) same weight for every row, which is called `weight time? weight. . ?, 1:18:50` `(picture)`#### Further lowdown- Because generally image has 3 channels, we need rank 3 kernel. - And **do multiply with all channel output is one pixel**. (`draw by your self`)	- but this kernel will catch one feature, like horizontal, so that we make more kernel so that output becomes (h * w * kernel)	- And that `kernel` come to `channel`- **Conv2d**: with 3 by 3 kernel, stride 2 conv -&gt; (h/2 * w/2 * kernel)	- skip or jump over input pixel	- to protect from memory out of control~~~pythonlearn. modellearn. summary()~~~TODO: understand yourself the blocks of conv-kernel: - Usually use big kernel size at first layer (will study this at part2)- Bottom right highlighting kernel(`pic / draw`)- `torch. tensor. expand`: for memory efficient, because we should do RGB- We do not make separate kernel, but make rank 4 kernel	- 4d tensor is just stacked kernel- `t[None]. shape` create new unit axis, and why? we make this -&gt; it should move unit of batch, not one size image. ### Average pooling, feature- suppose our pre-trained model results in size of `11 by 11 by 512 ` `pic 4` and my classification task has 37 classes	* take the first face of channel, which is 11 by 11 and `mean` it, so that make rank 2 tensor, 512 by 1	* and make 2d matrix, which is 512 by 37 and multiply so that we can get 37 by 1 matrix. - Feature, at convolution block	- So, when we transfer-learning without unfreeze, every element of last matrix (512 by 1) should represent(or could catch) each feature. 	### Heatmap, Hook~~~hook_output(model[0]) -&gt; acts -&gt; avg_acts~~~- if we average the block with `axis=feature`, result of matrix(11 by 11) depicts `how activated was that area?` -&gt; it is heatmap, `avg_acts`- and acts comes from hook, which is more advanced pytorch feature. 	- hook into pytorch machine itself, and run any arbitrary Pytorch code	- Why this is cool?: Normally it gives set of outputs of forward pass, but we can interrupt and hook the forward pass. 	- Also can store the output of the convolutional part of the model, which is before avg_pooling- Thinking back when we do cut off `after` the conv part. 	- but with fast. ai the original convolutional part of the model would be *the first thing in the model*, specifically could be given from `learn. model. eval()[0]`	- And this is gotten from `hooked_output` and having hooked the output, we can pass our x_minibatch to output. 	- Not directly, but with normalized, minibatch, put on to the gpu	- `one_item()` function do it, when we have one data `TODO: this is assignment` do it yourself without one_item function	- and `. cuda()` put it on gpu- you should print out very often the shape of tensor, and try think why. "
+    }, {
+    "id": 13,
+    "url": "http://localhost:4000/2020/04/qna-image-segmentation/",
+    "title": "[Q&A] Image Segmentation, using Unet with Driving Video data",
+    "body": "2020/04/02 - This post is about my questions while I was studying USF Deep Learning course about image segmentation task. All the answers are from the course, source code, library document, or document. I cared about being clear at reporting information including source of information, however if there are still anything unclear, please contact me. And thank you Jeremy&amp;Rachael for everything. Also Thank you Cambridge Computer Vision Lab to made us to study with your labor. The Cambridge-driving Labeled Video Database (CamVid) is the first collection of videos with object class semantic labels, complete with metadata. The database provides ground truth labels that associate each pixel with one of 32 semantic classes. If someone is interested in this project, please check the site and see the details. Now, let’s start first using jupyter’s one of tricks which I love most. It enables cell to print the code without print function. from IPython. core. interactiveshell import InteractiveShell# pretty print all cell's output and not just the last oneInteractiveShell. ast_node_interactivity =  all from fastai. vision import *from fastai. callbacks. hooks import *from fastai. utils. mem import *path = untar_data(URLs. CAMVID) # The locations where the data and models are downloaded are set in config. ymlpath. ls() I’m trying to accustomed to using pathlib module, not just it became built-in module in python, but I felt uncomfortable myself with os module. However, still unpredictable conflicts are remain, even in the quite standard library like Pytorch, tensorflow, onnx. (it require me string for path. not PosixPath. will send PR. . ) [PosixPath('/root/. fastai/data/camvid/valid. txt'), PosixPath('/root/. fastai/data/camvid/images'), PosixPath('/root/. fastai/data/camvid/labels'), PosixPath('/root/. fastai/data/camvid/codes. txt')]path_img = path/'images'path_lbl = path/'labels'fnames = get_image_files(path_img) #filenamelbl_names = get_image_files(path_lbl)1. (Play with data) My Hypothesis:  File name has A_B format. and A / B would be at key-value position.  Use collections - defaultdict  Default Dict: Link:   easy to group a sequence of key and value pairs into a dictionary of list?from collections import defaultdictfnames[0], lbl_names[0](PosixPath('/root/. fastai/data/camvid/images/0001TP_009210. png'), PosixPath('/root/. fastai/data/camvid/labels/0016E5_01800_P. png'))files = [tuple(i. stem. split('_')) for i in fnames]labels = [tuple(i. stem. split('_')[:-1]) for i in lbl_names]d = defaultdict(list)for k, v in files: d[k]. append(v)d. keys()len(d['0001TP'])124for k, v in d. items():  print(k, v)0001TP ['009210', '008850', '007350', '008970', '009840', '010140', '008490', '008520', '009540', '008250', '008340', '006840', '007860', '007410', '007740', '009870', '010080', '007890', '008790', '010020', '008400', '007080', '008280', '010380', '009330', '009060', '007470', '006810', '009720', '008580', '007110', '008730', '009150', '007680', '009780', '007800', '007290', '008760', '009510', '008640', '008310', '007440', '006900', '007500', '008460', '009030', '008130', '009480', '009900', '010230', '009270', '008040', '007590', '007950', '009990', '008550', '007260', '008100', '007530', '006960', '008190', '009420', '009930', '009000', '007830', '008940', '006690', '009570', '008880', '010170', '007560', '009300', '006750', '009360', '010200', '007320', '008010', '009120', '007620', '007200', '007140', '010320', '006720', '008670', '007230', '008370', '010260', '009690', '006930', '009090', '007770', '010290', '010350', '008610', '008070', '009600', '008430', '009450', '007380', '009240', '007710', '007170', '008160', '008910', '007020', '006780', '007050', '009960', '009810', '008220', '009180', '009750', '010050', '009660', '010110', '007920', '009630', '007650', '006990', '008700', '009390', '007980', '008820', '006870']0016E5 ['01290', '08159', '05760', '08133', '08063', '06660', '00960', '05850', '00750', '06960', '08035', '08107', '07975', '08017', '05610', '07140', '08119', '08027', '07170', '08400', '08093', '02100', '06390', '04470', '08340', '06060', '00600', '07470', '08151', '07800', '01620', '05730', '01530', '00690', '08430', '05940', '01980', '07320', '08069', '07965', '04380', '05430', '01410', '06780', '08007', '08087', '08079', '06600', '08109', '05490', '00901', '04590', '04680', '08045', '01770', '06690', '08085', '06810', '00420', '08011', '07440', '02190', '06300', '04800', '01500', '00450', '08029', '01470', '06330', '07997', '08067', '05370', '08013', '08190', '00840', '02370', '08049', '08135', '01440', '06870', '05820', '05280', '08051', '04440', '08091', '01380', '00630', '07290', '05520', '04770', '00540', '07995', '07999', '05550', '07920', '08101', '08141', '08053', '04620', '08103', '05160', '07350', '08057', '06030', '06000', '08550', '07963', '08089', '05970', '08047', '05640', '06240', '05220', '04350', '01590', '07959', '01950', '08117', '06180', '01560', '05400', '08043', '07680', '00780', '08081', '07050', '01020', '01350', '04530', '06720', '07969', '08149', '08003', '08131', '08129', '08033', '05460', '01650', '07530', '08023', '05340', '08640', '05100', '08075', '01230', '04980', '02070', '01080', '06210', '05910', '08009', '01800', '05190', '02400', '08083', '08019', '07620', '07200', '07890', '08059', '06990', '04410', '08121', '08123', '06930', '08137', '08147', '08095', '06570', '06150', '08153', '06840', '05250', '00510', '08370', '08580', '08113', '07410', '08097', '01200', '04950', '07770', '07650', '04710', '06090', '08055', '07110', '07981', '00990', '08250', '08127', '01920', '07985', '08220', '08005', '08157', '05130', '08071', '01140', '04830', '07740', '08143', '06120', '02040', '08111', '08115', '00660', '08280', '06420', '07983', '02220', '05700', '01860', '01260', '04920', '06510', '07020', '08073', '08105', '08125', '06360', '07860', '07993', '00810', '06540', '08099', '08139', '02010', '07973', '08155', '07991', '06630', '00480', '06750', '04890', '08001', '08025', '00870', '08490', '01830', '07977', '05010', '01170', '07961', '01680', '01050', '07987', '07080', '04560', '00930', '05310', '02340', '05790', '08460', '00720', '08031', '02280', '08039', '08037', '08065', '06270', '08077', '06900', '04650', '06480', '07230', '08041', '06450', '00570', '07989', '04740', '07979', '02250', '07380', '00390', '01710', '07590', '08021', '08520', '07500', '01110', '04500', '02310', '07971', '02130', '05580', '05880', '08610', '08310', '08145', '05670', '04860', '07260', '08015', '07967', '01740', '01320', '07560', '07830', '01890', '08061', '02160', '07710', '05070', '05040']Seq05VD ['f00030', 'f02550', 'f03450', 'f01110', 'f00480', 'f00210', 'f04590', 'f04170', 'f01800', 'f03990', 'f03360', 'f03900', 'f02070', 'f00810', 'f03690', 'f01350', 'f01530', 'f04980', 'f05100', 'f03060', 'f00900', 'f03870', 'f02460', 'f01470', 'f02370', 'f02820', 'f04080', 'f02760', 'f04860', 'f02250', 'f04200', 'f00270', 'f03720', 'f02850', 'f04410', 'f01200', 'f03090', 'f02010', 'f03930', 'f00090', 'f01650', 'f01890', 'f03840', 'f03030', 'f02130', 'f01230', 'f04110', 'f02520', 'f04140', 'f04020', 'f00060', 'f03420', 'f01560', 'f00120', 'f04290', 'f02340', 'f00300', 'f01380', 'f00870', 'f01860', 'f02970', 'f04560', 'f02730', 'f00330', 'f04530', 'f03780', 'f01770', 'f03390', 'f05040', 'f02430', 'f03330', 'f00660', 'f01740', 'f02100', 'f04800', 'f04050', 'f00510', 'f02790', 'f04350', 'f00690', 'f00540', 'f02490', 'f00960', 'f00930', 'f04230', 'f02880', 'f03600', 'f01020', 'f01500', 'f02400', 'f04830', 'f04470', 'f03300', 'f02670', 'f00450', 'f01980', 'f01170', 'f01620', 'f04500', 'f01080', 'f03180', 'f05070', 'f03150', 'f04950', 'f01440', 'f03510', 'f01710', 'f00360', 'f04770', 'f02910', 'f01050', 'f00630', 'f04320', 'f00570', 'f03240', 'f02190', 'f01140', 'f03540', 'f02220', 'f02640', 'f03960', 'f00000', 'f04920', 'f01950', 'f00990', 'f03480', 'f03000', 'f00420', 'f04620', 'f03210', 'f00780', 'f03570', 'f01590', 'f00750', 'f01920', 'f04650', 'f03750', 'f03630', 'f02310', 'f02610', 'f02580', 'f04740', 'f02280', 'f04680', 'f00390', 'f00720', 'f03660', 'f02040', 'f03270', 'f00180', 'f03810', 'f01410', 'f01290', 'f03120', 'f00840', 'f04440', 'f00150', 'f01260', 'f02700', 'f02940', 'f00600', 'f01830', 'f04260', 'f05010', 'f04890', 'f02160', 'f00240', 'f04380', 'f01680', 'f04710', 'f01320']0006R0 ['f02820', 'f03690', 'f03180', 'f02550', 'f01020', 'f03660', 'f02340', 'f01170', 'f02610', 'f02940', 'f01290', 'f02100', 'f01350', 'f03270', 'f03870', 'f01380', 'f01980', 'f03810', 'f02430', 'f02310', 'f01830', 'f03480', 'f02970', 'f01890', 'f03210', 'f03930', 'f02040', 'f02070', 'f02400', 'f01560', 'f03030', 'f01770', 'f01590', 'f01950', 'f03420', 'f01650', 'f03450', 'f00990', 'f03630', 'f01500', 'f03570', 'f00930', 'f03090', 'f03360', 'f02880', 'f02460', 'f01440', 'f01920', 'f01230', 'f03840', 'f02730', 'f01620', 'f02220', 'f03750', 'f03330', 'f03540', 'f02520', 'f02790', 'f01050', 'f03120', 'f01800', 'f01140', 'f01860', 'f01530', 'f01470', 'f02670', 'f02490', 'f01260', 'f01110', 'f02760', 'f01680', 'f03150', 'f02580', 'f03300', 'f02280', 'f01200', 'f03390', 'f03510', 'f02640', 'f02190', 'f02370', 'f01320', 'f02130', 'f03600', 'f03240', 'f03780', 'f03720', 'f02700', 'f01410', 'f01080', 'f02850', 'f01710', 'f03900', 'f03060', 'f01740', 'f02010', 'f02250', 'f00960', 'f03000', 'f02160', 'f02910']for k, v in d. items():  print(k, len(d[k]))0001TP 1240016E5 305Seq05VD 1710006R0 101for i in d2. keys():  print(i,len(d2[i]))0016E5 3050001TP 1240006R0 101Seq05VD 171files[0], labels[0](('0001TP', '009210'), ('0016E5', '01800'))2. My question: Link:  Why do we need masking? and does color from fastai library? (have to look into source code) What do the parameter alpha do? When people make masked img, would it be have ranged integer limit? Does image normalization related with this?lbl_sorted = sorted(lbl_names)f_sorted = sorted(fnames)lbl_1 = lbl_sorted[33]f_1 = f_sorted[33]img = open_image(lbl_1)mask = open_mask(lbl_1)_,axs = plt. subplots(1,2, figsize=(10,5))# img. show(ax=axs[0], y=mask, title='masked')img. show(ax=axs[0], title='1')mask. show(ax=axs[1], title='2', alpha=1. ) img_2 = open_image(f_1)mask_2 = open_mask(f_1)_,axs = plt. subplots(1,2, figsize=(10,5))# img. show(ax=axs[0], y=mask, title='masked')img_2. show(ax=axs[0], title='3',)mask_2. show(ax=axs[1], title='4', alpha=1. ) open_mask(lbl_1). data. shapetorch. Size([1, 720, 960])open_mask(lbl_1). data. shapetorch. Size([1, 720, 960])open_image(f_1). data. shapetorch. Size([3, 720, 960])open_image(f_1). data. shapetorch. Size([3, 720, 960])img. data #labeled datatensor([[[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]],    [[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]],    [[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]]])mask. data # after mask, labeled datatensor([[[ 4, 4, 4, . . . , 21, 21, 21],     [ 4, 4, 4, . . . , 21, 21, 21],     [ 4, 4, 4, . . . , 21, 21, 21],     . . . ,     [17, 17, 17, . . . , 30, 30, 30],     [17, 17, 17, . . . , 30, 30, 30],     [17, 17, 17, . . . , 30, 30, 30]]])img_2. data, mask_2. data(tensor([[[0. 0706, 0. 0667, 0. 0706, . . . , 0. 6431, 0. 6549, 0. 6627],     [0. 0745, 0. 0706, 0. 0706, . . . , 0. 6431, 0. 6510, 0. 6549],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 6392, 0. 6588, 0. 6588],     . . . ,     [0. 0863, 0. 0824, 0. 0824, . . . , 0. 1333, 0. 1216, 0. 1255],     [0. 0902, 0. 0863, 0. 0824, . . . , 0. 1255, 0. 1176, 0. 1216],     [0. 0863, 0. 0824, 0. 0784, . . . , 0. 1137, 0. 1059, 0. 1137]],     [[0. 0706, 0. 0667, 0. 0706, . . . , 0. 7490, 0. 7608, 0. 7686],     [0. 0745, 0. 0706, 0. 0706, . . . , 0. 7451, 0. 7569, 0. 7608],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 7412, 0. 7529, 0. 7529],     . . . ,     [0. 0980, 0. 0941, 0. 0941, . . . , 0. 1804, 0. 1686, 0. 1725],     [0. 1059, 0. 1020, 0. 0980, . . . , 0. 1725, 0. 1647, 0. 1686],     [0. 1020, 0. 0980, 0. 0941, . . . , 0. 1608, 0. 1529, 0. 1608]],     [[0. 0784, 0. 0745, 0. 0784, . . . , 0. 7569, 0. 7686, 0. 7765],     [0. 0824, 0. 0784, 0. 0784, . . . , 0. 7647, 0. 7647, 0. 7686],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 7608, 0. 7647, 0. 7647],     . . . ,     [0. 1216, 0. 1176, 0. 1176, . . . , 0. 2000, 0. 1882, 0. 1922],     [0. 1176, 0. 1137, 0. 1098, . . . , 0. 1843, 0. 1765, 0. 1804],     [0. 1137, 0. 1098, 0. 1059, . . . , 0. 1725, 0. 1647, 0. 1725]]]), tensor([[[ 18, 17, 18, . . . , 183, 186, 188],     [ 19, 18, 18, . . . , 183, 185, 186],     [ 20, 18, 19, . . . , 182, 185, 185],     . . . ,     [ 25, 24, 24, . . . , 43, 40, 41],     [ 26, 25, 24, . . . , 41, 39, 40],     [ 25, 24, 23, . . . , 38, 36, 38]]]))3. What is a difference between image and imageSegment?: imageSegment  An ImageSegment object has the same properties as an Image. The only difference is that when applying the transformations to an ImageSegment, it will ignore the functions that deal with lighting and keep values of 0 and 1.  It’s easy to show the segmentation mask over the associated Image by using the y argument of show_image. img = open_image(fnames[0])mask = open_mask(lbl_names[0])_,axs = plt. subplots(1,3, figsize=(8,4))img. show(ax=axs[0], title='no mask')img. show(ax=axs[1], y=mask, title='masked') #seg mask over the img using y argmask. show(ax=axs[2], title='mask only', alpha=1. ) vision. image ##4. Why/How img div by 255 and how it results fast. ai : vision. image - If div=True, pixel values are divided by 255. to become floats between 0. and 1.  At times, you want to get rid of distortions caused by lights and shadows in an image.    Normalizing the RGB values of an image can at times be a simple and effective way of achieving this.     So sum of the pixel’s value over all channels(which is S) divides each intensified channel so that nomalized value will be R/S, G/S and B/S (where, S=R+G+B).   Detailed explain here4. Python Evaluation Order: Python evaluates expressions from left to right. Notice that while evaluating an assignment, the right-hand side is evaluated before the left-hand side. mask_tmp, trg_tmp, void_tmp = 2, 1, 10mask_tmp = trg_tmp != void_tmpprint(mask_tmp, trg_tmp, void_tmp) # (1) target is not same with voidTrue 1 10# Example 1x = 1y = 2x,y = y,x+yx, y(2, 3)# Example 2x = 1y = 2x = yy = x+yx, y(2, 4)5. model learner parameter :: pct_start: A: Percentage of total number of epochs when learning rate rises during one cycle. Q: Sorry, I still confused that one cycle in the new API only runs one epoch. How the percentage of total number of epochs works? Can you give a example? If learn. fit_one_cycle(10, slice(1e-4,1e-3,1e-2), pct_start=0. 05)??A: Ok, strictly correct answer would be percentage of iterations, so you can have lr both increase and decrease during same epoch. In your example, say, you have 100 iterations per epoch, then for half an epoch (0. 05 * (10 * 100) = 50) lr will rise, then slowly decrease. Q2: Thanks for this explanation … so essentially, it is the percentage of overall iterations where the LR is increasing, correct? So, given the default of 0. 3, it means that your LR is going up for 30% of your iterations and then decreasing over the last 70%. Is that a correct summation of what is happening? A2: Yes, I think that’s correct. You can verify that by changing its value and check:learn. recorder. plot_lr() For example if pct_start = 0. 2 source: forums. fastai "
+    }, {
+    "id": 14,
     "url": "http://localhost:4000/2020/03/note08-fastai-4/",
     "title": "Gradient backward, Chain Rule, Refactoring",
-    "body": "2020/03/02 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves   ⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
+    "body": "2020/03/02 -  This note is divided into 4 section.      Section1: What is the meaning of ‘deep-learning from foundations?’   Section2: What’s inside Pytorch Operator?   Section3: Implement forward&amp;backward pass from scratch   Section4: Gradient backward, Chain Rule, Refactoring   ” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves    Postfix underscore means in pytorch, in-place function, What is in-place function?⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
     }, {
-    "id": 13,
+    "id": 15,
     "url": "http://localhost:4000/2020/03/note08-fastai-3/",
     "title": "Implement forward&backward pass from scratch",
     "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring1. The forward and backward passes: 1. 1 Normalization: train_mean,train_std = x_train. mean(),x_train. std()&gt;&gt;&gt; train_mean,train_std(tensor(0. 1304), tensor(0. 3073))Remember!  Dataset, which is x_train, mean and standard deviation is not 0&amp;1. But we need them to be which means we should substract means and divide data by std.  You should not standarlize validation set because training set and validation set should be aparted.  after normalize, mean is close to zero, and standard deviation is close to 1. 1. 2 Variable definition:  n,m: size of the training set c: the number of activations we need in our model2. Foundation Version: 2. 1 Basic architecture:  Our model has one hidden layer, output to have 10 activations, used in cross entropy.    But in process of building architecture, we will use mean square error, output to have 1 activations and lator change it to cross entropy   number of hidden unit; 50see below pic  We want to make w1&amp;w2 mean and std be 0&amp;1.      why initializating and make mean zero and std one is important?   paper highlighting importance of normalisation - training 10,000 layer network without regularisation1   2. 1. 1 simplified kaiming initQ: Why we did init, normalize with only validation data? Because we can not handle and get statistics from each value of x_valid?{: style=”color:red; font-size: 130%; text-align: center;”}  what about hidden(first) layer?w1 = torch. randn(m,nh)b1 = torch. zeros(nh)t = lin(x_valid, w1, b1) # hidden&gt;&gt;&gt; t. mean(), t. std()((tensor(2. 3191), tensor(27. 0303))In output(second) layer, w2 = torch. randn(nh,1)b2 = torch. zeros(1)t2 = lin(t, w2, b2) # output&gt;&gt;&gt; t2. mean(), t2. std()(tensor(-58. 2665), tensor(170. 9717))   which is terribly far from normalzed value.     But if we apply simplified kaiming init  w1 = torch. randn(m,nh)/math. sqrt(m); b1 = torch. zeros(nh)w2 = torch. randn(nh,1)/math. sqrt(nh); b2 = torch. zeros(1)t = lin(x_valid, w1, b1)t. mean(),t. std()&gt;&gt;&gt; (tensor(-0. 0516), tensor(0. 9354)) But, actually, we use activations not only linear function After applying activations relu at linear layer, mean and deviation became 0. 5. 2. 1. 2 Glorrot initializationPaper2: Understanding the difficulty of training deep feedforward neural networks  Gaussian(, bell shaped, normal distributions) is not trained very well.  How to initialize neural nets? with the size of layer , the number of filters .  But there is No acount for import of ReLU If we got 1000 layers, vanishing gradients problem emerges2. 1. 3 Kaiming initializatingPaper3: Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification  Kaiming He, explained here rectifier: rectified linear unit rectifier network: neural network with rectifier linear units  This is kaiming init, and why suddenly replace one to two on a top?     to avoid vanishing gradient(weights)   But it doesn’t give very nice mean tough.    2. 1. 4 Pytorch package Why fan_out?     according to pytorch documentation,   choosing 'fan_in' preserves the magnitude of the variance of the wights in the forward pass. choosing 'fan_out' preserves the magnitues in the backward pass(, which means matmul; with transposed matrix) ➡️ in the other words, torch use fan_out cz pytorch transpose in linear transformaton.  What about CNN in Pytorch?I tried torch. nn. Conv2d. conv2d_forward?? Jeremy digged into using torch. nn. modules. conv. _ConvNd. reset_parameters?? 2  in Pytorch, it doesn’t seem to be implemented kaiming init in right formula. so we should use our own operation.  But actually, this has been discussed in Pytorch community before. 3 4 Jeremy said it enhanced variance also, so I sampled 100 times and counted better results.  To make sure the shape seems sensible. check with assert. (remember we will replace 1 to 10 in cross entropy)assert model(x_valid). shape==torch. Size([x_valid. shape[0],1])&gt;&gt;&gt; model(x_valid). shape(10000, 1) We have made Relu, init, linear, it seems we can forward pass code we need for basic architecture nh = 50def lin(x, w, b): return x@w + b;w1 = torch. randn(m,nh)*math. sqrt(2. /m ); b1 = torch. zeros(nh)w2 = torch. randn(nh,1); b2 = torch. zeros(1)def relu(x): return x. clamp_min(0. ) - 0. 5t1 = relu(lin(x_valid, w1, b1))def model(xb):  l1 = lin(xb, w1, b1)  l2 = relu(l1)  l3 = lin(l2, w2, b2)  return l32. 2 Loss function: MSE:  Mean squared error need unit vector, so we remove unit axis.   def mse(output, targ): return (output. squeeze(-1) - targ). pow(2). mean()    In python, in case you remove axis, you use ‘squeeze’, or add axis use ‘unsqueeze’ torch. squeeze where code commonly broken. so, when you use squeeze, clarify dimension axis you want to removetmp = torch. tensor([1,1])tmp. squeeze()&gt;&gt;&gt; tensor([1, 1]) make sure to make as float when you calculateBut why??? because it is tensor?{: style=”color:red; font-size: 130%;”} Here’s the error when I don’t transform the data type ---------------------------------------------------------------------------TypeError                 Traceback (most recent call last)&lt;ipython-input-22-ae6009bef8b4&gt; in &lt;module&gt;()----&gt; 1 y_train = get_data()[1] # call data again   2 mse(preds, y_train)TypeError: 'map' object is not subscriptable This is forward passFootnote: Other materials:  Understanding the difficulty of training deep feedforward neural networks, paper that introduced Xavier initialization      Fixup Initialization: Residual Learning Without Normalization &#8617;        Pytorch implementaion on Kaiming init of conv and linear layers &#8617;        Pytorch kaiming init issue &#8617;        Pytorch kaiming init explained &#8617;    "
     }, {
-    "id": 14,
+    "id": 16,
     "url": "http://localhost:4000/2020/03/note08-fastai-2/",
     "title": "What's inside Pytorch Operator?",
     "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, RefactoringWhat’s inside Pytorch Operator?: Section02 Time comparison with pure Python:    Matmul with broadcasting&gt; 3194. 95 times faster     Einstein summation&gt; 16090. 91 times faster     Pytorch’s operator&gt; 49166. 67 times faster  1. Elementwise op: 1. 1 Frobenius norm:   above converted into (m*m). sum(). sqrt() Plus, don’t suffer from mathmatical symbols. He also copy and paste that equations from wikipedia.  and if you need latex form, download it from archive. 2. Elementwise Matmul:  What is the meaning of elementwise?   We do not calculate each component. But all of the component at once. Because, length of column of A and row of B are fixed.   How much time we saved?  So now that takes 1. 37ms. We have removed one line of code and it is a 178 times faster…#TODOI don’t know where the 5 from. but keep it. Maybe this is related with frobenius norm…?as a result, the code before for k in range(ac):  c[i,j] += a[i,k] + b[k,j]the code after c[i,j] = (a[i,:] * b[:,j]). sum()To compare it (result betweet original and adjusted version) we use not test_eq but other function. The reason for this is that due to rounding errors from math operations, matrices may not be exactly the same. As a result, we want a function that will “is a equal to b within some tolerance” #exportdef near(a,b):   return torch. allclose(a, b, rtol=1e-3, atol=1e-5)def test_near(a,b):   test(a,b,near)test_near(t1, matmul(m1, m2))3. Broadcasting:  Now, we will use the broadcasting and removec[i,j] = (a[i,:] * b[:,j]). sum() How it works?&gt;&gt;&gt; a=tensor([[10,10,10],     [20,20,20],     [30,30,30]])&gt;&gt;&gt; b=tensor([1,2,3,])&gt;&gt;&gt; a,b  (tensor([[10, 10, 10],     [20, 20, 20],     [30, 30, 30]]),tensor([1, 2, 3]))     &gt;&gt;&gt; a+btensor([[11, 12, 13],    [21, 22, 23],    [31, 32, 33]])  &lt;Figure 2&gt; demonstrated how array b is broadcasting(or copied but not occupy memory) to compatible with a. Refered from numpy_tutorial   there is no loop, but it seems there is exactly the loop.     This is not from jeremy (actually after a moment he cover it) but i wondered How to broadcast an array by columns?  c=tensor([[1],[2],[3]])a+ctensor([[11, 11, 11],    [22, 22, 22],    [33, 33, 33]])s  What is tensor. stride()?help(t. stride)Help on built-in function stride:  stride(…) method of torch. Tensor instancestride(dim) -&gt; tuple or intReturns the stride of :attr:’self’ tensor. Stride is the jump necessary to go from one element to the next one in the specified dimension :attr:’dim’. A tuple of all strides is returned when no argument is passed in. Otherwise, an integer value is returned as the stride in the particular dimension :attr:’dim’. Args:  dim (int, optional): the desired dimension in which stride is requiredExample::* x = torch. tensor([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]])`x. stride()&gt;&gt;&gt; (5, 1)x. stride(0)&gt;&gt;&gt; 5x. stride(-1)&gt;&gt;&gt; 1   unsqueeze &amp; None index   We can manipulate rank of tensor Special value ‘None’, which means please squeeze a new axis here== please broadcast herec = torch. tensor([10,20,30])c[None,:] in c, squeeze a new axis in here please. 2. 2 Matmul with broadcasting: for i in range(ar):#  c[i,j] = (a[i,:]).      *[:,j]. sum() #previous  c[i]  = (a[i]. unsqueeze(-1) * b). sum(dim=0) And Using None also (As howard teached)c[i]  = (a[i ]. unsqueeze(-1) * b). sum(dim=0) #howardc[i]  = (a[i][:,None] * b). sum(dim=0) # using Nonec[i]  = (a[i,:,None]*b). sum(dim=0)⭐️Tips🌟 1) Anytime there’s a trailinng(final) colon in numpy or pytorch you can delete it ex) c[i, :] = c [i]2) any number of colon commas at the start, you can switch it with the single elipsis.  ex) c[:,:,:,:,i] = c […,i] 2. 3 Broadcasting Rules:  What if we tensor. size([1,3]) * tensor. size([3,1])?  torch. Size([3, 3])    What is scale????   What if they are one array is times of the other array? ex) Image : 256 x 256 x 3Scale : 128 x 256 x 3Result: ?   Why I did not inserted axis via None, but happened broadcasting? &gt;&gt;&gt; c * c[:,None]tensor([[100. , 200. , 300. ],    [200. , 400. , 600. ],    [300. , 600. , 900. ]])maybe it broadcast cz following array has 3 rows as same principle, no matter what nature shape was, if we do the operation tensor broadcasts to the other. &gt;&gt;&gt; c==c[None]tensor([[True, True, True]])&gt;&gt;&gt; c[None]==c[None,:]tensor([[True, True, True]])&gt;&gt;&gt;c[None,:]==ctensor([[True, True, True]])3. Einstein summation:  Creates batch-wise, remove inner most loop, and replaced it with an elementwise producta. k. ac[i,j] += a[i,k] * b[k,j]inner most loop c[i,j] = (a[i,:] * b[:,j]). sum()elementwise product  Because K is repeated so we do a dot product. And it is torch. Usage of einsum()1) transpose2) diagnalisation tracing3) batch-wise (matmul) …  einstein summation notationdef matmul(a,b): return torch. einsum('ik,kj-&gt;ij', a, b)so after all, we are now 16000 times faster than Python. 4. Pytorch op: 49166. 67 times faster than pure python And we will use this matrix multiplication in Fully Connect forward, with some initialized parameters and ReLU. But before that, we need initialized parameters and ReLU, Footnote:  TensorRank ti noteResources:  Frobenius Norm Review Broadcasting Review (especially Rule)     Refer colab! (I totally confused with extension of arrays)    torch. allclose Review np. einsum Reviewh "
     }, {
-    "id": 15,
+    "id": 17,
     "url": "http://localhost:4000/2020/02/note08-fastai-1/",
     "title": "What is the meaning of 'deep-learning from foundations?'",
     "body": "2020/02/29 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ I don’t know if you read this article, but I heartily appreciate Rachael Thomas and Jeremy Howard for providing these priceless lectures for free Homework:  Review concepts 16 concepts from Course 1 (lessons 1 - 7)(1) Affine Functions &amp; non-linearities; 2) Parameters &amp; activations; 3) Random initialization &amp; transfer learning; 4) SGD, Momentum, Adam; 5) Convolutions; Batch-norm; 6) Dropout; 7) Data augmentation; 8) Weight decay; 9) Res/dense blocks; 10) Image classification and regression; 11)Embeddings; 12) Continuous &amp; Categorical variables; 13) Collaborative filtering; 14) Language models; 15) NLP classification; 16) Segmentation; U-net; GANS) Make sure you understand broadcasting Read section 2. 2 in Delving Deep into Rectifiers Try to replicate as much of the notebooks as you can without peeking; when you get stuck, peek at the lesson notebook, but then close it and try to do it yourself calculus for machine learning     based on weight…    einsum conventionCONTENTS:  What is going on in this course?     What is ‘from foundations’?   Steps to a basic modern CNN model   Today’s implementation goal: 1) matmul -&gt; 4) FC backward    Library development using jupyter notebook     jupyter notebook certainly can make module    Elementwise ops     How can we make python faster?         What is element wise operation?           FootnoteWhat is going on in this course?: What is ‘from foundations’?: 1) Recreate fast. ai and Pytorch 2) using pure python  Evade OverfittingOverfit : validation error getting worsetraining loss &lt; validation loss  Know the name of the symbol you usefind in this page if you don’t know the symbol that you are using or just draw it here (run by ML!) Steps to a basic modern CNN model:  1) Matrix multiplication -&gt; 2) Relu/Initialization -&gt; 3) Fully-connected Forward-&gt; 4) Fully-connected Backward -&gt; 5) Train loop -&gt; 6) Convolution-&gt; 7) Optimization -&gt;8) Batchnormalization -&gt; 9) Resnet Today’s implementation goal: 1) matmul -&gt; 4) FC backward: Library development using jupyter notebook: what is assers? jupyter notebook certainly can make module:  There will be #export tag that Howard (and we) want to extract special notebook2script. py will detect sign of #expert and convert following into python module and test ittest\_eq(TEST,'test')test\_eq(TEST,'test1')  what is run_notebook. py?     when you want to test your module in command line interface   		!python run\_notebook. py 01_matmul. ipynb  Is there any difference between 1) and 2)?1) test -&gt; test01 2) test01 -&gt; test #TODO I don’t know yet  look into run_notebook. py, package fire Jeremy used. What is that?read and run the code in a notebook, and in the process, Jeremy made Python Fire library called!shockingly, fire takes any kind of function and converts into CLI command. fire library was released by Google open source, Thursday, March 2, 2017    Get data   pytorch and numpy are pretty much same.  variable c explains how many pixels there are in in MNIST, 28 pixels PyTorch’s view() method: torch function that manipulating tensor, and squeeze() in torch &amp; mathmatical operation similar function Rao &amp; McMahan said usually this functions result in feature vector.    In part 1, you can use view function several times.     Initial python model     Which is Linear, like $Xw$(weight)$+a$(bias) $= Y$     If you don’t know hou to multiple matrix, refer this site matmul visulization site   How many time spends if we we use pure python   function matmul, typical matrix multiplication function, takes about 1 second for calculating 1 single train data! (maybe assumed stochastic, 5 data points in validation)     it takes about 11. 36 hours to update parameters even single layer and 1 iteration! (if that was my computer, it would be 14 hours. . )🤪   THIS is why we need to consider ‘time’&amp;’space’ This is kinda slow - what if we could speed it up by 50,000 times? Let’s try! Elementwise ops: How can we make python faster?:  If we want to calculate faster, then do remove pythonic calcuation, by passing its computation down to something that is written something other than python, like pytorch.  According to PyTorch doc it uses C++ (via ATen), so we are going to implement that function with python. What is element wise operation?:  items makes a pair, operate corresponding componentFootnote:  notebooks material video broadcasting excel"
     }, {
-    "id": 16,
+    "id": 18,
     "url": "http://localhost:4000/2020/02/what-is-convolution/",
     "title": "Digging into convolution",
     "body": "2020/02/28 - Issues 1) Kaiming Initializtion in Pytorch was in trouble. 1 2) Jeremy started to dig in, in lesson09, but I didn’t know why the size of tensor is 2 and even understand this spreadsheet data. 3 Homework: Read Visualizing and Understanding Convolutional Networks paper  What is a convolution?     Visualization         one kernel     Matthew D Zeiler &amp; Rob Fergus Paper          Convolution can be represented as matmul   Padding   Kernel has rank 3   How can we find a side-edge, a gradient and area of constant weight?   What is a convolution?: A convolutional neural network is that your red, green, and blue pixels go into the simple computation, and something comes out of that, and then the result of that goes into a second layer, and the result of that goes into the third layer and so forth. Visualization: one kernel Refer this site for visualizing CNN filteringMatthew D Zeiler &amp; Rob Fergus PaperLecture01     Nine examples of the actual coefficients from the **first layer**  Convolution can be represented as matmul: CNNs from different viewpoints {align-items: center;}    [A B C D E F G H I J] is 3 by 3 image data flatten to vector.   As a result, convolution is a just matrix just two things happens     Some of entries are set to zeros at all the times   same color always have the same weight. That called weight time / wegith sharing    So, we can implement a convolution with matrix multiplication. But, we don’t do that because it’s slow!Padding:  What most of libraries do is just put zeros asdie of matrix  fast. ai uses reflection paddings (what is this? Jeremy said he uttered it)Kernel has rank 3:  As standard picture input would be 4 5, it would be actually 3d, not 2d.  If we make kernel as a 3x3 size, we pass over same kernel all the different Red, Green, Blue Pixels.      This could make problem, because, if we want to detect frog, which is green, we would want more activations on the green(I made a test cell in my colab 6)   How can we find a side-edge, a gradient and area of constant weight?: Not top-edge!  One kernel can find only the top-edge, so we should stack the kernels 7 So, we pass it through bunch of kernels to the input images, and that process gives us height x width x corresponding number of kernels.  Usually that number of chanel is 16 And if we want to get the more channels and features, we should repeat that process     This process gives rise to memory out of control, we do the stride   #### conv-example. xlsx  2 convolutional filters At a second layer, filter is 3x3x2 tensor, because to add up together the first layer’s channel. Reference:       Problem was math. sqrt(5) was not kaiming initialization formula, Implementation in Pytorch &#8617;        size of tensor, lecture09 &#8617;        conv-example. xlsx &#8617;        Why do computer use red, green and blue instead of primary colors &#8617;        Grayscale is a group of shades without any visible color. … Each of these dots has its own brightness level as well and, therefore, can be converted to grayscale. A grayscale image is one with all color information removed.  &#8617;        Testing RGB and grayscale &#8617;        stack kernel and make new rank of tensor at output, Lesson06-2019 &#8617;    "
     }, {
-    "id": 17,
+    "id": 19,
     "url": "http://localhost:4000/2020/02/dps-week8/",
-    "title": "Digital Product School week 8&9",
-    "body": "2020/02/24 - The 8th week retropect at Digital Product School Week 8/9 - Ship your MVP/Release next iteration each day     This week's schedule  CONTENT:  Preparing engineering weekly Agile Process     Daily Stand-up    Making application flowchart (feat draw. io) / ER diagram     Flowchart, understaning user journey   ER diagram    Engineering weekly AI lunch Connecting firebase andPreparing engineering weekly: This week at Wednesday, I planned to explain the Language Modelings, mainly focusing ELMo, ULMFiT, BERT and GPT-2. Slides is available here Changed the presentation, because there were people who are not in ML domain. hereWhenever I do the presentation, I learn more than the information I give them. At the same time, I realize I need to learn more than I know. Agile Process: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m not sure ‘what is agile’ but this is what we tried to make agile process.  Daily Stand-up: Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Making application flowchart (feat draw. io) / ER diagram:  Before we start the party, we should clarify the flowchart and ER diagram of our application. Flowchart, understaning user journey: Thanks for google, we could use draw. io for our framechart framework. Actually, we cana choice other good flatform, but draw. io has connected app throgh google drive, most of our engineer was used to it. And after this job, I got to know there is also (of course) rule with the symbols, color, size, space, scaling and direction of arrow -reference. But why we should do this? WE have made our storymap before!! I think storymap is for visualize our status and app. So it should be shared with whole the team, and they should able to understand each role’s issue. But flowchart is more like testing technical feasibility, and error that user can experience. So it could be little more specific, complicated, and hypothetical.     This week engineer's main plan  ER diagram: Even if we use NoSQL database through firebase, my team was accustomed to SQL more. That what we educated when we were at college, so we had to organize our concept while we were learning NoSQL. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Before today, my AI collegues presented regression, knn and it was my turn. I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! AI lunch: Connecting firebase and: "
+    "title": "My life in Digital Product School - week 8/19/10",
+    "body": "2020/02/24 - The 8/9/10th week retropect at Digital Product School Week 8 - Ship your MVPWeek 9/10 - Release next iteration each day     Week 8th schedule  CONTENT:  Agile Product Development     Daily Stand-up(planning)   Gemba Walk   Sprint Reviews    Engineering weeklyAgile Product Development: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m still not sure ‘what is agile’ but this is how we tried to make agile process.  Daily Stand-up(planning): Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Gemba Walk:     Team Cero with core team  Every 2 weeks, we do the Gemba work, which is ‘question everything to the core team’ time. At this period, people can ask anything related to our product, workshop, and framework. Core team will help just for each team, and each team can solve the problem related to their work. &lt; br/&gt;Why we need this session? because with workshop and general schedule, core team has no time just focus on each team. So through this session, we can have opportunity to understand each program and workshop, like why we are using this platform, and when is the due of our small project, and we have this problem and we need help for this. whatever small problem you have, core team is always willing to help you. Sprint Reviews: Every Friday, we have time to summarise what we did for the week. Maybe we need HMW question and our storymap to share our process and then tell and share what we did try, what point we succeeded and what point it was deviant of our prediction, and why we tried it. .     Sprint of Ve-link  And then, just after all team’s ppt, we do vote with such a cute marvel. Always it’s very difficult to vote (of course you can’t vote to your team!) Because it depends on criteria what do I value!But since this is process of our agile work, I try to focus on what they have changed since last week, and why they did it, how they did it. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Everyone have their knowledge to share and we can be tutor and at the same time can be of tutee. Previously, my AI collegues presented regression, knn. And because I’m somewhat specialized to NLP, I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! "
     }, {
-    "id": 18,
+    "id": 20,
     "url": "http://localhost:4000/2020/02/fast.ai-nlp-note-16/",
     "title": "Algorithmic bias",
     "body": "2020/02/20 - Algorithms can encode &amp; magnify human bias Case Study 1: Facial Recognition &amp; Predictive Policing:  Joy Buolamwini &amp; Timnit Gebru, gendershades. org     Microsoft, FACE+, IBM - All of these things are sell now.    Largest gap between $\therefore\ Lighter Male\ &gt;\ Darker\ Female $      This US mayor joked cops should “mount . 50-caliber” guns where AI predicts crime      With machine learning, with automation, there’s a 99% success, so that robot is ㅡwill beㅡ99% accurate in telling us what is going to happen next, which is really interesting.     - city official in Lancater, CA, approving on using IBM for public security  Bias:  Bias is type of error Statistical Bias: difference between a statistic’s expected value and the true value Unjust Bias: disproportionate preference for or prejudice against a group Unconscious bias: bias that we don’t realize we have But, term bias is too generic to be productive. Different sources of bias have different causes Representation Bias: Dataset was not representative of the algorithm that might be used on later. Above : Data is okay, but algorithm has some problem. Below : Data has error.  For example, object detection production that performs very well in common product of US. But in contrast, change of target product region, like Zimbabwe, Solomon Island, and so on, reduced the performence remarkably. It is not the algorithmic problem, so we should care about data volume of region. Evaluation Bias: Benchmark datasets spur on research, 4. 4% of IJB-A images are dark-skinned women. 2/3 of ImageNet images from the West (Sharkar et al, 2017) Case Study 2: Recidivism Algorithm Used Prison Sentencing: Case Study 3: Online Ad Delivery: Bias in NLP: ( Nothing to do with the course, but I’m researching this field these days. )    But all about Englsih     ImpactThe person is doctor. The person is nurse -&gt; 그는 의사다. 그녀는 간호사다.  Concept of “biased data” often too generic to be useful:  Different sources of bias have different sources Data, models and systems are not unchanging numbers on a screen. They’re the result of a complex process that starts with years of historical context and involves a series of choices and norms, from data measurement to model evaluation to human interpretation. - Harini Suresh, “The problem with Biased Data” Five Sources of Bias in ML:  Representation Bias Evaluation Bias Measurement Bias Aggregation Bias(46:02) Historical Bias(46:26)     A few studies(47:13)   Racial Bias, Even when we have good intentions(new york times)(47:10)   gender(48:59)   Humans are biased, so why does algorithmic bias matter?: Algorithms &amp; humans are used differently (humans are usually decision maker)  Algorithms are accurate and objective No way to apeal if there if error processed large scale cheap Machine learning can amplify bias Machine learning can create feedback loops. Technology is power. And with that comes responsibility. Solutions:  Analyze a project at work/school:     Questions about AI   5 types of bias (Suresh &amp; Guttag)   Datasheets for datasets, Modelcards for model reporting   Accuracy rate on different sub-groups    Work with domain experts &amp; those impacted Increase diversity in our workspace Advocate for good policy Be on the ongoing lookout for bias"
     }, {
-    "id": 19,
+    "id": 21,
     "url": "http://localhost:4000/2020/02/classifier-city/",
     "title": "Making a classifier with image dataset made from gooogle",
     "body": "2020/02/15 - CONTENTS:  Creating dataset from google images     Using google_images_download   Create ImageDataBunch    Train model     fit_one_cycle()   Let’s find-tune   Let’s train the whole model!   Let’s make batch size bigger!    Interpretation Model in productionCode can be found hereDeployed model here Making a classifier which can distinguish Seoul from Munich and Sanfrancisco!(hoping my well in Munich!) Creating dataset from google images: In machine learning, you always need data before you build your model. You can use either URLs or google_images_download package. Since Jeremy explained specifically, I will try the other. Using google_images_download: note: This is not google official package Refer to Official Doncument, put that arguments. from google_images_download import google_images_downloadresponse = google_images_download. googleimagesdownload()  #class instantiationout_dir = os. path. abspath('. . /. . /materials/dataset/pkg/')os. mkdir(out_dir)arguments = { keywords : Cebu,Munich,Seoul ,        print_urls :True,        suffix_keywords : city ,        output_directory :out_dir,        type : photo ,      }paths = response. download(arguments)  #passing the arguments to the functionprint(paths)and if you need, here is main code. Create ImageDataBunch: We need to separate validation set because we just grabbed these imagese from Google. Most of the dataset we use (kaggle/research) splited into train / validation / test so if they are not devided beforehand we should make databunch, and Jeremy recommended assign 20% to validation. Help on function verify_images in module fastai. vision. data:verify_images(path: Union[pathlib. Path, str], delete: bool = True, max_workers: int = 4, max_size: int = None, recurse: bool = False, dest: Union[pathlib. Path, str] = '. ', n_channels: int = 3, interp=2, ext: str = None, img_format: str = None, resume: bool = None, **kwargs)  Check if the images in `path` aren't broken, maybe resize them and copy it in `dest`. Data from google image url Data from package Train model:           len(class)   len(train)   len(valid)         Data_url   3   432   108       Data_pkg   3   216   53   Uisng model: restnet34 1, Measurement: accuracy 2 fit_one_cycle(): What is fit one cycle? Cyclical Learning Rates for Training Neural Networks One of the way to find good learning rate. Core idea is to start with small learning rate (like 1e-4, 1e-3) and increase the learning rate after each mini-batch till loss starts exploding. And pick up learning rate one order lower than exploding point. For example, plotted learning rate is like below picture, picking up around 1e-2 is the best way.  Why this methods Traditionally, the learning rate is decreased as the learning starts converging with time. But this paper suggests to cycle our learning rate, because it makes us avoid local minimum. Basically this cyclic method enables us to explore whole of loss function so that find out global minimum. In other words, higher learning rate behaves like regularisation. Let’s find-tune: Do train just one last layer by learning rate found by find_lr This section you should find the strongest downward slope that kind of sticking around for quite a while. And choose just one order lower than lowest point. As explained before, I will pick up 1e-2. And of course, this is fine-tuning, we don’t need discriminative learning rate yet.  Let’s train the whole model!: link When you plot the learning rate again, maybe you will get soaring shape of learning rate. Rule of thumb, When you slice the learning rate, use learning rate you used at unfrozen part. Divide it by 5 or 10 and put it on maximum bound. At minimum bound, get the point just before it soared, and divide it by 10.  Let’s make batch size bigger!: Since default batch size is 64, I tried it to 128. And it gets way more better result(even it’s still underfitting!) And if I freeze model and train whole model again, the model would be better. Also, you can use this method to the other big dataset model training! Interpretation: See the confusion matrix. Result is quite great. *Since I’m using colab, I will skip data cleansing. But I highly recommend you to use ImageCleaner widget, only if you are using jupyter notebook (not jupyter lab) Model in production: You can deploy your model in simple way. I referred fast. ai, and used render(it’s free for limited time). You can find detailed document here. and you can create a route like this. @app. route( /classify-url , methods=[ GET ])async def classify_url(request):  bytes = await get_bytes(request. query_params[ url ])  img = open_image(BytesIO(bytes))  _,_,losses = learner. predict(img)  return JSONResponse({     predictions : sorted(      zip(cat_learner. data. classes, map(float, losses)),      key=lambda p: p[1],      reverse=True    )  })You can find my deployed model here Reference: How to create a deep learning dataset using Google Images towardsdatascience - one cycle policy       Deep Residual Learning for Image Recognition &#8617;        Accuracy_and_precision &#8617;    "
     }, {
-    "id": 20,
+    "id": 22,
     "url": "http://localhost:4000/2020/02/dps-week5/",
     "title": "Digital Product School week 5",
     "body": "2020/02/09 - The 5th week retropect at Digital Product School Week 5 - Create a Storymap and sync it with Lean Canvas     This week's schedule  CONTENT:  How to create our story map Prepare your story Discover your product’s AI potentialMondayHow to create our story map:     We need this 'aha' moment  There was a Milestone workshop, about our weekly goal. As we are agile working, we go fast and change every week’s goal. This week we will finalize our story map based on user’s pain-point and HMW questions.     How should we make our story-map  Basically we should make story map based on this rule  Tell stories, don’t just write them!     We always need context, that means all the story component should be connected    Visualize your product to establish a shared understanding and speed up discussions!     Post-it filled of text is not enough, we should fill it with visualizations then team mates can understand it fast    Only discuss in front our your story map! (Speed)     So we can update our story-map as soon as we change our opinion   And also    Use a story map to find the parts that matter most and to identify holes in your idea!     Since the story map consists of techinical part, we should consider each story’s technical feasibility    Minimise output, maximise outcome and impact! Build tests to figure out what’s minimum and what’s viable!     This story map functions to find out our minimum value of ideas    Work iteratively: Change your story map according to your learnings!     We should repeat this process again and again    PMs: Make sure Storymap is up to date!Prepare your story:     team cero, our whole story map    Our goal    Technical feasibility of our storyWhat is your strategy to make user achieve something? This would be our expand point Discover your product’s AI potential: How can we apply AI to our product? Let’s write down our ‘HMW’ questions, and find out all p ossibilities. These are suggestion of possibilities, so don’t attached to feasibility (we will do in at lean start-up)     Software section's expectation    AI section's expectationTuesday    Engineer's task, week5This 5th week, engineers settled WendesdayThursdayFriday"
     }, {
-    "id": 21,
+    "id": 23,
     "url": "http://localhost:4000/2020/02/GPU-time/",
     "title": "4 reasons took much time to setting GPU for fast.ai than I expected",
     "body": "2020/02/05 - Motivation: Before now, me as a undergraduate student, I was parsimony who usually depend on colab, kaggle, friend’s server(occasional) whenever i need GPU. . And this time it’s been for a while to install GPU than I expected and I share the several component that stood in my way. Written at Oct 24 2019, if you think this is deprecated, please do not have a leap of faith. Just for the record, I’ve used Kaggle, Colab, GCP, Azure, EC2 as GPU cloud. 1. Did not know there is JupyterLab option in Google Cloud Platform. : At the first time when GCP came out, there was no AI Platform service. So from starting vm instance to launching jupyter and installing packages, I did all of the things myself. (and I learned 🤗) $	curl -O https://repo. continuum. io/archive/Anaconda3-5. 0. 1-Linux-x86_64. sh[Downloading conda in ssh] I created VM instance,selected zone, machine type and disk type. Then, define firewall rules and in ssh terminal, install jupyter and other packages. But you can do all of these things just using AI Platform.  [AI Platform] I think it especially save your time if you are living in Asia-Pacific, which google doesn’t support not that much GPU resources.  2. Consider if the platform has limited resources in a region you live in. : I live in South Korea, East Asia, and it seems like this region has lots of limitation in GPU (except quite expensive AWS) And the Taiwan which was the only one region where I can launch my own VM with GPU (I tried all the other regions in the list) sometimes do normaly, but not always. 😥After launching, I did several works and next day I could not start VM. (I didn’t count it, but tried it a few hours because I didn’t want cost any more time…) Endlessly failed to start instance, then I choose to move AWS as an alternative way. 3. Fast. ai gives deliberate guide and I didn’t know it. : Fast. ai offer the guide for all available platform. (Colab, salamander, Gradient, Kaggle, Colab, and so on) It is so important, and really needs, because cloud computing options are vary as occasion and purpose arise. I didn’t know fast. ai has manual to running GCP, and I think it’s as good a reason as any for me to be have taken time. It helped me so much when I had aws and shortened my time. I don’t want to read all of the manual in amazno. . (It is recommended. . but I’d rather read GIT PRO now…) ssh -i ~/. ssh/&lt;your_private_key_pair&gt; -L localhost:8888:localhost:8888 ubuntu@&lt;your instance IP&gt;4. You should wait to add more volume just after add volume, by building AWS EC2. : Since Elastic Block Store(EBS) storage supports optimized storage, users can’t extend storage volume two times in a row.  Unfortunately, at the first time, I didn’t know it (again 👻) and when VM lacked volume, I doubled dist capacity (76*2) at a rough but It needs more.  &lt;!– this time I installed GPU in two years, and it became little complicated compared to 2 years ago. And this time for the first time(maybe not the first time. . but i handled it in my class or with my friend. but it’s my first time on my own. ) I very I’m started to using used google colab, kaggleand, GCP-JupyterLab, ec2 - friend made, aws vm machine but I had a environment variable but i did not know of it. On these days, I could not get a resources from taiwan…    I couldn’t notice a deliberate     Anyway, as a result I tried myself gcp myself and aws ec2 with fast. ai But I think doing on my self surely takes much time (in this point I wonder why I’m doing this, and should remind me, especially I was studying disk volume optimization)     disk volume exceed - https://askubuntu. com/questions/919748/no-space-left-on-device-even-though-there-is:  "
     }, {
-    "id": 22,
+    "id": 24,
     "url": "http://localhost:4000/2020/02/dps-week4/",
     "title": "Digital Product School week 4",
     "body": "2020/02/01 - The 4th week retropect at Digital Product School Week 4 - Find solution ideas and run experiments   [This week’s schedule] CONTENT:  Ideation Techniques     What is ideation techniques?    Generating idea in my team AIdeation Team brain storming of idea Die Produkt MacherMondayIdeation Techniques:   [slides from @steffen] What is ideation techniques?: We tried to find out user’s painpoint last week. Tried to users talk about their, pain point. No question directly, but extract from them their pain with transportation. Generating idea in my team:   AIdeation:   TuesdayTeam brain storming of idea:   Based on generated idea on Monday, we extended our idea doing rolling-paper!     Die Produkt Macher: What is lean start-up?  Lean startup is a methodology for developing businesses and products that aims to shorten product development cycles and rapidly discover if a proposed business model is viable; this is achieved by adopting a combination of business-hypothesis-driven experimentation, iterative product releases, and validated learning. - wikipedia WendesdayThursdayFriday"
     }, {
-    "id": 23,
+    "id": 25,
     "url": "http://localhost:4000/2020/01/retrosprect-of-acl-paper-2020/",
     "title": "Retrospect of ACL 2020 paper writing",
     "body": "2020/01/29 - 2020 Annual Conference of the Association for Computational Linguistics Why I can’t use ‘Cebuano’ for the research?: Why I had to change target language from ‘Cebuano’ to ‘Tagalog’?-&gt; No language translator options except google translation. But before knowing that I already consult my friend, whose mother tongue is English. So I had to aplogize her, but couldn’t tell her why suddenly I changed my plan. -&gt; I realized there are many languages even can’t be researched at all. . -&gt; Getting accustomed to discrimination makes misunderstanding, sometimes. At my country, we couldn’t use music streaming service, because of legal problem. But at that moment, I thought it was discrimination, which is done by music company. "
     }, {
-    "id": 24,
+    "id": 26,
     "url": "http://localhost:4000/2020/01/Git-Merge/",
     "title": "Why am I not listed as a contributor?!",
     "body": "2020/01/10 - From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’m not listed as contributor of repo which my PR accessed. How did I come to a repository?: When I’m stuck, I would prefer to code, than to go deep in theory. (It must be so. . too much to understand 🤒)It was BERT released by Google AI I felt keenly the necessity of implementing, because not only couldn’t understand the way they figured out positional encoding formula, but how it actually works. What does it mean to “scale” dot product in Attention? (Now I know it’s far from my section 😂) Figure 1. Scaled Dot Product. Adopted from tensorflow blogWhat was the code error?: For implement code in paper, I read the papers Transformer and BERT, structured the model, and refered the others’ code. Meanwhile, I found out a small error in tokenization process, which was changing a token into [MASK], enabled bidirectional representation. I’ve made PR, and got merged. But I was not in contributors. Why?: Figure 2. Merged Pull request Adopted from graykode projectActually I happened to know there can be couple of reasons github doesn’t include my name as contributor. Well, if contributors tab has more than 100 people, in which case it shows you up only if you are in the top 100 contributors because displaying too many contributors can make webpages down. Somethimes, however, it doesn’t that problem. Why not? Two possibilities are there.    First, According to Joel-Glovier, if repository maintainer merged-as-a-rebase PR will end up showing as maintainer’s commit. But maintainer shouldn’t normally do this.     Second, if you happend to commit using a different git email that what is in your GitHub profile, it will not be attached to your Github user, and “doesn’t show up” as you.  Reference:  Michał Chromiak’s blog Github: why are my contributions are not showing on my profile atlassian-gitfetch"
     }, {
-    "id": 25,
-    "url": "http://localhost:4000/2019/12/lesson1-fastai/",
-    "title": "Fine Grained Classification",
-    "body": "2019/12/31 - Finally you can solve the mystery behind this weird drawing. . through this course. juptyer notebook magic: %reload_ext autoreload%autoreload 2%matplotlib inlinethis is special directives to jupyter notebook, not python code. And it is called ‘magics’ (but i think jeremy is magicion)  If somebody changes underlying library code while I’m running this, please reload it automatically If somebody asks to plot something, then please plot it here in this Jupyter NotebookDon’t hesitate to import start~ Digging into untar_data, path. ls: Union[pathlib. Path, str]: typed programming language? -&gt; maybe i think disclaim the type beforehand for sure.  Q. like assert? path. ls()this is some module that fast. ai made because os. listdir(‘path’) is unconvinient. Python3 pathlib library!: pathlib "
-    }, {
-    "id": 26,
+    "id": 27,
     "url": "http://localhost:4000/2019/12/jeremy-howard/",
     "title": "Jeremy Howard",
     "body": "2019/12/15 - This is journey to find out ‘who am I trying to be?’: How he impacted me?  The person who made me start Computer Vision again. He emphasized the importance of studying NLP and Computer together to understand the deep-learning. He didn’t order it to study, but always he pursuade me with reasonable way.  “It’s not just something I can throw away. NLP and computer vision a few weeks apart and that’s going to force your brain to realize like ‘oh I have to remember this’”  He made me admit my failure in deep-learning. I started to objectify where am I. What should I do when I’m frustrated.  “Keep going. You’re not expected to remember everything. Yet.  You’re not expected to understand everything. Yet. You’re not expected to know why everything works.  Yet. ”  His articles are numerous, below.      What is torch. nn Really?   High Performance Numeric Programming with Swift: Explorations and Reflections   C++11, random distributions, and Swift    And especially, I like this book. Designing great data products Great predictive modeling is an important part of the solution, but it no longer stands on its own; as products become more sophisticated, it disappears into the plumbing. Designing great data products And he is also famous for words. Here are some.  we’re going to try and use that to really understand what’s going on. So to warn you, none of it is rocket science but a lot of its going to look really new. So don’t expect to get it the first time but expect to listen and jump into the notebook try a few things test things out look particularly at like tensor shapes and inputs and outputs to check your understanding then go back and listen again. But and kind of try it, a few times, because you will get there right, it’s just that there’s going to be a lot of new concepts because we haven’t done that much stuff in pure Pytorch. Lesson 6: Deep Learning 2019 "
     }, {
-    "id": 27,
+    "id": 28,
     "url": "http://localhost:4000/2019/11/julia-evans/",
     "title": "Julia Evans",
     "body": "2019/11/20 - This is journey to find out ‘who am I trying to be?’: The women who surprised me in many ways. First, she approached me to teaching some concepts drawing cartoons. It was at Hackers news, which was hightest ranks. Personally I have the use of not to reading title, so and cartoon was so cute and clear. I naturally gonna understood mechanism and astonished by her explaination ability. Her value, which she was taught by many people so want to do same things, moved me. Volume of her knowledge, that just reading post title is a deal of work, amazed me. "
     }, {
-    "id": 28,
+    "id": 29,
     "url": "http://localhost:4000/2019/11/coc-retropective/",
     "title": "Retrospective on Pycon 2019 Korea (CoC Committee)",
     "body": "2019/11/05 - When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized our responsibility, and if I passed each other’s burden, It could be my burden next time. In solidarity of the obligation, we finished conference well. And after participating PyCon Korea 2018 as volunteer, I’ve joined PyCon Korea Organizer last year. &lt;Figure 1&gt; First meeting of PyCon 2019 Korea Organizers It’s been a while since PyCon 2019 finished. It’s held on Aug 15 - 18, at Coex Grand Balloom &lt;Figure 2&gt; Ongoing session, speaking on news comment processing &lt;Figure 3&gt; Sponsor Booth iin Coex Hall &lt;Figure 4&gt; After PyCon 2019, with all of volunteer, organizer, speakers 😍 🥰  Serving as part of the coc TF, I spent large fraction of last year doing CoC job. here’s the path what we’ve been grappled with to grasp a solution. First half: Before the conference Toward Diverse Community:  Formally we’ve been reusing and modifying PyCon US CoC, but we needed fit in Korean and I was part of that to revise code of conduct. Except ‘That’ Diversity, Because it is ‘Harassment’:  Specific point was harassment, and the others were not. process of finding the points. How can we settle this point?Second half: During the conference Handling the potential Harassment: Disjunction of policy and real-time situation: This ‘PyCon 2019 Korea retrospective series’ would be devided into 3 Episodes.  “Retrospective on Pycon 2019 Korea (CoC Committee)” “Retrospective on Pycon 2019 Korea (Program Chair)” (20 Nov, To Be Update) “Maintaining participation while still making timely decisions” (29 Nov, To Be Update)"
     }, {
-    "id": 29,
+    "id": 30,
     "url": "http://localhost:4000/2019/11/elif-shafak/",
     "title": "Elif Shafak",
     "body": "2019/11/05 - This is journey to find out ‘who am I trying to be?’: For creative-minded people, Istanbul is a treasure. ’ Photo © Chris Boland, licensed under CC BY-NC-ND 2. 0    it suddenly felt like what I was trying to convey was more complicated and detailed than what the circumstances allowed me to say.     And I did what I usually do in similar situations: I stammered, I shut down, and I stopped talking. I stopped talking because the truth was complicated, even though I knew, deep within, that one should never, ever remain silent for fear of complexity.         &lt;Figure 1&gt; Elif Shafak   Photo credit: www. elifsafak. com. tr      I want to talk about emotions and the need to boost our emotional intelligence. I think it’s a pity that mainstream political theory pays very little attention to emotions.     Oftentimes, analysts and experts are so busy with data and metrics that they seem to forget those things in life that are difficult to measure and perhaps impossible to cluster under statistical models. But I think this is a mistake, for two main reasons. We are emotional beings.     I think it’s going to be one of our biggest intellectual challenges, because our political systems are replete with emotions. In country after country, we have seen illiberal politicians exploiting these emotions. And yet within the academia and among the intelligentsia, we are yet to take emotions seriously. I think we should.  1 2 Reference:       British Council Worldwide &#8617;        Ted Talk &#8617;    "
     }, {
-    "id": 30,
+    "id": 31,
     "url": "http://localhost:4000/2019/01/dps-week1/",
     "title": "Digital Product School week 1",
     "body": "2019/01/11 - The 1th week retropect at Digital Product School [This week’s schedule] CONTENT:  Welcome to Digital Product School!     Trip to Spitzingsee   Welcome to Design Office   Specifying our goal of product   Welcome to Digital Product School!: Trip to Spitzingsee:    At the first day of Digital Product School, we had a off-site with all of batch 9 people. All the costs were managed by dps. At the beautiful mountain, we settled the team, and got my team goal.     Basically, there are two kind of team in DPS. (1) Wild team - the team has fixed topic(2) Company team - the team which has specific stakeholders, and also topic defined by that stakeholders  The Core-team will fix what team you will join in DPS for 3 months based on ymy professionals, they announce it at off-site. [My team for 3 months at DPS]    And we decide on my batch #9 theme song. How? Each team draw for songs and pitch ‘why this song should be batch #9 theme song’The result? Imagine dragon - Believer (I didn’t know at the moment, this song would be stamped in my memory)     We have a workshop for getting to know each other. For example, we share 1) what do I expect from 3 months of dps, 2) when I feel happy in my life time, 3) what I worked for last week, 4) what was my last project and 5) what plays important role in my life      My team's board  Cero  Welcome to Design Office:  At first day of design office, we had workshop, which celebrates my day in dps also discuss specific rule, menifesto and stakeholders    We get sticker and attach it in map depends on my nationality  Now time to get to know my team’s stakeholders. What they want for us? What they expect from us? How free my team are on the topic?To be honest, it is endless tug-of-war. We should discuss with my stakeholders, endlessly, and find out solution which can meet interest of users, stakeholders and my team. Basically, my team’s main stakeholder is ADAC, but BMW, City of munich and Nokia will also participate as my team’s stakeholders. Specifying our goal of product: "
@@ -336,10 +341,38 @@ <h4 class="font-weight-bold spanborder"><span>Posts by dionne</span></h4>
             <div class="mb-5 d-flex justify-content-between main-loop-card">
 <div class="pr-3">
 	<h2 class="mb-1 h4 font-weight-bold">
-	<a class="text-dark" href="/2020/03/note08-fastai-4/">Gradient backward, Chain Rule, Refactoring</a>
+	<a class="text-dark" href="/2020/04/v3-2019-lesson06-note/">fastai 2019 course-v3 Part1, lesson06</a>
 	</h2>
 	<p class="excerpt">
-	   This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’  Section2: What’s inside Pytorch Operator?  Section3: Implement forward&amp;ba...
+	   Lesson 06
+	</p>
+	<small class="d-block text-muted">
+		In <span class="catlist">
+		
+		<a class="text-capitalize text-muted smoothscroll" href="/categories.html#fastai-v3">fastai-v3</a><span class="sep">, </span>
+		
+		</span>
+	</small>
+	<small class="text-muted">
+		Apr 15, 2020
+	</small>
+</div>
+
+	<div class="col-md-3 pr-0 text-right">
+	<a href="/2020/04/v3-2019-lesson06-note/">
+	<img class="w-100" src="/assets/images/att_00069.png" alt="fastai 2019 course-v3 Part1, lesson06">
+	</a>
+	</div>
+
+</div>
+            
+            <div class="mb-5 d-flex justify-content-between main-loop-card">
+<div class="pr-3">
+	<h2 class="mb-1 h4 font-weight-bold">
+	<a class="text-dark" href="/2020/04/qna-image-segmentation/">[Q&A] Image Segmentation, using Unet with Driving Video data</a>
+	</h2>
+	<p class="excerpt">
+	   This post is about my questions while I was studying USF Deep Learning course about image segmentation task.All the answers are from the course, source code, library document, or docu...
 	</p>
 	<small class="d-block text-muted">
 		In <span class="catlist">
@@ -348,6 +381,34 @@ <h2 class="mb-1 h4 font-weight-bold">
 		
 		</span>
 	</small>
+	<small class="text-muted">
+		Apr 02, 2020
+	</small>
+</div>
+
+	<div class="col-md-3 pr-0 text-right">
+	<a href="/2020/04/qna-image-segmentation/">
+	<img class="w-100" src="/assets/images/output_31_0.png" alt="[Q&A] Image Segmentation, using Unet with Driving Video data">
+	</a>
+	</div>
+
+</div>
+            
+            <div class="mb-5 d-flex justify-content-between main-loop-card">
+<div class="pr-3">
+	<h2 class="mb-1 h4 font-weight-bold">
+	<a class="text-dark" href="/2020/03/note08-fastai-4/">Gradient backward, Chain Rule, Refactoring</a>
+	</h2>
+	<p class="excerpt">
+	     This note is divided into 4 section.          Section1: What is the meaning of ‘deep-learning from foundations?’      Section2: What’s inside Pytorch Operator?      Section3: Implem...
+	</p>
+	<small class="d-block text-muted">
+		In <span class="catlist">
+		
+		<a class="text-capitalize text-muted smoothscroll" href="/categories.html#fastai-v3">fastai-v3</a><span class="sep">, </span>
+		
+		</span>
+	</small>
 	<small class="text-muted">
 		Mar 02, 2020
 	</small>
@@ -476,10 +537,10 @@ <h2 class="mb-1 h4 font-weight-bold">
             <div class="mb-5 d-flex justify-content-between main-loop-card">
 <div class="pr-3">
 	<h2 class="mb-1 h4 font-weight-bold">
-	<a class="text-dark" href="/2020/02/dps-week8/">Digital Product School week 8&9</a>
+	<a class="text-dark" href="/2020/02/dps-week8/">My life in Digital Product School - week 8/19/10</a>
 	</h2>
 	<p class="excerpt">
-	   The 8th week retropect at Digital Product School
+	   The 8/9/10th week retropect at Digital Product School
 	</p>
 	<small class="d-block text-muted">
 		In <span class="catlist">
@@ -495,7 +556,7 @@ <h2 class="mb-1 h4 font-weight-bold">
 
 	<div class="col-md-3 pr-0 text-right">
 	<a href="/2020/02/dps-week8/">
-	<img class="w-100" src="/assets/images/week8/gate.png" alt="Digital Product School week 8&9">
+	<img class="w-100" src="/assets/images/week8/gate.png" alt="My life in Digital Product School - week 8/19/10">
 	</a>
 	</div>
 
@@ -695,34 +756,6 @@ <h2 class="mb-1 h4 font-weight-bold">
 	</a>
 	</div>
 
-</div>
-            
-            <div class="mb-5 d-flex justify-content-between main-loop-card">
-<div class="pr-3">
-	<h2 class="mb-1 h4 font-weight-bold">
-	<a class="text-dark" href="/2019/12/lesson1-fastai/">Fine Grained Classification</a>
-	</h2>
-	<p class="excerpt">
-	   Finally you can solve the mystery behind this weird drawing.. through this course.
-	</p>
-	<small class="d-block text-muted">
-		In <span class="catlist">
-		
-		<a class="text-capitalize text-muted smoothscroll" href="/categories.html#fast.ai-v3">Fast.AI-v3</a><span class="sep">, </span>
-		
-		</span>
-	</small>
-	<small class="text-muted">
-		Dec 31, 2019
-	</small>
-</div>
-
-	<div class="col-md-3 pr-0 text-right">
-	<a href="/2019/12/lesson1-fastai/">
-	<img class="w-100" src="/assets/images/1-visual-cnn.png" alt="Fine Grained Classification">
-	</a>
-	</div>
-
 </div>
             
             <div class="mb-5 d-flex justify-content-between main-loop-card">
diff --git a/_site/authors-list.html b/_site/authors-list.html
index 061c402f19..533b3d1fe1 100644
--- a/_site/authors-list.html
+++ b/_site/authors-list.html
@@ -109,7 +109,7 @@
     "id": 2,
     "url": "http://localhost:4000/author-dionne.html",
     "title": "dionne",
-    "body": "                        dionne Follow:         https://spellonyou. github. io         Welcome, I'm dionne. My old hobby is collecting and systemizing data related with me. (I'm with my kindergarten newsletter) I am an observationist. In the meantime, I enjoy the difference between my expectation and result which I've been facinated(my korean-sarcasm project). And I love a film written and directed by Paul Thomas Anderson. Now, I'm interested in AI ethics, and especially studying bias.                                   Posts by dionne:                   		Gradient backward, Chain Rule, Refactoring	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 02, 2020						            		Implement forward&amp;backward pass from scratch	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						            		What's inside Pytorch Operator?	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						            		What is the meaning of 'deep-learning from foundations?'	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Feb 29, 2020						            		Digging into convolution	: 		  Issues 	 			In 				Fast. AI-v3, 								Feb 28, 2020						            		Digital Product School week 8&amp;9	: 		  The 8th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 24, 2020						            		Algorithmic bias	: 		  Algorithms can encode &amp; magnify human bias	 			In 				Fast. AI-v3, 								Feb 20, 2020						            		Making a classifier with image dataset made from gooogle	: 		  CONTENTS	 			In 				toy-project, 								Feb 15, 2020						            		Digital Product School week 5	: 		  The 5th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 09, 2020						            		4 reasons took much time to setting GPU for fast. ai than I expected	: 		  Motivation	 			In 				Resource, 								Feb 05, 2020						            		Digital Product School week 4	: 		  The 4th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 01, 2020						            		Retrospect of ACL 2020 paper writing	: 		  2020 Annual Conference of the Association for Computational Linguistics	 			In 				Retrospect, 								Jan 29, 2020						            		Why am I not listed as a contributor?!	: 		  From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’. . . 	 			In 				Resource, 								Jan 10, 2020						            		Fine Grained Classification	: 		  Finally you can solve the mystery behind this weird drawing. . through this course. 	 			In 				Fast. AI-v3, 								Dec 31, 2019						            		Jeremy Howard	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Dec 15, 2019						            		Julia Evans	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 20, 2019						            		Retrospective on Pycon 2019 Korea (CoC Committee)	: 		  When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized. . . 	 			In 				Retrospect, 								Nov 05, 2019						            		Elif Shafak	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 05, 2019						            		Digital Product School week 1	: 		  The 1th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Jan 11, 2019						        "
+    "body": "                        dionne Follow:         https://spellonyou. github. io         Welcome, I'm dionne. My old hobby is collecting and systemizing data related with me. (I'm with my kindergarten newsletter) I am an observationist. In the meantime, I enjoy the difference between my expectation and result which I've been facinated(my korean-sarcasm project). And I love a film written and directed by Paul Thomas Anderson. Now, I'm interested in AI ethics, and especially studying bias.                                   Posts by dionne:                   		fastai 2019 course-v3 Part1, lesson06	: 		  Lesson 06	 			In 				fastai-v3, 								Apr 15, 2020						            		[Q&amp;A] Image Segmentation, using Unet with Driving Video data	: 		  This post is about my questions while I was studying USF Deep Learning course about image segmentation task. All the answers are from the course, source code, library document, or docu. . . 	 			In 				Fast. AI-v3, 								Apr 02, 2020						            		Gradient backward, Chain Rule, Refactoring	: 		   This note is divided into 4 section.      Section1: What is the meaning of ‘deep-learning from foundations?’   Section2: What’s inside Pytorch Operator?   Section3: Implem. . . 	 			In 				fastai-v3, 								Mar 02, 2020						            		Implement forward&amp;backward pass from scratch	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						            		What's inside Pytorch Operator?	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						            		What is the meaning of 'deep-learning from foundations?'	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Feb 29, 2020						            		Digging into convolution	: 		  Issues 	 			In 				Fast. AI-v3, 								Feb 28, 2020						            		My life in Digital Product School - week 8/19/10	: 		  The 8/9/10th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 24, 2020						            		Algorithmic bias	: 		  Algorithms can encode &amp; magnify human bias	 			In 				Fast. AI-v3, 								Feb 20, 2020						            		Making a classifier with image dataset made from gooogle	: 		  CONTENTS	 			In 				toy-project, 								Feb 15, 2020						            		Digital Product School week 5	: 		  The 5th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 09, 2020						            		4 reasons took much time to setting GPU for fast. ai than I expected	: 		  Motivation	 			In 				Resource, 								Feb 05, 2020						            		Digital Product School week 4	: 		  The 4th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 01, 2020						            		Retrospect of ACL 2020 paper writing	: 		  2020 Annual Conference of the Association for Computational Linguistics	 			In 				Retrospect, 								Jan 29, 2020						            		Why am I not listed as a contributor?!	: 		  From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’. . . 	 			In 				Resource, 								Jan 10, 2020						            		Jeremy Howard	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Dec 15, 2019						            		Julia Evans	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 20, 2019						            		Retrospective on Pycon 2019 Korea (CoC Committee)	: 		  When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized. . . 	 			In 				Retrospect, 								Nov 05, 2019						            		Elif Shafak	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 05, 2019						            		Digital Product School week 1	: 		  The 1th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Jan 11, 2019						        "
     }, {
     "id": 3,
     "url": "http://localhost:4000/authors-list.html",
@@ -157,96 +157,101 @@
     "body": "  {% if page. url ==  /  %}            {% assign latest_post = site. posts[0] %}          &lt;div class= topfirstimage  style= background-image: url({% if latest_post. image contains  ://  %}{{ latest_post. image }}{% else %} {{site. baseurl}}/{{ latest_post. image}}{% endif %}); height: 200px;  background-size: cover;  background-repeat: no-repeat; &gt;&lt;/div&gt;           {{ latest_post. title }}  :       {{ latest_post. excerpt | strip_html | strip_newlines | truncate: 136 }}               In         {% for category in latest_post. categories %}        {{ category }},         {% endfor %}                                {{ latest_post. date | date: '%b %d, %Y' }}                            {%- assign second_post = site. posts[1] -%}                        {% if second_post. image %}                         &lt;img class= w-100  src= {% if second_post. image contains  ://  %}{{ second_post. image }}{% else %}{{ second_post. image | absolute_url }}{% endif %}  alt= {{ second_post. title }} &gt;                        {% endif %}                                    {{ second_post. title }}          :                       In             {% for category in second_post. categories %}            {{ category }},             {% endfor %}                                                      {{ second_post. date | date: '%b %d, %Y' }}                                    {%- assign third_post = site. posts[2] -%}                        {% if third_post. image %}                         &lt;img class= w-100  src= {% if third_post. image contains  ://  %}{{ third_post. image }}{% else %}{{site. baseurl}}/{{ third_post. image }}{% endif %}  alt= {{ third_post. title }} &gt;                        {% endif %}                                    {{ third_post. title }}          :                       In             {% for category in third_post. categories %}            {{ category }},             {% endfor %}                                                      {{ third_post. date | date: '%b %d, %Y' }}                                    {%- assign fourth_post = site. posts[3] -%}                        {% if fourth_post. image %}                        &lt;img class= w-100  src= {% if fourth_post. image contains  ://  %}{{ fourth_post. image }}{% else %}{{site. baseurl}}/{{ fourth_post. image }}{% endif %}  alt= {{ fourth_post. title }} &gt;                        {% endif %}                                    {{ fourth_post. title }}          :                       In             {% for category in fourth_post. categories %}            {{ category }},             {% endfor %}                                                      {{ fourth_post. date | date: '%b %d, %Y' }}                                  {% for post in site. posts %} {% if post. tags contains  sticky  %}                    {{post. title}}                  {{ post. excerpt | strip_html | strip_newlines | truncate: 136 }}                 Read More            	             {% endif %}{% endfor %}  {% endif %}                All Stories:         {% for post in paginator. posts %}          {% include main-loop-card. html %}        {% endfor %}                   {% if paginator. total_pages &gt; 1 %}              {% if paginator. previous_page %}        &laquo; Prev       {% else %}        &laquo;       {% endif %}       {% for page in (1. . paginator. total_pages) %}        {% if page == paginator. page %}        {{ page }}        {% elsif page == 1 %}        {{ page }}        {% else %}        {{ page }}        {% endif %}       {% endfor %}       {% if paginator. next_page %}        Next &raquo;       {% else %}        &raquo;       {% endif %}            {% endif %}                     {% include sidebar-featured. html %}      "
     }, {
     "id": 12,
+    "url": "http://localhost:4000/2020/04/v3-2019-lesson06-note/",
+    "title": "fastai 2019 course-v3 Part1, lesson06",
+    "body": "2020/04/15 - Lesson 06Rossmann(Tabular):  Tabular data: be careful on Categorical variable vs Continuous variable.  if datatype is int, fastai think it is classification, not a regression.  Root mean square percentage error. as loss function.  When you assign the y_range, it’s better to assign little bit more than actual maximum. &gt; because it’s sigmoid.  intermediate layers, which is weight matrix is 1) 1000, and 2) 500 -&gt; which means our parameter would be 500*1000. learn. modelWhat is dropout and embedding dropout?: Nitish Srivastava, Dropout: A Simple way to prevent Neural Networks from Overfitting  you can dropout with p value, make it specified to specific layer, or make it applied to all the layers.  Pytorch code 1) bernoulli, which decides whether you will hold it? 2) and divide the noise value depends on noise value. so noise became 2 or remain 0.      According to pytorch code, We do change at training time, but we do nothing at test time. and this means you don’t have to do anything special with inference time. ’   TODO: find at forums what is inference time - Related to NVIDIA, GPU.     Embedding dropout is just a dropout.      It’s different between continuous variable and embedding layer.  TODO Still can’t understand. why embedding dropout is effective. or,… in need.    Let’s delete at random, some of the results of the embedding.    and It worked well especially at Kaggle   Batch Normalization: Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift -&gt; came out false! According to How Does Batch Normalization Help Optimization?  The key was multiplicative bias {\gamma} and additive bias {\beta}` Explain     Let $$ \hat{y} = f(w_1, w_2, w_3, … , x)} $$ , loss = MSE , Then y_range should be between 1 and 5`   And Activation function ends with -1 -&gt; +1   To mitigate this problem, we can add the other parameter, like $$w_n$$   But there’re so much interactions in the process so just re-scale the output.    Momentum parameter at BatchNorm1d:  Different from momentum like in optimization.  This momentum is Exponentially weighted moving average of the mean, instead of deviation.      If this is small number: mean standard deviation would be less from mini_batch to mini_batch » less regularization effect. (If this is large number, variation would be greater from mini_batch to mini_batch » more regularization effect)   TODO: can’t sure, but i understand, this is not about how to update parameter but about how much reflect previous value when scale and shift   Q. Preference between batchnorm and the other regularizations(drop out, weight decay)A. Nope, always try and see the results## lesson6-pets-more### Data Augmentation- Last reg- `get_transforms` has lots of params (even not yet learned all) -&gt; check documentation	- Remember you can implement all the doc contents bc it's made from nbdev	- TODO: try this!!- Essence of data augmentation is you should maintain the label, while somewhat making sense. 	- ex) tilt, because it's optically sensible, you can always change the angle of the data view. - zeros, border, and reflection but always `reflection` works most of the time, so that is the default### Convolutional Kernel(What is convolution?)- Will make heat\_map from scratch, which means the parts convolution focuses on![setosa_visualization]()- http://setosa. io/ev/image-kernels/	- javascript thing	- How convolution works	- Kernel. which does element-wise multiplication, and sum them up	- so it has on pixel less at borders -&gt; so it uses padding, and fastai uses reflection as said. - why this Kernel(matrix) helps catching horizontal edge side?	- because this kernel`(picture2)` weights differently, depends on `x axis`	- why familiar, because it's similar intuition with fugus`(paper)` paper- CNN from different viewpoints`link`	- output of pixel is results from different linear equations. 	- If you connect this with represents of neural network nodes, you can see that the specific inp nodes connected with specific out nodes. 	- **Summarize**: cnn does 1) matmul some of the elements are always zero 2) same weight for every row, which is called `weight time? weight. . ?, 1:18:50` `(picture)`#### Further lowdown- Because generally image has 3 channels, we need rank 3 kernel. - And **do multiply with all channel output is one pixel**. (`draw by your self`)	- but this kernel will catch one feature, like horizontal, so that we make more kernel so that output becomes (h * w * kernel)	- And that `kernel` come to `channel`- **Conv2d**: with 3 by 3 kernel, stride 2 conv -&gt; (h/2 * w/2 * kernel)	- skip or jump over input pixel	- to protect from memory out of control~~~pythonlearn. modellearn. summary()~~~TODO: understand yourself the blocks of conv-kernel: - Usually use big kernel size at first layer (will study this at part2)- Bottom right highlighting kernel(`pic / draw`)- `torch. tensor. expand`: for memory efficient, because we should do RGB- We do not make separate kernel, but make rank 4 kernel	- 4d tensor is just stacked kernel- `t[None]. shape` create new unit axis, and why? we make this -&gt; it should move unit of batch, not one size image. ### Average pooling, feature- suppose our pre-trained model results in size of `11 by 11 by 512 ` `pic 4` and my classification task has 37 classes	* take the first face of channel, which is 11 by 11 and `mean` it, so that make rank 2 tensor, 512 by 1	* and make 2d matrix, which is 512 by 37 and multiply so that we can get 37 by 1 matrix. - Feature, at convolution block	- So, when we transfer-learning without unfreeze, every element of last matrix (512 by 1) should represent(or could catch) each feature. 	### Heatmap, Hook~~~hook_output(model[0]) -&gt; acts -&gt; avg_acts~~~- if we average the block with `axis=feature`, result of matrix(11 by 11) depicts `how activated was that area?` -&gt; it is heatmap, `avg_acts`- and acts comes from hook, which is more advanced pytorch feature. 	- hook into pytorch machine itself, and run any arbitrary Pytorch code	- Why this is cool?: Normally it gives set of outputs of forward pass, but we can interrupt and hook the forward pass. 	- Also can store the output of the convolutional part of the model, which is before avg_pooling- Thinking back when we do cut off `after` the conv part. 	- but with fast. ai the original convolutional part of the model would be *the first thing in the model*, specifically could be given from `learn. model. eval()[0]`	- And this is gotten from `hooked_output` and having hooked the output, we can pass our x_minibatch to output. 	- Not directly, but with normalized, minibatch, put on to the gpu	- `one_item()` function do it, when we have one data `TODO: this is assignment` do it yourself without one_item function	- and `. cuda()` put it on gpu- you should print out very often the shape of tensor, and try think why. "
+    }, {
+    "id": 13,
+    "url": "http://localhost:4000/2020/04/qna-image-segmentation/",
+    "title": "[Q&A] Image Segmentation, using Unet with Driving Video data",
+    "body": "2020/04/02 - This post is about my questions while I was studying USF Deep Learning course about image segmentation task. All the answers are from the course, source code, library document, or document. I cared about being clear at reporting information including source of information, however if there are still anything unclear, please contact me. And thank you Jeremy&amp;Rachael for everything. Also Thank you Cambridge Computer Vision Lab to made us to study with your labor. The Cambridge-driving Labeled Video Database (CamVid) is the first collection of videos with object class semantic labels, complete with metadata. The database provides ground truth labels that associate each pixel with one of 32 semantic classes. If someone is interested in this project, please check the site and see the details. Now, let’s start first using jupyter’s one of tricks which I love most. It enables cell to print the code without print function. from IPython. core. interactiveshell import InteractiveShell# pretty print all cell's output and not just the last oneInteractiveShell. ast_node_interactivity =  all from fastai. vision import *from fastai. callbacks. hooks import *from fastai. utils. mem import *path = untar_data(URLs. CAMVID) # The locations where the data and models are downloaded are set in config. ymlpath. ls() I’m trying to accustomed to using pathlib module, not just it became built-in module in python, but I felt uncomfortable myself with os module. However, still unpredictable conflicts are remain, even in the quite standard library like Pytorch, tensorflow, onnx. (it require me string for path. not PosixPath. will send PR. . ) [PosixPath('/root/. fastai/data/camvid/valid. txt'), PosixPath('/root/. fastai/data/camvid/images'), PosixPath('/root/. fastai/data/camvid/labels'), PosixPath('/root/. fastai/data/camvid/codes. txt')]path_img = path/'images'path_lbl = path/'labels'fnames = get_image_files(path_img) #filenamelbl_names = get_image_files(path_lbl)1. (Play with data) My Hypothesis:  File name has A_B format. and A / B would be at key-value position.  Use collections - defaultdict  Default Dict: Link:   easy to group a sequence of key and value pairs into a dictionary of list?from collections import defaultdictfnames[0], lbl_names[0](PosixPath('/root/. fastai/data/camvid/images/0001TP_009210. png'), PosixPath('/root/. fastai/data/camvid/labels/0016E5_01800_P. png'))files = [tuple(i. stem. split('_')) for i in fnames]labels = [tuple(i. stem. split('_')[:-1]) for i in lbl_names]d = defaultdict(list)for k, v in files: d[k]. append(v)d. keys()len(d['0001TP'])124for k, v in d. items():  print(k, v)0001TP ['009210', '008850', '007350', '008970', '009840', '010140', '008490', '008520', '009540', '008250', '008340', '006840', '007860', '007410', '007740', '009870', '010080', '007890', '008790', '010020', '008400', '007080', '008280', '010380', '009330', '009060', '007470', '006810', '009720', '008580', '007110', '008730', '009150', '007680', '009780', '007800', '007290', '008760', '009510', '008640', '008310', '007440', '006900', '007500', '008460', '009030', '008130', '009480', '009900', '010230', '009270', '008040', '007590', '007950', '009990', '008550', '007260', '008100', '007530', '006960', '008190', '009420', '009930', '009000', '007830', '008940', '006690', '009570', '008880', '010170', '007560', '009300', '006750', '009360', '010200', '007320', '008010', '009120', '007620', '007200', '007140', '010320', '006720', '008670', '007230', '008370', '010260', '009690', '006930', '009090', '007770', '010290', '010350', '008610', '008070', '009600', '008430', '009450', '007380', '009240', '007710', '007170', '008160', '008910', '007020', '006780', '007050', '009960', '009810', '008220', '009180', '009750', '010050', '009660', '010110', '007920', '009630', '007650', '006990', '008700', '009390', '007980', '008820', '006870']0016E5 ['01290', '08159', '05760', '08133', '08063', '06660', '00960', '05850', '00750', '06960', '08035', '08107', '07975', '08017', '05610', '07140', '08119', '08027', '07170', '08400', '08093', '02100', '06390', '04470', '08340', '06060', '00600', '07470', '08151', '07800', '01620', '05730', '01530', '00690', '08430', '05940', '01980', '07320', '08069', '07965', '04380', '05430', '01410', '06780', '08007', '08087', '08079', '06600', '08109', '05490', '00901', '04590', '04680', '08045', '01770', '06690', '08085', '06810', '00420', '08011', '07440', '02190', '06300', '04800', '01500', '00450', '08029', '01470', '06330', '07997', '08067', '05370', '08013', '08190', '00840', '02370', '08049', '08135', '01440', '06870', '05820', '05280', '08051', '04440', '08091', '01380', '00630', '07290', '05520', '04770', '00540', '07995', '07999', '05550', '07920', '08101', '08141', '08053', '04620', '08103', '05160', '07350', '08057', '06030', '06000', '08550', '07963', '08089', '05970', '08047', '05640', '06240', '05220', '04350', '01590', '07959', '01950', '08117', '06180', '01560', '05400', '08043', '07680', '00780', '08081', '07050', '01020', '01350', '04530', '06720', '07969', '08149', '08003', '08131', '08129', '08033', '05460', '01650', '07530', '08023', '05340', '08640', '05100', '08075', '01230', '04980', '02070', '01080', '06210', '05910', '08009', '01800', '05190', '02400', '08083', '08019', '07620', '07200', '07890', '08059', '06990', '04410', '08121', '08123', '06930', '08137', '08147', '08095', '06570', '06150', '08153', '06840', '05250', '00510', '08370', '08580', '08113', '07410', '08097', '01200', '04950', '07770', '07650', '04710', '06090', '08055', '07110', '07981', '00990', '08250', '08127', '01920', '07985', '08220', '08005', '08157', '05130', '08071', '01140', '04830', '07740', '08143', '06120', '02040', '08111', '08115', '00660', '08280', '06420', '07983', '02220', '05700', '01860', '01260', '04920', '06510', '07020', '08073', '08105', '08125', '06360', '07860', '07993', '00810', '06540', '08099', '08139', '02010', '07973', '08155', '07991', '06630', '00480', '06750', '04890', '08001', '08025', '00870', '08490', '01830', '07977', '05010', '01170', '07961', '01680', '01050', '07987', '07080', '04560', '00930', '05310', '02340', '05790', '08460', '00720', '08031', '02280', '08039', '08037', '08065', '06270', '08077', '06900', '04650', '06480', '07230', '08041', '06450', '00570', '07989', '04740', '07979', '02250', '07380', '00390', '01710', '07590', '08021', '08520', '07500', '01110', '04500', '02310', '07971', '02130', '05580', '05880', '08610', '08310', '08145', '05670', '04860', '07260', '08015', '07967', '01740', '01320', '07560', '07830', '01890', '08061', '02160', '07710', '05070', '05040']Seq05VD ['f00030', 'f02550', 'f03450', 'f01110', 'f00480', 'f00210', 'f04590', 'f04170', 'f01800', 'f03990', 'f03360', 'f03900', 'f02070', 'f00810', 'f03690', 'f01350', 'f01530', 'f04980', 'f05100', 'f03060', 'f00900', 'f03870', 'f02460', 'f01470', 'f02370', 'f02820', 'f04080', 'f02760', 'f04860', 'f02250', 'f04200', 'f00270', 'f03720', 'f02850', 'f04410', 'f01200', 'f03090', 'f02010', 'f03930', 'f00090', 'f01650', 'f01890', 'f03840', 'f03030', 'f02130', 'f01230', 'f04110', 'f02520', 'f04140', 'f04020', 'f00060', 'f03420', 'f01560', 'f00120', 'f04290', 'f02340', 'f00300', 'f01380', 'f00870', 'f01860', 'f02970', 'f04560', 'f02730', 'f00330', 'f04530', 'f03780', 'f01770', 'f03390', 'f05040', 'f02430', 'f03330', 'f00660', 'f01740', 'f02100', 'f04800', 'f04050', 'f00510', 'f02790', 'f04350', 'f00690', 'f00540', 'f02490', 'f00960', 'f00930', 'f04230', 'f02880', 'f03600', 'f01020', 'f01500', 'f02400', 'f04830', 'f04470', 'f03300', 'f02670', 'f00450', 'f01980', 'f01170', 'f01620', 'f04500', 'f01080', 'f03180', 'f05070', 'f03150', 'f04950', 'f01440', 'f03510', 'f01710', 'f00360', 'f04770', 'f02910', 'f01050', 'f00630', 'f04320', 'f00570', 'f03240', 'f02190', 'f01140', 'f03540', 'f02220', 'f02640', 'f03960', 'f00000', 'f04920', 'f01950', 'f00990', 'f03480', 'f03000', 'f00420', 'f04620', 'f03210', 'f00780', 'f03570', 'f01590', 'f00750', 'f01920', 'f04650', 'f03750', 'f03630', 'f02310', 'f02610', 'f02580', 'f04740', 'f02280', 'f04680', 'f00390', 'f00720', 'f03660', 'f02040', 'f03270', 'f00180', 'f03810', 'f01410', 'f01290', 'f03120', 'f00840', 'f04440', 'f00150', 'f01260', 'f02700', 'f02940', 'f00600', 'f01830', 'f04260', 'f05010', 'f04890', 'f02160', 'f00240', 'f04380', 'f01680', 'f04710', 'f01320']0006R0 ['f02820', 'f03690', 'f03180', 'f02550', 'f01020', 'f03660', 'f02340', 'f01170', 'f02610', 'f02940', 'f01290', 'f02100', 'f01350', 'f03270', 'f03870', 'f01380', 'f01980', 'f03810', 'f02430', 'f02310', 'f01830', 'f03480', 'f02970', 'f01890', 'f03210', 'f03930', 'f02040', 'f02070', 'f02400', 'f01560', 'f03030', 'f01770', 'f01590', 'f01950', 'f03420', 'f01650', 'f03450', 'f00990', 'f03630', 'f01500', 'f03570', 'f00930', 'f03090', 'f03360', 'f02880', 'f02460', 'f01440', 'f01920', 'f01230', 'f03840', 'f02730', 'f01620', 'f02220', 'f03750', 'f03330', 'f03540', 'f02520', 'f02790', 'f01050', 'f03120', 'f01800', 'f01140', 'f01860', 'f01530', 'f01470', 'f02670', 'f02490', 'f01260', 'f01110', 'f02760', 'f01680', 'f03150', 'f02580', 'f03300', 'f02280', 'f01200', 'f03390', 'f03510', 'f02640', 'f02190', 'f02370', 'f01320', 'f02130', 'f03600', 'f03240', 'f03780', 'f03720', 'f02700', 'f01410', 'f01080', 'f02850', 'f01710', 'f03900', 'f03060', 'f01740', 'f02010', 'f02250', 'f00960', 'f03000', 'f02160', 'f02910']for k, v in d. items():  print(k, len(d[k]))0001TP 1240016E5 305Seq05VD 1710006R0 101for i in d2. keys():  print(i,len(d2[i]))0016E5 3050001TP 1240006R0 101Seq05VD 171files[0], labels[0](('0001TP', '009210'), ('0016E5', '01800'))2. My question: Link:  Why do we need masking? and does color from fastai library? (have to look into source code) What do the parameter alpha do? When people make masked img, would it be have ranged integer limit? Does image normalization related with this?lbl_sorted = sorted(lbl_names)f_sorted = sorted(fnames)lbl_1 = lbl_sorted[33]f_1 = f_sorted[33]img = open_image(lbl_1)mask = open_mask(lbl_1)_,axs = plt. subplots(1,2, figsize=(10,5))# img. show(ax=axs[0], y=mask, title='masked')img. show(ax=axs[0], title='1')mask. show(ax=axs[1], title='2', alpha=1. ) img_2 = open_image(f_1)mask_2 = open_mask(f_1)_,axs = plt. subplots(1,2, figsize=(10,5))# img. show(ax=axs[0], y=mask, title='masked')img_2. show(ax=axs[0], title='3',)mask_2. show(ax=axs[1], title='4', alpha=1. ) open_mask(lbl_1). data. shapetorch. Size([1, 720, 960])open_mask(lbl_1). data. shapetorch. Size([1, 720, 960])open_image(f_1). data. shapetorch. Size([3, 720, 960])open_image(f_1). data. shapetorch. Size([3, 720, 960])img. data #labeled datatensor([[[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]],    [[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]],    [[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]]])mask. data # after mask, labeled datatensor([[[ 4, 4, 4, . . . , 21, 21, 21],     [ 4, 4, 4, . . . , 21, 21, 21],     [ 4, 4, 4, . . . , 21, 21, 21],     . . . ,     [17, 17, 17, . . . , 30, 30, 30],     [17, 17, 17, . . . , 30, 30, 30],     [17, 17, 17, . . . , 30, 30, 30]]])img_2. data, mask_2. data(tensor([[[0. 0706, 0. 0667, 0. 0706, . . . , 0. 6431, 0. 6549, 0. 6627],     [0. 0745, 0. 0706, 0. 0706, . . . , 0. 6431, 0. 6510, 0. 6549],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 6392, 0. 6588, 0. 6588],     . . . ,     [0. 0863, 0. 0824, 0. 0824, . . . , 0. 1333, 0. 1216, 0. 1255],     [0. 0902, 0. 0863, 0. 0824, . . . , 0. 1255, 0. 1176, 0. 1216],     [0. 0863, 0. 0824, 0. 0784, . . . , 0. 1137, 0. 1059, 0. 1137]],     [[0. 0706, 0. 0667, 0. 0706, . . . , 0. 7490, 0. 7608, 0. 7686],     [0. 0745, 0. 0706, 0. 0706, . . . , 0. 7451, 0. 7569, 0. 7608],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 7412, 0. 7529, 0. 7529],     . . . ,     [0. 0980, 0. 0941, 0. 0941, . . . , 0. 1804, 0. 1686, 0. 1725],     [0. 1059, 0. 1020, 0. 0980, . . . , 0. 1725, 0. 1647, 0. 1686],     [0. 1020, 0. 0980, 0. 0941, . . . , 0. 1608, 0. 1529, 0. 1608]],     [[0. 0784, 0. 0745, 0. 0784, . . . , 0. 7569, 0. 7686, 0. 7765],     [0. 0824, 0. 0784, 0. 0784, . . . , 0. 7647, 0. 7647, 0. 7686],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 7608, 0. 7647, 0. 7647],     . . . ,     [0. 1216, 0. 1176, 0. 1176, . . . , 0. 2000, 0. 1882, 0. 1922],     [0. 1176, 0. 1137, 0. 1098, . . . , 0. 1843, 0. 1765, 0. 1804],     [0. 1137, 0. 1098, 0. 1059, . . . , 0. 1725, 0. 1647, 0. 1725]]]), tensor([[[ 18, 17, 18, . . . , 183, 186, 188],     [ 19, 18, 18, . . . , 183, 185, 186],     [ 20, 18, 19, . . . , 182, 185, 185],     . . . ,     [ 25, 24, 24, . . . , 43, 40, 41],     [ 26, 25, 24, . . . , 41, 39, 40],     [ 25, 24, 23, . . . , 38, 36, 38]]]))3. What is a difference between image and imageSegment?: imageSegment  An ImageSegment object has the same properties as an Image. The only difference is that when applying the transformations to an ImageSegment, it will ignore the functions that deal with lighting and keep values of 0 and 1.  It’s easy to show the segmentation mask over the associated Image by using the y argument of show_image. img = open_image(fnames[0])mask = open_mask(lbl_names[0])_,axs = plt. subplots(1,3, figsize=(8,4))img. show(ax=axs[0], title='no mask')img. show(ax=axs[1], y=mask, title='masked') #seg mask over the img using y argmask. show(ax=axs[2], title='mask only', alpha=1. ) vision. image ##4. Why/How img div by 255 and how it results fast. ai : vision. image - If div=True, pixel values are divided by 255. to become floats between 0. and 1.  At times, you want to get rid of distortions caused by lights and shadows in an image.    Normalizing the RGB values of an image can at times be a simple and effective way of achieving this.     So sum of the pixel’s value over all channels(which is S) divides each intensified channel so that nomalized value will be R/S, G/S and B/S (where, S=R+G+B).   Detailed explain here4. Python Evaluation Order: Python evaluates expressions from left to right. Notice that while evaluating an assignment, the right-hand side is evaluated before the left-hand side. mask_tmp, trg_tmp, void_tmp = 2, 1, 10mask_tmp = trg_tmp != void_tmpprint(mask_tmp, trg_tmp, void_tmp) # (1) target is not same with voidTrue 1 10# Example 1x = 1y = 2x,y = y,x+yx, y(2, 3)# Example 2x = 1y = 2x = yy = x+yx, y(2, 4)5. model learner parameter :: pct_start: A: Percentage of total number of epochs when learning rate rises during one cycle. Q: Sorry, I still confused that one cycle in the new API only runs one epoch. How the percentage of total number of epochs works? Can you give a example? If learn. fit_one_cycle(10, slice(1e-4,1e-3,1e-2), pct_start=0. 05)??A: Ok, strictly correct answer would be percentage of iterations, so you can have lr both increase and decrease during same epoch. In your example, say, you have 100 iterations per epoch, then for half an epoch (0. 05 * (10 * 100) = 50) lr will rise, then slowly decrease. Q2: Thanks for this explanation … so essentially, it is the percentage of overall iterations where the LR is increasing, correct? So, given the default of 0. 3, it means that your LR is going up for 30% of your iterations and then decreasing over the last 70%. Is that a correct summation of what is happening? A2: Yes, I think that’s correct. You can verify that by changing its value and check:learn. recorder. plot_lr() For example if pct_start = 0. 2 source: forums. fastai "
+    }, {
+    "id": 14,
     "url": "http://localhost:4000/2020/03/note08-fastai-4/",
     "title": "Gradient backward, Chain Rule, Refactoring",
-    "body": "2020/03/02 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves   ⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
+    "body": "2020/03/02 -  This note is divided into 4 section.      Section1: What is the meaning of ‘deep-learning from foundations?’   Section2: What’s inside Pytorch Operator?   Section3: Implement forward&amp;backward pass from scratch   Section4: Gradient backward, Chain Rule, Refactoring   ” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves    Postfix underscore means in pytorch, in-place function, What is in-place function?⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
     }, {
-    "id": 13,
+    "id": 15,
     "url": "http://localhost:4000/2020/03/note08-fastai-3/",
     "title": "Implement forward&backward pass from scratch",
     "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring1. The forward and backward passes: 1. 1 Normalization: train_mean,train_std = x_train. mean(),x_train. std()&gt;&gt;&gt; train_mean,train_std(tensor(0. 1304), tensor(0. 3073))Remember!  Dataset, which is x_train, mean and standard deviation is not 0&amp;1. But we need them to be which means we should substract means and divide data by std.  You should not standarlize validation set because training set and validation set should be aparted.  after normalize, mean is close to zero, and standard deviation is close to 1. 1. 2 Variable definition:  n,m: size of the training set c: the number of activations we need in our model2. Foundation Version: 2. 1 Basic architecture:  Our model has one hidden layer, output to have 10 activations, used in cross entropy.    But in process of building architecture, we will use mean square error, output to have 1 activations and lator change it to cross entropy   number of hidden unit; 50see below pic  We want to make w1&amp;w2 mean and std be 0&amp;1.      why initializating and make mean zero and std one is important?   paper highlighting importance of normalisation - training 10,000 layer network without regularisation1   2. 1. 1 simplified kaiming initQ: Why we did init, normalize with only validation data? Because we can not handle and get statistics from each value of x_valid?{: style=”color:red; font-size: 130%; text-align: center;”}  what about hidden(first) layer?w1 = torch. randn(m,nh)b1 = torch. zeros(nh)t = lin(x_valid, w1, b1) # hidden&gt;&gt;&gt; t. mean(), t. std()((tensor(2. 3191), tensor(27. 0303))In output(second) layer, w2 = torch. randn(nh,1)b2 = torch. zeros(1)t2 = lin(t, w2, b2) # output&gt;&gt;&gt; t2. mean(), t2. std()(tensor(-58. 2665), tensor(170. 9717))   which is terribly far from normalzed value.     But if we apply simplified kaiming init  w1 = torch. randn(m,nh)/math. sqrt(m); b1 = torch. zeros(nh)w2 = torch. randn(nh,1)/math. sqrt(nh); b2 = torch. zeros(1)t = lin(x_valid, w1, b1)t. mean(),t. std()&gt;&gt;&gt; (tensor(-0. 0516), tensor(0. 9354)) But, actually, we use activations not only linear function After applying activations relu at linear layer, mean and deviation became 0. 5. 2. 1. 2 Glorrot initializationPaper2: Understanding the difficulty of training deep feedforward neural networks  Gaussian(, bell shaped, normal distributions) is not trained very well.  How to initialize neural nets? with the size of layer , the number of filters .  But there is No acount for import of ReLU If we got 1000 layers, vanishing gradients problem emerges2. 1. 3 Kaiming initializatingPaper3: Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification  Kaiming He, explained here rectifier: rectified linear unit rectifier network: neural network with rectifier linear units  This is kaiming init, and why suddenly replace one to two on a top?     to avoid vanishing gradient(weights)   But it doesn’t give very nice mean tough.    2. 1. 4 Pytorch package Why fan_out?     according to pytorch documentation,   choosing 'fan_in' preserves the magnitude of the variance of the wights in the forward pass. choosing 'fan_out' preserves the magnitues in the backward pass(, which means matmul; with transposed matrix) ➡️ in the other words, torch use fan_out cz pytorch transpose in linear transformaton.  What about CNN in Pytorch?I tried torch. nn. Conv2d. conv2d_forward?? Jeremy digged into using torch. nn. modules. conv. _ConvNd. reset_parameters?? 2  in Pytorch, it doesn’t seem to be implemented kaiming init in right formula. so we should use our own operation.  But actually, this has been discussed in Pytorch community before. 3 4 Jeremy said it enhanced variance also, so I sampled 100 times and counted better results.  To make sure the shape seems sensible. check with assert. (remember we will replace 1 to 10 in cross entropy)assert model(x_valid). shape==torch. Size([x_valid. shape[0],1])&gt;&gt;&gt; model(x_valid). shape(10000, 1) We have made Relu, init, linear, it seems we can forward pass code we need for basic architecture nh = 50def lin(x, w, b): return x@w + b;w1 = torch. randn(m,nh)*math. sqrt(2. /m ); b1 = torch. zeros(nh)w2 = torch. randn(nh,1); b2 = torch. zeros(1)def relu(x): return x. clamp_min(0. ) - 0. 5t1 = relu(lin(x_valid, w1, b1))def model(xb):  l1 = lin(xb, w1, b1)  l2 = relu(l1)  l3 = lin(l2, w2, b2)  return l32. 2 Loss function: MSE:  Mean squared error need unit vector, so we remove unit axis.   def mse(output, targ): return (output. squeeze(-1) - targ). pow(2). mean()    In python, in case you remove axis, you use ‘squeeze’, or add axis use ‘unsqueeze’ torch. squeeze where code commonly broken. so, when you use squeeze, clarify dimension axis you want to removetmp = torch. tensor([1,1])tmp. squeeze()&gt;&gt;&gt; tensor([1, 1]) make sure to make as float when you calculateBut why??? because it is tensor?{: style=”color:red; font-size: 130%;”} Here’s the error when I don’t transform the data type ---------------------------------------------------------------------------TypeError                 Traceback (most recent call last)&lt;ipython-input-22-ae6009bef8b4&gt; in &lt;module&gt;()----&gt; 1 y_train = get_data()[1] # call data again   2 mse(preds, y_train)TypeError: 'map' object is not subscriptable This is forward passFootnote: Other materials:  Understanding the difficulty of training deep feedforward neural networks, paper that introduced Xavier initialization      Fixup Initialization: Residual Learning Without Normalization &#8617;        Pytorch implementaion on Kaiming init of conv and linear layers &#8617;        Pytorch kaiming init issue &#8617;        Pytorch kaiming init explained &#8617;    "
     }, {
-    "id": 14,
+    "id": 16,
     "url": "http://localhost:4000/2020/03/note08-fastai-2/",
     "title": "What's inside Pytorch Operator?",
     "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, RefactoringWhat’s inside Pytorch Operator?: Section02 Time comparison with pure Python:    Matmul with broadcasting&gt; 3194. 95 times faster     Einstein summation&gt; 16090. 91 times faster     Pytorch’s operator&gt; 49166. 67 times faster  1. Elementwise op: 1. 1 Frobenius norm:   above converted into (m*m). sum(). sqrt() Plus, don’t suffer from mathmatical symbols. He also copy and paste that equations from wikipedia.  and if you need latex form, download it from archive. 2. Elementwise Matmul:  What is the meaning of elementwise?   We do not calculate each component. But all of the component at once. Because, length of column of A and row of B are fixed.   How much time we saved?  So now that takes 1. 37ms. We have removed one line of code and it is a 178 times faster…#TODOI don’t know where the 5 from. but keep it. Maybe this is related with frobenius norm…?as a result, the code before for k in range(ac):  c[i,j] += a[i,k] + b[k,j]the code after c[i,j] = (a[i,:] * b[:,j]). sum()To compare it (result betweet original and adjusted version) we use not test_eq but other function. The reason for this is that due to rounding errors from math operations, matrices may not be exactly the same. As a result, we want a function that will “is a equal to b within some tolerance” #exportdef near(a,b):   return torch. allclose(a, b, rtol=1e-3, atol=1e-5)def test_near(a,b):   test(a,b,near)test_near(t1, matmul(m1, m2))3. Broadcasting:  Now, we will use the broadcasting and removec[i,j] = (a[i,:] * b[:,j]). sum() How it works?&gt;&gt;&gt; a=tensor([[10,10,10],     [20,20,20],     [30,30,30]])&gt;&gt;&gt; b=tensor([1,2,3,])&gt;&gt;&gt; a,b  (tensor([[10, 10, 10],     [20, 20, 20],     [30, 30, 30]]),tensor([1, 2, 3]))     &gt;&gt;&gt; a+btensor([[11, 12, 13],    [21, 22, 23],    [31, 32, 33]])  &lt;Figure 2&gt; demonstrated how array b is broadcasting(or copied but not occupy memory) to compatible with a. Refered from numpy_tutorial   there is no loop, but it seems there is exactly the loop.     This is not from jeremy (actually after a moment he cover it) but i wondered How to broadcast an array by columns?  c=tensor([[1],[2],[3]])a+ctensor([[11, 11, 11],    [22, 22, 22],    [33, 33, 33]])s  What is tensor. stride()?help(t. stride)Help on built-in function stride:  stride(…) method of torch. Tensor instancestride(dim) -&gt; tuple or intReturns the stride of :attr:’self’ tensor. Stride is the jump necessary to go from one element to the next one in the specified dimension :attr:’dim’. A tuple of all strides is returned when no argument is passed in. Otherwise, an integer value is returned as the stride in the particular dimension :attr:’dim’. Args:  dim (int, optional): the desired dimension in which stride is requiredExample::* x = torch. tensor([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]])`x. stride()&gt;&gt;&gt; (5, 1)x. stride(0)&gt;&gt;&gt; 5x. stride(-1)&gt;&gt;&gt; 1   unsqueeze &amp; None index   We can manipulate rank of tensor Special value ‘None’, which means please squeeze a new axis here== please broadcast herec = torch. tensor([10,20,30])c[None,:] in c, squeeze a new axis in here please. 2. 2 Matmul with broadcasting: for i in range(ar):#  c[i,j] = (a[i,:]).      *[:,j]. sum() #previous  c[i]  = (a[i]. unsqueeze(-1) * b). sum(dim=0) And Using None also (As howard teached)c[i]  = (a[i ]. unsqueeze(-1) * b). sum(dim=0) #howardc[i]  = (a[i][:,None] * b). sum(dim=0) # using Nonec[i]  = (a[i,:,None]*b). sum(dim=0)⭐️Tips🌟 1) Anytime there’s a trailinng(final) colon in numpy or pytorch you can delete it ex) c[i, :] = c [i]2) any number of colon commas at the start, you can switch it with the single elipsis.  ex) c[:,:,:,:,i] = c […,i] 2. 3 Broadcasting Rules:  What if we tensor. size([1,3]) * tensor. size([3,1])?  torch. Size([3, 3])    What is scale????   What if they are one array is times of the other array? ex) Image : 256 x 256 x 3Scale : 128 x 256 x 3Result: ?   Why I did not inserted axis via None, but happened broadcasting? &gt;&gt;&gt; c * c[:,None]tensor([[100. , 200. , 300. ],    [200. , 400. , 600. ],    [300. , 600. , 900. ]])maybe it broadcast cz following array has 3 rows as same principle, no matter what nature shape was, if we do the operation tensor broadcasts to the other. &gt;&gt;&gt; c==c[None]tensor([[True, True, True]])&gt;&gt;&gt; c[None]==c[None,:]tensor([[True, True, True]])&gt;&gt;&gt;c[None,:]==ctensor([[True, True, True]])3. Einstein summation:  Creates batch-wise, remove inner most loop, and replaced it with an elementwise producta. k. ac[i,j] += a[i,k] * b[k,j]inner most loop c[i,j] = (a[i,:] * b[:,j]). sum()elementwise product  Because K is repeated so we do a dot product. And it is torch. Usage of einsum()1) transpose2) diagnalisation tracing3) batch-wise (matmul) …  einstein summation notationdef matmul(a,b): return torch. einsum('ik,kj-&gt;ij', a, b)so after all, we are now 16000 times faster than Python. 4. Pytorch op: 49166. 67 times faster than pure python And we will use this matrix multiplication in Fully Connect forward, with some initialized parameters and ReLU. But before that, we need initialized parameters and ReLU, Footnote:  TensorRank ti noteResources:  Frobenius Norm Review Broadcasting Review (especially Rule)     Refer colab! (I totally confused with extension of arrays)    torch. allclose Review np. einsum Reviewh "
     }, {
-    "id": 15,
+    "id": 17,
     "url": "http://localhost:4000/2020/02/note08-fastai-1/",
     "title": "What is the meaning of 'deep-learning from foundations?'",
     "body": "2020/02/29 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ I don’t know if you read this article, but I heartily appreciate Rachael Thomas and Jeremy Howard for providing these priceless lectures for free Homework:  Review concepts 16 concepts from Course 1 (lessons 1 - 7)(1) Affine Functions &amp; non-linearities; 2) Parameters &amp; activations; 3) Random initialization &amp; transfer learning; 4) SGD, Momentum, Adam; 5) Convolutions; Batch-norm; 6) Dropout; 7) Data augmentation; 8) Weight decay; 9) Res/dense blocks; 10) Image classification and regression; 11)Embeddings; 12) Continuous &amp; Categorical variables; 13) Collaborative filtering; 14) Language models; 15) NLP classification; 16) Segmentation; U-net; GANS) Make sure you understand broadcasting Read section 2. 2 in Delving Deep into Rectifiers Try to replicate as much of the notebooks as you can without peeking; when you get stuck, peek at the lesson notebook, but then close it and try to do it yourself calculus for machine learning     based on weight…    einsum conventionCONTENTS:  What is going on in this course?     What is ‘from foundations’?   Steps to a basic modern CNN model   Today’s implementation goal: 1) matmul -&gt; 4) FC backward    Library development using jupyter notebook     jupyter notebook certainly can make module    Elementwise ops     How can we make python faster?         What is element wise operation?           FootnoteWhat is going on in this course?: What is ‘from foundations’?: 1) Recreate fast. ai and Pytorch 2) using pure python  Evade OverfittingOverfit : validation error getting worsetraining loss &lt; validation loss  Know the name of the symbol you usefind in this page if you don’t know the symbol that you are using or just draw it here (run by ML!) Steps to a basic modern CNN model:  1) Matrix multiplication -&gt; 2) Relu/Initialization -&gt; 3) Fully-connected Forward-&gt; 4) Fully-connected Backward -&gt; 5) Train loop -&gt; 6) Convolution-&gt; 7) Optimization -&gt;8) Batchnormalization -&gt; 9) Resnet Today’s implementation goal: 1) matmul -&gt; 4) FC backward: Library development using jupyter notebook: what is assers? jupyter notebook certainly can make module:  There will be #export tag that Howard (and we) want to extract special notebook2script. py will detect sign of #expert and convert following into python module and test ittest\_eq(TEST,'test')test\_eq(TEST,'test1')  what is run_notebook. py?     when you want to test your module in command line interface   		!python run\_notebook. py 01_matmul. ipynb  Is there any difference between 1) and 2)?1) test -&gt; test01 2) test01 -&gt; test #TODO I don’t know yet  look into run_notebook. py, package fire Jeremy used. What is that?read and run the code in a notebook, and in the process, Jeremy made Python Fire library called!shockingly, fire takes any kind of function and converts into CLI command. fire library was released by Google open source, Thursday, March 2, 2017    Get data   pytorch and numpy are pretty much same.  variable c explains how many pixels there are in in MNIST, 28 pixels PyTorch’s view() method: torch function that manipulating tensor, and squeeze() in torch &amp; mathmatical operation similar function Rao &amp; McMahan said usually this functions result in feature vector.    In part 1, you can use view function several times.     Initial python model     Which is Linear, like $Xw$(weight)$+a$(bias) $= Y$     If you don’t know hou to multiple matrix, refer this site matmul visulization site   How many time spends if we we use pure python   function matmul, typical matrix multiplication function, takes about 1 second for calculating 1 single train data! (maybe assumed stochastic, 5 data points in validation)     it takes about 11. 36 hours to update parameters even single layer and 1 iteration! (if that was my computer, it would be 14 hours. . )🤪   THIS is why we need to consider ‘time’&amp;’space’ This is kinda slow - what if we could speed it up by 50,000 times? Let’s try! Elementwise ops: How can we make python faster?:  If we want to calculate faster, then do remove pythonic calcuation, by passing its computation down to something that is written something other than python, like pytorch.  According to PyTorch doc it uses C++ (via ATen), so we are going to implement that function with python. What is element wise operation?:  items makes a pair, operate corresponding componentFootnote:  notebooks material video broadcasting excel"
     }, {
-    "id": 16,
+    "id": 18,
     "url": "http://localhost:4000/2020/02/what-is-convolution/",
     "title": "Digging into convolution",
     "body": "2020/02/28 - Issues 1) Kaiming Initializtion in Pytorch was in trouble. 1 2) Jeremy started to dig in, in lesson09, but I didn’t know why the size of tensor is 2 and even understand this spreadsheet data. 3 Homework: Read Visualizing and Understanding Convolutional Networks paper  What is a convolution?     Visualization         one kernel     Matthew D Zeiler &amp; Rob Fergus Paper          Convolution can be represented as matmul   Padding   Kernel has rank 3   How can we find a side-edge, a gradient and area of constant weight?   What is a convolution?: A convolutional neural network is that your red, green, and blue pixels go into the simple computation, and something comes out of that, and then the result of that goes into a second layer, and the result of that goes into the third layer and so forth. Visualization: one kernel Refer this site for visualizing CNN filteringMatthew D Zeiler &amp; Rob Fergus PaperLecture01     Nine examples of the actual coefficients from the **first layer**  Convolution can be represented as matmul: CNNs from different viewpoints {align-items: center;}    [A B C D E F G H I J] is 3 by 3 image data flatten to vector.   As a result, convolution is a just matrix just two things happens     Some of entries are set to zeros at all the times   same color always have the same weight. That called weight time / wegith sharing    So, we can implement a convolution with matrix multiplication. But, we don’t do that because it’s slow!Padding:  What most of libraries do is just put zeros asdie of matrix  fast. ai uses reflection paddings (what is this? Jeremy said he uttered it)Kernel has rank 3:  As standard picture input would be 4 5, it would be actually 3d, not 2d.  If we make kernel as a 3x3 size, we pass over same kernel all the different Red, Green, Blue Pixels.      This could make problem, because, if we want to detect frog, which is green, we would want more activations on the green(I made a test cell in my colab 6)   How can we find a side-edge, a gradient and area of constant weight?: Not top-edge!  One kernel can find only the top-edge, so we should stack the kernels 7 So, we pass it through bunch of kernels to the input images, and that process gives us height x width x corresponding number of kernels.  Usually that number of chanel is 16 And if we want to get the more channels and features, we should repeat that process     This process gives rise to memory out of control, we do the stride   #### conv-example. xlsx  2 convolutional filters At a second layer, filter is 3x3x2 tensor, because to add up together the first layer’s channel. Reference:       Problem was math. sqrt(5) was not kaiming initialization formula, Implementation in Pytorch &#8617;        size of tensor, lecture09 &#8617;        conv-example. xlsx &#8617;        Why do computer use red, green and blue instead of primary colors &#8617;        Grayscale is a group of shades without any visible color. … Each of these dots has its own brightness level as well and, therefore, can be converted to grayscale. A grayscale image is one with all color information removed.  &#8617;        Testing RGB and grayscale &#8617;        stack kernel and make new rank of tensor at output, Lesson06-2019 &#8617;    "
     }, {
-    "id": 17,
+    "id": 19,
     "url": "http://localhost:4000/2020/02/dps-week8/",
-    "title": "Digital Product School week 8&9",
-    "body": "2020/02/24 - The 8th week retropect at Digital Product School Week 8/9 - Ship your MVP/Release next iteration each day     This week's schedule  CONTENT:  Preparing engineering weekly Agile Process     Daily Stand-up    Making application flowchart (feat draw. io) / ER diagram     Flowchart, understaning user journey   ER diagram    Engineering weekly AI lunch Connecting firebase andPreparing engineering weekly: This week at Wednesday, I planned to explain the Language Modelings, mainly focusing ELMo, ULMFiT, BERT and GPT-2. Slides is available here Changed the presentation, because there were people who are not in ML domain. hereWhenever I do the presentation, I learn more than the information I give them. At the same time, I realize I need to learn more than I know. Agile Process: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m not sure ‘what is agile’ but this is what we tried to make agile process.  Daily Stand-up: Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Making application flowchart (feat draw. io) / ER diagram:  Before we start the party, we should clarify the flowchart and ER diagram of our application. Flowchart, understaning user journey: Thanks for google, we could use draw. io for our framechart framework. Actually, we cana choice other good flatform, but draw. io has connected app throgh google drive, most of our engineer was used to it. And after this job, I got to know there is also (of course) rule with the symbols, color, size, space, scaling and direction of arrow -reference. But why we should do this? WE have made our storymap before!! I think storymap is for visualize our status and app. So it should be shared with whole the team, and they should able to understand each role’s issue. But flowchart is more like testing technical feasibility, and error that user can experience. So it could be little more specific, complicated, and hypothetical.     This week engineer's main plan  ER diagram: Even if we use NoSQL database through firebase, my team was accustomed to SQL more. That what we educated when we were at college, so we had to organize our concept while we were learning NoSQL. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Before today, my AI collegues presented regression, knn and it was my turn. I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! AI lunch: Connecting firebase and: "
+    "title": "My life in Digital Product School - week 8/19/10",
+    "body": "2020/02/24 - The 8/9/10th week retropect at Digital Product School Week 8 - Ship your MVPWeek 9/10 - Release next iteration each day     Week 8th schedule  CONTENT:  Agile Product Development     Daily Stand-up(planning)   Gemba Walk   Sprint Reviews    Engineering weeklyAgile Product Development: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m still not sure ‘what is agile’ but this is how we tried to make agile process.  Daily Stand-up(planning): Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Gemba Walk:     Team Cero with core team  Every 2 weeks, we do the Gemba work, which is ‘question everything to the core team’ time. At this period, people can ask anything related to our product, workshop, and framework. Core team will help just for each team, and each team can solve the problem related to their work. &lt; br/&gt;Why we need this session? because with workshop and general schedule, core team has no time just focus on each team. So through this session, we can have opportunity to understand each program and workshop, like why we are using this platform, and when is the due of our small project, and we have this problem and we need help for this. whatever small problem you have, core team is always willing to help you. Sprint Reviews: Every Friday, we have time to summarise what we did for the week. Maybe we need HMW question and our storymap to share our process and then tell and share what we did try, what point we succeeded and what point it was deviant of our prediction, and why we tried it. .     Sprint of Ve-link  And then, just after all team’s ppt, we do vote with such a cute marvel. Always it’s very difficult to vote (of course you can’t vote to your team!) Because it depends on criteria what do I value!But since this is process of our agile work, I try to focus on what they have changed since last week, and why they did it, how they did it. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Everyone have their knowledge to share and we can be tutor and at the same time can be of tutee. Previously, my AI collegues presented regression, knn. And because I’m somewhat specialized to NLP, I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! "
     }, {
-    "id": 18,
+    "id": 20,
     "url": "http://localhost:4000/2020/02/fast.ai-nlp-note-16/",
     "title": "Algorithmic bias",
     "body": "2020/02/20 - Algorithms can encode &amp; magnify human bias Case Study 1: Facial Recognition &amp; Predictive Policing:  Joy Buolamwini &amp; Timnit Gebru, gendershades. org     Microsoft, FACE+, IBM - All of these things are sell now.    Largest gap between $\therefore\ Lighter Male\ &gt;\ Darker\ Female $      This US mayor joked cops should “mount . 50-caliber” guns where AI predicts crime      With machine learning, with automation, there’s a 99% success, so that robot is ㅡwill beㅡ99% accurate in telling us what is going to happen next, which is really interesting.     - city official in Lancater, CA, approving on using IBM for public security  Bias:  Bias is type of error Statistical Bias: difference between a statistic’s expected value and the true value Unjust Bias: disproportionate preference for or prejudice against a group Unconscious bias: bias that we don’t realize we have But, term bias is too generic to be productive. Different sources of bias have different causes Representation Bias: Dataset was not representative of the algorithm that might be used on later. Above : Data is okay, but algorithm has some problem. Below : Data has error.  For example, object detection production that performs very well in common product of US. But in contrast, change of target product region, like Zimbabwe, Solomon Island, and so on, reduced the performence remarkably. It is not the algorithmic problem, so we should care about data volume of region. Evaluation Bias: Benchmark datasets spur on research, 4. 4% of IJB-A images are dark-skinned women. 2/3 of ImageNet images from the West (Sharkar et al, 2017) Case Study 2: Recidivism Algorithm Used Prison Sentencing: Case Study 3: Online Ad Delivery: Bias in NLP: ( Nothing to do with the course, but I’m researching this field these days. )    But all about Englsih     ImpactThe person is doctor. The person is nurse -&gt; 그는 의사다. 그녀는 간호사다.  Concept of “biased data” often too generic to be useful:  Different sources of bias have different sources Data, models and systems are not unchanging numbers on a screen. They’re the result of a complex process that starts with years of historical context and involves a series of choices and norms, from data measurement to model evaluation to human interpretation. - Harini Suresh, “The problem with Biased Data” Five Sources of Bias in ML:  Representation Bias Evaluation Bias Measurement Bias Aggregation Bias(46:02) Historical Bias(46:26)     A few studies(47:13)   Racial Bias, Even when we have good intentions(new york times)(47:10)   gender(48:59)   Humans are biased, so why does algorithmic bias matter?: Algorithms &amp; humans are used differently (humans are usually decision maker)  Algorithms are accurate and objective No way to apeal if there if error processed large scale cheap Machine learning can amplify bias Machine learning can create feedback loops. Technology is power. And with that comes responsibility. Solutions:  Analyze a project at work/school:     Questions about AI   5 types of bias (Suresh &amp; Guttag)   Datasheets for datasets, Modelcards for model reporting   Accuracy rate on different sub-groups    Work with domain experts &amp; those impacted Increase diversity in our workspace Advocate for good policy Be on the ongoing lookout for bias"
     }, {
-    "id": 19,
+    "id": 21,
     "url": "http://localhost:4000/2020/02/classifier-city/",
     "title": "Making a classifier with image dataset made from gooogle",
     "body": "2020/02/15 - CONTENTS:  Creating dataset from google images     Using google_images_download   Create ImageDataBunch    Train model     fit_one_cycle()   Let’s find-tune   Let’s train the whole model!   Let’s make batch size bigger!    Interpretation Model in productionCode can be found hereDeployed model here Making a classifier which can distinguish Seoul from Munich and Sanfrancisco!(hoping my well in Munich!) Creating dataset from google images: In machine learning, you always need data before you build your model. You can use either URLs or google_images_download package. Since Jeremy explained specifically, I will try the other. Using google_images_download: note: This is not google official package Refer to Official Doncument, put that arguments. from google_images_download import google_images_downloadresponse = google_images_download. googleimagesdownload()  #class instantiationout_dir = os. path. abspath('. . /. . /materials/dataset/pkg/')os. mkdir(out_dir)arguments = { keywords : Cebu,Munich,Seoul ,        print_urls :True,        suffix_keywords : city ,        output_directory :out_dir,        type : photo ,      }paths = response. download(arguments)  #passing the arguments to the functionprint(paths)and if you need, here is main code. Create ImageDataBunch: We need to separate validation set because we just grabbed these imagese from Google. Most of the dataset we use (kaggle/research) splited into train / validation / test so if they are not devided beforehand we should make databunch, and Jeremy recommended assign 20% to validation. Help on function verify_images in module fastai. vision. data:verify_images(path: Union[pathlib. Path, str], delete: bool = True, max_workers: int = 4, max_size: int = None, recurse: bool = False, dest: Union[pathlib. Path, str] = '. ', n_channels: int = 3, interp=2, ext: str = None, img_format: str = None, resume: bool = None, **kwargs)  Check if the images in `path` aren't broken, maybe resize them and copy it in `dest`. Data from google image url Data from package Train model:           len(class)   len(train)   len(valid)         Data_url   3   432   108       Data_pkg   3   216   53   Uisng model: restnet34 1, Measurement: accuracy 2 fit_one_cycle(): What is fit one cycle? Cyclical Learning Rates for Training Neural Networks One of the way to find good learning rate. Core idea is to start with small learning rate (like 1e-4, 1e-3) and increase the learning rate after each mini-batch till loss starts exploding. And pick up learning rate one order lower than exploding point. For example, plotted learning rate is like below picture, picking up around 1e-2 is the best way.  Why this methods Traditionally, the learning rate is decreased as the learning starts converging with time. But this paper suggests to cycle our learning rate, because it makes us avoid local minimum. Basically this cyclic method enables us to explore whole of loss function so that find out global minimum. In other words, higher learning rate behaves like regularisation. Let’s find-tune: Do train just one last layer by learning rate found by find_lr This section you should find the strongest downward slope that kind of sticking around for quite a while. And choose just one order lower than lowest point. As explained before, I will pick up 1e-2. And of course, this is fine-tuning, we don’t need discriminative learning rate yet.  Let’s train the whole model!: link When you plot the learning rate again, maybe you will get soaring shape of learning rate. Rule of thumb, When you slice the learning rate, use learning rate you used at unfrozen part. Divide it by 5 or 10 and put it on maximum bound. At minimum bound, get the point just before it soared, and divide it by 10.  Let’s make batch size bigger!: Since default batch size is 64, I tried it to 128. And it gets way more better result(even it’s still underfitting!) And if I freeze model and train whole model again, the model would be better. Also, you can use this method to the other big dataset model training! Interpretation: See the confusion matrix. Result is quite great. *Since I’m using colab, I will skip data cleansing. But I highly recommend you to use ImageCleaner widget, only if you are using jupyter notebook (not jupyter lab) Model in production: You can deploy your model in simple way. I referred fast. ai, and used render(it’s free for limited time). You can find detailed document here. and you can create a route like this. @app. route( /classify-url , methods=[ GET ])async def classify_url(request):  bytes = await get_bytes(request. query_params[ url ])  img = open_image(BytesIO(bytes))  _,_,losses = learner. predict(img)  return JSONResponse({     predictions : sorted(      zip(cat_learner. data. classes, map(float, losses)),      key=lambda p: p[1],      reverse=True    )  })You can find my deployed model here Reference: How to create a deep learning dataset using Google Images towardsdatascience - one cycle policy       Deep Residual Learning for Image Recognition &#8617;        Accuracy_and_precision &#8617;    "
     }, {
-    "id": 20,
+    "id": 22,
     "url": "http://localhost:4000/2020/02/dps-week5/",
     "title": "Digital Product School week 5",
     "body": "2020/02/09 - The 5th week retropect at Digital Product School Week 5 - Create a Storymap and sync it with Lean Canvas     This week's schedule  CONTENT:  How to create our story map Prepare your story Discover your product’s AI potentialMondayHow to create our story map:     We need this 'aha' moment  There was a Milestone workshop, about our weekly goal. As we are agile working, we go fast and change every week’s goal. This week we will finalize our story map based on user’s pain-point and HMW questions.     How should we make our story-map  Basically we should make story map based on this rule  Tell stories, don’t just write them!     We always need context, that means all the story component should be connected    Visualize your product to establish a shared understanding and speed up discussions!     Post-it filled of text is not enough, we should fill it with visualizations then team mates can understand it fast    Only discuss in front our your story map! (Speed)     So we can update our story-map as soon as we change our opinion   And also    Use a story map to find the parts that matter most and to identify holes in your idea!     Since the story map consists of techinical part, we should consider each story’s technical feasibility    Minimise output, maximise outcome and impact! Build tests to figure out what’s minimum and what’s viable!     This story map functions to find out our minimum value of ideas    Work iteratively: Change your story map according to your learnings!     We should repeat this process again and again    PMs: Make sure Storymap is up to date!Prepare your story:     team cero, our whole story map    Our goal    Technical feasibility of our storyWhat is your strategy to make user achieve something? This would be our expand point Discover your product’s AI potential: How can we apply AI to our product? Let’s write down our ‘HMW’ questions, and find out all p ossibilities. These are suggestion of possibilities, so don’t attached to feasibility (we will do in at lean start-up)     Software section's expectation    AI section's expectationTuesday    Engineer's task, week5This 5th week, engineers settled WendesdayThursdayFriday"
     }, {
-    "id": 21,
+    "id": 23,
     "url": "http://localhost:4000/2020/02/GPU-time/",
     "title": "4 reasons took much time to setting GPU for fast.ai than I expected",
     "body": "2020/02/05 - Motivation: Before now, me as a undergraduate student, I was parsimony who usually depend on colab, kaggle, friend’s server(occasional) whenever i need GPU. . And this time it’s been for a while to install GPU than I expected and I share the several component that stood in my way. Written at Oct 24 2019, if you think this is deprecated, please do not have a leap of faith. Just for the record, I’ve used Kaggle, Colab, GCP, Azure, EC2 as GPU cloud. 1. Did not know there is JupyterLab option in Google Cloud Platform. : At the first time when GCP came out, there was no AI Platform service. So from starting vm instance to launching jupyter and installing packages, I did all of the things myself. (and I learned 🤗) $	curl -O https://repo. continuum. io/archive/Anaconda3-5. 0. 1-Linux-x86_64. sh[Downloading conda in ssh] I created VM instance,selected zone, machine type and disk type. Then, define firewall rules and in ssh terminal, install jupyter and other packages. But you can do all of these things just using AI Platform.  [AI Platform] I think it especially save your time if you are living in Asia-Pacific, which google doesn’t support not that much GPU resources.  2. Consider if the platform has limited resources in a region you live in. : I live in South Korea, East Asia, and it seems like this region has lots of limitation in GPU (except quite expensive AWS) And the Taiwan which was the only one region where I can launch my own VM with GPU (I tried all the other regions in the list) sometimes do normaly, but not always. 😥After launching, I did several works and next day I could not start VM. (I didn’t count it, but tried it a few hours because I didn’t want cost any more time…) Endlessly failed to start instance, then I choose to move AWS as an alternative way. 3. Fast. ai gives deliberate guide and I didn’t know it. : Fast. ai offer the guide for all available platform. (Colab, salamander, Gradient, Kaggle, Colab, and so on) It is so important, and really needs, because cloud computing options are vary as occasion and purpose arise. I didn’t know fast. ai has manual to running GCP, and I think it’s as good a reason as any for me to be have taken time. It helped me so much when I had aws and shortened my time. I don’t want to read all of the manual in amazno. . (It is recommended. . but I’d rather read GIT PRO now…) ssh -i ~/. ssh/&lt;your_private_key_pair&gt; -L localhost:8888:localhost:8888 ubuntu@&lt;your instance IP&gt;4. You should wait to add more volume just after add volume, by building AWS EC2. : Since Elastic Block Store(EBS) storage supports optimized storage, users can’t extend storage volume two times in a row.  Unfortunately, at the first time, I didn’t know it (again 👻) and when VM lacked volume, I doubled dist capacity (76*2) at a rough but It needs more.  &lt;!– this time I installed GPU in two years, and it became little complicated compared to 2 years ago. And this time for the first time(maybe not the first time. . but i handled it in my class or with my friend. but it’s my first time on my own. ) I very I’m started to using used google colab, kaggleand, GCP-JupyterLab, ec2 - friend made, aws vm machine but I had a environment variable but i did not know of it. On these days, I could not get a resources from taiwan…    I couldn’t notice a deliberate     Anyway, as a result I tried myself gcp myself and aws ec2 with fast. ai But I think doing on my self surely takes much time (in this point I wonder why I’m doing this, and should remind me, especially I was studying disk volume optimization)     disk volume exceed - https://askubuntu. com/questions/919748/no-space-left-on-device-even-though-there-is:  "
     }, {
-    "id": 22,
+    "id": 24,
     "url": "http://localhost:4000/2020/02/dps-week4/",
     "title": "Digital Product School week 4",
     "body": "2020/02/01 - The 4th week retropect at Digital Product School Week 4 - Find solution ideas and run experiments   [This week’s schedule] CONTENT:  Ideation Techniques     What is ideation techniques?    Generating idea in my team AIdeation Team brain storming of idea Die Produkt MacherMondayIdeation Techniques:   [slides from @steffen] What is ideation techniques?: We tried to find out user’s painpoint last week. Tried to users talk about their, pain point. No question directly, but extract from them their pain with transportation. Generating idea in my team:   AIdeation:   TuesdayTeam brain storming of idea:   Based on generated idea on Monday, we extended our idea doing rolling-paper!     Die Produkt Macher: What is lean start-up?  Lean startup is a methodology for developing businesses and products that aims to shorten product development cycles and rapidly discover if a proposed business model is viable; this is achieved by adopting a combination of business-hypothesis-driven experimentation, iterative product releases, and validated learning. - wikipedia WendesdayThursdayFriday"
     }, {
-    "id": 23,
+    "id": 25,
     "url": "http://localhost:4000/2020/01/retrosprect-of-acl-paper-2020/",
     "title": "Retrospect of ACL 2020 paper writing",
     "body": "2020/01/29 - 2020 Annual Conference of the Association for Computational Linguistics Why I can’t use ‘Cebuano’ for the research?: Why I had to change target language from ‘Cebuano’ to ‘Tagalog’?-&gt; No language translator options except google translation. But before knowing that I already consult my friend, whose mother tongue is English. So I had to aplogize her, but couldn’t tell her why suddenly I changed my plan. -&gt; I realized there are many languages even can’t be researched at all. . -&gt; Getting accustomed to discrimination makes misunderstanding, sometimes. At my country, we couldn’t use music streaming service, because of legal problem. But at that moment, I thought it was discrimination, which is done by music company. "
     }, {
-    "id": 24,
+    "id": 26,
     "url": "http://localhost:4000/2020/01/Git-Merge/",
     "title": "Why am I not listed as a contributor?!",
     "body": "2020/01/10 - From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’m not listed as contributor of repo which my PR accessed. How did I come to a repository?: When I’m stuck, I would prefer to code, than to go deep in theory. (It must be so. . too much to understand 🤒)It was BERT released by Google AI I felt keenly the necessity of implementing, because not only couldn’t understand the way they figured out positional encoding formula, but how it actually works. What does it mean to “scale” dot product in Attention? (Now I know it’s far from my section 😂) Figure 1. Scaled Dot Product. Adopted from tensorflow blogWhat was the code error?: For implement code in paper, I read the papers Transformer and BERT, structured the model, and refered the others’ code. Meanwhile, I found out a small error in tokenization process, which was changing a token into [MASK], enabled bidirectional representation. I’ve made PR, and got merged. But I was not in contributors. Why?: Figure 2. Merged Pull request Adopted from graykode projectActually I happened to know there can be couple of reasons github doesn’t include my name as contributor. Well, if contributors tab has more than 100 people, in which case it shows you up only if you are in the top 100 contributors because displaying too many contributors can make webpages down. Somethimes, however, it doesn’t that problem. Why not? Two possibilities are there.    First, According to Joel-Glovier, if repository maintainer merged-as-a-rebase PR will end up showing as maintainer’s commit. But maintainer shouldn’t normally do this.     Second, if you happend to commit using a different git email that what is in your GitHub profile, it will not be attached to your Github user, and “doesn’t show up” as you.  Reference:  Michał Chromiak’s blog Github: why are my contributions are not showing on my profile atlassian-gitfetch"
     }, {
-    "id": 25,
-    "url": "http://localhost:4000/2019/12/lesson1-fastai/",
-    "title": "Fine Grained Classification",
-    "body": "2019/12/31 - Finally you can solve the mystery behind this weird drawing. . through this course. juptyer notebook magic: %reload_ext autoreload%autoreload 2%matplotlib inlinethis is special directives to jupyter notebook, not python code. And it is called ‘magics’ (but i think jeremy is magicion)  If somebody changes underlying library code while I’m running this, please reload it automatically If somebody asks to plot something, then please plot it here in this Jupyter NotebookDon’t hesitate to import start~ Digging into untar_data, path. ls: Union[pathlib. Path, str]: typed programming language? -&gt; maybe i think disclaim the type beforehand for sure.  Q. like assert? path. ls()this is some module that fast. ai made because os. listdir(‘path’) is unconvinient. Python3 pathlib library!: pathlib "
-    }, {
-    "id": 26,
+    "id": 27,
     "url": "http://localhost:4000/2019/12/jeremy-howard/",
     "title": "Jeremy Howard",
     "body": "2019/12/15 - This is journey to find out ‘who am I trying to be?’: How he impacted me?  The person who made me start Computer Vision again. He emphasized the importance of studying NLP and Computer together to understand the deep-learning. He didn’t order it to study, but always he pursuade me with reasonable way.  “It’s not just something I can throw away. NLP and computer vision a few weeks apart and that’s going to force your brain to realize like ‘oh I have to remember this’”  He made me admit my failure in deep-learning. I started to objectify where am I. What should I do when I’m frustrated.  “Keep going. You’re not expected to remember everything. Yet.  You’re not expected to understand everything. Yet. You’re not expected to know why everything works.  Yet. ”  His articles are numerous, below.      What is torch. nn Really?   High Performance Numeric Programming with Swift: Explorations and Reflections   C++11, random distributions, and Swift    And especially, I like this book. Designing great data products Great predictive modeling is an important part of the solution, but it no longer stands on its own; as products become more sophisticated, it disappears into the plumbing. Designing great data products And he is also famous for words. Here are some.  we’re going to try and use that to really understand what’s going on. So to warn you, none of it is rocket science but a lot of its going to look really new. So don’t expect to get it the first time but expect to listen and jump into the notebook try a few things test things out look particularly at like tensor shapes and inputs and outputs to check your understanding then go back and listen again. But and kind of try it, a few times, because you will get there right, it’s just that there’s going to be a lot of new concepts because we haven’t done that much stuff in pure Pytorch. Lesson 6: Deep Learning 2019 "
     }, {
-    "id": 27,
+    "id": 28,
     "url": "http://localhost:4000/2019/11/julia-evans/",
     "title": "Julia Evans",
     "body": "2019/11/20 - This is journey to find out ‘who am I trying to be?’: The women who surprised me in many ways. First, she approached me to teaching some concepts drawing cartoons. It was at Hackers news, which was hightest ranks. Personally I have the use of not to reading title, so and cartoon was so cute and clear. I naturally gonna understood mechanism and astonished by her explaination ability. Her value, which she was taught by many people so want to do same things, moved me. Volume of her knowledge, that just reading post title is a deal of work, amazed me. "
     }, {
-    "id": 28,
+    "id": 29,
     "url": "http://localhost:4000/2019/11/coc-retropective/",
     "title": "Retrospective on Pycon 2019 Korea (CoC Committee)",
     "body": "2019/11/05 - When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized our responsibility, and if I passed each other’s burden, It could be my burden next time. In solidarity of the obligation, we finished conference well. And after participating PyCon Korea 2018 as volunteer, I’ve joined PyCon Korea Organizer last year. &lt;Figure 1&gt; First meeting of PyCon 2019 Korea Organizers It’s been a while since PyCon 2019 finished. It’s held on Aug 15 - 18, at Coex Grand Balloom &lt;Figure 2&gt; Ongoing session, speaking on news comment processing &lt;Figure 3&gt; Sponsor Booth iin Coex Hall &lt;Figure 4&gt; After PyCon 2019, with all of volunteer, organizer, speakers 😍 🥰  Serving as part of the coc TF, I spent large fraction of last year doing CoC job. here’s the path what we’ve been grappled with to grasp a solution. First half: Before the conference Toward Diverse Community:  Formally we’ve been reusing and modifying PyCon US CoC, but we needed fit in Korean and I was part of that to revise code of conduct. Except ‘That’ Diversity, Because it is ‘Harassment’:  Specific point was harassment, and the others were not. process of finding the points. How can we settle this point?Second half: During the conference Handling the potential Harassment: Disjunction of policy and real-time situation: This ‘PyCon 2019 Korea retrospective series’ would be devided into 3 Episodes.  “Retrospective on Pycon 2019 Korea (CoC Committee)” “Retrospective on Pycon 2019 Korea (Program Chair)” (20 Nov, To Be Update) “Maintaining participation while still making timely decisions” (29 Nov, To Be Update)"
     }, {
-    "id": 29,
+    "id": 30,
     "url": "http://localhost:4000/2019/11/elif-shafak/",
     "title": "Elif Shafak",
     "body": "2019/11/05 - This is journey to find out ‘who am I trying to be?’: For creative-minded people, Istanbul is a treasure. ’ Photo © Chris Boland, licensed under CC BY-NC-ND 2. 0    it suddenly felt like what I was trying to convey was more complicated and detailed than what the circumstances allowed me to say.     And I did what I usually do in similar situations: I stammered, I shut down, and I stopped talking. I stopped talking because the truth was complicated, even though I knew, deep within, that one should never, ever remain silent for fear of complexity.         &lt;Figure 1&gt; Elif Shafak   Photo credit: www. elifsafak. com. tr      I want to talk about emotions and the need to boost our emotional intelligence. I think it’s a pity that mainstream political theory pays very little attention to emotions.     Oftentimes, analysts and experts are so busy with data and metrics that they seem to forget those things in life that are difficult to measure and perhaps impossible to cluster under statistical models. But I think this is a mistake, for two main reasons. We are emotional beings.     I think it’s going to be one of our biggest intellectual challenges, because our political systems are replete with emotions. In country after country, we have seen illiberal politicians exploiting these emotions. And yet within the academia and among the intelligentsia, we are yet to take emotions seriously. I think we should.  1 2 Reference:       British Council Worldwide &#8617;        Ted Talk &#8617;    "
     }, {
-    "id": 30,
+    "id": 31,
     "url": "http://localhost:4000/2019/01/dps-week1/",
     "title": "Digital Product School week 1",
     "body": "2019/01/11 - The 1th week retropect at Digital Product School [This week’s schedule] CONTENT:  Welcome to Digital Product School!     Trip to Spitzingsee   Welcome to Design Office   Specifying our goal of product   Welcome to Digital Product School!: Trip to Spitzingsee:    At the first day of Digital Product School, we had a off-site with all of batch 9 people. All the costs were managed by dps. At the beautiful mountain, we settled the team, and got my team goal.     Basically, there are two kind of team in DPS. (1) Wild team - the team has fixed topic(2) Company team - the team which has specific stakeholders, and also topic defined by that stakeholders  The Core-team will fix what team you will join in DPS for 3 months based on ymy professionals, they announce it at off-site. [My team for 3 months at DPS]    And we decide on my batch #9 theme song. How? Each team draw for songs and pitch ‘why this song should be batch #9 theme song’The result? Imagine dragon - Believer (I didn’t know at the moment, this song would be stamped in my memory)     We have a workshop for getting to know each other. For example, we share 1) what do I expect from 3 months of dps, 2) when I feel happy in my life time, 3) what I worked for last week, 4) what was my last project and 5) what plays important role in my life      My team's board  Cero  Welcome to Design Office:  At first day of design office, we had workshop, which celebrates my day in dps also discuss specific rule, menifesto and stakeholders    We get sticker and attach it in map depends on my nationality  Now time to get to know my team’s stakeholders. What they want for us? What they expect from us? How free my team are on the topic?To be honest, it is endless tug-of-war. We should discuss with my stakeholders, endlessly, and find out solution which can meet interest of users, stakeholders and my team. Basically, my team’s main stakeholder is ADAC, but BMW, City of munich and Nokia will also participate as my team’s stakeholders. Specifying our goal of product: "
diff --git a/_site/categories.html b/_site/categories.html
index d3f316ab1c..12012ef59b 100644
--- a/_site/categories.html
+++ b/_site/categories.html
@@ -109,7 +109,7 @@
     "id": 2,
     "url": "http://localhost:4000/author-dionne.html",
     "title": "dionne",
-    "body": "                        dionne Follow:         https://spellonyou. github. io         Welcome, I'm dionne. My old hobby is collecting and systemizing data related with me. (I'm with my kindergarten newsletter) I am an observationist. In the meantime, I enjoy the difference between my expectation and result which I've been facinated(my korean-sarcasm project). And I love a film written and directed by Paul Thomas Anderson. Now, I'm interested in AI ethics, and especially studying bias.                                   Posts by dionne:                   		Gradient backward, Chain Rule, Refactoring	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 02, 2020						            		Implement forward&amp;backward pass from scratch	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						            		What's inside Pytorch Operator?	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						            		What is the meaning of 'deep-learning from foundations?'	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Feb 29, 2020						            		Digging into convolution	: 		  Issues 	 			In 				Fast. AI-v3, 								Feb 28, 2020						            		Digital Product School week 8&amp;9	: 		  The 8th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 24, 2020						            		Algorithmic bias	: 		  Algorithms can encode &amp; magnify human bias	 			In 				Fast. AI-v3, 								Feb 20, 2020						            		Making a classifier with image dataset made from gooogle	: 		  CONTENTS	 			In 				toy-project, 								Feb 15, 2020						            		Digital Product School week 5	: 		  The 5th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 09, 2020						            		4 reasons took much time to setting GPU for fast. ai than I expected	: 		  Motivation	 			In 				Resource, 								Feb 05, 2020						            		Digital Product School week 4	: 		  The 4th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 01, 2020						            		Retrospect of ACL 2020 paper writing	: 		  2020 Annual Conference of the Association for Computational Linguistics	 			In 				Retrospect, 								Jan 29, 2020						            		Why am I not listed as a contributor?!	: 		  From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’. . . 	 			In 				Resource, 								Jan 10, 2020						            		Fine Grained Classification	: 		  Finally you can solve the mystery behind this weird drawing. . through this course. 	 			In 				Fast. AI-v3, 								Dec 31, 2019						            		Jeremy Howard	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Dec 15, 2019						            		Julia Evans	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 20, 2019						            		Retrospective on Pycon 2019 Korea (CoC Committee)	: 		  When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized. . . 	 			In 				Retrospect, 								Nov 05, 2019						            		Elif Shafak	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 05, 2019						            		Digital Product School week 1	: 		  The 1th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Jan 11, 2019						        "
+    "body": "                        dionne Follow:         https://spellonyou. github. io         Welcome, I'm dionne. My old hobby is collecting and systemizing data related with me. (I'm with my kindergarten newsletter) I am an observationist. In the meantime, I enjoy the difference between my expectation and result which I've been facinated(my korean-sarcasm project). And I love a film written and directed by Paul Thomas Anderson. Now, I'm interested in AI ethics, and especially studying bias.                                   Posts by dionne:                   		fastai 2019 course-v3 Part1, lesson06	: 		  Lesson 06	 			In 				fastai-v3, 								Apr 15, 2020						            		[Q&amp;A] Image Segmentation, using Unet with Driving Video data	: 		  This post is about my questions while I was studying USF Deep Learning course about image segmentation task. All the answers are from the course, source code, library document, or docu. . . 	 			In 				Fast. AI-v3, 								Apr 02, 2020						            		Gradient backward, Chain Rule, Refactoring	: 		   This note is divided into 4 section.      Section1: What is the meaning of ‘deep-learning from foundations?’   Section2: What’s inside Pytorch Operator?   Section3: Implem. . . 	 			In 				fastai-v3, 								Mar 02, 2020						            		Implement forward&amp;backward pass from scratch	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						            		What's inside Pytorch Operator?	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						            		What is the meaning of 'deep-learning from foundations?'	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Feb 29, 2020						            		Digging into convolution	: 		  Issues 	 			In 				Fast. AI-v3, 								Feb 28, 2020						            		My life in Digital Product School - week 8/19/10	: 		  The 8/9/10th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 24, 2020						            		Algorithmic bias	: 		  Algorithms can encode &amp; magnify human bias	 			In 				Fast. AI-v3, 								Feb 20, 2020						            		Making a classifier with image dataset made from gooogle	: 		  CONTENTS	 			In 				toy-project, 								Feb 15, 2020						            		Digital Product School week 5	: 		  The 5th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 09, 2020						            		4 reasons took much time to setting GPU for fast. ai than I expected	: 		  Motivation	 			In 				Resource, 								Feb 05, 2020						            		Digital Product School week 4	: 		  The 4th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 01, 2020						            		Retrospect of ACL 2020 paper writing	: 		  2020 Annual Conference of the Association for Computational Linguistics	 			In 				Retrospect, 								Jan 29, 2020						            		Why am I not listed as a contributor?!	: 		  From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’. . . 	 			In 				Resource, 								Jan 10, 2020						            		Jeremy Howard	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Dec 15, 2019						            		Julia Evans	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 20, 2019						            		Retrospective on Pycon 2019 Korea (CoC Committee)	: 		  When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized. . . 	 			In 				Retrospect, 								Nov 05, 2019						            		Elif Shafak	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 05, 2019						            		Digital Product School week 1	: 		  The 1th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Jan 11, 2019						        "
     }, {
     "id": 3,
     "url": "http://localhost:4000/authors-list.html",
@@ -119,7 +119,7 @@
     "id": 4,
     "url": "http://localhost:4000/categories.html",
     "title": "Categories",
-    "body": "          Categories               DigitalProductSchool:                                  		Digital Product School week 8&amp;9	: 		  The 8th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 24, 2020						                                 		Digital Product School week 5	: 		  The 5th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 09, 2020						                                 		Digital Product School week 4	: 		  The 4th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 01, 2020						                                 		Digital Product School week 1	: 		  The 1th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Jan 11, 2019						                              People in the World:                                  		Jeremy Howard	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Dec 15, 2019						                                 		Julia Evans	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 20, 2019						                                 		Elif Shafak	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 05, 2019						                              Retrospect:                                  		Retrospect of ACL 2020 paper writing	: 		  2020 Annual Conference of the Association for Computational Linguistics	 			In 				Retrospect, 								Jan 29, 2020						                                 		Retrospective on Pycon 2019 Korea (CoC Committee)	: 		  When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized. . . 	 			In 				Retrospect, 								Nov 05, 2019						                              Fast. AI-v3:                                  		Gradient backward, Chain Rule, Refactoring	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 02, 2020						                                 		Implement forward&amp;backward pass from scratch	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						                                 		What's inside Pytorch Operator?	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						                                 		What is the meaning of 'deep-learning from foundations?'	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Feb 29, 2020						                                 		Digging into convolution	: 		  Issues 	 			In 				Fast. AI-v3, 								Feb 28, 2020						                                 		Algorithmic bias	: 		  Algorithms can encode &amp; magnify human bias	 			In 				Fast. AI-v3, 								Feb 20, 2020						                                 		Fine Grained Classification	: 		  Finally you can solve the mystery behind this weird drawing. . through this course. 	 			In 				Fast. AI-v3, 								Dec 31, 2019						                              Resource:                                  		4 reasons took much time to setting GPU for fast. ai than I expected	: 		  Motivation	 			In 				Resource, 								Feb 05, 2020						                                 		Why am I not listed as a contributor?!	: 		  From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’. . . 	 			In 				Resource, 								Jan 10, 2020						                              toy-project:                                  		Making a classifier with image dataset made from gooogle	: 		  CONTENTS	 			In 				toy-project, 								Feb 15, 2020						                                             Featured:    				                                          Implement forward&amp;backward pass from scratch                          In                     Fast. AI-v3,                                                                                           What's inside Pytorch Operator?                          In                     Fast. AI-v3,                                                                                           What is the meaning of 'deep-learning from foundations?'                          In                     Fast. AI-v3,                                                                                           Why am I not listed as a contributor?!                          In                     Resource,                                                                                           Jeremy Howard                          In                     People in the World,                                                                   "
+    "body": "          Categories               DigitalProductSchool:                                  		My life in Digital Product School - week 8/19/10	: 		  The 8/9/10th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 24, 2020						                                 		Digital Product School week 5	: 		  The 5th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 09, 2020						                                 		Digital Product School week 4	: 		  The 4th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 01, 2020						                                 		Digital Product School week 1	: 		  The 1th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Jan 11, 2019						                              People in the World:                                  		Jeremy Howard	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Dec 15, 2019						                                 		Julia Evans	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 20, 2019						                                 		Elif Shafak	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 05, 2019						                              Retrospect:                                  		Retrospect of ACL 2020 paper writing	: 		  2020 Annual Conference of the Association for Computational Linguistics	 			In 				Retrospect, 								Jan 29, 2020						                                 		Retrospective on Pycon 2019 Korea (CoC Committee)	: 		  When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized. . . 	 			In 				Retrospect, 								Nov 05, 2019						                              Resource:                                  		4 reasons took much time to setting GPU for fast. ai than I expected	: 		  Motivation	 			In 				Resource, 								Feb 05, 2020						                                 		Why am I not listed as a contributor?!	: 		  From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’. . . 	 			In 				Resource, 								Jan 10, 2020						                              toy-project:                                  		Making a classifier with image dataset made from gooogle	: 		  CONTENTS	 			In 				toy-project, 								Feb 15, 2020						                              Fast. AI-v3:                                  		[Q&amp;A] Image Segmentation, using Unet with Driving Video data	: 		  This post is about my questions while I was studying USF Deep Learning course about image segmentation task. All the answers are from the course, source code, library document, or docu. . . 	 			In 				Fast. AI-v3, 								Apr 02, 2020						                                 		Implement forward&amp;backward pass from scratch	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						                                 		What's inside Pytorch Operator?	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						                                 		What is the meaning of 'deep-learning from foundations?'	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Feb 29, 2020						                                 		Digging into convolution	: 		  Issues 	 			In 				Fast. AI-v3, 								Feb 28, 2020						                                 		Algorithmic bias	: 		  Algorithms can encode &amp; magnify human bias	 			In 				Fast. AI-v3, 								Feb 20, 2020						                              fastai-v3:                                  		fastai 2019 course-v3 Part1, lesson06	: 		  Lesson 06	 			In 				fastai-v3, 								Apr 15, 2020						                                 		Gradient backward, Chain Rule, Refactoring	: 		   This note is divided into 4 section.      Section1: What is the meaning of ‘deep-learning from foundations?’   Section2: What’s inside Pytorch Operator?   Section3: Implem. . . 	 			In 				fastai-v3, 								Mar 02, 2020						                                             Featured:    				                                          [Q&amp;A] Image Segmentation, using Unet with Driving Video data                          In                     Fast. AI-v3,                                                                                           Implement forward&amp;backward pass from scratch                          In                     Fast. AI-v3,                                                                                           What's inside Pytorch Operator?                          In                     Fast. AI-v3,                                                                                           What is the meaning of 'deep-learning from foundations?'                          In                     Fast. AI-v3,                                                                                           Why am I not listed as a contributor?!                          In                     Resource,                                                                                           Jeremy Howard                          In                     People in the World,                                                                   "
     }, {
     "id": 5,
     "url": "http://localhost:4000/contact.html",
@@ -157,96 +157,101 @@
     "body": "  {% if page. url ==  /  %}            {% assign latest_post = site. posts[0] %}          &lt;div class= topfirstimage  style= background-image: url({% if latest_post. image contains  ://  %}{{ latest_post. image }}{% else %} {{site. baseurl}}/{{ latest_post. image}}{% endif %}); height: 200px;  background-size: cover;  background-repeat: no-repeat; &gt;&lt;/div&gt;           {{ latest_post. title }}  :       {{ latest_post. excerpt | strip_html | strip_newlines | truncate: 136 }}               In         {% for category in latest_post. categories %}        {{ category }},         {% endfor %}                                {{ latest_post. date | date: '%b %d, %Y' }}                            {%- assign second_post = site. posts[1] -%}                        {% if second_post. image %}                         &lt;img class= w-100  src= {% if second_post. image contains  ://  %}{{ second_post. image }}{% else %}{{ second_post. image | absolute_url }}{% endif %}  alt= {{ second_post. title }} &gt;                        {% endif %}                                    {{ second_post. title }}          :                       In             {% for category in second_post. categories %}            {{ category }},             {% endfor %}                                                      {{ second_post. date | date: '%b %d, %Y' }}                                    {%- assign third_post = site. posts[2] -%}                        {% if third_post. image %}                         &lt;img class= w-100  src= {% if third_post. image contains  ://  %}{{ third_post. image }}{% else %}{{site. baseurl}}/{{ third_post. image }}{% endif %}  alt= {{ third_post. title }} &gt;                        {% endif %}                                    {{ third_post. title }}          :                       In             {% for category in third_post. categories %}            {{ category }},             {% endfor %}                                                      {{ third_post. date | date: '%b %d, %Y' }}                                    {%- assign fourth_post = site. posts[3] -%}                        {% if fourth_post. image %}                        &lt;img class= w-100  src= {% if fourth_post. image contains  ://  %}{{ fourth_post. image }}{% else %}{{site. baseurl}}/{{ fourth_post. image }}{% endif %}  alt= {{ fourth_post. title }} &gt;                        {% endif %}                                    {{ fourth_post. title }}          :                       In             {% for category in fourth_post. categories %}            {{ category }},             {% endfor %}                                                      {{ fourth_post. date | date: '%b %d, %Y' }}                                  {% for post in site. posts %} {% if post. tags contains  sticky  %}                    {{post. title}}                  {{ post. excerpt | strip_html | strip_newlines | truncate: 136 }}                 Read More            	             {% endif %}{% endfor %}  {% endif %}                All Stories:         {% for post in paginator. posts %}          {% include main-loop-card. html %}        {% endfor %}                   {% if paginator. total_pages &gt; 1 %}              {% if paginator. previous_page %}        &laquo; Prev       {% else %}        &laquo;       {% endif %}       {% for page in (1. . paginator. total_pages) %}        {% if page == paginator. page %}        {{ page }}        {% elsif page == 1 %}        {{ page }}        {% else %}        {{ page }}        {% endif %}       {% endfor %}       {% if paginator. next_page %}        Next &raquo;       {% else %}        &raquo;       {% endif %}            {% endif %}                     {% include sidebar-featured. html %}      "
     }, {
     "id": 12,
+    "url": "http://localhost:4000/2020/04/v3-2019-lesson06-note/",
+    "title": "fastai 2019 course-v3 Part1, lesson06",
+    "body": "2020/04/15 - Lesson 06Rossmann(Tabular):  Tabular data: be careful on Categorical variable vs Continuous variable.  if datatype is int, fastai think it is classification, not a regression.  Root mean square percentage error. as loss function.  When you assign the y_range, it’s better to assign little bit more than actual maximum. &gt; because it’s sigmoid.  intermediate layers, which is weight matrix is 1) 1000, and 2) 500 -&gt; which means our parameter would be 500*1000. learn. modelWhat is dropout and embedding dropout?: Nitish Srivastava, Dropout: A Simple way to prevent Neural Networks from Overfitting  you can dropout with p value, make it specified to specific layer, or make it applied to all the layers.  Pytorch code 1) bernoulli, which decides whether you will hold it? 2) and divide the noise value depends on noise value. so noise became 2 or remain 0.      According to pytorch code, We do change at training time, but we do nothing at test time. and this means you don’t have to do anything special with inference time. ’   TODO: find at forums what is inference time - Related to NVIDIA, GPU.     Embedding dropout is just a dropout.      It’s different between continuous variable and embedding layer.  TODO Still can’t understand. why embedding dropout is effective. or,… in need.    Let’s delete at random, some of the results of the embedding.    and It worked well especially at Kaggle   Batch Normalization: Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift -&gt; came out false! According to How Does Batch Normalization Help Optimization?  The key was multiplicative bias {\gamma} and additive bias {\beta}` Explain     Let $$ \hat{y} = f(w_1, w_2, w_3, … , x)} $$ , loss = MSE , Then y_range should be between 1 and 5`   And Activation function ends with -1 -&gt; +1   To mitigate this problem, we can add the other parameter, like $$w_n$$   But there’re so much interactions in the process so just re-scale the output.    Momentum parameter at BatchNorm1d:  Different from momentum like in optimization.  This momentum is Exponentially weighted moving average of the mean, instead of deviation.      If this is small number: mean standard deviation would be less from mini_batch to mini_batch » less regularization effect. (If this is large number, variation would be greater from mini_batch to mini_batch » more regularization effect)   TODO: can’t sure, but i understand, this is not about how to update parameter but about how much reflect previous value when scale and shift   Q. Preference between batchnorm and the other regularizations(drop out, weight decay)A. Nope, always try and see the results## lesson6-pets-more### Data Augmentation- Last reg- `get_transforms` has lots of params (even not yet learned all) -&gt; check documentation	- Remember you can implement all the doc contents bc it's made from nbdev	- TODO: try this!!- Essence of data augmentation is you should maintain the label, while somewhat making sense. 	- ex) tilt, because it's optically sensible, you can always change the angle of the data view. - zeros, border, and reflection but always `reflection` works most of the time, so that is the default### Convolutional Kernel(What is convolution?)- Will make heat\_map from scratch, which means the parts convolution focuses on![setosa_visualization]()- http://setosa. io/ev/image-kernels/	- javascript thing	- How convolution works	- Kernel. which does element-wise multiplication, and sum them up	- so it has on pixel less at borders -&gt; so it uses padding, and fastai uses reflection as said. - why this Kernel(matrix) helps catching horizontal edge side?	- because this kernel`(picture2)` weights differently, depends on `x axis`	- why familiar, because it's similar intuition with fugus`(paper)` paper- CNN from different viewpoints`link`	- output of pixel is results from different linear equations. 	- If you connect this with represents of neural network nodes, you can see that the specific inp nodes connected with specific out nodes. 	- **Summarize**: cnn does 1) matmul some of the elements are always zero 2) same weight for every row, which is called `weight time? weight. . ?, 1:18:50` `(picture)`#### Further lowdown- Because generally image has 3 channels, we need rank 3 kernel. - And **do multiply with all channel output is one pixel**. (`draw by your self`)	- but this kernel will catch one feature, like horizontal, so that we make more kernel so that output becomes (h * w * kernel)	- And that `kernel` come to `channel`- **Conv2d**: with 3 by 3 kernel, stride 2 conv -&gt; (h/2 * w/2 * kernel)	- skip or jump over input pixel	- to protect from memory out of control~~~pythonlearn. modellearn. summary()~~~TODO: understand yourself the blocks of conv-kernel: - Usually use big kernel size at first layer (will study this at part2)- Bottom right highlighting kernel(`pic / draw`)- `torch. tensor. expand`: for memory efficient, because we should do RGB- We do not make separate kernel, but make rank 4 kernel	- 4d tensor is just stacked kernel- `t[None]. shape` create new unit axis, and why? we make this -&gt; it should move unit of batch, not one size image. ### Average pooling, feature- suppose our pre-trained model results in size of `11 by 11 by 512 ` `pic 4` and my classification task has 37 classes	* take the first face of channel, which is 11 by 11 and `mean` it, so that make rank 2 tensor, 512 by 1	* and make 2d matrix, which is 512 by 37 and multiply so that we can get 37 by 1 matrix. - Feature, at convolution block	- So, when we transfer-learning without unfreeze, every element of last matrix (512 by 1) should represent(or could catch) each feature. 	### Heatmap, Hook~~~hook_output(model[0]) -&gt; acts -&gt; avg_acts~~~- if we average the block with `axis=feature`, result of matrix(11 by 11) depicts `how activated was that area?` -&gt; it is heatmap, `avg_acts`- and acts comes from hook, which is more advanced pytorch feature. 	- hook into pytorch machine itself, and run any arbitrary Pytorch code	- Why this is cool?: Normally it gives set of outputs of forward pass, but we can interrupt and hook the forward pass. 	- Also can store the output of the convolutional part of the model, which is before avg_pooling- Thinking back when we do cut off `after` the conv part. 	- but with fast. ai the original convolutional part of the model would be *the first thing in the model*, specifically could be given from `learn. model. eval()[0]`	- And this is gotten from `hooked_output` and having hooked the output, we can pass our x_minibatch to output. 	- Not directly, but with normalized, minibatch, put on to the gpu	- `one_item()` function do it, when we have one data `TODO: this is assignment` do it yourself without one_item function	- and `. cuda()` put it on gpu- you should print out very often the shape of tensor, and try think why. "
+    }, {
+    "id": 13,
+    "url": "http://localhost:4000/2020/04/qna-image-segmentation/",
+    "title": "[Q&A] Image Segmentation, using Unet with Driving Video data",
+    "body": "2020/04/02 - This post is about my questions while I was studying USF Deep Learning course about image segmentation task. All the answers are from the course, source code, library document, or document. I cared about being clear at reporting information including source of information, however if there are still anything unclear, please contact me. And thank you Jeremy&amp;Rachael for everything. Also Thank you Cambridge Computer Vision Lab to made us to study with your labor. The Cambridge-driving Labeled Video Database (CamVid) is the first collection of videos with object class semantic labels, complete with metadata. The database provides ground truth labels that associate each pixel with one of 32 semantic classes. If someone is interested in this project, please check the site and see the details. Now, let’s start first using jupyter’s one of tricks which I love most. It enables cell to print the code without print function. from IPython. core. interactiveshell import InteractiveShell# pretty print all cell's output and not just the last oneInteractiveShell. ast_node_interactivity =  all from fastai. vision import *from fastai. callbacks. hooks import *from fastai. utils. mem import *path = untar_data(URLs. CAMVID) # The locations where the data and models are downloaded are set in config. ymlpath. ls() I’m trying to accustomed to using pathlib module, not just it became built-in module in python, but I felt uncomfortable myself with os module. However, still unpredictable conflicts are remain, even in the quite standard library like Pytorch, tensorflow, onnx. (it require me string for path. not PosixPath. will send PR. . ) [PosixPath('/root/. fastai/data/camvid/valid. txt'), PosixPath('/root/. fastai/data/camvid/images'), PosixPath('/root/. fastai/data/camvid/labels'), PosixPath('/root/. fastai/data/camvid/codes. txt')]path_img = path/'images'path_lbl = path/'labels'fnames = get_image_files(path_img) #filenamelbl_names = get_image_files(path_lbl)1. (Play with data) My Hypothesis:  File name has A_B format. and A / B would be at key-value position.  Use collections - defaultdict  Default Dict: Link:   easy to group a sequence of key and value pairs into a dictionary of list?from collections import defaultdictfnames[0], lbl_names[0](PosixPath('/root/. fastai/data/camvid/images/0001TP_009210. png'), PosixPath('/root/. fastai/data/camvid/labels/0016E5_01800_P. png'))files = [tuple(i. stem. split('_')) for i in fnames]labels = [tuple(i. stem. split('_')[:-1]) for i in lbl_names]d = defaultdict(list)for k, v in files: d[k]. append(v)d. keys()len(d['0001TP'])124for k, v in d. items():  print(k, v)0001TP ['009210', '008850', '007350', '008970', '009840', '010140', '008490', '008520', '009540', '008250', '008340', '006840', '007860', '007410', '007740', '009870', '010080', '007890', '008790', '010020', '008400', '007080', '008280', '010380', '009330', '009060', '007470', '006810', '009720', '008580', '007110', '008730', '009150', '007680', '009780', '007800', '007290', '008760', '009510', '008640', '008310', '007440', '006900', '007500', '008460', '009030', '008130', '009480', '009900', '010230', '009270', '008040', '007590', '007950', '009990', '008550', '007260', '008100', '007530', '006960', '008190', '009420', '009930', '009000', '007830', '008940', '006690', '009570', '008880', '010170', '007560', '009300', '006750', '009360', '010200', '007320', '008010', '009120', '007620', '007200', '007140', '010320', '006720', '008670', '007230', '008370', '010260', '009690', '006930', '009090', '007770', '010290', '010350', '008610', '008070', '009600', '008430', '009450', '007380', '009240', '007710', '007170', '008160', '008910', '007020', '006780', '007050', '009960', '009810', '008220', '009180', '009750', '010050', '009660', '010110', '007920', '009630', '007650', '006990', '008700', '009390', '007980', '008820', '006870']0016E5 ['01290', '08159', '05760', '08133', '08063', '06660', '00960', '05850', '00750', '06960', '08035', '08107', '07975', '08017', '05610', '07140', '08119', '08027', '07170', '08400', '08093', '02100', '06390', '04470', '08340', '06060', '00600', '07470', '08151', '07800', '01620', '05730', '01530', '00690', '08430', '05940', '01980', '07320', '08069', '07965', '04380', '05430', '01410', '06780', '08007', '08087', '08079', '06600', '08109', '05490', '00901', '04590', '04680', '08045', '01770', '06690', '08085', '06810', '00420', '08011', '07440', '02190', '06300', '04800', '01500', '00450', '08029', '01470', '06330', '07997', '08067', '05370', '08013', '08190', '00840', '02370', '08049', '08135', '01440', '06870', '05820', '05280', '08051', '04440', '08091', '01380', '00630', '07290', '05520', '04770', '00540', '07995', '07999', '05550', '07920', '08101', '08141', '08053', '04620', '08103', '05160', '07350', '08057', '06030', '06000', '08550', '07963', '08089', '05970', '08047', '05640', '06240', '05220', '04350', '01590', '07959', '01950', '08117', '06180', '01560', '05400', '08043', '07680', '00780', '08081', '07050', '01020', '01350', '04530', '06720', '07969', '08149', '08003', '08131', '08129', '08033', '05460', '01650', '07530', '08023', '05340', '08640', '05100', '08075', '01230', '04980', '02070', '01080', '06210', '05910', '08009', '01800', '05190', '02400', '08083', '08019', '07620', '07200', '07890', '08059', '06990', '04410', '08121', '08123', '06930', '08137', '08147', '08095', '06570', '06150', '08153', '06840', '05250', '00510', '08370', '08580', '08113', '07410', '08097', '01200', '04950', '07770', '07650', '04710', '06090', '08055', '07110', '07981', '00990', '08250', '08127', '01920', '07985', '08220', '08005', '08157', '05130', '08071', '01140', '04830', '07740', '08143', '06120', '02040', '08111', '08115', '00660', '08280', '06420', '07983', '02220', '05700', '01860', '01260', '04920', '06510', '07020', '08073', '08105', '08125', '06360', '07860', '07993', '00810', '06540', '08099', '08139', '02010', '07973', '08155', '07991', '06630', '00480', '06750', '04890', '08001', '08025', '00870', '08490', '01830', '07977', '05010', '01170', '07961', '01680', '01050', '07987', '07080', '04560', '00930', '05310', '02340', '05790', '08460', '00720', '08031', '02280', '08039', '08037', '08065', '06270', '08077', '06900', '04650', '06480', '07230', '08041', '06450', '00570', '07989', '04740', '07979', '02250', '07380', '00390', '01710', '07590', '08021', '08520', '07500', '01110', '04500', '02310', '07971', '02130', '05580', '05880', '08610', '08310', '08145', '05670', '04860', '07260', '08015', '07967', '01740', '01320', '07560', '07830', '01890', '08061', '02160', '07710', '05070', '05040']Seq05VD ['f00030', 'f02550', 'f03450', 'f01110', 'f00480', 'f00210', 'f04590', 'f04170', 'f01800', 'f03990', 'f03360', 'f03900', 'f02070', 'f00810', 'f03690', 'f01350', 'f01530', 'f04980', 'f05100', 'f03060', 'f00900', 'f03870', 'f02460', 'f01470', 'f02370', 'f02820', 'f04080', 'f02760', 'f04860', 'f02250', 'f04200', 'f00270', 'f03720', 'f02850', 'f04410', 'f01200', 'f03090', 'f02010', 'f03930', 'f00090', 'f01650', 'f01890', 'f03840', 'f03030', 'f02130', 'f01230', 'f04110', 'f02520', 'f04140', 'f04020', 'f00060', 'f03420', 'f01560', 'f00120', 'f04290', 'f02340', 'f00300', 'f01380', 'f00870', 'f01860', 'f02970', 'f04560', 'f02730', 'f00330', 'f04530', 'f03780', 'f01770', 'f03390', 'f05040', 'f02430', 'f03330', 'f00660', 'f01740', 'f02100', 'f04800', 'f04050', 'f00510', 'f02790', 'f04350', 'f00690', 'f00540', 'f02490', 'f00960', 'f00930', 'f04230', 'f02880', 'f03600', 'f01020', 'f01500', 'f02400', 'f04830', 'f04470', 'f03300', 'f02670', 'f00450', 'f01980', 'f01170', 'f01620', 'f04500', 'f01080', 'f03180', 'f05070', 'f03150', 'f04950', 'f01440', 'f03510', 'f01710', 'f00360', 'f04770', 'f02910', 'f01050', 'f00630', 'f04320', 'f00570', 'f03240', 'f02190', 'f01140', 'f03540', 'f02220', 'f02640', 'f03960', 'f00000', 'f04920', 'f01950', 'f00990', 'f03480', 'f03000', 'f00420', 'f04620', 'f03210', 'f00780', 'f03570', 'f01590', 'f00750', 'f01920', 'f04650', 'f03750', 'f03630', 'f02310', 'f02610', 'f02580', 'f04740', 'f02280', 'f04680', 'f00390', 'f00720', 'f03660', 'f02040', 'f03270', 'f00180', 'f03810', 'f01410', 'f01290', 'f03120', 'f00840', 'f04440', 'f00150', 'f01260', 'f02700', 'f02940', 'f00600', 'f01830', 'f04260', 'f05010', 'f04890', 'f02160', 'f00240', 'f04380', 'f01680', 'f04710', 'f01320']0006R0 ['f02820', 'f03690', 'f03180', 'f02550', 'f01020', 'f03660', 'f02340', 'f01170', 'f02610', 'f02940', 'f01290', 'f02100', 'f01350', 'f03270', 'f03870', 'f01380', 'f01980', 'f03810', 'f02430', 'f02310', 'f01830', 'f03480', 'f02970', 'f01890', 'f03210', 'f03930', 'f02040', 'f02070', 'f02400', 'f01560', 'f03030', 'f01770', 'f01590', 'f01950', 'f03420', 'f01650', 'f03450', 'f00990', 'f03630', 'f01500', 'f03570', 'f00930', 'f03090', 'f03360', 'f02880', 'f02460', 'f01440', 'f01920', 'f01230', 'f03840', 'f02730', 'f01620', 'f02220', 'f03750', 'f03330', 'f03540', 'f02520', 'f02790', 'f01050', 'f03120', 'f01800', 'f01140', 'f01860', 'f01530', 'f01470', 'f02670', 'f02490', 'f01260', 'f01110', 'f02760', 'f01680', 'f03150', 'f02580', 'f03300', 'f02280', 'f01200', 'f03390', 'f03510', 'f02640', 'f02190', 'f02370', 'f01320', 'f02130', 'f03600', 'f03240', 'f03780', 'f03720', 'f02700', 'f01410', 'f01080', 'f02850', 'f01710', 'f03900', 'f03060', 'f01740', 'f02010', 'f02250', 'f00960', 'f03000', 'f02160', 'f02910']for k, v in d. items():  print(k, len(d[k]))0001TP 1240016E5 305Seq05VD 1710006R0 101for i in d2. keys():  print(i,len(d2[i]))0016E5 3050001TP 1240006R0 101Seq05VD 171files[0], labels[0](('0001TP', '009210'), ('0016E5', '01800'))2. My question: Link:  Why do we need masking? and does color from fastai library? (have to look into source code) What do the parameter alpha do? When people make masked img, would it be have ranged integer limit? Does image normalization related with this?lbl_sorted = sorted(lbl_names)f_sorted = sorted(fnames)lbl_1 = lbl_sorted[33]f_1 = f_sorted[33]img = open_image(lbl_1)mask = open_mask(lbl_1)_,axs = plt. subplots(1,2, figsize=(10,5))# img. show(ax=axs[0], y=mask, title='masked')img. show(ax=axs[0], title='1')mask. show(ax=axs[1], title='2', alpha=1. ) img_2 = open_image(f_1)mask_2 = open_mask(f_1)_,axs = plt. subplots(1,2, figsize=(10,5))# img. show(ax=axs[0], y=mask, title='masked')img_2. show(ax=axs[0], title='3',)mask_2. show(ax=axs[1], title='4', alpha=1. ) open_mask(lbl_1). data. shapetorch. Size([1, 720, 960])open_mask(lbl_1). data. shapetorch. Size([1, 720, 960])open_image(f_1). data. shapetorch. Size([3, 720, 960])open_image(f_1). data. shapetorch. Size([3, 720, 960])img. data #labeled datatensor([[[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]],    [[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]],    [[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]]])mask. data # after mask, labeled datatensor([[[ 4, 4, 4, . . . , 21, 21, 21],     [ 4, 4, 4, . . . , 21, 21, 21],     [ 4, 4, 4, . . . , 21, 21, 21],     . . . ,     [17, 17, 17, . . . , 30, 30, 30],     [17, 17, 17, . . . , 30, 30, 30],     [17, 17, 17, . . . , 30, 30, 30]]])img_2. data, mask_2. data(tensor([[[0. 0706, 0. 0667, 0. 0706, . . . , 0. 6431, 0. 6549, 0. 6627],     [0. 0745, 0. 0706, 0. 0706, . . . , 0. 6431, 0. 6510, 0. 6549],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 6392, 0. 6588, 0. 6588],     . . . ,     [0. 0863, 0. 0824, 0. 0824, . . . , 0. 1333, 0. 1216, 0. 1255],     [0. 0902, 0. 0863, 0. 0824, . . . , 0. 1255, 0. 1176, 0. 1216],     [0. 0863, 0. 0824, 0. 0784, . . . , 0. 1137, 0. 1059, 0. 1137]],     [[0. 0706, 0. 0667, 0. 0706, . . . , 0. 7490, 0. 7608, 0. 7686],     [0. 0745, 0. 0706, 0. 0706, . . . , 0. 7451, 0. 7569, 0. 7608],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 7412, 0. 7529, 0. 7529],     . . . ,     [0. 0980, 0. 0941, 0. 0941, . . . , 0. 1804, 0. 1686, 0. 1725],     [0. 1059, 0. 1020, 0. 0980, . . . , 0. 1725, 0. 1647, 0. 1686],     [0. 1020, 0. 0980, 0. 0941, . . . , 0. 1608, 0. 1529, 0. 1608]],     [[0. 0784, 0. 0745, 0. 0784, . . . , 0. 7569, 0. 7686, 0. 7765],     [0. 0824, 0. 0784, 0. 0784, . . . , 0. 7647, 0. 7647, 0. 7686],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 7608, 0. 7647, 0. 7647],     . . . ,     [0. 1216, 0. 1176, 0. 1176, . . . , 0. 2000, 0. 1882, 0. 1922],     [0. 1176, 0. 1137, 0. 1098, . . . , 0. 1843, 0. 1765, 0. 1804],     [0. 1137, 0. 1098, 0. 1059, . . . , 0. 1725, 0. 1647, 0. 1725]]]), tensor([[[ 18, 17, 18, . . . , 183, 186, 188],     [ 19, 18, 18, . . . , 183, 185, 186],     [ 20, 18, 19, . . . , 182, 185, 185],     . . . ,     [ 25, 24, 24, . . . , 43, 40, 41],     [ 26, 25, 24, . . . , 41, 39, 40],     [ 25, 24, 23, . . . , 38, 36, 38]]]))3. What is a difference between image and imageSegment?: imageSegment  An ImageSegment object has the same properties as an Image. The only difference is that when applying the transformations to an ImageSegment, it will ignore the functions that deal with lighting and keep values of 0 and 1.  It’s easy to show the segmentation mask over the associated Image by using the y argument of show_image. img = open_image(fnames[0])mask = open_mask(lbl_names[0])_,axs = plt. subplots(1,3, figsize=(8,4))img. show(ax=axs[0], title='no mask')img. show(ax=axs[1], y=mask, title='masked') #seg mask over the img using y argmask. show(ax=axs[2], title='mask only', alpha=1. ) vision. image ##4. Why/How img div by 255 and how it results fast. ai : vision. image - If div=True, pixel values are divided by 255. to become floats between 0. and 1.  At times, you want to get rid of distortions caused by lights and shadows in an image.    Normalizing the RGB values of an image can at times be a simple and effective way of achieving this.     So sum of the pixel’s value over all channels(which is S) divides each intensified channel so that nomalized value will be R/S, G/S and B/S (where, S=R+G+B).   Detailed explain here4. Python Evaluation Order: Python evaluates expressions from left to right. Notice that while evaluating an assignment, the right-hand side is evaluated before the left-hand side. mask_tmp, trg_tmp, void_tmp = 2, 1, 10mask_tmp = trg_tmp != void_tmpprint(mask_tmp, trg_tmp, void_tmp) # (1) target is not same with voidTrue 1 10# Example 1x = 1y = 2x,y = y,x+yx, y(2, 3)# Example 2x = 1y = 2x = yy = x+yx, y(2, 4)5. model learner parameter :: pct_start: A: Percentage of total number of epochs when learning rate rises during one cycle. Q: Sorry, I still confused that one cycle in the new API only runs one epoch. How the percentage of total number of epochs works? Can you give a example? If learn. fit_one_cycle(10, slice(1e-4,1e-3,1e-2), pct_start=0. 05)??A: Ok, strictly correct answer would be percentage of iterations, so you can have lr both increase and decrease during same epoch. In your example, say, you have 100 iterations per epoch, then for half an epoch (0. 05 * (10 * 100) = 50) lr will rise, then slowly decrease. Q2: Thanks for this explanation … so essentially, it is the percentage of overall iterations where the LR is increasing, correct? So, given the default of 0. 3, it means that your LR is going up for 30% of your iterations and then decreasing over the last 70%. Is that a correct summation of what is happening? A2: Yes, I think that’s correct. You can verify that by changing its value and check:learn. recorder. plot_lr() For example if pct_start = 0. 2 source: forums. fastai "
+    }, {
+    "id": 14,
     "url": "http://localhost:4000/2020/03/note08-fastai-4/",
     "title": "Gradient backward, Chain Rule, Refactoring",
-    "body": "2020/03/02 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves   ⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
+    "body": "2020/03/02 -  This note is divided into 4 section.      Section1: What is the meaning of ‘deep-learning from foundations?’   Section2: What’s inside Pytorch Operator?   Section3: Implement forward&amp;backward pass from scratch   Section4: Gradient backward, Chain Rule, Refactoring   ” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves    Postfix underscore means in pytorch, in-place function, What is in-place function?⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
     }, {
-    "id": 13,
+    "id": 15,
     "url": "http://localhost:4000/2020/03/note08-fastai-3/",
     "title": "Implement forward&backward pass from scratch",
     "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring1. The forward and backward passes: 1. 1 Normalization: train_mean,train_std = x_train. mean(),x_train. std()&gt;&gt;&gt; train_mean,train_std(tensor(0. 1304), tensor(0. 3073))Remember!  Dataset, which is x_train, mean and standard deviation is not 0&amp;1. But we need them to be which means we should substract means and divide data by std.  You should not standarlize validation set because training set and validation set should be aparted.  after normalize, mean is close to zero, and standard deviation is close to 1. 1. 2 Variable definition:  n,m: size of the training set c: the number of activations we need in our model2. Foundation Version: 2. 1 Basic architecture:  Our model has one hidden layer, output to have 10 activations, used in cross entropy.    But in process of building architecture, we will use mean square error, output to have 1 activations and lator change it to cross entropy   number of hidden unit; 50see below pic  We want to make w1&amp;w2 mean and std be 0&amp;1.      why initializating and make mean zero and std one is important?   paper highlighting importance of normalisation - training 10,000 layer network without regularisation1   2. 1. 1 simplified kaiming initQ: Why we did init, normalize with only validation data? Because we can not handle and get statistics from each value of x_valid?{: style=”color:red; font-size: 130%; text-align: center;”}  what about hidden(first) layer?w1 = torch. randn(m,nh)b1 = torch. zeros(nh)t = lin(x_valid, w1, b1) # hidden&gt;&gt;&gt; t. mean(), t. std()((tensor(2. 3191), tensor(27. 0303))In output(second) layer, w2 = torch. randn(nh,1)b2 = torch. zeros(1)t2 = lin(t, w2, b2) # output&gt;&gt;&gt; t2. mean(), t2. std()(tensor(-58. 2665), tensor(170. 9717))   which is terribly far from normalzed value.     But if we apply simplified kaiming init  w1 = torch. randn(m,nh)/math. sqrt(m); b1 = torch. zeros(nh)w2 = torch. randn(nh,1)/math. sqrt(nh); b2 = torch. zeros(1)t = lin(x_valid, w1, b1)t. mean(),t. std()&gt;&gt;&gt; (tensor(-0. 0516), tensor(0. 9354)) But, actually, we use activations not only linear function After applying activations relu at linear layer, mean and deviation became 0. 5. 2. 1. 2 Glorrot initializationPaper2: Understanding the difficulty of training deep feedforward neural networks  Gaussian(, bell shaped, normal distributions) is not trained very well.  How to initialize neural nets? with the size of layer , the number of filters .  But there is No acount for import of ReLU If we got 1000 layers, vanishing gradients problem emerges2. 1. 3 Kaiming initializatingPaper3: Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification  Kaiming He, explained here rectifier: rectified linear unit rectifier network: neural network with rectifier linear units  This is kaiming init, and why suddenly replace one to two on a top?     to avoid vanishing gradient(weights)   But it doesn’t give very nice mean tough.    2. 1. 4 Pytorch package Why fan_out?     according to pytorch documentation,   choosing 'fan_in' preserves the magnitude of the variance of the wights in the forward pass. choosing 'fan_out' preserves the magnitues in the backward pass(, which means matmul; with transposed matrix) ➡️ in the other words, torch use fan_out cz pytorch transpose in linear transformaton.  What about CNN in Pytorch?I tried torch. nn. Conv2d. conv2d_forward?? Jeremy digged into using torch. nn. modules. conv. _ConvNd. reset_parameters?? 2  in Pytorch, it doesn’t seem to be implemented kaiming init in right formula. so we should use our own operation.  But actually, this has been discussed in Pytorch community before. 3 4 Jeremy said it enhanced variance also, so I sampled 100 times and counted better results.  To make sure the shape seems sensible. check with assert. (remember we will replace 1 to 10 in cross entropy)assert model(x_valid). shape==torch. Size([x_valid. shape[0],1])&gt;&gt;&gt; model(x_valid). shape(10000, 1) We have made Relu, init, linear, it seems we can forward pass code we need for basic architecture nh = 50def lin(x, w, b): return x@w + b;w1 = torch. randn(m,nh)*math. sqrt(2. /m ); b1 = torch. zeros(nh)w2 = torch. randn(nh,1); b2 = torch. zeros(1)def relu(x): return x. clamp_min(0. ) - 0. 5t1 = relu(lin(x_valid, w1, b1))def model(xb):  l1 = lin(xb, w1, b1)  l2 = relu(l1)  l3 = lin(l2, w2, b2)  return l32. 2 Loss function: MSE:  Mean squared error need unit vector, so we remove unit axis.   def mse(output, targ): return (output. squeeze(-1) - targ). pow(2). mean()    In python, in case you remove axis, you use ‘squeeze’, or add axis use ‘unsqueeze’ torch. squeeze where code commonly broken. so, when you use squeeze, clarify dimension axis you want to removetmp = torch. tensor([1,1])tmp. squeeze()&gt;&gt;&gt; tensor([1, 1]) make sure to make as float when you calculateBut why??? because it is tensor?{: style=”color:red; font-size: 130%;”} Here’s the error when I don’t transform the data type ---------------------------------------------------------------------------TypeError                 Traceback (most recent call last)&lt;ipython-input-22-ae6009bef8b4&gt; in &lt;module&gt;()----&gt; 1 y_train = get_data()[1] # call data again   2 mse(preds, y_train)TypeError: 'map' object is not subscriptable This is forward passFootnote: Other materials:  Understanding the difficulty of training deep feedforward neural networks, paper that introduced Xavier initialization      Fixup Initialization: Residual Learning Without Normalization &#8617;        Pytorch implementaion on Kaiming init of conv and linear layers &#8617;        Pytorch kaiming init issue &#8617;        Pytorch kaiming init explained &#8617;    "
     }, {
-    "id": 14,
+    "id": 16,
     "url": "http://localhost:4000/2020/03/note08-fastai-2/",
     "title": "What's inside Pytorch Operator?",
     "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, RefactoringWhat’s inside Pytorch Operator?: Section02 Time comparison with pure Python:    Matmul with broadcasting&gt; 3194. 95 times faster     Einstein summation&gt; 16090. 91 times faster     Pytorch’s operator&gt; 49166. 67 times faster  1. Elementwise op: 1. 1 Frobenius norm:   above converted into (m*m). sum(). sqrt() Plus, don’t suffer from mathmatical symbols. He also copy and paste that equations from wikipedia.  and if you need latex form, download it from archive. 2. Elementwise Matmul:  What is the meaning of elementwise?   We do not calculate each component. But all of the component at once. Because, length of column of A and row of B are fixed.   How much time we saved?  So now that takes 1. 37ms. We have removed one line of code and it is a 178 times faster…#TODOI don’t know where the 5 from. but keep it. Maybe this is related with frobenius norm…?as a result, the code before for k in range(ac):  c[i,j] += a[i,k] + b[k,j]the code after c[i,j] = (a[i,:] * b[:,j]). sum()To compare it (result betweet original and adjusted version) we use not test_eq but other function. The reason for this is that due to rounding errors from math operations, matrices may not be exactly the same. As a result, we want a function that will “is a equal to b within some tolerance” #exportdef near(a,b):   return torch. allclose(a, b, rtol=1e-3, atol=1e-5)def test_near(a,b):   test(a,b,near)test_near(t1, matmul(m1, m2))3. Broadcasting:  Now, we will use the broadcasting and removec[i,j] = (a[i,:] * b[:,j]). sum() How it works?&gt;&gt;&gt; a=tensor([[10,10,10],     [20,20,20],     [30,30,30]])&gt;&gt;&gt; b=tensor([1,2,3,])&gt;&gt;&gt; a,b  (tensor([[10, 10, 10],     [20, 20, 20],     [30, 30, 30]]),tensor([1, 2, 3]))     &gt;&gt;&gt; a+btensor([[11, 12, 13],    [21, 22, 23],    [31, 32, 33]])  &lt;Figure 2&gt; demonstrated how array b is broadcasting(or copied but not occupy memory) to compatible with a. Refered from numpy_tutorial   there is no loop, but it seems there is exactly the loop.     This is not from jeremy (actually after a moment he cover it) but i wondered How to broadcast an array by columns?  c=tensor([[1],[2],[3]])a+ctensor([[11, 11, 11],    [22, 22, 22],    [33, 33, 33]])s  What is tensor. stride()?help(t. stride)Help on built-in function stride:  stride(…) method of torch. Tensor instancestride(dim) -&gt; tuple or intReturns the stride of :attr:’self’ tensor. Stride is the jump necessary to go from one element to the next one in the specified dimension :attr:’dim’. A tuple of all strides is returned when no argument is passed in. Otherwise, an integer value is returned as the stride in the particular dimension :attr:’dim’. Args:  dim (int, optional): the desired dimension in which stride is requiredExample::* x = torch. tensor([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]])`x. stride()&gt;&gt;&gt; (5, 1)x. stride(0)&gt;&gt;&gt; 5x. stride(-1)&gt;&gt;&gt; 1   unsqueeze &amp; None index   We can manipulate rank of tensor Special value ‘None’, which means please squeeze a new axis here== please broadcast herec = torch. tensor([10,20,30])c[None,:] in c, squeeze a new axis in here please. 2. 2 Matmul with broadcasting: for i in range(ar):#  c[i,j] = (a[i,:]).      *[:,j]. sum() #previous  c[i]  = (a[i]. unsqueeze(-1) * b). sum(dim=0) And Using None also (As howard teached)c[i]  = (a[i ]. unsqueeze(-1) * b). sum(dim=0) #howardc[i]  = (a[i][:,None] * b). sum(dim=0) # using Nonec[i]  = (a[i,:,None]*b). sum(dim=0)⭐️Tips🌟 1) Anytime there’s a trailinng(final) colon in numpy or pytorch you can delete it ex) c[i, :] = c [i]2) any number of colon commas at the start, you can switch it with the single elipsis.  ex) c[:,:,:,:,i] = c […,i] 2. 3 Broadcasting Rules:  What if we tensor. size([1,3]) * tensor. size([3,1])?  torch. Size([3, 3])    What is scale????   What if they are one array is times of the other array? ex) Image : 256 x 256 x 3Scale : 128 x 256 x 3Result: ?   Why I did not inserted axis via None, but happened broadcasting? &gt;&gt;&gt; c * c[:,None]tensor([[100. , 200. , 300. ],    [200. , 400. , 600. ],    [300. , 600. , 900. ]])maybe it broadcast cz following array has 3 rows as same principle, no matter what nature shape was, if we do the operation tensor broadcasts to the other. &gt;&gt;&gt; c==c[None]tensor([[True, True, True]])&gt;&gt;&gt; c[None]==c[None,:]tensor([[True, True, True]])&gt;&gt;&gt;c[None,:]==ctensor([[True, True, True]])3. Einstein summation:  Creates batch-wise, remove inner most loop, and replaced it with an elementwise producta. k. ac[i,j] += a[i,k] * b[k,j]inner most loop c[i,j] = (a[i,:] * b[:,j]). sum()elementwise product  Because K is repeated so we do a dot product. And it is torch. Usage of einsum()1) transpose2) diagnalisation tracing3) batch-wise (matmul) …  einstein summation notationdef matmul(a,b): return torch. einsum('ik,kj-&gt;ij', a, b)so after all, we are now 16000 times faster than Python. 4. Pytorch op: 49166. 67 times faster than pure python And we will use this matrix multiplication in Fully Connect forward, with some initialized parameters and ReLU. But before that, we need initialized parameters and ReLU, Footnote:  TensorRank ti noteResources:  Frobenius Norm Review Broadcasting Review (especially Rule)     Refer colab! (I totally confused with extension of arrays)    torch. allclose Review np. einsum Reviewh "
     }, {
-    "id": 15,
+    "id": 17,
     "url": "http://localhost:4000/2020/02/note08-fastai-1/",
     "title": "What is the meaning of 'deep-learning from foundations?'",
     "body": "2020/02/29 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ I don’t know if you read this article, but I heartily appreciate Rachael Thomas and Jeremy Howard for providing these priceless lectures for free Homework:  Review concepts 16 concepts from Course 1 (lessons 1 - 7)(1) Affine Functions &amp; non-linearities; 2) Parameters &amp; activations; 3) Random initialization &amp; transfer learning; 4) SGD, Momentum, Adam; 5) Convolutions; Batch-norm; 6) Dropout; 7) Data augmentation; 8) Weight decay; 9) Res/dense blocks; 10) Image classification and regression; 11)Embeddings; 12) Continuous &amp; Categorical variables; 13) Collaborative filtering; 14) Language models; 15) NLP classification; 16) Segmentation; U-net; GANS) Make sure you understand broadcasting Read section 2. 2 in Delving Deep into Rectifiers Try to replicate as much of the notebooks as you can without peeking; when you get stuck, peek at the lesson notebook, but then close it and try to do it yourself calculus for machine learning     based on weight…    einsum conventionCONTENTS:  What is going on in this course?     What is ‘from foundations’?   Steps to a basic modern CNN model   Today’s implementation goal: 1) matmul -&gt; 4) FC backward    Library development using jupyter notebook     jupyter notebook certainly can make module    Elementwise ops     How can we make python faster?         What is element wise operation?           FootnoteWhat is going on in this course?: What is ‘from foundations’?: 1) Recreate fast. ai and Pytorch 2) using pure python  Evade OverfittingOverfit : validation error getting worsetraining loss &lt; validation loss  Know the name of the symbol you usefind in this page if you don’t know the symbol that you are using or just draw it here (run by ML!) Steps to a basic modern CNN model:  1) Matrix multiplication -&gt; 2) Relu/Initialization -&gt; 3) Fully-connected Forward-&gt; 4) Fully-connected Backward -&gt; 5) Train loop -&gt; 6) Convolution-&gt; 7) Optimization -&gt;8) Batchnormalization -&gt; 9) Resnet Today’s implementation goal: 1) matmul -&gt; 4) FC backward: Library development using jupyter notebook: what is assers? jupyter notebook certainly can make module:  There will be #export tag that Howard (and we) want to extract special notebook2script. py will detect sign of #expert and convert following into python module and test ittest\_eq(TEST,'test')test\_eq(TEST,'test1')  what is run_notebook. py?     when you want to test your module in command line interface   		!python run\_notebook. py 01_matmul. ipynb  Is there any difference between 1) and 2)?1) test -&gt; test01 2) test01 -&gt; test #TODO I don’t know yet  look into run_notebook. py, package fire Jeremy used. What is that?read and run the code in a notebook, and in the process, Jeremy made Python Fire library called!shockingly, fire takes any kind of function and converts into CLI command. fire library was released by Google open source, Thursday, March 2, 2017    Get data   pytorch and numpy are pretty much same.  variable c explains how many pixels there are in in MNIST, 28 pixels PyTorch’s view() method: torch function that manipulating tensor, and squeeze() in torch &amp; mathmatical operation similar function Rao &amp; McMahan said usually this functions result in feature vector.    In part 1, you can use view function several times.     Initial python model     Which is Linear, like $Xw$(weight)$+a$(bias) $= Y$     If you don’t know hou to multiple matrix, refer this site matmul visulization site   How many time spends if we we use pure python   function matmul, typical matrix multiplication function, takes about 1 second for calculating 1 single train data! (maybe assumed stochastic, 5 data points in validation)     it takes about 11. 36 hours to update parameters even single layer and 1 iteration! (if that was my computer, it would be 14 hours. . )🤪   THIS is why we need to consider ‘time’&amp;’space’ This is kinda slow - what if we could speed it up by 50,000 times? Let’s try! Elementwise ops: How can we make python faster?:  If we want to calculate faster, then do remove pythonic calcuation, by passing its computation down to something that is written something other than python, like pytorch.  According to PyTorch doc it uses C++ (via ATen), so we are going to implement that function with python. What is element wise operation?:  items makes a pair, operate corresponding componentFootnote:  notebooks material video broadcasting excel"
     }, {
-    "id": 16,
+    "id": 18,
     "url": "http://localhost:4000/2020/02/what-is-convolution/",
     "title": "Digging into convolution",
     "body": "2020/02/28 - Issues 1) Kaiming Initializtion in Pytorch was in trouble. 1 2) Jeremy started to dig in, in lesson09, but I didn’t know why the size of tensor is 2 and even understand this spreadsheet data. 3 Homework: Read Visualizing and Understanding Convolutional Networks paper  What is a convolution?     Visualization         one kernel     Matthew D Zeiler &amp; Rob Fergus Paper          Convolution can be represented as matmul   Padding   Kernel has rank 3   How can we find a side-edge, a gradient and area of constant weight?   What is a convolution?: A convolutional neural network is that your red, green, and blue pixels go into the simple computation, and something comes out of that, and then the result of that goes into a second layer, and the result of that goes into the third layer and so forth. Visualization: one kernel Refer this site for visualizing CNN filteringMatthew D Zeiler &amp; Rob Fergus PaperLecture01     Nine examples of the actual coefficients from the **first layer**  Convolution can be represented as matmul: CNNs from different viewpoints {align-items: center;}    [A B C D E F G H I J] is 3 by 3 image data flatten to vector.   As a result, convolution is a just matrix just two things happens     Some of entries are set to zeros at all the times   same color always have the same weight. That called weight time / wegith sharing    So, we can implement a convolution with matrix multiplication. But, we don’t do that because it’s slow!Padding:  What most of libraries do is just put zeros asdie of matrix  fast. ai uses reflection paddings (what is this? Jeremy said he uttered it)Kernel has rank 3:  As standard picture input would be 4 5, it would be actually 3d, not 2d.  If we make kernel as a 3x3 size, we pass over same kernel all the different Red, Green, Blue Pixels.      This could make problem, because, if we want to detect frog, which is green, we would want more activations on the green(I made a test cell in my colab 6)   How can we find a side-edge, a gradient and area of constant weight?: Not top-edge!  One kernel can find only the top-edge, so we should stack the kernels 7 So, we pass it through bunch of kernels to the input images, and that process gives us height x width x corresponding number of kernels.  Usually that number of chanel is 16 And if we want to get the more channels and features, we should repeat that process     This process gives rise to memory out of control, we do the stride   #### conv-example. xlsx  2 convolutional filters At a second layer, filter is 3x3x2 tensor, because to add up together the first layer’s channel. Reference:       Problem was math. sqrt(5) was not kaiming initialization formula, Implementation in Pytorch &#8617;        size of tensor, lecture09 &#8617;        conv-example. xlsx &#8617;        Why do computer use red, green and blue instead of primary colors &#8617;        Grayscale is a group of shades without any visible color. … Each of these dots has its own brightness level as well and, therefore, can be converted to grayscale. A grayscale image is one with all color information removed.  &#8617;        Testing RGB and grayscale &#8617;        stack kernel and make new rank of tensor at output, Lesson06-2019 &#8617;    "
     }, {
-    "id": 17,
+    "id": 19,
     "url": "http://localhost:4000/2020/02/dps-week8/",
-    "title": "Digital Product School week 8&9",
-    "body": "2020/02/24 - The 8th week retropect at Digital Product School Week 8/9 - Ship your MVP/Release next iteration each day     This week's schedule  CONTENT:  Preparing engineering weekly Agile Process     Daily Stand-up    Making application flowchart (feat draw. io) / ER diagram     Flowchart, understaning user journey   ER diagram    Engineering weekly AI lunch Connecting firebase andPreparing engineering weekly: This week at Wednesday, I planned to explain the Language Modelings, mainly focusing ELMo, ULMFiT, BERT and GPT-2. Slides is available here Changed the presentation, because there were people who are not in ML domain. hereWhenever I do the presentation, I learn more than the information I give them. At the same time, I realize I need to learn more than I know. Agile Process: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m not sure ‘what is agile’ but this is what we tried to make agile process.  Daily Stand-up: Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Making application flowchart (feat draw. io) / ER diagram:  Before we start the party, we should clarify the flowchart and ER diagram of our application. Flowchart, understaning user journey: Thanks for google, we could use draw. io for our framechart framework. Actually, we cana choice other good flatform, but draw. io has connected app throgh google drive, most of our engineer was used to it. And after this job, I got to know there is also (of course) rule with the symbols, color, size, space, scaling and direction of arrow -reference. But why we should do this? WE have made our storymap before!! I think storymap is for visualize our status and app. So it should be shared with whole the team, and they should able to understand each role’s issue. But flowchart is more like testing technical feasibility, and error that user can experience. So it could be little more specific, complicated, and hypothetical.     This week engineer's main plan  ER diagram: Even if we use NoSQL database through firebase, my team was accustomed to SQL more. That what we educated when we were at college, so we had to organize our concept while we were learning NoSQL. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Before today, my AI collegues presented regression, knn and it was my turn. I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! AI lunch: Connecting firebase and: "
+    "title": "My life in Digital Product School - week 8/19/10",
+    "body": "2020/02/24 - The 8/9/10th week retropect at Digital Product School Week 8 - Ship your MVPWeek 9/10 - Release next iteration each day     Week 8th schedule  CONTENT:  Agile Product Development     Daily Stand-up(planning)   Gemba Walk   Sprint Reviews    Engineering weeklyAgile Product Development: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m still not sure ‘what is agile’ but this is how we tried to make agile process.  Daily Stand-up(planning): Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Gemba Walk:     Team Cero with core team  Every 2 weeks, we do the Gemba work, which is ‘question everything to the core team’ time. At this period, people can ask anything related to our product, workshop, and framework. Core team will help just for each team, and each team can solve the problem related to their work. &lt; br/&gt;Why we need this session? because with workshop and general schedule, core team has no time just focus on each team. So through this session, we can have opportunity to understand each program and workshop, like why we are using this platform, and when is the due of our small project, and we have this problem and we need help for this. whatever small problem you have, core team is always willing to help you. Sprint Reviews: Every Friday, we have time to summarise what we did for the week. Maybe we need HMW question and our storymap to share our process and then tell and share what we did try, what point we succeeded and what point it was deviant of our prediction, and why we tried it. .     Sprint of Ve-link  And then, just after all team’s ppt, we do vote with such a cute marvel. Always it’s very difficult to vote (of course you can’t vote to your team!) Because it depends on criteria what do I value!But since this is process of our agile work, I try to focus on what they have changed since last week, and why they did it, how they did it. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Everyone have their knowledge to share and we can be tutor and at the same time can be of tutee. Previously, my AI collegues presented regression, knn. And because I’m somewhat specialized to NLP, I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! "
     }, {
-    "id": 18,
+    "id": 20,
     "url": "http://localhost:4000/2020/02/fast.ai-nlp-note-16/",
     "title": "Algorithmic bias",
     "body": "2020/02/20 - Algorithms can encode &amp; magnify human bias Case Study 1: Facial Recognition &amp; Predictive Policing:  Joy Buolamwini &amp; Timnit Gebru, gendershades. org     Microsoft, FACE+, IBM - All of these things are sell now.    Largest gap between $\therefore\ Lighter Male\ &gt;\ Darker\ Female $      This US mayor joked cops should “mount . 50-caliber” guns where AI predicts crime      With machine learning, with automation, there’s a 99% success, so that robot is ㅡwill beㅡ99% accurate in telling us what is going to happen next, which is really interesting.     - city official in Lancater, CA, approving on using IBM for public security  Bias:  Bias is type of error Statistical Bias: difference between a statistic’s expected value and the true value Unjust Bias: disproportionate preference for or prejudice against a group Unconscious bias: bias that we don’t realize we have But, term bias is too generic to be productive. Different sources of bias have different causes Representation Bias: Dataset was not representative of the algorithm that might be used on later. Above : Data is okay, but algorithm has some problem. Below : Data has error.  For example, object detection production that performs very well in common product of US. But in contrast, change of target product region, like Zimbabwe, Solomon Island, and so on, reduced the performence remarkably. It is not the algorithmic problem, so we should care about data volume of region. Evaluation Bias: Benchmark datasets spur on research, 4. 4% of IJB-A images are dark-skinned women. 2/3 of ImageNet images from the West (Sharkar et al, 2017) Case Study 2: Recidivism Algorithm Used Prison Sentencing: Case Study 3: Online Ad Delivery: Bias in NLP: ( Nothing to do with the course, but I’m researching this field these days. )    But all about Englsih     ImpactThe person is doctor. The person is nurse -&gt; 그는 의사다. 그녀는 간호사다.  Concept of “biased data” often too generic to be useful:  Different sources of bias have different sources Data, models and systems are not unchanging numbers on a screen. They’re the result of a complex process that starts with years of historical context and involves a series of choices and norms, from data measurement to model evaluation to human interpretation. - Harini Suresh, “The problem with Biased Data” Five Sources of Bias in ML:  Representation Bias Evaluation Bias Measurement Bias Aggregation Bias(46:02) Historical Bias(46:26)     A few studies(47:13)   Racial Bias, Even when we have good intentions(new york times)(47:10)   gender(48:59)   Humans are biased, so why does algorithmic bias matter?: Algorithms &amp; humans are used differently (humans are usually decision maker)  Algorithms are accurate and objective No way to apeal if there if error processed large scale cheap Machine learning can amplify bias Machine learning can create feedback loops. Technology is power. And with that comes responsibility. Solutions:  Analyze a project at work/school:     Questions about AI   5 types of bias (Suresh &amp; Guttag)   Datasheets for datasets, Modelcards for model reporting   Accuracy rate on different sub-groups    Work with domain experts &amp; those impacted Increase diversity in our workspace Advocate for good policy Be on the ongoing lookout for bias"
     }, {
-    "id": 19,
+    "id": 21,
     "url": "http://localhost:4000/2020/02/classifier-city/",
     "title": "Making a classifier with image dataset made from gooogle",
     "body": "2020/02/15 - CONTENTS:  Creating dataset from google images     Using google_images_download   Create ImageDataBunch    Train model     fit_one_cycle()   Let’s find-tune   Let’s train the whole model!   Let’s make batch size bigger!    Interpretation Model in productionCode can be found hereDeployed model here Making a classifier which can distinguish Seoul from Munich and Sanfrancisco!(hoping my well in Munich!) Creating dataset from google images: In machine learning, you always need data before you build your model. You can use either URLs or google_images_download package. Since Jeremy explained specifically, I will try the other. Using google_images_download: note: This is not google official package Refer to Official Doncument, put that arguments. from google_images_download import google_images_downloadresponse = google_images_download. googleimagesdownload()  #class instantiationout_dir = os. path. abspath('. . /. . /materials/dataset/pkg/')os. mkdir(out_dir)arguments = { keywords : Cebu,Munich,Seoul ,        print_urls :True,        suffix_keywords : city ,        output_directory :out_dir,        type : photo ,      }paths = response. download(arguments)  #passing the arguments to the functionprint(paths)and if you need, here is main code. Create ImageDataBunch: We need to separate validation set because we just grabbed these imagese from Google. Most of the dataset we use (kaggle/research) splited into train / validation / test so if they are not devided beforehand we should make databunch, and Jeremy recommended assign 20% to validation. Help on function verify_images in module fastai. vision. data:verify_images(path: Union[pathlib. Path, str], delete: bool = True, max_workers: int = 4, max_size: int = None, recurse: bool = False, dest: Union[pathlib. Path, str] = '. ', n_channels: int = 3, interp=2, ext: str = None, img_format: str = None, resume: bool = None, **kwargs)  Check if the images in `path` aren't broken, maybe resize them and copy it in `dest`. Data from google image url Data from package Train model:           len(class)   len(train)   len(valid)         Data_url   3   432   108       Data_pkg   3   216   53   Uisng model: restnet34 1, Measurement: accuracy 2 fit_one_cycle(): What is fit one cycle? Cyclical Learning Rates for Training Neural Networks One of the way to find good learning rate. Core idea is to start with small learning rate (like 1e-4, 1e-3) and increase the learning rate after each mini-batch till loss starts exploding. And pick up learning rate one order lower than exploding point. For example, plotted learning rate is like below picture, picking up around 1e-2 is the best way.  Why this methods Traditionally, the learning rate is decreased as the learning starts converging with time. But this paper suggests to cycle our learning rate, because it makes us avoid local minimum. Basically this cyclic method enables us to explore whole of loss function so that find out global minimum. In other words, higher learning rate behaves like regularisation. Let’s find-tune: Do train just one last layer by learning rate found by find_lr This section you should find the strongest downward slope that kind of sticking around for quite a while. And choose just one order lower than lowest point. As explained before, I will pick up 1e-2. And of course, this is fine-tuning, we don’t need discriminative learning rate yet.  Let’s train the whole model!: link When you plot the learning rate again, maybe you will get soaring shape of learning rate. Rule of thumb, When you slice the learning rate, use learning rate you used at unfrozen part. Divide it by 5 or 10 and put it on maximum bound. At minimum bound, get the point just before it soared, and divide it by 10.  Let’s make batch size bigger!: Since default batch size is 64, I tried it to 128. And it gets way more better result(even it’s still underfitting!) And if I freeze model and train whole model again, the model would be better. Also, you can use this method to the other big dataset model training! Interpretation: See the confusion matrix. Result is quite great. *Since I’m using colab, I will skip data cleansing. But I highly recommend you to use ImageCleaner widget, only if you are using jupyter notebook (not jupyter lab) Model in production: You can deploy your model in simple way. I referred fast. ai, and used render(it’s free for limited time). You can find detailed document here. and you can create a route like this. @app. route( /classify-url , methods=[ GET ])async def classify_url(request):  bytes = await get_bytes(request. query_params[ url ])  img = open_image(BytesIO(bytes))  _,_,losses = learner. predict(img)  return JSONResponse({     predictions : sorted(      zip(cat_learner. data. classes, map(float, losses)),      key=lambda p: p[1],      reverse=True    )  })You can find my deployed model here Reference: How to create a deep learning dataset using Google Images towardsdatascience - one cycle policy       Deep Residual Learning for Image Recognition &#8617;        Accuracy_and_precision &#8617;    "
     }, {
-    "id": 20,
+    "id": 22,
     "url": "http://localhost:4000/2020/02/dps-week5/",
     "title": "Digital Product School week 5",
     "body": "2020/02/09 - The 5th week retropect at Digital Product School Week 5 - Create a Storymap and sync it with Lean Canvas     This week's schedule  CONTENT:  How to create our story map Prepare your story Discover your product’s AI potentialMondayHow to create our story map:     We need this 'aha' moment  There was a Milestone workshop, about our weekly goal. As we are agile working, we go fast and change every week’s goal. This week we will finalize our story map based on user’s pain-point and HMW questions.     How should we make our story-map  Basically we should make story map based on this rule  Tell stories, don’t just write them!     We always need context, that means all the story component should be connected    Visualize your product to establish a shared understanding and speed up discussions!     Post-it filled of text is not enough, we should fill it with visualizations then team mates can understand it fast    Only discuss in front our your story map! (Speed)     So we can update our story-map as soon as we change our opinion   And also    Use a story map to find the parts that matter most and to identify holes in your idea!     Since the story map consists of techinical part, we should consider each story’s technical feasibility    Minimise output, maximise outcome and impact! Build tests to figure out what’s minimum and what’s viable!     This story map functions to find out our minimum value of ideas    Work iteratively: Change your story map according to your learnings!     We should repeat this process again and again    PMs: Make sure Storymap is up to date!Prepare your story:     team cero, our whole story map    Our goal    Technical feasibility of our storyWhat is your strategy to make user achieve something? This would be our expand point Discover your product’s AI potential: How can we apply AI to our product? Let’s write down our ‘HMW’ questions, and find out all p ossibilities. These are suggestion of possibilities, so don’t attached to feasibility (we will do in at lean start-up)     Software section's expectation    AI section's expectationTuesday    Engineer's task, week5This 5th week, engineers settled WendesdayThursdayFriday"
     }, {
-    "id": 21,
+    "id": 23,
     "url": "http://localhost:4000/2020/02/GPU-time/",
     "title": "4 reasons took much time to setting GPU for fast.ai than I expected",
     "body": "2020/02/05 - Motivation: Before now, me as a undergraduate student, I was parsimony who usually depend on colab, kaggle, friend’s server(occasional) whenever i need GPU. . And this time it’s been for a while to install GPU than I expected and I share the several component that stood in my way. Written at Oct 24 2019, if you think this is deprecated, please do not have a leap of faith. Just for the record, I’ve used Kaggle, Colab, GCP, Azure, EC2 as GPU cloud. 1. Did not know there is JupyterLab option in Google Cloud Platform. : At the first time when GCP came out, there was no AI Platform service. So from starting vm instance to launching jupyter and installing packages, I did all of the things myself. (and I learned 🤗) $	curl -O https://repo. continuum. io/archive/Anaconda3-5. 0. 1-Linux-x86_64. sh[Downloading conda in ssh] I created VM instance,selected zone, machine type and disk type. Then, define firewall rules and in ssh terminal, install jupyter and other packages. But you can do all of these things just using AI Platform.  [AI Platform] I think it especially save your time if you are living in Asia-Pacific, which google doesn’t support not that much GPU resources.  2. Consider if the platform has limited resources in a region you live in. : I live in South Korea, East Asia, and it seems like this region has lots of limitation in GPU (except quite expensive AWS) And the Taiwan which was the only one region where I can launch my own VM with GPU (I tried all the other regions in the list) sometimes do normaly, but not always. 😥After launching, I did several works and next day I could not start VM. (I didn’t count it, but tried it a few hours because I didn’t want cost any more time…) Endlessly failed to start instance, then I choose to move AWS as an alternative way. 3. Fast. ai gives deliberate guide and I didn’t know it. : Fast. ai offer the guide for all available platform. (Colab, salamander, Gradient, Kaggle, Colab, and so on) It is so important, and really needs, because cloud computing options are vary as occasion and purpose arise. I didn’t know fast. ai has manual to running GCP, and I think it’s as good a reason as any for me to be have taken time. It helped me so much when I had aws and shortened my time. I don’t want to read all of the manual in amazno. . (It is recommended. . but I’d rather read GIT PRO now…) ssh -i ~/. ssh/&lt;your_private_key_pair&gt; -L localhost:8888:localhost:8888 ubuntu@&lt;your instance IP&gt;4. You should wait to add more volume just after add volume, by building AWS EC2. : Since Elastic Block Store(EBS) storage supports optimized storage, users can’t extend storage volume two times in a row.  Unfortunately, at the first time, I didn’t know it (again 👻) and when VM lacked volume, I doubled dist capacity (76*2) at a rough but It needs more.  &lt;!– this time I installed GPU in two years, and it became little complicated compared to 2 years ago. And this time for the first time(maybe not the first time. . but i handled it in my class or with my friend. but it’s my first time on my own. ) I very I’m started to using used google colab, kaggleand, GCP-JupyterLab, ec2 - friend made, aws vm machine but I had a environment variable but i did not know of it. On these days, I could not get a resources from taiwan…    I couldn’t notice a deliberate     Anyway, as a result I tried myself gcp myself and aws ec2 with fast. ai But I think doing on my self surely takes much time (in this point I wonder why I’m doing this, and should remind me, especially I was studying disk volume optimization)     disk volume exceed - https://askubuntu. com/questions/919748/no-space-left-on-device-even-though-there-is:  "
     }, {
-    "id": 22,
+    "id": 24,
     "url": "http://localhost:4000/2020/02/dps-week4/",
     "title": "Digital Product School week 4",
     "body": "2020/02/01 - The 4th week retropect at Digital Product School Week 4 - Find solution ideas and run experiments   [This week’s schedule] CONTENT:  Ideation Techniques     What is ideation techniques?    Generating idea in my team AIdeation Team brain storming of idea Die Produkt MacherMondayIdeation Techniques:   [slides from @steffen] What is ideation techniques?: We tried to find out user’s painpoint last week. Tried to users talk about their, pain point. No question directly, but extract from them their pain with transportation. Generating idea in my team:   AIdeation:   TuesdayTeam brain storming of idea:   Based on generated idea on Monday, we extended our idea doing rolling-paper!     Die Produkt Macher: What is lean start-up?  Lean startup is a methodology for developing businesses and products that aims to shorten product development cycles and rapidly discover if a proposed business model is viable; this is achieved by adopting a combination of business-hypothesis-driven experimentation, iterative product releases, and validated learning. - wikipedia WendesdayThursdayFriday"
     }, {
-    "id": 23,
+    "id": 25,
     "url": "http://localhost:4000/2020/01/retrosprect-of-acl-paper-2020/",
     "title": "Retrospect of ACL 2020 paper writing",
     "body": "2020/01/29 - 2020 Annual Conference of the Association for Computational Linguistics Why I can’t use ‘Cebuano’ for the research?: Why I had to change target language from ‘Cebuano’ to ‘Tagalog’?-&gt; No language translator options except google translation. But before knowing that I already consult my friend, whose mother tongue is English. So I had to aplogize her, but couldn’t tell her why suddenly I changed my plan. -&gt; I realized there are many languages even can’t be researched at all. . -&gt; Getting accustomed to discrimination makes misunderstanding, sometimes. At my country, we couldn’t use music streaming service, because of legal problem. But at that moment, I thought it was discrimination, which is done by music company. "
     }, {
-    "id": 24,
+    "id": 26,
     "url": "http://localhost:4000/2020/01/Git-Merge/",
     "title": "Why am I not listed as a contributor?!",
     "body": "2020/01/10 - From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’m not listed as contributor of repo which my PR accessed. How did I come to a repository?: When I’m stuck, I would prefer to code, than to go deep in theory. (It must be so. . too much to understand 🤒)It was BERT released by Google AI I felt keenly the necessity of implementing, because not only couldn’t understand the way they figured out positional encoding formula, but how it actually works. What does it mean to “scale” dot product in Attention? (Now I know it’s far from my section 😂) Figure 1. Scaled Dot Product. Adopted from tensorflow blogWhat was the code error?: For implement code in paper, I read the papers Transformer and BERT, structured the model, and refered the others’ code. Meanwhile, I found out a small error in tokenization process, which was changing a token into [MASK], enabled bidirectional representation. I’ve made PR, and got merged. But I was not in contributors. Why?: Figure 2. Merged Pull request Adopted from graykode projectActually I happened to know there can be couple of reasons github doesn’t include my name as contributor. Well, if contributors tab has more than 100 people, in which case it shows you up only if you are in the top 100 contributors because displaying too many contributors can make webpages down. Somethimes, however, it doesn’t that problem. Why not? Two possibilities are there.    First, According to Joel-Glovier, if repository maintainer merged-as-a-rebase PR will end up showing as maintainer’s commit. But maintainer shouldn’t normally do this.     Second, if you happend to commit using a different git email that what is in your GitHub profile, it will not be attached to your Github user, and “doesn’t show up” as you.  Reference:  Michał Chromiak’s blog Github: why are my contributions are not showing on my profile atlassian-gitfetch"
     }, {
-    "id": 25,
-    "url": "http://localhost:4000/2019/12/lesson1-fastai/",
-    "title": "Fine Grained Classification",
-    "body": "2019/12/31 - Finally you can solve the mystery behind this weird drawing. . through this course. juptyer notebook magic: %reload_ext autoreload%autoreload 2%matplotlib inlinethis is special directives to jupyter notebook, not python code. And it is called ‘magics’ (but i think jeremy is magicion)  If somebody changes underlying library code while I’m running this, please reload it automatically If somebody asks to plot something, then please plot it here in this Jupyter NotebookDon’t hesitate to import start~ Digging into untar_data, path. ls: Union[pathlib. Path, str]: typed programming language? -&gt; maybe i think disclaim the type beforehand for sure.  Q. like assert? path. ls()this is some module that fast. ai made because os. listdir(‘path’) is unconvinient. Python3 pathlib library!: pathlib "
-    }, {
-    "id": 26,
+    "id": 27,
     "url": "http://localhost:4000/2019/12/jeremy-howard/",
     "title": "Jeremy Howard",
     "body": "2019/12/15 - This is journey to find out ‘who am I trying to be?’: How he impacted me?  The person who made me start Computer Vision again. He emphasized the importance of studying NLP and Computer together to understand the deep-learning. He didn’t order it to study, but always he pursuade me with reasonable way.  “It’s not just something I can throw away. NLP and computer vision a few weeks apart and that’s going to force your brain to realize like ‘oh I have to remember this’”  He made me admit my failure in deep-learning. I started to objectify where am I. What should I do when I’m frustrated.  “Keep going. You’re not expected to remember everything. Yet.  You’re not expected to understand everything. Yet. You’re not expected to know why everything works.  Yet. ”  His articles are numerous, below.      What is torch. nn Really?   High Performance Numeric Programming with Swift: Explorations and Reflections   C++11, random distributions, and Swift    And especially, I like this book. Designing great data products Great predictive modeling is an important part of the solution, but it no longer stands on its own; as products become more sophisticated, it disappears into the plumbing. Designing great data products And he is also famous for words. Here are some.  we’re going to try and use that to really understand what’s going on. So to warn you, none of it is rocket science but a lot of its going to look really new. So don’t expect to get it the first time but expect to listen and jump into the notebook try a few things test things out look particularly at like tensor shapes and inputs and outputs to check your understanding then go back and listen again. But and kind of try it, a few times, because you will get there right, it’s just that there’s going to be a lot of new concepts because we haven’t done that much stuff in pure Pytorch. Lesson 6: Deep Learning 2019 "
     }, {
-    "id": 27,
+    "id": 28,
     "url": "http://localhost:4000/2019/11/julia-evans/",
     "title": "Julia Evans",
     "body": "2019/11/20 - This is journey to find out ‘who am I trying to be?’: The women who surprised me in many ways. First, she approached me to teaching some concepts drawing cartoons. It was at Hackers news, which was hightest ranks. Personally I have the use of not to reading title, so and cartoon was so cute and clear. I naturally gonna understood mechanism and astonished by her explaination ability. Her value, which she was taught by many people so want to do same things, moved me. Volume of her knowledge, that just reading post title is a deal of work, amazed me. "
     }, {
-    "id": 28,
+    "id": 29,
     "url": "http://localhost:4000/2019/11/coc-retropective/",
     "title": "Retrospective on Pycon 2019 Korea (CoC Committee)",
     "body": "2019/11/05 - When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized our responsibility, and if I passed each other’s burden, It could be my burden next time. In solidarity of the obligation, we finished conference well. And after participating PyCon Korea 2018 as volunteer, I’ve joined PyCon Korea Organizer last year. &lt;Figure 1&gt; First meeting of PyCon 2019 Korea Organizers It’s been a while since PyCon 2019 finished. It’s held on Aug 15 - 18, at Coex Grand Balloom &lt;Figure 2&gt; Ongoing session, speaking on news comment processing &lt;Figure 3&gt; Sponsor Booth iin Coex Hall &lt;Figure 4&gt; After PyCon 2019, with all of volunteer, organizer, speakers 😍 🥰  Serving as part of the coc TF, I spent large fraction of last year doing CoC job. here’s the path what we’ve been grappled with to grasp a solution. First half: Before the conference Toward Diverse Community:  Formally we’ve been reusing and modifying PyCon US CoC, but we needed fit in Korean and I was part of that to revise code of conduct. Except ‘That’ Diversity, Because it is ‘Harassment’:  Specific point was harassment, and the others were not. process of finding the points. How can we settle this point?Second half: During the conference Handling the potential Harassment: Disjunction of policy and real-time situation: This ‘PyCon 2019 Korea retrospective series’ would be devided into 3 Episodes.  “Retrospective on Pycon 2019 Korea (CoC Committee)” “Retrospective on Pycon 2019 Korea (Program Chair)” (20 Nov, To Be Update) “Maintaining participation while still making timely decisions” (29 Nov, To Be Update)"
     }, {
-    "id": 29,
+    "id": 30,
     "url": "http://localhost:4000/2019/11/elif-shafak/",
     "title": "Elif Shafak",
     "body": "2019/11/05 - This is journey to find out ‘who am I trying to be?’: For creative-minded people, Istanbul is a treasure. ’ Photo © Chris Boland, licensed under CC BY-NC-ND 2. 0    it suddenly felt like what I was trying to convey was more complicated and detailed than what the circumstances allowed me to say.     And I did what I usually do in similar situations: I stammered, I shut down, and I stopped talking. I stopped talking because the truth was complicated, even though I knew, deep within, that one should never, ever remain silent for fear of complexity.         &lt;Figure 1&gt; Elif Shafak   Photo credit: www. elifsafak. com. tr      I want to talk about emotions and the need to boost our emotional intelligence. I think it’s a pity that mainstream political theory pays very little attention to emotions.     Oftentimes, analysts and experts are so busy with data and metrics that they seem to forget those things in life that are difficult to measure and perhaps impossible to cluster under statistical models. But I think this is a mistake, for two main reasons. We are emotional beings.     I think it’s going to be one of our biggest intellectual challenges, because our political systems are replete with emotions. In country after country, we have seen illiberal politicians exploiting these emotions. And yet within the academia and among the intelligentsia, we are yet to take emotions seriously. I think we should.  1 2 Reference:       British Council Worldwide &#8617;        Ted Talk &#8617;    "
     }, {
-    "id": 30,
+    "id": 31,
     "url": "http://localhost:4000/2019/01/dps-week1/",
     "title": "Digital Product School week 1",
     "body": "2019/01/11 - The 1th week retropect at Digital Product School [This week’s schedule] CONTENT:  Welcome to Digital Product School!     Trip to Spitzingsee   Welcome to Design Office   Specifying our goal of product   Welcome to Digital Product School!: Trip to Spitzingsee:    At the first day of Digital Product School, we had a off-site with all of batch 9 people. All the costs were managed by dps. At the beautiful mountain, we settled the team, and got my team goal.     Basically, there are two kind of team in DPS. (1) Wild team - the team has fixed topic(2) Company team - the team which has specific stakeholders, and also topic defined by that stakeholders  The Core-team will fix what team you will join in DPS for 3 months based on ymy professionals, they announce it at off-site. [My team for 3 months at DPS]    And we decide on my batch #9 theme song. How? Each team draw for songs and pitch ‘why this song should be batch #9 theme song’The result? Imagine dragon - Believer (I didn’t know at the moment, this song would be stamped in my memory)     We have a workshop for getting to know each other. For example, we share 1) what do I expect from 3 months of dps, 2) when I feel happy in my life time, 3) what I worked for last week, 4) what was my last project and 5) what plays important role in my life      My team's board  Cero  Welcome to Design Office:  At first day of design office, we had workshop, which celebrates my day in dps also discuss specific rule, menifesto and stakeholders    We get sticker and attach it in map depends on my nationality  Now time to get to know my team’s stakeholders. What they want for us? What they expect from us? How free my team are on the topic?To be honest, it is endless tug-of-war. We should discuss with my stakeholders, endlessly, and find out solution which can meet interest of users, stakeholders and my team. Basically, my team’s main stakeholder is ADAC, but BMW, City of munich and Nokia will also participate as my team’s stakeholders. Specifying our goal of product: "
@@ -333,10 +338,10 @@ <h4 class="font-weight-bold spanborder text-capitalize" id="digitalproductschool
             <div class="mb-5 d-flex justify-content-between main-loop-card">
 <div class="pr-3">
 	<h2 class="mb-1 h4 font-weight-bold">
-	<a class="text-dark" href="/2020/02/dps-week8/">Digital Product School week 8&9</a>
+	<a class="text-dark" href="/2020/02/dps-week8/">My life in Digital Product School - week 8/19/10</a>
 	</h2>
 	<p class="excerpt">
-	   The 8th week retropect at Digital Product School
+	   The 8/9/10th week retropect at Digital Product School
 	</p>
 	<small class="d-block text-muted">
 		In <span class="catlist">
@@ -352,7 +357,7 @@ <h2 class="mb-1 h4 font-weight-bold">
 
 	<div class="col-md-3 pr-0 text-right">
 	<a href="/2020/02/dps-week8/">
-	<img class="w-100" src="/assets/images/week8/gate.png" alt="Digital Product School week 8&9">
+	<img class="w-100" src="/assets/images/week8/gate.png" alt="My life in Digital Product School - week 8/19/10">
 	</a>
 	</div>
 
@@ -644,7 +649,7 @@ <h2 class="mb-1 h4 font-weight-bold">
         
 
          
-        <h4 class="font-weight-bold spanborder text-capitalize" id="fast.ai-v3"><span>Fast.AI-v3</span></h4>
+        <h4 class="font-weight-bold spanborder text-capitalize" id="resource"><span>Resource</span></h4>
             
         
         
@@ -654,26 +659,26 @@ <h4 class="font-weight-bold spanborder text-capitalize" id="fast.ai-v3"><span>Fa
             <div class="mb-5 d-flex justify-content-between main-loop-card">
 <div class="pr-3">
 	<h2 class="mb-1 h4 font-weight-bold">
-	<a class="text-dark" href="/2020/03/note08-fastai-4/">Gradient backward, Chain Rule, Refactoring</a>
+	<a class="text-dark" href="/2020/02/GPU-time/">4 reasons took much time to setting GPU for fast.ai than I expected</a>
 	</h2>
 	<p class="excerpt">
-	   This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’  Section2: What’s inside Pytorch Operator?  Section3: Implement forward&amp;ba...
+	   Motivation
 	</p>
 	<small class="d-block text-muted">
 		In <span class="catlist">
 		
-		<a class="text-capitalize text-muted smoothscroll" href="/categories.html#fast.ai-v3">Fast.AI-v3</a><span class="sep">, </span>
+		<a class="text-capitalize text-muted smoothscroll" href="/categories.html#resource">Resource</a><span class="sep">, </span>
 		
 		</span>
 	</small>
 	<small class="text-muted">
-		Mar 02, 2020
+		Feb 05, 2020
 	</small>
 </div>
 
 	<div class="col-md-3 pr-0 text-right">
-	<a href="/2020/03/note08-fastai-4/">
-	<img class="w-100" src="/assets/images/4-classlin.png" alt="Gradient backward, Chain Rule, Refactoring">
+	<a href="/2020/02/GPU-time/">
+	<img class="w-100" src="/assets/images/10.png" alt="4 reasons took much time to setting GPU for fast.ai than I expected">
 	</a>
 	</div>
 
@@ -687,26 +692,26 @@ <h2 class="mb-1 h4 font-weight-bold">
             <div class="mb-5 d-flex justify-content-between main-loop-card">
 <div class="pr-3">
 	<h2 class="mb-1 h4 font-weight-bold">
-	<a class="text-dark" href="/2020/03/note08-fastai-3/">Implement forward&backward pass from scratch</a>
+	<a class="text-dark" href="/2020/01/Git-Merge/">Why am I not listed as a contributor?!</a>
 	</h2>
 	<p class="excerpt">
-	   This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’  Section2: What’s inside Pytorch Operator?  Section3: Implement forward&amp;ba...
+	   From the end of last year, big changes have witnessed in NLP research.Embracing an unprecedented growth, I started to study new exciting results and advances.In doing so, I noticed I’...
 	</p>
 	<small class="d-block text-muted">
 		In <span class="catlist">
 		
-		<a class="text-capitalize text-muted smoothscroll" href="/categories.html#fast.ai-v3">Fast.AI-v3</a><span class="sep">, </span>
+		<a class="text-capitalize text-muted smoothscroll" href="/categories.html#resource">Resource</a><span class="sep">, </span>
 		
 		</span>
 	</small>
 	<small class="text-muted">
-		Mar 01, 2020
+		Jan 10, 2020
 	</small>
 </div>
 
 	<div class="col-md-3 pr-0 text-right">
-	<a href="/2020/03/note08-fastai-3/">
-	<img class="w-100" src="/assets/images/4-backward3.png" alt="Implement forward&backward pass from scratch">
+	<a href="/2020/01/Git-Merge/">
+	<img class="w-100" src="/assets/images/1.png" alt="Why am I not listed as a contributor?!">
 	</a>
 	</div>
 
@@ -715,31 +720,39 @@ <h2 class="mb-1 h4 font-weight-bold">
         
         
         
+        
+
+         
+        <h4 class="font-weight-bold spanborder text-capitalize" id="toy-project"><span>toy-project</span></h4>
+            
+        
+        
+        
           
          
             <div class="mb-5 d-flex justify-content-between main-loop-card">
 <div class="pr-3">
 	<h2 class="mb-1 h4 font-weight-bold">
-	<a class="text-dark" href="/2020/03/note08-fastai-2/">What's inside Pytorch Operator?</a>
+	<a class="text-dark" href="/2020/02/classifier-city/">Making a classifier with image dataset made from gooogle</a>
 	</h2>
 	<p class="excerpt">
-	   This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’  Section2: What’s inside Pytorch Operator?  Section3: Implement forward&amp;ba...
+	   CONTENTS
 	</p>
 	<small class="d-block text-muted">
 		In <span class="catlist">
 		
-		<a class="text-capitalize text-muted smoothscroll" href="/categories.html#fast.ai-v3">Fast.AI-v3</a><span class="sep">, </span>
+		<a class="text-capitalize text-muted smoothscroll" href="/categories.html#toy-project">toy-project</a><span class="sep">, </span>
 		
 		</span>
 	</small>
 	<small class="text-muted">
-		Mar 01, 2020
+		Feb 15, 2020
 	</small>
 </div>
 
 	<div class="col-md-3 pr-0 text-right">
-	<a href="/2020/03/note08-fastai-2/">
-	<img class="w-100" src="/assets/images/30.png" alt="What's inside Pytorch Operator?">
+	<a href="/2020/02/classifier-city/">
+	<img class="w-100" src="/assets/images/munich2.jpg" alt="Making a classifier with image dataset made from gooogle">
 	</a>
 	</div>
 
@@ -748,15 +761,23 @@ <h2 class="mb-1 h4 font-weight-bold">
         
         
         
+        
+
+         
+        <h4 class="font-weight-bold spanborder text-capitalize" id="fast.ai-v3"><span>Fast.AI-v3</span></h4>
+            
+        
+        
+        
           
          
             <div class="mb-5 d-flex justify-content-between main-loop-card">
 <div class="pr-3">
 	<h2 class="mb-1 h4 font-weight-bold">
-	<a class="text-dark" href="/2020/02/note08-fastai-1/">What is the meaning of 'deep-learning from foundations?'</a>
+	<a class="text-dark" href="/2020/04/qna-image-segmentation/">[Q&A] Image Segmentation, using Unet with Driving Video data</a>
 	</h2>
 	<p class="excerpt">
-	   This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’  Section2: What’s inside Pytorch Operator?  Section3: Implement forward&amp;ba...
+	   This post is about my questions while I was studying USF Deep Learning course about image segmentation task.All the answers are from the course, source code, library document, or docu...
 	</p>
 	<small class="d-block text-muted">
 		In <span class="catlist">
@@ -766,13 +787,13 @@ <h2 class="mb-1 h4 font-weight-bold">
 		</span>
 	</small>
 	<small class="text-muted">
-		Feb 29, 2020
+		Apr 02, 2020
 	</small>
 </div>
 
 	<div class="col-md-3 pr-0 text-right">
-	<a href="/2020/02/note08-fastai-1/">
-	<img class="w-100" src="/assets/images/1-matmul.png" alt="What is the meaning of 'deep-learning from foundations?'">
+	<a href="/2020/04/qna-image-segmentation/">
+	<img class="w-100" src="/assets/images/output_31_0.png" alt="[Q&A] Image Segmentation, using Unet with Driving Video data">
 	</a>
 	</div>
 
@@ -786,10 +807,10 @@ <h2 class="mb-1 h4 font-weight-bold">
             <div class="mb-5 d-flex justify-content-between main-loop-card">
 <div class="pr-3">
 	<h2 class="mb-1 h4 font-weight-bold">
-	<a class="text-dark" href="/2020/02/what-is-convolution/">Digging into convolution</a>
+	<a class="text-dark" href="/2020/03/note08-fastai-3/">Implement forward&backward pass from scratch</a>
 	</h2>
 	<p class="excerpt">
-	   Issues 
+	   This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’  Section2: What’s inside Pytorch Operator?  Section3: Implement forward&amp;ba...
 	</p>
 	<small class="d-block text-muted">
 		In <span class="catlist">
@@ -799,13 +820,13 @@ <h2 class="mb-1 h4 font-weight-bold">
 		</span>
 	</small>
 	<small class="text-muted">
-		Feb 28, 2020
+		Mar 01, 2020
 	</small>
 </div>
 
 	<div class="col-md-3 pr-0 text-right">
-	<a href="/2020/02/what-is-convolution/">
-	<img class="w-100" src="/assets/images/cnn-jiwon.png" alt="Digging into convolution">
+	<a href="/2020/03/note08-fastai-3/">
+	<img class="w-100" src="/assets/images/4-backward3.png" alt="Implement forward&backward pass from scratch">
 	</a>
 	</div>
 
@@ -819,10 +840,10 @@ <h2 class="mb-1 h4 font-weight-bold">
             <div class="mb-5 d-flex justify-content-between main-loop-card">
 <div class="pr-3">
 	<h2 class="mb-1 h4 font-weight-bold">
-	<a class="text-dark" href="/2020/02/fast.ai-nlp-note-16/">Algorithmic bias</a>
+	<a class="text-dark" href="/2020/03/note08-fastai-2/">What's inside Pytorch Operator?</a>
 	</h2>
 	<p class="excerpt">
-	   Algorithms can encode &amp; magnify human bias
+	   This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’  Section2: What’s inside Pytorch Operator?  Section3: Implement forward&amp;ba...
 	</p>
 	<small class="d-block text-muted">
 		In <span class="catlist">
@@ -832,13 +853,13 @@ <h2 class="mb-1 h4 font-weight-bold">
 		</span>
 	</small>
 	<small class="text-muted">
-		Feb 20, 2020
+		Mar 01, 2020
 	</small>
 </div>
 
 	<div class="col-md-3 pr-0 text-right">
-	<a href="/2020/02/fast.ai-nlp-note-16/">
-	<img class="w-100" src="/assets/images/13.png" alt="Algorithmic bias">
+	<a href="/2020/03/note08-fastai-2/">
+	<img class="w-100" src="/assets/images/30.png" alt="What's inside Pytorch Operator?">
 	</a>
 	</div>
 
@@ -852,10 +873,10 @@ <h2 class="mb-1 h4 font-weight-bold">
             <div class="mb-5 d-flex justify-content-between main-loop-card">
 <div class="pr-3">
 	<h2 class="mb-1 h4 font-weight-bold">
-	<a class="text-dark" href="/2019/12/lesson1-fastai/">Fine Grained Classification</a>
+	<a class="text-dark" href="/2020/02/note08-fastai-1/">What is the meaning of 'deep-learning from foundations?'</a>
 	</h2>
 	<p class="excerpt">
-	   Finally you can solve the mystery behind this weird drawing.. through this course.
+	   This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’  Section2: What’s inside Pytorch Operator?  Section3: Implement forward&amp;ba...
 	</p>
 	<small class="d-block text-muted">
 		In <span class="catlist">
@@ -865,13 +886,13 @@ <h2 class="mb-1 h4 font-weight-bold">
 		</span>
 	</small>
 	<small class="text-muted">
-		Dec 31, 2019
+		Feb 29, 2020
 	</small>
 </div>
 
 	<div class="col-md-3 pr-0 text-right">
-	<a href="/2019/12/lesson1-fastai/">
-	<img class="w-100" src="/assets/images/1-visual-cnn.png" alt="Fine Grained Classification">
+	<a href="/2020/02/note08-fastai-1/">
+	<img class="w-100" src="/assets/images/1-matmul.png" alt="What is the meaning of 'deep-learning from foundations?'">
 	</a>
 	</div>
 
@@ -880,39 +901,31 @@ <h2 class="mb-1 h4 font-weight-bold">
         
         
         
-        
-
-         
-        <h4 class="font-weight-bold spanborder text-capitalize" id="resource"><span>Resource</span></h4>
-            
-        
-        
-        
           
          
             <div class="mb-5 d-flex justify-content-between main-loop-card">
 <div class="pr-3">
 	<h2 class="mb-1 h4 font-weight-bold">
-	<a class="text-dark" href="/2020/02/GPU-time/">4 reasons took much time to setting GPU for fast.ai than I expected</a>
+	<a class="text-dark" href="/2020/02/what-is-convolution/">Digging into convolution</a>
 	</h2>
 	<p class="excerpt">
-	   Motivation
+	   Issues 
 	</p>
 	<small class="d-block text-muted">
 		In <span class="catlist">
 		
-		<a class="text-capitalize text-muted smoothscroll" href="/categories.html#resource">Resource</a><span class="sep">, </span>
+		<a class="text-capitalize text-muted smoothscroll" href="/categories.html#fast.ai-v3">Fast.AI-v3</a><span class="sep">, </span>
 		
 		</span>
 	</small>
 	<small class="text-muted">
-		Feb 05, 2020
+		Feb 28, 2020
 	</small>
 </div>
 
 	<div class="col-md-3 pr-0 text-right">
-	<a href="/2020/02/GPU-time/">
-	<img class="w-100" src="/assets/images/10.png" alt="4 reasons took much time to setting GPU for fast.ai than I expected">
+	<a href="/2020/02/what-is-convolution/">
+	<img class="w-100" src="/assets/images/cnn-jiwon.png" alt="Digging into convolution">
 	</a>
 	</div>
 
@@ -926,26 +939,26 @@ <h2 class="mb-1 h4 font-weight-bold">
             <div class="mb-5 d-flex justify-content-between main-loop-card">
 <div class="pr-3">
 	<h2 class="mb-1 h4 font-weight-bold">
-	<a class="text-dark" href="/2020/01/Git-Merge/">Why am I not listed as a contributor?!</a>
+	<a class="text-dark" href="/2020/02/fast.ai-nlp-note-16/">Algorithmic bias</a>
 	</h2>
 	<p class="excerpt">
-	   From the end of last year, big changes have witnessed in NLP research.Embracing an unprecedented growth, I started to study new exciting results and advances.In doing so, I noticed I’...
+	   Algorithms can encode &amp; magnify human bias
 	</p>
 	<small class="d-block text-muted">
 		In <span class="catlist">
 		
-		<a class="text-capitalize text-muted smoothscroll" href="/categories.html#resource">Resource</a><span class="sep">, </span>
+		<a class="text-capitalize text-muted smoothscroll" href="/categories.html#fast.ai-v3">Fast.AI-v3</a><span class="sep">, </span>
 		
 		</span>
 	</small>
 	<small class="text-muted">
-		Jan 10, 2020
+		Feb 20, 2020
 	</small>
 </div>
 
 	<div class="col-md-3 pr-0 text-right">
-	<a href="/2020/01/Git-Merge/">
-	<img class="w-100" src="/assets/images/1.png" alt="Why am I not listed as a contributor?!">
+	<a href="/2020/02/fast.ai-nlp-note-16/">
+	<img class="w-100" src="/assets/images/13.png" alt="Algorithmic bias">
 	</a>
 	</div>
 
@@ -957,7 +970,7 @@ <h2 class="mb-1 h4 font-weight-bold">
         
 
          
-        <h4 class="font-weight-bold spanborder text-capitalize" id="toy-project"><span>toy-project</span></h4>
+        <h4 class="font-weight-bold spanborder text-capitalize" id="fastai-v3"><span>fastai-v3</span></h4>
             
         
         
@@ -967,26 +980,59 @@ <h4 class="font-weight-bold spanborder text-capitalize" id="toy-project"><span>t
             <div class="mb-5 d-flex justify-content-between main-loop-card">
 <div class="pr-3">
 	<h2 class="mb-1 h4 font-weight-bold">
-	<a class="text-dark" href="/2020/02/classifier-city/">Making a classifier with image dataset made from gooogle</a>
+	<a class="text-dark" href="/2020/04/v3-2019-lesson06-note/">fastai 2019 course-v3 Part1, lesson06</a>
 	</h2>
 	<p class="excerpt">
-	   CONTENTS
+	   Lesson 06
 	</p>
 	<small class="d-block text-muted">
 		In <span class="catlist">
 		
-		<a class="text-capitalize text-muted smoothscroll" href="/categories.html#toy-project">toy-project</a><span class="sep">, </span>
+		<a class="text-capitalize text-muted smoothscroll" href="/categories.html#fastai-v3">fastai-v3</a><span class="sep">, </span>
 		
 		</span>
 	</small>
 	<small class="text-muted">
-		Feb 15, 2020
+		Apr 15, 2020
 	</small>
 </div>
 
 	<div class="col-md-3 pr-0 text-right">
-	<a href="/2020/02/classifier-city/">
-	<img class="w-100" src="/assets/images/munich2.jpg" alt="Making a classifier with image dataset made from gooogle">
+	<a href="/2020/04/v3-2019-lesson06-note/">
+	<img class="w-100" src="/assets/images/att_00069.png" alt="fastai 2019 course-v3 Part1, lesson06">
+	</a>
+	</div>
+
+</div>
+          
+        
+        
+        
+          
+         
+            <div class="mb-5 d-flex justify-content-between main-loop-card">
+<div class="pr-3">
+	<h2 class="mb-1 h4 font-weight-bold">
+	<a class="text-dark" href="/2020/03/note08-fastai-4/">Gradient backward, Chain Rule, Refactoring</a>
+	</h2>
+	<p class="excerpt">
+	     This note is divided into 4 section.          Section1: What is the meaning of ‘deep-learning from foundations?’      Section2: What’s inside Pytorch Operator?      Section3: Implem...
+	</p>
+	<small class="d-block text-muted">
+		In <span class="catlist">
+		
+		<a class="text-capitalize text-muted smoothscroll" href="/categories.html#fastai-v3">fastai-v3</a><span class="sep">, </span>
+		
+		</span>
+	</small>
+	<small class="text-muted">
+		Mar 02, 2020
+	</small>
+</div>
+
+	<div class="col-md-3 pr-0 text-right">
+	<a href="/2020/03/note08-fastai-4/">
+	<img class="w-100" src="/assets/images/4-classlin.png" alt="Gradient backward, Chain Rule, Refactoring">
 	</a>
 	</div>
 
@@ -1006,6 +1052,21 @@ <h2 class="mb-1 h4 font-weight-bold">
     <h4 class="font-weight-bold spanborder"><span>Featured</span></h4>  
     <ol class="list-featured">				
                         
+            <li class="mb-4">
+            <span>
+                <h6 class="font-weight-bold">
+                    <a href="/2020/04/qna-image-segmentation/" class="text-dark">[Q&A] Image Segmentation, using Unet with Driving Video data</a>
+                </h6>
+                <span class="d-block text-muted">
+                    In <span class="catlist">
+                    
+                    <a class="text-capitalize text-muted smoothscroll" href="/categories.html#fast.ai-v3">Fast.AI-v3</a><span class="sep">, </span>
+                    
+                    </span>
+                </span>
+            </span>
+            </li>                
+                        
             <li class="mb-4">
             <span>
                 <h6 class="font-weight-bold">
diff --git a/_site/contact.html b/_site/contact.html
index 0947311daa..14bc3a9d14 100644
--- a/_site/contact.html
+++ b/_site/contact.html
@@ -109,7 +109,7 @@
     "id": 2,
     "url": "http://localhost:4000/author-dionne.html",
     "title": "dionne",
-    "body": "                        dionne Follow:         https://spellonyou. github. io         Welcome, I'm dionne. My old hobby is collecting and systemizing data related with me. (I'm with my kindergarten newsletter) I am an observationist. In the meantime, I enjoy the difference between my expectation and result which I've been facinated(my korean-sarcasm project). And I love a film written and directed by Paul Thomas Anderson. Now, I'm interested in AI ethics, and especially studying bias.                                   Posts by dionne:                   		Gradient backward, Chain Rule, Refactoring	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 02, 2020						            		Implement forward&amp;backward pass from scratch	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						            		What's inside Pytorch Operator?	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						            		What is the meaning of 'deep-learning from foundations?'	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Feb 29, 2020						            		Digging into convolution	: 		  Issues 	 			In 				Fast. AI-v3, 								Feb 28, 2020						            		Digital Product School week 8&amp;9	: 		  The 8th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 24, 2020						            		Algorithmic bias	: 		  Algorithms can encode &amp; magnify human bias	 			In 				Fast. AI-v3, 								Feb 20, 2020						            		Making a classifier with image dataset made from gooogle	: 		  CONTENTS	 			In 				toy-project, 								Feb 15, 2020						            		Digital Product School week 5	: 		  The 5th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 09, 2020						            		4 reasons took much time to setting GPU for fast. ai than I expected	: 		  Motivation	 			In 				Resource, 								Feb 05, 2020						            		Digital Product School week 4	: 		  The 4th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 01, 2020						            		Retrospect of ACL 2020 paper writing	: 		  2020 Annual Conference of the Association for Computational Linguistics	 			In 				Retrospect, 								Jan 29, 2020						            		Why am I not listed as a contributor?!	: 		  From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’. . . 	 			In 				Resource, 								Jan 10, 2020						            		Fine Grained Classification	: 		  Finally you can solve the mystery behind this weird drawing. . through this course. 	 			In 				Fast. AI-v3, 								Dec 31, 2019						            		Jeremy Howard	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Dec 15, 2019						            		Julia Evans	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 20, 2019						            		Retrospective on Pycon 2019 Korea (CoC Committee)	: 		  When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized. . . 	 			In 				Retrospect, 								Nov 05, 2019						            		Elif Shafak	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 05, 2019						            		Digital Product School week 1	: 		  The 1th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Jan 11, 2019						        "
+    "body": "                        dionne Follow:         https://spellonyou. github. io         Welcome, I'm dionne. My old hobby is collecting and systemizing data related with me. (I'm with my kindergarten newsletter) I am an observationist. In the meantime, I enjoy the difference between my expectation and result which I've been facinated(my korean-sarcasm project). And I love a film written and directed by Paul Thomas Anderson. Now, I'm interested in AI ethics, and especially studying bias.                                   Posts by dionne:                   		fastai 2019 course-v3 Part1, lesson06	: 		  Lesson 06	 			In 				fastai-v3, 								Apr 15, 2020						            		[Q&amp;A] Image Segmentation, using Unet with Driving Video data	: 		  This post is about my questions while I was studying USF Deep Learning course about image segmentation task. All the answers are from the course, source code, library document, or docu. . . 	 			In 				Fast. AI-v3, 								Apr 02, 2020						            		Gradient backward, Chain Rule, Refactoring	: 		   This note is divided into 4 section.      Section1: What is the meaning of ‘deep-learning from foundations?’   Section2: What’s inside Pytorch Operator?   Section3: Implem. . . 	 			In 				fastai-v3, 								Mar 02, 2020						            		Implement forward&amp;backward pass from scratch	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						            		What's inside Pytorch Operator?	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						            		What is the meaning of 'deep-learning from foundations?'	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Feb 29, 2020						            		Digging into convolution	: 		  Issues 	 			In 				Fast. AI-v3, 								Feb 28, 2020						            		My life in Digital Product School - week 8/19/10	: 		  The 8/9/10th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 24, 2020						            		Algorithmic bias	: 		  Algorithms can encode &amp; magnify human bias	 			In 				Fast. AI-v3, 								Feb 20, 2020						            		Making a classifier with image dataset made from gooogle	: 		  CONTENTS	 			In 				toy-project, 								Feb 15, 2020						            		Digital Product School week 5	: 		  The 5th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 09, 2020						            		4 reasons took much time to setting GPU for fast. ai than I expected	: 		  Motivation	 			In 				Resource, 								Feb 05, 2020						            		Digital Product School week 4	: 		  The 4th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 01, 2020						            		Retrospect of ACL 2020 paper writing	: 		  2020 Annual Conference of the Association for Computational Linguistics	 			In 				Retrospect, 								Jan 29, 2020						            		Why am I not listed as a contributor?!	: 		  From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’. . . 	 			In 				Resource, 								Jan 10, 2020						            		Jeremy Howard	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Dec 15, 2019						            		Julia Evans	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 20, 2019						            		Retrospective on Pycon 2019 Korea (CoC Committee)	: 		  When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized. . . 	 			In 				Retrospect, 								Nov 05, 2019						            		Elif Shafak	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 05, 2019						            		Digital Product School week 1	: 		  The 1th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Jan 11, 2019						        "
     }, {
     "id": 3,
     "url": "http://localhost:4000/authors-list.html",
@@ -119,7 +119,7 @@
     "id": 4,
     "url": "http://localhost:4000/categories.html",
     "title": "Categories",
-    "body": "          Categories               DigitalProductSchool:                                  		Digital Product School week 8&amp;9	: 		  The 8th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 24, 2020						                                 		Digital Product School week 5	: 		  The 5th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 09, 2020						                                 		Digital Product School week 4	: 		  The 4th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 01, 2020						                                 		Digital Product School week 1	: 		  The 1th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Jan 11, 2019						                              People in the World:                                  		Jeremy Howard	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Dec 15, 2019						                                 		Julia Evans	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 20, 2019						                                 		Elif Shafak	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 05, 2019						                              Retrospect:                                  		Retrospect of ACL 2020 paper writing	: 		  2020 Annual Conference of the Association for Computational Linguistics	 			In 				Retrospect, 								Jan 29, 2020						                                 		Retrospective on Pycon 2019 Korea (CoC Committee)	: 		  When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized. . . 	 			In 				Retrospect, 								Nov 05, 2019						                              Fast. AI-v3:                                  		Gradient backward, Chain Rule, Refactoring	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 02, 2020						                                 		Implement forward&amp;backward pass from scratch	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						                                 		What's inside Pytorch Operator?	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						                                 		What is the meaning of 'deep-learning from foundations?'	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Feb 29, 2020						                                 		Digging into convolution	: 		  Issues 	 			In 				Fast. AI-v3, 								Feb 28, 2020						                                 		Algorithmic bias	: 		  Algorithms can encode &amp; magnify human bias	 			In 				Fast. AI-v3, 								Feb 20, 2020						                                 		Fine Grained Classification	: 		  Finally you can solve the mystery behind this weird drawing. . through this course. 	 			In 				Fast. AI-v3, 								Dec 31, 2019						                              Resource:                                  		4 reasons took much time to setting GPU for fast. ai than I expected	: 		  Motivation	 			In 				Resource, 								Feb 05, 2020						                                 		Why am I not listed as a contributor?!	: 		  From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’. . . 	 			In 				Resource, 								Jan 10, 2020						                              toy-project:                                  		Making a classifier with image dataset made from gooogle	: 		  CONTENTS	 			In 				toy-project, 								Feb 15, 2020						                                             Featured:    				                                          Implement forward&amp;backward pass from scratch                          In                     Fast. AI-v3,                                                                                           What's inside Pytorch Operator?                          In                     Fast. AI-v3,                                                                                           What is the meaning of 'deep-learning from foundations?'                          In                     Fast. AI-v3,                                                                                           Why am I not listed as a contributor?!                          In                     Resource,                                                                                           Jeremy Howard                          In                     People in the World,                                                                   "
+    "body": "          Categories               DigitalProductSchool:                                  		My life in Digital Product School - week 8/19/10	: 		  The 8/9/10th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 24, 2020						                                 		Digital Product School week 5	: 		  The 5th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 09, 2020						                                 		Digital Product School week 4	: 		  The 4th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 01, 2020						                                 		Digital Product School week 1	: 		  The 1th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Jan 11, 2019						                              People in the World:                                  		Jeremy Howard	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Dec 15, 2019						                                 		Julia Evans	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 20, 2019						                                 		Elif Shafak	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 05, 2019						                              Retrospect:                                  		Retrospect of ACL 2020 paper writing	: 		  2020 Annual Conference of the Association for Computational Linguistics	 			In 				Retrospect, 								Jan 29, 2020						                                 		Retrospective on Pycon 2019 Korea (CoC Committee)	: 		  When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized. . . 	 			In 				Retrospect, 								Nov 05, 2019						                              Resource:                                  		4 reasons took much time to setting GPU for fast. ai than I expected	: 		  Motivation	 			In 				Resource, 								Feb 05, 2020						                                 		Why am I not listed as a contributor?!	: 		  From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’. . . 	 			In 				Resource, 								Jan 10, 2020						                              toy-project:                                  		Making a classifier with image dataset made from gooogle	: 		  CONTENTS	 			In 				toy-project, 								Feb 15, 2020						                              Fast. AI-v3:                                  		[Q&amp;A] Image Segmentation, using Unet with Driving Video data	: 		  This post is about my questions while I was studying USF Deep Learning course about image segmentation task. All the answers are from the course, source code, library document, or docu. . . 	 			In 				Fast. AI-v3, 								Apr 02, 2020						                                 		Implement forward&amp;backward pass from scratch	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						                                 		What's inside Pytorch Operator?	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						                                 		What is the meaning of 'deep-learning from foundations?'	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Feb 29, 2020						                                 		Digging into convolution	: 		  Issues 	 			In 				Fast. AI-v3, 								Feb 28, 2020						                                 		Algorithmic bias	: 		  Algorithms can encode &amp; magnify human bias	 			In 				Fast. AI-v3, 								Feb 20, 2020						                              fastai-v3:                                  		fastai 2019 course-v3 Part1, lesson06	: 		  Lesson 06	 			In 				fastai-v3, 								Apr 15, 2020						                                 		Gradient backward, Chain Rule, Refactoring	: 		   This note is divided into 4 section.      Section1: What is the meaning of ‘deep-learning from foundations?’   Section2: What’s inside Pytorch Operator?   Section3: Implem. . . 	 			In 				fastai-v3, 								Mar 02, 2020						                                             Featured:    				                                          [Q&amp;A] Image Segmentation, using Unet with Driving Video data                          In                     Fast. AI-v3,                                                                                           Implement forward&amp;backward pass from scratch                          In                     Fast. AI-v3,                                                                                           What's inside Pytorch Operator?                          In                     Fast. AI-v3,                                                                                           What is the meaning of 'deep-learning from foundations?'                          In                     Fast. AI-v3,                                                                                           Why am I not listed as a contributor?!                          In                     Resource,                                                                                           Jeremy Howard                          In                     People in the World,                                                                   "
     }, {
     "id": 5,
     "url": "http://localhost:4000/contact.html",
@@ -157,96 +157,101 @@
     "body": "  {% if page. url ==  /  %}            {% assign latest_post = site. posts[0] %}          &lt;div class= topfirstimage  style= background-image: url({% if latest_post. image contains  ://  %}{{ latest_post. image }}{% else %} {{site. baseurl}}/{{ latest_post. image}}{% endif %}); height: 200px;  background-size: cover;  background-repeat: no-repeat; &gt;&lt;/div&gt;           {{ latest_post. title }}  :       {{ latest_post. excerpt | strip_html | strip_newlines | truncate: 136 }}               In         {% for category in latest_post. categories %}        {{ category }},         {% endfor %}                                {{ latest_post. date | date: '%b %d, %Y' }}                            {%- assign second_post = site. posts[1] -%}                        {% if second_post. image %}                         &lt;img class= w-100  src= {% if second_post. image contains  ://  %}{{ second_post. image }}{% else %}{{ second_post. image | absolute_url }}{% endif %}  alt= {{ second_post. title }} &gt;                        {% endif %}                                    {{ second_post. title }}          :                       In             {% for category in second_post. categories %}            {{ category }},             {% endfor %}                                                      {{ second_post. date | date: '%b %d, %Y' }}                                    {%- assign third_post = site. posts[2] -%}                        {% if third_post. image %}                         &lt;img class= w-100  src= {% if third_post. image contains  ://  %}{{ third_post. image }}{% else %}{{site. baseurl}}/{{ third_post. image }}{% endif %}  alt= {{ third_post. title }} &gt;                        {% endif %}                                    {{ third_post. title }}          :                       In             {% for category in third_post. categories %}            {{ category }},             {% endfor %}                                                      {{ third_post. date | date: '%b %d, %Y' }}                                    {%- assign fourth_post = site. posts[3] -%}                        {% if fourth_post. image %}                        &lt;img class= w-100  src= {% if fourth_post. image contains  ://  %}{{ fourth_post. image }}{% else %}{{site. baseurl}}/{{ fourth_post. image }}{% endif %}  alt= {{ fourth_post. title }} &gt;                        {% endif %}                                    {{ fourth_post. title }}          :                       In             {% for category in fourth_post. categories %}            {{ category }},             {% endfor %}                                                      {{ fourth_post. date | date: '%b %d, %Y' }}                                  {% for post in site. posts %} {% if post. tags contains  sticky  %}                    {{post. title}}                  {{ post. excerpt | strip_html | strip_newlines | truncate: 136 }}                 Read More            	             {% endif %}{% endfor %}  {% endif %}                All Stories:         {% for post in paginator. posts %}          {% include main-loop-card. html %}        {% endfor %}                   {% if paginator. total_pages &gt; 1 %}              {% if paginator. previous_page %}        &laquo; Prev       {% else %}        &laquo;       {% endif %}       {% for page in (1. . paginator. total_pages) %}        {% if page == paginator. page %}        {{ page }}        {% elsif page == 1 %}        {{ page }}        {% else %}        {{ page }}        {% endif %}       {% endfor %}       {% if paginator. next_page %}        Next &raquo;       {% else %}        &raquo;       {% endif %}            {% endif %}                     {% include sidebar-featured. html %}      "
     }, {
     "id": 12,
+    "url": "http://localhost:4000/2020/04/v3-2019-lesson06-note/",
+    "title": "fastai 2019 course-v3 Part1, lesson06",
+    "body": "2020/04/15 - Lesson 06Rossmann(Tabular):  Tabular data: be careful on Categorical variable vs Continuous variable.  if datatype is int, fastai think it is classification, not a regression.  Root mean square percentage error. as loss function.  When you assign the y_range, it’s better to assign little bit more than actual maximum. &gt; because it’s sigmoid.  intermediate layers, which is weight matrix is 1) 1000, and 2) 500 -&gt; which means our parameter would be 500*1000. learn. modelWhat is dropout and embedding dropout?: Nitish Srivastava, Dropout: A Simple way to prevent Neural Networks from Overfitting  you can dropout with p value, make it specified to specific layer, or make it applied to all the layers.  Pytorch code 1) bernoulli, which decides whether you will hold it? 2) and divide the noise value depends on noise value. so noise became 2 or remain 0.      According to pytorch code, We do change at training time, but we do nothing at test time. and this means you don’t have to do anything special with inference time. ’   TODO: find at forums what is inference time - Related to NVIDIA, GPU.     Embedding dropout is just a dropout.      It’s different between continuous variable and embedding layer.  TODO Still can’t understand. why embedding dropout is effective. or,… in need.    Let’s delete at random, some of the results of the embedding.    and It worked well especially at Kaggle   Batch Normalization: Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift -&gt; came out false! According to How Does Batch Normalization Help Optimization?  The key was multiplicative bias {\gamma} and additive bias {\beta}` Explain     Let $$ \hat{y} = f(w_1, w_2, w_3, … , x)} $$ , loss = MSE , Then y_range should be between 1 and 5`   And Activation function ends with -1 -&gt; +1   To mitigate this problem, we can add the other parameter, like $$w_n$$   But there’re so much interactions in the process so just re-scale the output.    Momentum parameter at BatchNorm1d:  Different from momentum like in optimization.  This momentum is Exponentially weighted moving average of the mean, instead of deviation.      If this is small number: mean standard deviation would be less from mini_batch to mini_batch » less regularization effect. (If this is large number, variation would be greater from mini_batch to mini_batch » more regularization effect)   TODO: can’t sure, but i understand, this is not about how to update parameter but about how much reflect previous value when scale and shift   Q. Preference between batchnorm and the other regularizations(drop out, weight decay)A. Nope, always try and see the results## lesson6-pets-more### Data Augmentation- Last reg- `get_transforms` has lots of params (even not yet learned all) -&gt; check documentation	- Remember you can implement all the doc contents bc it's made from nbdev	- TODO: try this!!- Essence of data augmentation is you should maintain the label, while somewhat making sense. 	- ex) tilt, because it's optically sensible, you can always change the angle of the data view. - zeros, border, and reflection but always `reflection` works most of the time, so that is the default### Convolutional Kernel(What is convolution?)- Will make heat\_map from scratch, which means the parts convolution focuses on![setosa_visualization]()- http://setosa. io/ev/image-kernels/	- javascript thing	- How convolution works	- Kernel. which does element-wise multiplication, and sum them up	- so it has on pixel less at borders -&gt; so it uses padding, and fastai uses reflection as said. - why this Kernel(matrix) helps catching horizontal edge side?	- because this kernel`(picture2)` weights differently, depends on `x axis`	- why familiar, because it's similar intuition with fugus`(paper)` paper- CNN from different viewpoints`link`	- output of pixel is results from different linear equations. 	- If you connect this with represents of neural network nodes, you can see that the specific inp nodes connected with specific out nodes. 	- **Summarize**: cnn does 1) matmul some of the elements are always zero 2) same weight for every row, which is called `weight time? weight. . ?, 1:18:50` `(picture)`#### Further lowdown- Because generally image has 3 channels, we need rank 3 kernel. - And **do multiply with all channel output is one pixel**. (`draw by your self`)	- but this kernel will catch one feature, like horizontal, so that we make more kernel so that output becomes (h * w * kernel)	- And that `kernel` come to `channel`- **Conv2d**: with 3 by 3 kernel, stride 2 conv -&gt; (h/2 * w/2 * kernel)	- skip or jump over input pixel	- to protect from memory out of control~~~pythonlearn. modellearn. summary()~~~TODO: understand yourself the blocks of conv-kernel: - Usually use big kernel size at first layer (will study this at part2)- Bottom right highlighting kernel(`pic / draw`)- `torch. tensor. expand`: for memory efficient, because we should do RGB- We do not make separate kernel, but make rank 4 kernel	- 4d tensor is just stacked kernel- `t[None]. shape` create new unit axis, and why? we make this -&gt; it should move unit of batch, not one size image. ### Average pooling, feature- suppose our pre-trained model results in size of `11 by 11 by 512 ` `pic 4` and my classification task has 37 classes	* take the first face of channel, which is 11 by 11 and `mean` it, so that make rank 2 tensor, 512 by 1	* and make 2d matrix, which is 512 by 37 and multiply so that we can get 37 by 1 matrix. - Feature, at convolution block	- So, when we transfer-learning without unfreeze, every element of last matrix (512 by 1) should represent(or could catch) each feature. 	### Heatmap, Hook~~~hook_output(model[0]) -&gt; acts -&gt; avg_acts~~~- if we average the block with `axis=feature`, result of matrix(11 by 11) depicts `how activated was that area?` -&gt; it is heatmap, `avg_acts`- and acts comes from hook, which is more advanced pytorch feature. 	- hook into pytorch machine itself, and run any arbitrary Pytorch code	- Why this is cool?: Normally it gives set of outputs of forward pass, but we can interrupt and hook the forward pass. 	- Also can store the output of the convolutional part of the model, which is before avg_pooling- Thinking back when we do cut off `after` the conv part. 	- but with fast. ai the original convolutional part of the model would be *the first thing in the model*, specifically could be given from `learn. model. eval()[0]`	- And this is gotten from `hooked_output` and having hooked the output, we can pass our x_minibatch to output. 	- Not directly, but with normalized, minibatch, put on to the gpu	- `one_item()` function do it, when we have one data `TODO: this is assignment` do it yourself without one_item function	- and `. cuda()` put it on gpu- you should print out very often the shape of tensor, and try think why. "
+    }, {
+    "id": 13,
+    "url": "http://localhost:4000/2020/04/qna-image-segmentation/",
+    "title": "[Q&A] Image Segmentation, using Unet with Driving Video data",
+    "body": "2020/04/02 - This post is about my questions while I was studying USF Deep Learning course about image segmentation task. All the answers are from the course, source code, library document, or document. I cared about being clear at reporting information including source of information, however if there are still anything unclear, please contact me. And thank you Jeremy&amp;Rachael for everything. Also Thank you Cambridge Computer Vision Lab to made us to study with your labor. The Cambridge-driving Labeled Video Database (CamVid) is the first collection of videos with object class semantic labels, complete with metadata. The database provides ground truth labels that associate each pixel with one of 32 semantic classes. If someone is interested in this project, please check the site and see the details. Now, let’s start first using jupyter’s one of tricks which I love most. It enables cell to print the code without print function. from IPython. core. interactiveshell import InteractiveShell# pretty print all cell's output and not just the last oneInteractiveShell. ast_node_interactivity =  all from fastai. vision import *from fastai. callbacks. hooks import *from fastai. utils. mem import *path = untar_data(URLs. CAMVID) # The locations where the data and models are downloaded are set in config. ymlpath. ls() I’m trying to accustomed to using pathlib module, not just it became built-in module in python, but I felt uncomfortable myself with os module. However, still unpredictable conflicts are remain, even in the quite standard library like Pytorch, tensorflow, onnx. (it require me string for path. not PosixPath. will send PR. . ) [PosixPath('/root/. fastai/data/camvid/valid. txt'), PosixPath('/root/. fastai/data/camvid/images'), PosixPath('/root/. fastai/data/camvid/labels'), PosixPath('/root/. fastai/data/camvid/codes. txt')]path_img = path/'images'path_lbl = path/'labels'fnames = get_image_files(path_img) #filenamelbl_names = get_image_files(path_lbl)1. (Play with data) My Hypothesis:  File name has A_B format. and A / B would be at key-value position.  Use collections - defaultdict  Default Dict: Link:   easy to group a sequence of key and value pairs into a dictionary of list?from collections import defaultdictfnames[0], lbl_names[0](PosixPath('/root/. fastai/data/camvid/images/0001TP_009210. png'), PosixPath('/root/. fastai/data/camvid/labels/0016E5_01800_P. png'))files = [tuple(i. stem. split('_')) for i in fnames]labels = [tuple(i. stem. split('_')[:-1]) for i in lbl_names]d = defaultdict(list)for k, v in files: d[k]. append(v)d. keys()len(d['0001TP'])124for k, v in d. items():  print(k, v)0001TP ['009210', '008850', '007350', '008970', '009840', '010140', '008490', '008520', '009540', '008250', '008340', '006840', '007860', '007410', '007740', '009870', '010080', '007890', '008790', '010020', '008400', '007080', '008280', '010380', '009330', '009060', '007470', '006810', '009720', '008580', '007110', '008730', '009150', '007680', '009780', '007800', '007290', '008760', '009510', '008640', '008310', '007440', '006900', '007500', '008460', '009030', '008130', '009480', '009900', '010230', '009270', '008040', '007590', '007950', '009990', '008550', '007260', '008100', '007530', '006960', '008190', '009420', '009930', '009000', '007830', '008940', '006690', '009570', '008880', '010170', '007560', '009300', '006750', '009360', '010200', '007320', '008010', '009120', '007620', '007200', '007140', '010320', '006720', '008670', '007230', '008370', '010260', '009690', '006930', '009090', '007770', '010290', '010350', '008610', '008070', '009600', '008430', '009450', '007380', '009240', '007710', '007170', '008160', '008910', '007020', '006780', '007050', '009960', '009810', '008220', '009180', '009750', '010050', '009660', '010110', '007920', '009630', '007650', '006990', '008700', '009390', '007980', '008820', '006870']0016E5 ['01290', '08159', '05760', '08133', '08063', '06660', '00960', '05850', '00750', '06960', '08035', '08107', '07975', '08017', '05610', '07140', '08119', '08027', '07170', '08400', '08093', '02100', '06390', '04470', '08340', '06060', '00600', '07470', '08151', '07800', '01620', '05730', '01530', '00690', '08430', '05940', '01980', '07320', '08069', '07965', '04380', '05430', '01410', '06780', '08007', '08087', '08079', '06600', '08109', '05490', '00901', '04590', '04680', '08045', '01770', '06690', '08085', '06810', '00420', '08011', '07440', '02190', '06300', '04800', '01500', '00450', '08029', '01470', '06330', '07997', '08067', '05370', '08013', '08190', '00840', '02370', '08049', '08135', '01440', '06870', '05820', '05280', '08051', '04440', '08091', '01380', '00630', '07290', '05520', '04770', '00540', '07995', '07999', '05550', '07920', '08101', '08141', '08053', '04620', '08103', '05160', '07350', '08057', '06030', '06000', '08550', '07963', '08089', '05970', '08047', '05640', '06240', '05220', '04350', '01590', '07959', '01950', '08117', '06180', '01560', '05400', '08043', '07680', '00780', '08081', '07050', '01020', '01350', '04530', '06720', '07969', '08149', '08003', '08131', '08129', '08033', '05460', '01650', '07530', '08023', '05340', '08640', '05100', '08075', '01230', '04980', '02070', '01080', '06210', '05910', '08009', '01800', '05190', '02400', '08083', '08019', '07620', '07200', '07890', '08059', '06990', '04410', '08121', '08123', '06930', '08137', '08147', '08095', '06570', '06150', '08153', '06840', '05250', '00510', '08370', '08580', '08113', '07410', '08097', '01200', '04950', '07770', '07650', '04710', '06090', '08055', '07110', '07981', '00990', '08250', '08127', '01920', '07985', '08220', '08005', '08157', '05130', '08071', '01140', '04830', '07740', '08143', '06120', '02040', '08111', '08115', '00660', '08280', '06420', '07983', '02220', '05700', '01860', '01260', '04920', '06510', '07020', '08073', '08105', '08125', '06360', '07860', '07993', '00810', '06540', '08099', '08139', '02010', '07973', '08155', '07991', '06630', '00480', '06750', '04890', '08001', '08025', '00870', '08490', '01830', '07977', '05010', '01170', '07961', '01680', '01050', '07987', '07080', '04560', '00930', '05310', '02340', '05790', '08460', '00720', '08031', '02280', '08039', '08037', '08065', '06270', '08077', '06900', '04650', '06480', '07230', '08041', '06450', '00570', '07989', '04740', '07979', '02250', '07380', '00390', '01710', '07590', '08021', '08520', '07500', '01110', '04500', '02310', '07971', '02130', '05580', '05880', '08610', '08310', '08145', '05670', '04860', '07260', '08015', '07967', '01740', '01320', '07560', '07830', '01890', '08061', '02160', '07710', '05070', '05040']Seq05VD ['f00030', 'f02550', 'f03450', 'f01110', 'f00480', 'f00210', 'f04590', 'f04170', 'f01800', 'f03990', 'f03360', 'f03900', 'f02070', 'f00810', 'f03690', 'f01350', 'f01530', 'f04980', 'f05100', 'f03060', 'f00900', 'f03870', 'f02460', 'f01470', 'f02370', 'f02820', 'f04080', 'f02760', 'f04860', 'f02250', 'f04200', 'f00270', 'f03720', 'f02850', 'f04410', 'f01200', 'f03090', 'f02010', 'f03930', 'f00090', 'f01650', 'f01890', 'f03840', 'f03030', 'f02130', 'f01230', 'f04110', 'f02520', 'f04140', 'f04020', 'f00060', 'f03420', 'f01560', 'f00120', 'f04290', 'f02340', 'f00300', 'f01380', 'f00870', 'f01860', 'f02970', 'f04560', 'f02730', 'f00330', 'f04530', 'f03780', 'f01770', 'f03390', 'f05040', 'f02430', 'f03330', 'f00660', 'f01740', 'f02100', 'f04800', 'f04050', 'f00510', 'f02790', 'f04350', 'f00690', 'f00540', 'f02490', 'f00960', 'f00930', 'f04230', 'f02880', 'f03600', 'f01020', 'f01500', 'f02400', 'f04830', 'f04470', 'f03300', 'f02670', 'f00450', 'f01980', 'f01170', 'f01620', 'f04500', 'f01080', 'f03180', 'f05070', 'f03150', 'f04950', 'f01440', 'f03510', 'f01710', 'f00360', 'f04770', 'f02910', 'f01050', 'f00630', 'f04320', 'f00570', 'f03240', 'f02190', 'f01140', 'f03540', 'f02220', 'f02640', 'f03960', 'f00000', 'f04920', 'f01950', 'f00990', 'f03480', 'f03000', 'f00420', 'f04620', 'f03210', 'f00780', 'f03570', 'f01590', 'f00750', 'f01920', 'f04650', 'f03750', 'f03630', 'f02310', 'f02610', 'f02580', 'f04740', 'f02280', 'f04680', 'f00390', 'f00720', 'f03660', 'f02040', 'f03270', 'f00180', 'f03810', 'f01410', 'f01290', 'f03120', 'f00840', 'f04440', 'f00150', 'f01260', 'f02700', 'f02940', 'f00600', 'f01830', 'f04260', 'f05010', 'f04890', 'f02160', 'f00240', 'f04380', 'f01680', 'f04710', 'f01320']0006R0 ['f02820', 'f03690', 'f03180', 'f02550', 'f01020', 'f03660', 'f02340', 'f01170', 'f02610', 'f02940', 'f01290', 'f02100', 'f01350', 'f03270', 'f03870', 'f01380', 'f01980', 'f03810', 'f02430', 'f02310', 'f01830', 'f03480', 'f02970', 'f01890', 'f03210', 'f03930', 'f02040', 'f02070', 'f02400', 'f01560', 'f03030', 'f01770', 'f01590', 'f01950', 'f03420', 'f01650', 'f03450', 'f00990', 'f03630', 'f01500', 'f03570', 'f00930', 'f03090', 'f03360', 'f02880', 'f02460', 'f01440', 'f01920', 'f01230', 'f03840', 'f02730', 'f01620', 'f02220', 'f03750', 'f03330', 'f03540', 'f02520', 'f02790', 'f01050', 'f03120', 'f01800', 'f01140', 'f01860', 'f01530', 'f01470', 'f02670', 'f02490', 'f01260', 'f01110', 'f02760', 'f01680', 'f03150', 'f02580', 'f03300', 'f02280', 'f01200', 'f03390', 'f03510', 'f02640', 'f02190', 'f02370', 'f01320', 'f02130', 'f03600', 'f03240', 'f03780', 'f03720', 'f02700', 'f01410', 'f01080', 'f02850', 'f01710', 'f03900', 'f03060', 'f01740', 'f02010', 'f02250', 'f00960', 'f03000', 'f02160', 'f02910']for k, v in d. items():  print(k, len(d[k]))0001TP 1240016E5 305Seq05VD 1710006R0 101for i in d2. keys():  print(i,len(d2[i]))0016E5 3050001TP 1240006R0 101Seq05VD 171files[0], labels[0](('0001TP', '009210'), ('0016E5', '01800'))2. My question: Link:  Why do we need masking? and does color from fastai library? (have to look into source code) What do the parameter alpha do? When people make masked img, would it be have ranged integer limit? Does image normalization related with this?lbl_sorted = sorted(lbl_names)f_sorted = sorted(fnames)lbl_1 = lbl_sorted[33]f_1 = f_sorted[33]img = open_image(lbl_1)mask = open_mask(lbl_1)_,axs = plt. subplots(1,2, figsize=(10,5))# img. show(ax=axs[0], y=mask, title='masked')img. show(ax=axs[0], title='1')mask. show(ax=axs[1], title='2', alpha=1. ) img_2 = open_image(f_1)mask_2 = open_mask(f_1)_,axs = plt. subplots(1,2, figsize=(10,5))# img. show(ax=axs[0], y=mask, title='masked')img_2. show(ax=axs[0], title='3',)mask_2. show(ax=axs[1], title='4', alpha=1. ) open_mask(lbl_1). data. shapetorch. Size([1, 720, 960])open_mask(lbl_1). data. shapetorch. Size([1, 720, 960])open_image(f_1). data. shapetorch. Size([3, 720, 960])open_image(f_1). data. shapetorch. Size([3, 720, 960])img. data #labeled datatensor([[[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]],    [[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]],    [[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]]])mask. data # after mask, labeled datatensor([[[ 4, 4, 4, . . . , 21, 21, 21],     [ 4, 4, 4, . . . , 21, 21, 21],     [ 4, 4, 4, . . . , 21, 21, 21],     . . . ,     [17, 17, 17, . . . , 30, 30, 30],     [17, 17, 17, . . . , 30, 30, 30],     [17, 17, 17, . . . , 30, 30, 30]]])img_2. data, mask_2. data(tensor([[[0. 0706, 0. 0667, 0. 0706, . . . , 0. 6431, 0. 6549, 0. 6627],     [0. 0745, 0. 0706, 0. 0706, . . . , 0. 6431, 0. 6510, 0. 6549],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 6392, 0. 6588, 0. 6588],     . . . ,     [0. 0863, 0. 0824, 0. 0824, . . . , 0. 1333, 0. 1216, 0. 1255],     [0. 0902, 0. 0863, 0. 0824, . . . , 0. 1255, 0. 1176, 0. 1216],     [0. 0863, 0. 0824, 0. 0784, . . . , 0. 1137, 0. 1059, 0. 1137]],     [[0. 0706, 0. 0667, 0. 0706, . . . , 0. 7490, 0. 7608, 0. 7686],     [0. 0745, 0. 0706, 0. 0706, . . . , 0. 7451, 0. 7569, 0. 7608],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 7412, 0. 7529, 0. 7529],     . . . ,     [0. 0980, 0. 0941, 0. 0941, . . . , 0. 1804, 0. 1686, 0. 1725],     [0. 1059, 0. 1020, 0. 0980, . . . , 0. 1725, 0. 1647, 0. 1686],     [0. 1020, 0. 0980, 0. 0941, . . . , 0. 1608, 0. 1529, 0. 1608]],     [[0. 0784, 0. 0745, 0. 0784, . . . , 0. 7569, 0. 7686, 0. 7765],     [0. 0824, 0. 0784, 0. 0784, . . . , 0. 7647, 0. 7647, 0. 7686],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 7608, 0. 7647, 0. 7647],     . . . ,     [0. 1216, 0. 1176, 0. 1176, . . . , 0. 2000, 0. 1882, 0. 1922],     [0. 1176, 0. 1137, 0. 1098, . . . , 0. 1843, 0. 1765, 0. 1804],     [0. 1137, 0. 1098, 0. 1059, . . . , 0. 1725, 0. 1647, 0. 1725]]]), tensor([[[ 18, 17, 18, . . . , 183, 186, 188],     [ 19, 18, 18, . . . , 183, 185, 186],     [ 20, 18, 19, . . . , 182, 185, 185],     . . . ,     [ 25, 24, 24, . . . , 43, 40, 41],     [ 26, 25, 24, . . . , 41, 39, 40],     [ 25, 24, 23, . . . , 38, 36, 38]]]))3. What is a difference between image and imageSegment?: imageSegment  An ImageSegment object has the same properties as an Image. The only difference is that when applying the transformations to an ImageSegment, it will ignore the functions that deal with lighting and keep values of 0 and 1.  It’s easy to show the segmentation mask over the associated Image by using the y argument of show_image. img = open_image(fnames[0])mask = open_mask(lbl_names[0])_,axs = plt. subplots(1,3, figsize=(8,4))img. show(ax=axs[0], title='no mask')img. show(ax=axs[1], y=mask, title='masked') #seg mask over the img using y argmask. show(ax=axs[2], title='mask only', alpha=1. ) vision. image ##4. Why/How img div by 255 and how it results fast. ai : vision. image - If div=True, pixel values are divided by 255. to become floats between 0. and 1.  At times, you want to get rid of distortions caused by lights and shadows in an image.    Normalizing the RGB values of an image can at times be a simple and effective way of achieving this.     So sum of the pixel’s value over all channels(which is S) divides each intensified channel so that nomalized value will be R/S, G/S and B/S (where, S=R+G+B).   Detailed explain here4. Python Evaluation Order: Python evaluates expressions from left to right. Notice that while evaluating an assignment, the right-hand side is evaluated before the left-hand side. mask_tmp, trg_tmp, void_tmp = 2, 1, 10mask_tmp = trg_tmp != void_tmpprint(mask_tmp, trg_tmp, void_tmp) # (1) target is not same with voidTrue 1 10# Example 1x = 1y = 2x,y = y,x+yx, y(2, 3)# Example 2x = 1y = 2x = yy = x+yx, y(2, 4)5. model learner parameter :: pct_start: A: Percentage of total number of epochs when learning rate rises during one cycle. Q: Sorry, I still confused that one cycle in the new API only runs one epoch. How the percentage of total number of epochs works? Can you give a example? If learn. fit_one_cycle(10, slice(1e-4,1e-3,1e-2), pct_start=0. 05)??A: Ok, strictly correct answer would be percentage of iterations, so you can have lr both increase and decrease during same epoch. In your example, say, you have 100 iterations per epoch, then for half an epoch (0. 05 * (10 * 100) = 50) lr will rise, then slowly decrease. Q2: Thanks for this explanation … so essentially, it is the percentage of overall iterations where the LR is increasing, correct? So, given the default of 0. 3, it means that your LR is going up for 30% of your iterations and then decreasing over the last 70%. Is that a correct summation of what is happening? A2: Yes, I think that’s correct. You can verify that by changing its value and check:learn. recorder. plot_lr() For example if pct_start = 0. 2 source: forums. fastai "
+    }, {
+    "id": 14,
     "url": "http://localhost:4000/2020/03/note08-fastai-4/",
     "title": "Gradient backward, Chain Rule, Refactoring",
-    "body": "2020/03/02 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves   ⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
+    "body": "2020/03/02 -  This note is divided into 4 section.      Section1: What is the meaning of ‘deep-learning from foundations?’   Section2: What’s inside Pytorch Operator?   Section3: Implement forward&amp;backward pass from scratch   Section4: Gradient backward, Chain Rule, Refactoring   ” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves    Postfix underscore means in pytorch, in-place function, What is in-place function?⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
     }, {
-    "id": 13,
+    "id": 15,
     "url": "http://localhost:4000/2020/03/note08-fastai-3/",
     "title": "Implement forward&backward pass from scratch",
     "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring1. The forward and backward passes: 1. 1 Normalization: train_mean,train_std = x_train. mean(),x_train. std()&gt;&gt;&gt; train_mean,train_std(tensor(0. 1304), tensor(0. 3073))Remember!  Dataset, which is x_train, mean and standard deviation is not 0&amp;1. But we need them to be which means we should substract means and divide data by std.  You should not standarlize validation set because training set and validation set should be aparted.  after normalize, mean is close to zero, and standard deviation is close to 1. 1. 2 Variable definition:  n,m: size of the training set c: the number of activations we need in our model2. Foundation Version: 2. 1 Basic architecture:  Our model has one hidden layer, output to have 10 activations, used in cross entropy.    But in process of building architecture, we will use mean square error, output to have 1 activations and lator change it to cross entropy   number of hidden unit; 50see below pic  We want to make w1&amp;w2 mean and std be 0&amp;1.      why initializating and make mean zero and std one is important?   paper highlighting importance of normalisation - training 10,000 layer network without regularisation1   2. 1. 1 simplified kaiming initQ: Why we did init, normalize with only validation data? Because we can not handle and get statistics from each value of x_valid?{: style=”color:red; font-size: 130%; text-align: center;”}  what about hidden(first) layer?w1 = torch. randn(m,nh)b1 = torch. zeros(nh)t = lin(x_valid, w1, b1) # hidden&gt;&gt;&gt; t. mean(), t. std()((tensor(2. 3191), tensor(27. 0303))In output(second) layer, w2 = torch. randn(nh,1)b2 = torch. zeros(1)t2 = lin(t, w2, b2) # output&gt;&gt;&gt; t2. mean(), t2. std()(tensor(-58. 2665), tensor(170. 9717))   which is terribly far from normalzed value.     But if we apply simplified kaiming init  w1 = torch. randn(m,nh)/math. sqrt(m); b1 = torch. zeros(nh)w2 = torch. randn(nh,1)/math. sqrt(nh); b2 = torch. zeros(1)t = lin(x_valid, w1, b1)t. mean(),t. std()&gt;&gt;&gt; (tensor(-0. 0516), tensor(0. 9354)) But, actually, we use activations not only linear function After applying activations relu at linear layer, mean and deviation became 0. 5. 2. 1. 2 Glorrot initializationPaper2: Understanding the difficulty of training deep feedforward neural networks  Gaussian(, bell shaped, normal distributions) is not trained very well.  How to initialize neural nets? with the size of layer , the number of filters .  But there is No acount for import of ReLU If we got 1000 layers, vanishing gradients problem emerges2. 1. 3 Kaiming initializatingPaper3: Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification  Kaiming He, explained here rectifier: rectified linear unit rectifier network: neural network with rectifier linear units  This is kaiming init, and why suddenly replace one to two on a top?     to avoid vanishing gradient(weights)   But it doesn’t give very nice mean tough.    2. 1. 4 Pytorch package Why fan_out?     according to pytorch documentation,   choosing 'fan_in' preserves the magnitude of the variance of the wights in the forward pass. choosing 'fan_out' preserves the magnitues in the backward pass(, which means matmul; with transposed matrix) ➡️ in the other words, torch use fan_out cz pytorch transpose in linear transformaton.  What about CNN in Pytorch?I tried torch. nn. Conv2d. conv2d_forward?? Jeremy digged into using torch. nn. modules. conv. _ConvNd. reset_parameters?? 2  in Pytorch, it doesn’t seem to be implemented kaiming init in right formula. so we should use our own operation.  But actually, this has been discussed in Pytorch community before. 3 4 Jeremy said it enhanced variance also, so I sampled 100 times and counted better results.  To make sure the shape seems sensible. check with assert. (remember we will replace 1 to 10 in cross entropy)assert model(x_valid). shape==torch. Size([x_valid. shape[0],1])&gt;&gt;&gt; model(x_valid). shape(10000, 1) We have made Relu, init, linear, it seems we can forward pass code we need for basic architecture nh = 50def lin(x, w, b): return x@w + b;w1 = torch. randn(m,nh)*math. sqrt(2. /m ); b1 = torch. zeros(nh)w2 = torch. randn(nh,1); b2 = torch. zeros(1)def relu(x): return x. clamp_min(0. ) - 0. 5t1 = relu(lin(x_valid, w1, b1))def model(xb):  l1 = lin(xb, w1, b1)  l2 = relu(l1)  l3 = lin(l2, w2, b2)  return l32. 2 Loss function: MSE:  Mean squared error need unit vector, so we remove unit axis.   def mse(output, targ): return (output. squeeze(-1) - targ). pow(2). mean()    In python, in case you remove axis, you use ‘squeeze’, or add axis use ‘unsqueeze’ torch. squeeze where code commonly broken. so, when you use squeeze, clarify dimension axis you want to removetmp = torch. tensor([1,1])tmp. squeeze()&gt;&gt;&gt; tensor([1, 1]) make sure to make as float when you calculateBut why??? because it is tensor?{: style=”color:red; font-size: 130%;”} Here’s the error when I don’t transform the data type ---------------------------------------------------------------------------TypeError                 Traceback (most recent call last)&lt;ipython-input-22-ae6009bef8b4&gt; in &lt;module&gt;()----&gt; 1 y_train = get_data()[1] # call data again   2 mse(preds, y_train)TypeError: 'map' object is not subscriptable This is forward passFootnote: Other materials:  Understanding the difficulty of training deep feedforward neural networks, paper that introduced Xavier initialization      Fixup Initialization: Residual Learning Without Normalization &#8617;        Pytorch implementaion on Kaiming init of conv and linear layers &#8617;        Pytorch kaiming init issue &#8617;        Pytorch kaiming init explained &#8617;    "
     }, {
-    "id": 14,
+    "id": 16,
     "url": "http://localhost:4000/2020/03/note08-fastai-2/",
     "title": "What's inside Pytorch Operator?",
     "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, RefactoringWhat’s inside Pytorch Operator?: Section02 Time comparison with pure Python:    Matmul with broadcasting&gt; 3194. 95 times faster     Einstein summation&gt; 16090. 91 times faster     Pytorch’s operator&gt; 49166. 67 times faster  1. Elementwise op: 1. 1 Frobenius norm:   above converted into (m*m). sum(). sqrt() Plus, don’t suffer from mathmatical symbols. He also copy and paste that equations from wikipedia.  and if you need latex form, download it from archive. 2. Elementwise Matmul:  What is the meaning of elementwise?   We do not calculate each component. But all of the component at once. Because, length of column of A and row of B are fixed.   How much time we saved?  So now that takes 1. 37ms. We have removed one line of code and it is a 178 times faster…#TODOI don’t know where the 5 from. but keep it. Maybe this is related with frobenius norm…?as a result, the code before for k in range(ac):  c[i,j] += a[i,k] + b[k,j]the code after c[i,j] = (a[i,:] * b[:,j]). sum()To compare it (result betweet original and adjusted version) we use not test_eq but other function. The reason for this is that due to rounding errors from math operations, matrices may not be exactly the same. As a result, we want a function that will “is a equal to b within some tolerance” #exportdef near(a,b):   return torch. allclose(a, b, rtol=1e-3, atol=1e-5)def test_near(a,b):   test(a,b,near)test_near(t1, matmul(m1, m2))3. Broadcasting:  Now, we will use the broadcasting and removec[i,j] = (a[i,:] * b[:,j]). sum() How it works?&gt;&gt;&gt; a=tensor([[10,10,10],     [20,20,20],     [30,30,30]])&gt;&gt;&gt; b=tensor([1,2,3,])&gt;&gt;&gt; a,b  (tensor([[10, 10, 10],     [20, 20, 20],     [30, 30, 30]]),tensor([1, 2, 3]))     &gt;&gt;&gt; a+btensor([[11, 12, 13],    [21, 22, 23],    [31, 32, 33]])  &lt;Figure 2&gt; demonstrated how array b is broadcasting(or copied but not occupy memory) to compatible with a. Refered from numpy_tutorial   there is no loop, but it seems there is exactly the loop.     This is not from jeremy (actually after a moment he cover it) but i wondered How to broadcast an array by columns?  c=tensor([[1],[2],[3]])a+ctensor([[11, 11, 11],    [22, 22, 22],    [33, 33, 33]])s  What is tensor. stride()?help(t. stride)Help on built-in function stride:  stride(…) method of torch. Tensor instancestride(dim) -&gt; tuple or intReturns the stride of :attr:’self’ tensor. Stride is the jump necessary to go from one element to the next one in the specified dimension :attr:’dim’. A tuple of all strides is returned when no argument is passed in. Otherwise, an integer value is returned as the stride in the particular dimension :attr:’dim’. Args:  dim (int, optional): the desired dimension in which stride is requiredExample::* x = torch. tensor([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]])`x. stride()&gt;&gt;&gt; (5, 1)x. stride(0)&gt;&gt;&gt; 5x. stride(-1)&gt;&gt;&gt; 1   unsqueeze &amp; None index   We can manipulate rank of tensor Special value ‘None’, which means please squeeze a new axis here== please broadcast herec = torch. tensor([10,20,30])c[None,:] in c, squeeze a new axis in here please. 2. 2 Matmul with broadcasting: for i in range(ar):#  c[i,j] = (a[i,:]).      *[:,j]. sum() #previous  c[i]  = (a[i]. unsqueeze(-1) * b). sum(dim=0) And Using None also (As howard teached)c[i]  = (a[i ]. unsqueeze(-1) * b). sum(dim=0) #howardc[i]  = (a[i][:,None] * b). sum(dim=0) # using Nonec[i]  = (a[i,:,None]*b). sum(dim=0)⭐️Tips🌟 1) Anytime there’s a trailinng(final) colon in numpy or pytorch you can delete it ex) c[i, :] = c [i]2) any number of colon commas at the start, you can switch it with the single elipsis.  ex) c[:,:,:,:,i] = c […,i] 2. 3 Broadcasting Rules:  What if we tensor. size([1,3]) * tensor. size([3,1])?  torch. Size([3, 3])    What is scale????   What if they are one array is times of the other array? ex) Image : 256 x 256 x 3Scale : 128 x 256 x 3Result: ?   Why I did not inserted axis via None, but happened broadcasting? &gt;&gt;&gt; c * c[:,None]tensor([[100. , 200. , 300. ],    [200. , 400. , 600. ],    [300. , 600. , 900. ]])maybe it broadcast cz following array has 3 rows as same principle, no matter what nature shape was, if we do the operation tensor broadcasts to the other. &gt;&gt;&gt; c==c[None]tensor([[True, True, True]])&gt;&gt;&gt; c[None]==c[None,:]tensor([[True, True, True]])&gt;&gt;&gt;c[None,:]==ctensor([[True, True, True]])3. Einstein summation:  Creates batch-wise, remove inner most loop, and replaced it with an elementwise producta. k. ac[i,j] += a[i,k] * b[k,j]inner most loop c[i,j] = (a[i,:] * b[:,j]). sum()elementwise product  Because K is repeated so we do a dot product. And it is torch. Usage of einsum()1) transpose2) diagnalisation tracing3) batch-wise (matmul) …  einstein summation notationdef matmul(a,b): return torch. einsum('ik,kj-&gt;ij', a, b)so after all, we are now 16000 times faster than Python. 4. Pytorch op: 49166. 67 times faster than pure python And we will use this matrix multiplication in Fully Connect forward, with some initialized parameters and ReLU. But before that, we need initialized parameters and ReLU, Footnote:  TensorRank ti noteResources:  Frobenius Norm Review Broadcasting Review (especially Rule)     Refer colab! (I totally confused with extension of arrays)    torch. allclose Review np. einsum Reviewh "
     }, {
-    "id": 15,
+    "id": 17,
     "url": "http://localhost:4000/2020/02/note08-fastai-1/",
     "title": "What is the meaning of 'deep-learning from foundations?'",
     "body": "2020/02/29 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ I don’t know if you read this article, but I heartily appreciate Rachael Thomas and Jeremy Howard for providing these priceless lectures for free Homework:  Review concepts 16 concepts from Course 1 (lessons 1 - 7)(1) Affine Functions &amp; non-linearities; 2) Parameters &amp; activations; 3) Random initialization &amp; transfer learning; 4) SGD, Momentum, Adam; 5) Convolutions; Batch-norm; 6) Dropout; 7) Data augmentation; 8) Weight decay; 9) Res/dense blocks; 10) Image classification and regression; 11)Embeddings; 12) Continuous &amp; Categorical variables; 13) Collaborative filtering; 14) Language models; 15) NLP classification; 16) Segmentation; U-net; GANS) Make sure you understand broadcasting Read section 2. 2 in Delving Deep into Rectifiers Try to replicate as much of the notebooks as you can without peeking; when you get stuck, peek at the lesson notebook, but then close it and try to do it yourself calculus for machine learning     based on weight…    einsum conventionCONTENTS:  What is going on in this course?     What is ‘from foundations’?   Steps to a basic modern CNN model   Today’s implementation goal: 1) matmul -&gt; 4) FC backward    Library development using jupyter notebook     jupyter notebook certainly can make module    Elementwise ops     How can we make python faster?         What is element wise operation?           FootnoteWhat is going on in this course?: What is ‘from foundations’?: 1) Recreate fast. ai and Pytorch 2) using pure python  Evade OverfittingOverfit : validation error getting worsetraining loss &lt; validation loss  Know the name of the symbol you usefind in this page if you don’t know the symbol that you are using or just draw it here (run by ML!) Steps to a basic modern CNN model:  1) Matrix multiplication -&gt; 2) Relu/Initialization -&gt; 3) Fully-connected Forward-&gt; 4) Fully-connected Backward -&gt; 5) Train loop -&gt; 6) Convolution-&gt; 7) Optimization -&gt;8) Batchnormalization -&gt; 9) Resnet Today’s implementation goal: 1) matmul -&gt; 4) FC backward: Library development using jupyter notebook: what is assers? jupyter notebook certainly can make module:  There will be #export tag that Howard (and we) want to extract special notebook2script. py will detect sign of #expert and convert following into python module and test ittest\_eq(TEST,'test')test\_eq(TEST,'test1')  what is run_notebook. py?     when you want to test your module in command line interface   		!python run\_notebook. py 01_matmul. ipynb  Is there any difference between 1) and 2)?1) test -&gt; test01 2) test01 -&gt; test #TODO I don’t know yet  look into run_notebook. py, package fire Jeremy used. What is that?read and run the code in a notebook, and in the process, Jeremy made Python Fire library called!shockingly, fire takes any kind of function and converts into CLI command. fire library was released by Google open source, Thursday, March 2, 2017    Get data   pytorch and numpy are pretty much same.  variable c explains how many pixels there are in in MNIST, 28 pixels PyTorch’s view() method: torch function that manipulating tensor, and squeeze() in torch &amp; mathmatical operation similar function Rao &amp; McMahan said usually this functions result in feature vector.    In part 1, you can use view function several times.     Initial python model     Which is Linear, like $Xw$(weight)$+a$(bias) $= Y$     If you don’t know hou to multiple matrix, refer this site matmul visulization site   How many time spends if we we use pure python   function matmul, typical matrix multiplication function, takes about 1 second for calculating 1 single train data! (maybe assumed stochastic, 5 data points in validation)     it takes about 11. 36 hours to update parameters even single layer and 1 iteration! (if that was my computer, it would be 14 hours. . )🤪   THIS is why we need to consider ‘time’&amp;’space’ This is kinda slow - what if we could speed it up by 50,000 times? Let’s try! Elementwise ops: How can we make python faster?:  If we want to calculate faster, then do remove pythonic calcuation, by passing its computation down to something that is written something other than python, like pytorch.  According to PyTorch doc it uses C++ (via ATen), so we are going to implement that function with python. What is element wise operation?:  items makes a pair, operate corresponding componentFootnote:  notebooks material video broadcasting excel"
     }, {
-    "id": 16,
+    "id": 18,
     "url": "http://localhost:4000/2020/02/what-is-convolution/",
     "title": "Digging into convolution",
     "body": "2020/02/28 - Issues 1) Kaiming Initializtion in Pytorch was in trouble. 1 2) Jeremy started to dig in, in lesson09, but I didn’t know why the size of tensor is 2 and even understand this spreadsheet data. 3 Homework: Read Visualizing and Understanding Convolutional Networks paper  What is a convolution?     Visualization         one kernel     Matthew D Zeiler &amp; Rob Fergus Paper          Convolution can be represented as matmul   Padding   Kernel has rank 3   How can we find a side-edge, a gradient and area of constant weight?   What is a convolution?: A convolutional neural network is that your red, green, and blue pixels go into the simple computation, and something comes out of that, and then the result of that goes into a second layer, and the result of that goes into the third layer and so forth. Visualization: one kernel Refer this site for visualizing CNN filteringMatthew D Zeiler &amp; Rob Fergus PaperLecture01     Nine examples of the actual coefficients from the **first layer**  Convolution can be represented as matmul: CNNs from different viewpoints {align-items: center;}    [A B C D E F G H I J] is 3 by 3 image data flatten to vector.   As a result, convolution is a just matrix just two things happens     Some of entries are set to zeros at all the times   same color always have the same weight. That called weight time / wegith sharing    So, we can implement a convolution with matrix multiplication. But, we don’t do that because it’s slow!Padding:  What most of libraries do is just put zeros asdie of matrix  fast. ai uses reflection paddings (what is this? Jeremy said he uttered it)Kernel has rank 3:  As standard picture input would be 4 5, it would be actually 3d, not 2d.  If we make kernel as a 3x3 size, we pass over same kernel all the different Red, Green, Blue Pixels.      This could make problem, because, if we want to detect frog, which is green, we would want more activations on the green(I made a test cell in my colab 6)   How can we find a side-edge, a gradient and area of constant weight?: Not top-edge!  One kernel can find only the top-edge, so we should stack the kernels 7 So, we pass it through bunch of kernels to the input images, and that process gives us height x width x corresponding number of kernels.  Usually that number of chanel is 16 And if we want to get the more channels and features, we should repeat that process     This process gives rise to memory out of control, we do the stride   #### conv-example. xlsx  2 convolutional filters At a second layer, filter is 3x3x2 tensor, because to add up together the first layer’s channel. Reference:       Problem was math. sqrt(5) was not kaiming initialization formula, Implementation in Pytorch &#8617;        size of tensor, lecture09 &#8617;        conv-example. xlsx &#8617;        Why do computer use red, green and blue instead of primary colors &#8617;        Grayscale is a group of shades without any visible color. … Each of these dots has its own brightness level as well and, therefore, can be converted to grayscale. A grayscale image is one with all color information removed.  &#8617;        Testing RGB and grayscale &#8617;        stack kernel and make new rank of tensor at output, Lesson06-2019 &#8617;    "
     }, {
-    "id": 17,
+    "id": 19,
     "url": "http://localhost:4000/2020/02/dps-week8/",
-    "title": "Digital Product School week 8&9",
-    "body": "2020/02/24 - The 8th week retropect at Digital Product School Week 8/9 - Ship your MVP/Release next iteration each day     This week's schedule  CONTENT:  Preparing engineering weekly Agile Process     Daily Stand-up    Making application flowchart (feat draw. io) / ER diagram     Flowchart, understaning user journey   ER diagram    Engineering weekly AI lunch Connecting firebase andPreparing engineering weekly: This week at Wednesday, I planned to explain the Language Modelings, mainly focusing ELMo, ULMFiT, BERT and GPT-2. Slides is available here Changed the presentation, because there were people who are not in ML domain. hereWhenever I do the presentation, I learn more than the information I give them. At the same time, I realize I need to learn more than I know. Agile Process: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m not sure ‘what is agile’ but this is what we tried to make agile process.  Daily Stand-up: Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Making application flowchart (feat draw. io) / ER diagram:  Before we start the party, we should clarify the flowchart and ER diagram of our application. Flowchart, understaning user journey: Thanks for google, we could use draw. io for our framechart framework. Actually, we cana choice other good flatform, but draw. io has connected app throgh google drive, most of our engineer was used to it. And after this job, I got to know there is also (of course) rule with the symbols, color, size, space, scaling and direction of arrow -reference. But why we should do this? WE have made our storymap before!! I think storymap is for visualize our status and app. So it should be shared with whole the team, and they should able to understand each role’s issue. But flowchart is more like testing technical feasibility, and error that user can experience. So it could be little more specific, complicated, and hypothetical.     This week engineer's main plan  ER diagram: Even if we use NoSQL database through firebase, my team was accustomed to SQL more. That what we educated when we were at college, so we had to organize our concept while we were learning NoSQL. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Before today, my AI collegues presented regression, knn and it was my turn. I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! AI lunch: Connecting firebase and: "
+    "title": "My life in Digital Product School - week 8/19/10",
+    "body": "2020/02/24 - The 8/9/10th week retropect at Digital Product School Week 8 - Ship your MVPWeek 9/10 - Release next iteration each day     Week 8th schedule  CONTENT:  Agile Product Development     Daily Stand-up(planning)   Gemba Walk   Sprint Reviews    Engineering weeklyAgile Product Development: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m still not sure ‘what is agile’ but this is how we tried to make agile process.  Daily Stand-up(planning): Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Gemba Walk:     Team Cero with core team  Every 2 weeks, we do the Gemba work, which is ‘question everything to the core team’ time. At this period, people can ask anything related to our product, workshop, and framework. Core team will help just for each team, and each team can solve the problem related to their work. &lt; br/&gt;Why we need this session? because with workshop and general schedule, core team has no time just focus on each team. So through this session, we can have opportunity to understand each program and workshop, like why we are using this platform, and when is the due of our small project, and we have this problem and we need help for this. whatever small problem you have, core team is always willing to help you. Sprint Reviews: Every Friday, we have time to summarise what we did for the week. Maybe we need HMW question and our storymap to share our process and then tell and share what we did try, what point we succeeded and what point it was deviant of our prediction, and why we tried it. .     Sprint of Ve-link  And then, just after all team’s ppt, we do vote with such a cute marvel. Always it’s very difficult to vote (of course you can’t vote to your team!) Because it depends on criteria what do I value!But since this is process of our agile work, I try to focus on what they have changed since last week, and why they did it, how they did it. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Everyone have their knowledge to share and we can be tutor and at the same time can be of tutee. Previously, my AI collegues presented regression, knn. And because I’m somewhat specialized to NLP, I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! "
     }, {
-    "id": 18,
+    "id": 20,
     "url": "http://localhost:4000/2020/02/fast.ai-nlp-note-16/",
     "title": "Algorithmic bias",
     "body": "2020/02/20 - Algorithms can encode &amp; magnify human bias Case Study 1: Facial Recognition &amp; Predictive Policing:  Joy Buolamwini &amp; Timnit Gebru, gendershades. org     Microsoft, FACE+, IBM - All of these things are sell now.    Largest gap between $\therefore\ Lighter Male\ &gt;\ Darker\ Female $      This US mayor joked cops should “mount . 50-caliber” guns where AI predicts crime      With machine learning, with automation, there’s a 99% success, so that robot is ㅡwill beㅡ99% accurate in telling us what is going to happen next, which is really interesting.     - city official in Lancater, CA, approving on using IBM for public security  Bias:  Bias is type of error Statistical Bias: difference between a statistic’s expected value and the true value Unjust Bias: disproportionate preference for or prejudice against a group Unconscious bias: bias that we don’t realize we have But, term bias is too generic to be productive. Different sources of bias have different causes Representation Bias: Dataset was not representative of the algorithm that might be used on later. Above : Data is okay, but algorithm has some problem. Below : Data has error.  For example, object detection production that performs very well in common product of US. But in contrast, change of target product region, like Zimbabwe, Solomon Island, and so on, reduced the performence remarkably. It is not the algorithmic problem, so we should care about data volume of region. Evaluation Bias: Benchmark datasets spur on research, 4. 4% of IJB-A images are dark-skinned women. 2/3 of ImageNet images from the West (Sharkar et al, 2017) Case Study 2: Recidivism Algorithm Used Prison Sentencing: Case Study 3: Online Ad Delivery: Bias in NLP: ( Nothing to do with the course, but I’m researching this field these days. )    But all about Englsih     ImpactThe person is doctor. The person is nurse -&gt; 그는 의사다. 그녀는 간호사다.  Concept of “biased data” often too generic to be useful:  Different sources of bias have different sources Data, models and systems are not unchanging numbers on a screen. They’re the result of a complex process that starts with years of historical context and involves a series of choices and norms, from data measurement to model evaluation to human interpretation. - Harini Suresh, “The problem with Biased Data” Five Sources of Bias in ML:  Representation Bias Evaluation Bias Measurement Bias Aggregation Bias(46:02) Historical Bias(46:26)     A few studies(47:13)   Racial Bias, Even when we have good intentions(new york times)(47:10)   gender(48:59)   Humans are biased, so why does algorithmic bias matter?: Algorithms &amp; humans are used differently (humans are usually decision maker)  Algorithms are accurate and objective No way to apeal if there if error processed large scale cheap Machine learning can amplify bias Machine learning can create feedback loops. Technology is power. And with that comes responsibility. Solutions:  Analyze a project at work/school:     Questions about AI   5 types of bias (Suresh &amp; Guttag)   Datasheets for datasets, Modelcards for model reporting   Accuracy rate on different sub-groups    Work with domain experts &amp; those impacted Increase diversity in our workspace Advocate for good policy Be on the ongoing lookout for bias"
     }, {
-    "id": 19,
+    "id": 21,
     "url": "http://localhost:4000/2020/02/classifier-city/",
     "title": "Making a classifier with image dataset made from gooogle",
     "body": "2020/02/15 - CONTENTS:  Creating dataset from google images     Using google_images_download   Create ImageDataBunch    Train model     fit_one_cycle()   Let’s find-tune   Let’s train the whole model!   Let’s make batch size bigger!    Interpretation Model in productionCode can be found hereDeployed model here Making a classifier which can distinguish Seoul from Munich and Sanfrancisco!(hoping my well in Munich!) Creating dataset from google images: In machine learning, you always need data before you build your model. You can use either URLs or google_images_download package. Since Jeremy explained specifically, I will try the other. Using google_images_download: note: This is not google official package Refer to Official Doncument, put that arguments. from google_images_download import google_images_downloadresponse = google_images_download. googleimagesdownload()  #class instantiationout_dir = os. path. abspath('. . /. . /materials/dataset/pkg/')os. mkdir(out_dir)arguments = { keywords : Cebu,Munich,Seoul ,        print_urls :True,        suffix_keywords : city ,        output_directory :out_dir,        type : photo ,      }paths = response. download(arguments)  #passing the arguments to the functionprint(paths)and if you need, here is main code. Create ImageDataBunch: We need to separate validation set because we just grabbed these imagese from Google. Most of the dataset we use (kaggle/research) splited into train / validation / test so if they are not devided beforehand we should make databunch, and Jeremy recommended assign 20% to validation. Help on function verify_images in module fastai. vision. data:verify_images(path: Union[pathlib. Path, str], delete: bool = True, max_workers: int = 4, max_size: int = None, recurse: bool = False, dest: Union[pathlib. Path, str] = '. ', n_channels: int = 3, interp=2, ext: str = None, img_format: str = None, resume: bool = None, **kwargs)  Check if the images in `path` aren't broken, maybe resize them and copy it in `dest`. Data from google image url Data from package Train model:           len(class)   len(train)   len(valid)         Data_url   3   432   108       Data_pkg   3   216   53   Uisng model: restnet34 1, Measurement: accuracy 2 fit_one_cycle(): What is fit one cycle? Cyclical Learning Rates for Training Neural Networks One of the way to find good learning rate. Core idea is to start with small learning rate (like 1e-4, 1e-3) and increase the learning rate after each mini-batch till loss starts exploding. And pick up learning rate one order lower than exploding point. For example, plotted learning rate is like below picture, picking up around 1e-2 is the best way.  Why this methods Traditionally, the learning rate is decreased as the learning starts converging with time. But this paper suggests to cycle our learning rate, because it makes us avoid local minimum. Basically this cyclic method enables us to explore whole of loss function so that find out global minimum. In other words, higher learning rate behaves like regularisation. Let’s find-tune: Do train just one last layer by learning rate found by find_lr This section you should find the strongest downward slope that kind of sticking around for quite a while. And choose just one order lower than lowest point. As explained before, I will pick up 1e-2. And of course, this is fine-tuning, we don’t need discriminative learning rate yet.  Let’s train the whole model!: link When you plot the learning rate again, maybe you will get soaring shape of learning rate. Rule of thumb, When you slice the learning rate, use learning rate you used at unfrozen part. Divide it by 5 or 10 and put it on maximum bound. At minimum bound, get the point just before it soared, and divide it by 10.  Let’s make batch size bigger!: Since default batch size is 64, I tried it to 128. And it gets way more better result(even it’s still underfitting!) And if I freeze model and train whole model again, the model would be better. Also, you can use this method to the other big dataset model training! Interpretation: See the confusion matrix. Result is quite great. *Since I’m using colab, I will skip data cleansing. But I highly recommend you to use ImageCleaner widget, only if you are using jupyter notebook (not jupyter lab) Model in production: You can deploy your model in simple way. I referred fast. ai, and used render(it’s free for limited time). You can find detailed document here. and you can create a route like this. @app. route( /classify-url , methods=[ GET ])async def classify_url(request):  bytes = await get_bytes(request. query_params[ url ])  img = open_image(BytesIO(bytes))  _,_,losses = learner. predict(img)  return JSONResponse({     predictions : sorted(      zip(cat_learner. data. classes, map(float, losses)),      key=lambda p: p[1],      reverse=True    )  })You can find my deployed model here Reference: How to create a deep learning dataset using Google Images towardsdatascience - one cycle policy       Deep Residual Learning for Image Recognition &#8617;        Accuracy_and_precision &#8617;    "
     }, {
-    "id": 20,
+    "id": 22,
     "url": "http://localhost:4000/2020/02/dps-week5/",
     "title": "Digital Product School week 5",
     "body": "2020/02/09 - The 5th week retropect at Digital Product School Week 5 - Create a Storymap and sync it with Lean Canvas     This week's schedule  CONTENT:  How to create our story map Prepare your story Discover your product’s AI potentialMondayHow to create our story map:     We need this 'aha' moment  There was a Milestone workshop, about our weekly goal. As we are agile working, we go fast and change every week’s goal. This week we will finalize our story map based on user’s pain-point and HMW questions.     How should we make our story-map  Basically we should make story map based on this rule  Tell stories, don’t just write them!     We always need context, that means all the story component should be connected    Visualize your product to establish a shared understanding and speed up discussions!     Post-it filled of text is not enough, we should fill it with visualizations then team mates can understand it fast    Only discuss in front our your story map! (Speed)     So we can update our story-map as soon as we change our opinion   And also    Use a story map to find the parts that matter most and to identify holes in your idea!     Since the story map consists of techinical part, we should consider each story’s technical feasibility    Minimise output, maximise outcome and impact! Build tests to figure out what’s minimum and what’s viable!     This story map functions to find out our minimum value of ideas    Work iteratively: Change your story map according to your learnings!     We should repeat this process again and again    PMs: Make sure Storymap is up to date!Prepare your story:     team cero, our whole story map    Our goal    Technical feasibility of our storyWhat is your strategy to make user achieve something? This would be our expand point Discover your product’s AI potential: How can we apply AI to our product? Let’s write down our ‘HMW’ questions, and find out all p ossibilities. These are suggestion of possibilities, so don’t attached to feasibility (we will do in at lean start-up)     Software section's expectation    AI section's expectationTuesday    Engineer's task, week5This 5th week, engineers settled WendesdayThursdayFriday"
     }, {
-    "id": 21,
+    "id": 23,
     "url": "http://localhost:4000/2020/02/GPU-time/",
     "title": "4 reasons took much time to setting GPU for fast.ai than I expected",
     "body": "2020/02/05 - Motivation: Before now, me as a undergraduate student, I was parsimony who usually depend on colab, kaggle, friend’s server(occasional) whenever i need GPU. . And this time it’s been for a while to install GPU than I expected and I share the several component that stood in my way. Written at Oct 24 2019, if you think this is deprecated, please do not have a leap of faith. Just for the record, I’ve used Kaggle, Colab, GCP, Azure, EC2 as GPU cloud. 1. Did not know there is JupyterLab option in Google Cloud Platform. : At the first time when GCP came out, there was no AI Platform service. So from starting vm instance to launching jupyter and installing packages, I did all of the things myself. (and I learned 🤗) $	curl -O https://repo. continuum. io/archive/Anaconda3-5. 0. 1-Linux-x86_64. sh[Downloading conda in ssh] I created VM instance,selected zone, machine type and disk type. Then, define firewall rules and in ssh terminal, install jupyter and other packages. But you can do all of these things just using AI Platform.  [AI Platform] I think it especially save your time if you are living in Asia-Pacific, which google doesn’t support not that much GPU resources.  2. Consider if the platform has limited resources in a region you live in. : I live in South Korea, East Asia, and it seems like this region has lots of limitation in GPU (except quite expensive AWS) And the Taiwan which was the only one region where I can launch my own VM with GPU (I tried all the other regions in the list) sometimes do normaly, but not always. 😥After launching, I did several works and next day I could not start VM. (I didn’t count it, but tried it a few hours because I didn’t want cost any more time…) Endlessly failed to start instance, then I choose to move AWS as an alternative way. 3. Fast. ai gives deliberate guide and I didn’t know it. : Fast. ai offer the guide for all available platform. (Colab, salamander, Gradient, Kaggle, Colab, and so on) It is so important, and really needs, because cloud computing options are vary as occasion and purpose arise. I didn’t know fast. ai has manual to running GCP, and I think it’s as good a reason as any for me to be have taken time. It helped me so much when I had aws and shortened my time. I don’t want to read all of the manual in amazno. . (It is recommended. . but I’d rather read GIT PRO now…) ssh -i ~/. ssh/&lt;your_private_key_pair&gt; -L localhost:8888:localhost:8888 ubuntu@&lt;your instance IP&gt;4. You should wait to add more volume just after add volume, by building AWS EC2. : Since Elastic Block Store(EBS) storage supports optimized storage, users can’t extend storage volume two times in a row.  Unfortunately, at the first time, I didn’t know it (again 👻) and when VM lacked volume, I doubled dist capacity (76*2) at a rough but It needs more.  &lt;!– this time I installed GPU in two years, and it became little complicated compared to 2 years ago. And this time for the first time(maybe not the first time. . but i handled it in my class or with my friend. but it’s my first time on my own. ) I very I’m started to using used google colab, kaggleand, GCP-JupyterLab, ec2 - friend made, aws vm machine but I had a environment variable but i did not know of it. On these days, I could not get a resources from taiwan…    I couldn’t notice a deliberate     Anyway, as a result I tried myself gcp myself and aws ec2 with fast. ai But I think doing on my self surely takes much time (in this point I wonder why I’m doing this, and should remind me, especially I was studying disk volume optimization)     disk volume exceed - https://askubuntu. com/questions/919748/no-space-left-on-device-even-though-there-is:  "
     }, {
-    "id": 22,
+    "id": 24,
     "url": "http://localhost:4000/2020/02/dps-week4/",
     "title": "Digital Product School week 4",
     "body": "2020/02/01 - The 4th week retropect at Digital Product School Week 4 - Find solution ideas and run experiments   [This week’s schedule] CONTENT:  Ideation Techniques     What is ideation techniques?    Generating idea in my team AIdeation Team brain storming of idea Die Produkt MacherMondayIdeation Techniques:   [slides from @steffen] What is ideation techniques?: We tried to find out user’s painpoint last week. Tried to users talk about their, pain point. No question directly, but extract from them their pain with transportation. Generating idea in my team:   AIdeation:   TuesdayTeam brain storming of idea:   Based on generated idea on Monday, we extended our idea doing rolling-paper!     Die Produkt Macher: What is lean start-up?  Lean startup is a methodology for developing businesses and products that aims to shorten product development cycles and rapidly discover if a proposed business model is viable; this is achieved by adopting a combination of business-hypothesis-driven experimentation, iterative product releases, and validated learning. - wikipedia WendesdayThursdayFriday"
     }, {
-    "id": 23,
+    "id": 25,
     "url": "http://localhost:4000/2020/01/retrosprect-of-acl-paper-2020/",
     "title": "Retrospect of ACL 2020 paper writing",
     "body": "2020/01/29 - 2020 Annual Conference of the Association for Computational Linguistics Why I can’t use ‘Cebuano’ for the research?: Why I had to change target language from ‘Cebuano’ to ‘Tagalog’?-&gt; No language translator options except google translation. But before knowing that I already consult my friend, whose mother tongue is English. So I had to aplogize her, but couldn’t tell her why suddenly I changed my plan. -&gt; I realized there are many languages even can’t be researched at all. . -&gt; Getting accustomed to discrimination makes misunderstanding, sometimes. At my country, we couldn’t use music streaming service, because of legal problem. But at that moment, I thought it was discrimination, which is done by music company. "
     }, {
-    "id": 24,
+    "id": 26,
     "url": "http://localhost:4000/2020/01/Git-Merge/",
     "title": "Why am I not listed as a contributor?!",
     "body": "2020/01/10 - From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’m not listed as contributor of repo which my PR accessed. How did I come to a repository?: When I’m stuck, I would prefer to code, than to go deep in theory. (It must be so. . too much to understand 🤒)It was BERT released by Google AI I felt keenly the necessity of implementing, because not only couldn’t understand the way they figured out positional encoding formula, but how it actually works. What does it mean to “scale” dot product in Attention? (Now I know it’s far from my section 😂) Figure 1. Scaled Dot Product. Adopted from tensorflow blogWhat was the code error?: For implement code in paper, I read the papers Transformer and BERT, structured the model, and refered the others’ code. Meanwhile, I found out a small error in tokenization process, which was changing a token into [MASK], enabled bidirectional representation. I’ve made PR, and got merged. But I was not in contributors. Why?: Figure 2. Merged Pull request Adopted from graykode projectActually I happened to know there can be couple of reasons github doesn’t include my name as contributor. Well, if contributors tab has more than 100 people, in which case it shows you up only if you are in the top 100 contributors because displaying too many contributors can make webpages down. Somethimes, however, it doesn’t that problem. Why not? Two possibilities are there.    First, According to Joel-Glovier, if repository maintainer merged-as-a-rebase PR will end up showing as maintainer’s commit. But maintainer shouldn’t normally do this.     Second, if you happend to commit using a different git email that what is in your GitHub profile, it will not be attached to your Github user, and “doesn’t show up” as you.  Reference:  Michał Chromiak’s blog Github: why are my contributions are not showing on my profile atlassian-gitfetch"
     }, {
-    "id": 25,
-    "url": "http://localhost:4000/2019/12/lesson1-fastai/",
-    "title": "Fine Grained Classification",
-    "body": "2019/12/31 - Finally you can solve the mystery behind this weird drawing. . through this course. juptyer notebook magic: %reload_ext autoreload%autoreload 2%matplotlib inlinethis is special directives to jupyter notebook, not python code. And it is called ‘magics’ (but i think jeremy is magicion)  If somebody changes underlying library code while I’m running this, please reload it automatically If somebody asks to plot something, then please plot it here in this Jupyter NotebookDon’t hesitate to import start~ Digging into untar_data, path. ls: Union[pathlib. Path, str]: typed programming language? -&gt; maybe i think disclaim the type beforehand for sure.  Q. like assert? path. ls()this is some module that fast. ai made because os. listdir(‘path’) is unconvinient. Python3 pathlib library!: pathlib "
-    }, {
-    "id": 26,
+    "id": 27,
     "url": "http://localhost:4000/2019/12/jeremy-howard/",
     "title": "Jeremy Howard",
     "body": "2019/12/15 - This is journey to find out ‘who am I trying to be?’: How he impacted me?  The person who made me start Computer Vision again. He emphasized the importance of studying NLP and Computer together to understand the deep-learning. He didn’t order it to study, but always he pursuade me with reasonable way.  “It’s not just something I can throw away. NLP and computer vision a few weeks apart and that’s going to force your brain to realize like ‘oh I have to remember this’”  He made me admit my failure in deep-learning. I started to objectify where am I. What should I do when I’m frustrated.  “Keep going. You’re not expected to remember everything. Yet.  You’re not expected to understand everything. Yet. You’re not expected to know why everything works.  Yet. ”  His articles are numerous, below.      What is torch. nn Really?   High Performance Numeric Programming with Swift: Explorations and Reflections   C++11, random distributions, and Swift    And especially, I like this book. Designing great data products Great predictive modeling is an important part of the solution, but it no longer stands on its own; as products become more sophisticated, it disappears into the plumbing. Designing great data products And he is also famous for words. Here are some.  we’re going to try and use that to really understand what’s going on. So to warn you, none of it is rocket science but a lot of its going to look really new. So don’t expect to get it the first time but expect to listen and jump into the notebook try a few things test things out look particularly at like tensor shapes and inputs and outputs to check your understanding then go back and listen again. But and kind of try it, a few times, because you will get there right, it’s just that there’s going to be a lot of new concepts because we haven’t done that much stuff in pure Pytorch. Lesson 6: Deep Learning 2019 "
     }, {
-    "id": 27,
+    "id": 28,
     "url": "http://localhost:4000/2019/11/julia-evans/",
     "title": "Julia Evans",
     "body": "2019/11/20 - This is journey to find out ‘who am I trying to be?’: The women who surprised me in many ways. First, she approached me to teaching some concepts drawing cartoons. It was at Hackers news, which was hightest ranks. Personally I have the use of not to reading title, so and cartoon was so cute and clear. I naturally gonna understood mechanism and astonished by her explaination ability. Her value, which she was taught by many people so want to do same things, moved me. Volume of her knowledge, that just reading post title is a deal of work, amazed me. "
     }, {
-    "id": 28,
+    "id": 29,
     "url": "http://localhost:4000/2019/11/coc-retropective/",
     "title": "Retrospective on Pycon 2019 Korea (CoC Committee)",
     "body": "2019/11/05 - When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized our responsibility, and if I passed each other’s burden, It could be my burden next time. In solidarity of the obligation, we finished conference well. And after participating PyCon Korea 2018 as volunteer, I’ve joined PyCon Korea Organizer last year. &lt;Figure 1&gt; First meeting of PyCon 2019 Korea Organizers It’s been a while since PyCon 2019 finished. It’s held on Aug 15 - 18, at Coex Grand Balloom &lt;Figure 2&gt; Ongoing session, speaking on news comment processing &lt;Figure 3&gt; Sponsor Booth iin Coex Hall &lt;Figure 4&gt; After PyCon 2019, with all of volunteer, organizer, speakers 😍 🥰  Serving as part of the coc TF, I spent large fraction of last year doing CoC job. here’s the path what we’ve been grappled with to grasp a solution. First half: Before the conference Toward Diverse Community:  Formally we’ve been reusing and modifying PyCon US CoC, but we needed fit in Korean and I was part of that to revise code of conduct. Except ‘That’ Diversity, Because it is ‘Harassment’:  Specific point was harassment, and the others were not. process of finding the points. How can we settle this point?Second half: During the conference Handling the potential Harassment: Disjunction of policy and real-time situation: This ‘PyCon 2019 Korea retrospective series’ would be devided into 3 Episodes.  “Retrospective on Pycon 2019 Korea (CoC Committee)” “Retrospective on Pycon 2019 Korea (Program Chair)” (20 Nov, To Be Update) “Maintaining participation while still making timely decisions” (29 Nov, To Be Update)"
     }, {
-    "id": 29,
+    "id": 30,
     "url": "http://localhost:4000/2019/11/elif-shafak/",
     "title": "Elif Shafak",
     "body": "2019/11/05 - This is journey to find out ‘who am I trying to be?’: For creative-minded people, Istanbul is a treasure. ’ Photo © Chris Boland, licensed under CC BY-NC-ND 2. 0    it suddenly felt like what I was trying to convey was more complicated and detailed than what the circumstances allowed me to say.     And I did what I usually do in similar situations: I stammered, I shut down, and I stopped talking. I stopped talking because the truth was complicated, even though I knew, deep within, that one should never, ever remain silent for fear of complexity.         &lt;Figure 1&gt; Elif Shafak   Photo credit: www. elifsafak. com. tr      I want to talk about emotions and the need to boost our emotional intelligence. I think it’s a pity that mainstream political theory pays very little attention to emotions.     Oftentimes, analysts and experts are so busy with data and metrics that they seem to forget those things in life that are difficult to measure and perhaps impossible to cluster under statistical models. But I think this is a mistake, for two main reasons. We are emotional beings.     I think it’s going to be one of our biggest intellectual challenges, because our political systems are replete with emotions. In country after country, we have seen illiberal politicians exploiting these emotions. And yet within the academia and among the intelligentsia, we are yet to take emotions seriously. I think we should.  1 2 Reference:       British Council Worldwide &#8617;        Ted Talk &#8617;    "
     }, {
-    "id": 30,
+    "id": 31,
     "url": "http://localhost:4000/2019/01/dps-week1/",
     "title": "Digital Product School week 1",
     "body": "2019/01/11 - The 1th week retropect at Digital Product School [This week’s schedule] CONTENT:  Welcome to Digital Product School!     Trip to Spitzingsee   Welcome to Design Office   Specifying our goal of product   Welcome to Digital Product School!: Trip to Spitzingsee:    At the first day of Digital Product School, we had a off-site with all of batch 9 people. All the costs were managed by dps. At the beautiful mountain, we settled the team, and got my team goal.     Basically, there are two kind of team in DPS. (1) Wild team - the team has fixed topic(2) Company team - the team which has specific stakeholders, and also topic defined by that stakeholders  The Core-team will fix what team you will join in DPS for 3 months based on ymy professionals, they announce it at off-site. [My team for 3 months at DPS]    And we decide on my batch #9 theme song. How? Each team draw for songs and pitch ‘why this song should be batch #9 theme song’The result? Imagine dragon - Believer (I didn’t know at the moment, this song would be stamped in my memory)     We have a workshop for getting to know each other. For example, we share 1) what do I expect from 3 months of dps, 2) when I feel happy in my life time, 3) what I worked for last week, 4) what was my last project and 5) what plays important role in my life      My team's board  Cero  Welcome to Design Office:  At first day of design office, we had workshop, which celebrates my day in dps also discuss specific rule, menifesto and stakeholders    We get sticker and attach it in map depends on my nationality  Now time to get to know my team’s stakeholders. What they want for us? What they expect from us? How free my team are on the topic?To be honest, it is endless tug-of-war. We should discuss with my stakeholders, endlessly, and find out solution which can meet interest of users, stakeholders and my team. Basically, my team’s main stakeholder is ADAC, but BMW, City of munich and Nokia will also participate as my team’s stakeholders. Specifying our goal of product: "
diff --git a/_site/feed.xml b/_site/feed.xml
index 0a115b42f9..8787543787 100644
--- a/_site/feed.xml
+++ b/_site/feed.xml
@@ -1,9 +1,507 @@
-<?xml version="1.0" encoding="utf-8"?><feed xmlns="http://www.w3.org/2005/Atom" ><generator uri="https://jekyllrb.com/" version="3.8.5">Jekyll</generator><link href="http://localhost:4000/feed.xml" rel="self" type="application/atom+xml" /><link href="http://localhost:4000/" rel="alternate" type="text/html" /><updated>2020-03-14T23:28:14+01:00</updated><id>http://localhost:4000/feed.xml</id><title type="html">SpellOnYou</title><subtitle>Be afraid only of standing still. Remain fresh, body and soul.</subtitle><entry><title type="html">Gradient backward, Chain Rule, Refactoring</title><link href="http://localhost:4000/2020/03/note08-fastai-4/" rel="alternate" type="text/html" title="Gradient backward, Chain Rule, Refactoring" /><published>2020-03-02T00:00:00+01:00</published><updated>2020-03-02T00:00:00+01:00</updated><id>http://localhost:4000/2020/03/note08-fastai-4</id><content type="html" xml:base="http://localhost:4000/2020/03/note08-fastai-4/">&lt;p&gt;This note is divided into 4 section.&lt;/p&gt;
+<?xml version="1.0" encoding="utf-8"?><feed xmlns="http://www.w3.org/2005/Atom" ><generator uri="https://jekyllrb.com/" version="3.8.5">Jekyll</generator><link href="http://localhost:4000/feed.xml" rel="self" type="application/atom+xml" /><link href="http://localhost:4000/" rel="alternate" type="text/html" /><updated>2020-04-23T22:15:03+09:00</updated><id>http://localhost:4000/feed.xml</id><title type="html">SpellOnYou</title><subtitle>Be afraid only of standing still. Remain fresh, body and soul.</subtitle><entry><title type="html">fastai 2019 course-v3 Part1, lesson06</title><link href="http://localhost:4000/2020/04/v3-2019-lesson06-note/" rel="alternate" type="text/html" title="fastai 2019 course-v3 Part1, lesson06" /><published>2020-04-15T00:00:00+09:00</published><updated>2020-04-15T00:00:00+09:00</updated><id>http://localhost:4000/2020/04/v3-2019-lesson06-note</id><content type="html" xml:base="http://localhost:4000/2020/04/v3-2019-lesson06-note/">&lt;h1 id=&quot;lesson-06&quot;&gt;Lesson 06&lt;/h1&gt;
+
+&lt;h2 id=&quot;rossmanntabular&quot;&gt;Rossmann(Tabular)&lt;/h2&gt;
+
 &lt;ul&gt;
-  &lt;li&gt;Section1: &lt;a href=&quot;https://spellonyou.github.io/2020/02/note08-fastai-1/&quot;&gt;What is the meaning of ‘deep-learning from foundations?’&lt;/a&gt;&lt;/li&gt;
-  &lt;li&gt;Section2: &lt;a href=&quot;https://spellonyou.github.io/2020/03/note08-fastai-2/&quot;&gt;What’s inside Pytorch Operator?&lt;/a&gt;&lt;/li&gt;
-  &lt;li&gt;Section3: &lt;a href=&quot;https://spellonyou.github.io/2020/03/note08-fastai-3/&quot;&gt;Implement forward&amp;amp;backward pass from scratch&lt;/a&gt;&lt;/li&gt;
-  &lt;li&gt;Section4: &lt;a href=&quot;https://spellonyou.github.io/2020/03/note08-fastai-4/&quot;&gt;Gradient backward, Chain Rule, Refactoring&lt;/a&gt;&lt;/li&gt;
+  &lt;li&gt;Tabular data: be careful on Categorical variable vs Continuous variable.&lt;/li&gt;
+  &lt;li&gt;if datatype is int, fastai think it is classification, not a regression.&lt;/li&gt;
+  &lt;li&gt;Root mean square percentage error. as loss function.&lt;/li&gt;
+  &lt;li&gt;When you assign the y_range, it’s better to assign little bit more than actual maximum. &amp;gt; because it’s sigmoid.&lt;/li&gt;
+  &lt;li&gt;intermediate layers, which is weight matrix is 1) 1000, and 2) 500 -&amp;gt; which means our parameter would be 500*1000.&lt;/li&gt;
+&lt;/ul&gt;
+
+&lt;div class=&quot;language-python highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;&lt;span class=&quot;n&quot;&gt;learn&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;model&lt;/span&gt;
+&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;h3 id=&quot;what-is-dropout-and-embedding-dropout&quot;&gt;What is dropout and embedding dropout?&lt;/h3&gt;
+
+&lt;p&gt;&lt;a href=&quot;http://jmlr.org/papers/v15/srivastava14a.html&quot;&gt;Nitish Srivastava, Dropout: A Simple way to prevent Neural Networks from Overfitting&lt;/a&gt;&lt;/p&gt;
+
+&lt;ul&gt;
+  &lt;li&gt;you can dropout with &lt;code class=&quot;highlighter-rouge&quot;&gt;p&lt;/code&gt; value, make it specified to specific layer, or make it applied to all the layers.&lt;/li&gt;
+  &lt;li&gt;Pytorch code 1) bernoulli, which decides whether you will hold it? 2) and divide the noise value depends on noise value. so noise became 2 or remain 0.
+    &lt;ul&gt;
+      &lt;li&gt;According to pytorch code, We do change at training time, but we do nothing at test time. and this means you don’t have to do anything special with inference time.’&lt;/li&gt;
+      &lt;li&gt;&lt;b&gt;TODO&lt;/b&gt;: find at forums &lt;code class=&quot;highlighter-rouge&quot;&gt;what is inference time&lt;/code&gt; - Related to NVIDIA, GPU.&lt;/li&gt;
+    &lt;/ul&gt;
+  &lt;/li&gt;
+  &lt;li&gt;Embedding dropout is just a dropout.
+    &lt;ul&gt;
+      &lt;li&gt;It’s different between continuous variable and embedding layer.  &lt;b&gt;TODO&lt;/b&gt; Still can’t understand. why embedding dropout is effective. or,… in need.&lt;/li&gt;
+      &lt;li&gt;Let’s delete at random, some of the results of the embedding.&lt;/li&gt;
+      &lt;li&gt;and It worked well especially at Kaggle&lt;/li&gt;
+    &lt;/ul&gt;
+  &lt;/li&gt;
+&lt;/ul&gt;
+
+&lt;h3 id=&quot;batch-normalization&quot;&gt;Batch Normalization&lt;/h3&gt;
+
+&lt;p&gt;&lt;a href=&quot;https://arxiv.org/pdf/1502.03167.pdf&quot;&gt;Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift&lt;/a&gt; -&amp;gt; came out false! According to &lt;a href=&quot;https://arxiv.org/pdf/1805.11604.pdf&quot;&gt;How Does Batch Normalization Help Optimization?&lt;/a&gt;&lt;/p&gt;
+
+&lt;ul&gt;
+  &lt;li&gt;The key was  &lt;code class=&quot;highlighter-rouge&quot;&gt;multiplicative&lt;/code&gt; bias {\gamma} and &lt;code class=&quot;highlighter-rouge&quot;&gt;additive&lt;/code&gt; bias {\beta}`&lt;/li&gt;
+  &lt;li&gt;Explain
+    &lt;ul&gt;
+      &lt;li&gt;Let $$ \hat{y}  = f(w_1, w_2, w_3, … , x)} $$ ,  loss = MSE , Then &lt;code class=&quot;highlighter-rouge&quot;&gt;y_range&lt;/code&gt; should be between 1 and 5`&lt;/li&gt;
+      &lt;li&gt;And Activation function ends with &lt;code class=&quot;highlighter-rouge&quot;&gt;-1 -&amp;gt; +1&lt;/code&gt;&lt;/li&gt;
+      &lt;li&gt;To mitigate this problem, we can add the other parameter, like $$w_n$$&lt;/li&gt;
+      &lt;li&gt;But there’re so much interactions in the process so just re-scale the output.&lt;/li&gt;
+    &lt;/ul&gt;
+  &lt;/li&gt;
+&lt;/ul&gt;
+
+&lt;h3 id=&quot;momentum-parameter-at-batchnorm1d&quot;&gt;Momentum parameter at BatchNorm1d&lt;/h3&gt;
+&lt;ul&gt;
+  &lt;li&gt;Different from momentum like in optimization.&lt;/li&gt;
+  &lt;li&gt;This momentum is Exponentially weighted moving average of the mean, instead of deviation.
+    &lt;ul&gt;
+      &lt;li&gt;If this is small number: &lt;code class=&quot;highlighter-rouge&quot;&gt;mean standard deviation&lt;/code&gt; would be less from mini_batch to mini_batch » less regularization effect. (If this is large number, variation would be greater from mini_batch to mini_batch » more regularization effect)&lt;/li&gt;
+      &lt;li&gt;TODO: can’t sure, but i understand, this is not about &lt;code class=&quot;highlighter-rouge&quot;&gt;how to update parameter&lt;/code&gt; but about &lt;code class=&quot;highlighter-rouge&quot;&gt;how much reflect previous value when scale and shift&lt;/code&gt;&lt;/li&gt;
+    &lt;/ul&gt;
+  &lt;/li&gt;
+&lt;/ul&gt;
+
+&lt;End of=&quot;&quot; Rossmann=&quot;&quot;&gt;
+
+Q. Preference between batchnorm and the other regularizations(drop out, weight decay)
+A. Nope, always try and see the results
+
+## lesson6-pets-more
+
+### Data Augmentation
+
+- Last reg
+- `get_transforms` has lots of params (even not yet learned all) -&amp;gt; check documentation
+	- Remember you can implement all the doc contents bc it's made from nbdev
+	- TODO: try this!!
+- Essence of data augmentation is you should maintain the label, while somewhat making sense.
+	- ex) tilt, because it's optically sensible, you can always change the angle of the data view.
+- zeros, border, and reflection but always `reflection` works most of the time, so that is the default
+
+### Convolutional Kernel(What is convolution?)
+
+
+- Will make heat\_map from scratch, which means the parts convolution focuses on
+
+![setosa_visualization]()
+
+- http://setosa.io/ev/image-kernels/
+	- javascript thing
+	- How convolution works
+	- Kernel. which does element-wise multiplication, and sum them up
+	- so it has on pixel less at borders -&amp;gt; so it uses padding, and fastai uses reflection as said.
+- why this Kernel(matrix) helps catching horizontal edge side?
+	- because this kernel`(picture2)` weights differently, depends on `x axis`
+	- why familiar, because it's similar intuition with fugus`(paper)` paper
+- CNN from different viewpoints`link`
+	- output of pixel is results from different linear equations.
+	- If you connect this with represents of neural network nodes, you can see that the specific inp nodes connected with specific out nodes.
+	- **Summarize**: cnn does 1) matmul some of the elements are always zero 2) same weight for every row, which is called `weight time? weight..?, 1:18:50` `(picture)`
+
+#### Further lowdown
+
+- Because generally image has 3  channels, we need rank 3 kernel.
+- And **do multiply with all channel output is one pixel**.(`draw by your self`)
+	- but this kernel will catch one feature, like horizontal, so that we make more kernel so that output becomes (h * w * kernel)
+	- And that `kernel` come to `channel`
+- **Conv2d**: with 3 by 3 kernel, stride 2 conv -&amp;gt; (h/2 * w/2 * kernel)
+	- skip or jump over input pixel
+	- to protect from memory out of control
+
+~~~python
+learn.model
+learn.summary()
+~~~
+TODO: understand yourself the blocks of conv-kernel: 
+
+- Usually use big kernel size at first layer (will study this at part2)
+
+
+- Bottom right highlighting kernel(`pic / draw`)
+- `torch.tensor.expand`: for memory efficient, because we should do RGB
+- We do not make separate kernel, but make rank 4 kernel
+	- 4d tensor is just stacked kernel
+- `t[None].shape` create new unit axis, and why? we make this -&amp;gt; it should move unit of batch, not one size image.
+
+### Average pooling, feature
+
+- suppose our pre-trained model results in size of `11 by 11 by 512 ` `pic 4` and my classification task has 37 classes
+	* take the first face of channel, which is 11 by 11 and `mean` it, so that make rank 2 tensor, 512 by 1
+	* and make 2d matrix, which is 512 by 37 and multiply so that we can get 37 by 1 matrix.
+
+- Feature, at convolution block
+	- So, when we transfer-learning without unfreeze, every element of last matrix (512 by 1) should represent(or could catch) each feature. 	
+
+### Heatmap, Hook
+
+~~~
+hook_output(model[0]) -&amp;gt; acts -&amp;gt; avg_acts
+~~~
+- if we average the block with `axis=feature`, result of matrix(11 by 11) depicts `how activated was that area?` -&amp;gt; it is heatmap, `avg_acts`
+
+- and acts comes from hook, which is more advanced pytorch feature.
+	- hook into pytorch machine itself, and run any arbitrary Pytorch code
+	- Why this is cool?: Normally it gives set of outputs of forward pass, but we can interrupt and hook the forward pass.
+	- Also can store the output of the convolutional part of the model, which is before avg_pooling
+- Thinking back when we do cut off `after` the conv part.
+	- but with fast.ai the original convolutional part of the model would be *the first thing in the model*, specifically could be given from `learn.model.eval()[0]`
+	- And this is gotten from `hooked_output` and having hooked the output, we can pass our x_minibatch to output.
+	- Not directly, but with normalized, minibatch, put on to the gpu
+	- `one_item()` function do it, when we have one data `TODO: this is assignment` do it yourself without one_item function
+	- and `.cuda()` put it on gpu
+
+- you should print out very often the shape of tensor, and try think why.
+&lt;/End&gt;</content><author><name>dionne</name></author><summary type="html">Lesson 06</summary><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://localhost:4000/assets/images/att_00069.png" /></entry><entry><title type="html">[Q&amp;amp;A] Image Segmentation, using Unet with Driving Video data</title><link href="http://localhost:4000/2020/04/qna-image-segmentation/" rel="alternate" type="text/html" title="[Q&amp;A] Image Segmentation, using Unet with Driving Video data" /><published>2020-04-02T00:00:00+09:00</published><updated>2020-04-02T00:00:00+09:00</updated><id>http://localhost:4000/2020/04/qna-image-segmentation</id><content type="html" xml:base="http://localhost:4000/2020/04/qna-image-segmentation/">&lt;p&gt;This post is about my questions while I was studying &lt;a href=&quot;https://www.usfca.edu/data-institute/certificates/deep-learning-part-one&quot;&gt;USF Deep Learning course&lt;/a&gt; about image segmentation task.&lt;br /&gt;
+All the answers are from the course, source code, library document, or document. &lt;br /&gt;
+I cared about being clear at reporting information including source of information, however if there are still anything unclear, please contact me.&lt;/p&gt;
+
+&lt;p&gt;And thank you Jeremy&amp;amp;Rachael for everything. Also Thank you Cambridge Computer Vision Lab to made us to study with your labor.&lt;/p&gt;
+
+&lt;p&gt;&lt;em&gt;The Cambridge-driving Labeled Video Database (CamVid) is the first collection of videos with object class semantic labels, complete with metadata. The database provides ground truth labels that associate each pixel with one of 32 semantic classes.&lt;/em&gt;&lt;/p&gt;
+
+&lt;p&gt;If someone is interested in this project, please check &lt;a href=&quot;http://mi.eng.cam.ac.uk/research/projects/VideoRec/CamVid/&quot;&gt;the site&lt;/a&gt; and see the details.&lt;/p&gt;
+
+&lt;p&gt;Now, let’s start first using jupyter’s one of tricks which I love most. It enables cell to print the code without print function.&lt;/p&gt;
+
+&lt;div class=&quot;language-python highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;&lt;span class=&quot;kn&quot;&gt;from&lt;/span&gt; &lt;span class=&quot;nn&quot;&gt;IPython.core.interactiveshell&lt;/span&gt; &lt;span class=&quot;kn&quot;&gt;import&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;InteractiveShell&lt;/span&gt;
+
+&lt;span class=&quot;c1&quot;&gt;# pretty print all cell's output and not just the last one
+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;InteractiveShell&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;ast_node_interactivity&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;s&quot;&gt;&quot;all&quot;&lt;/span&gt;
+&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;div class=&quot;language-python highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;&lt;span class=&quot;kn&quot;&gt;from&lt;/span&gt; &lt;span class=&quot;nn&quot;&gt;fastai.vision&lt;/span&gt; &lt;span class=&quot;kn&quot;&gt;import&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;*&lt;/span&gt;
+&lt;span class=&quot;kn&quot;&gt;from&lt;/span&gt; &lt;span class=&quot;nn&quot;&gt;fastai.callbacks.hooks&lt;/span&gt; &lt;span class=&quot;kn&quot;&gt;import&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;*&lt;/span&gt;
+&lt;span class=&quot;kn&quot;&gt;from&lt;/span&gt; &lt;span class=&quot;nn&quot;&gt;fastai.utils.mem&lt;/span&gt; &lt;span class=&quot;kn&quot;&gt;import&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;*&lt;/span&gt;
+&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;div class=&quot;language-python highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;&lt;span class=&quot;n&quot;&gt;path&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;untar_data&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;URLs&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;CAMVID&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;c1&quot;&gt;# The locations where the data and models are downloaded are set in config.yml
+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;path&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;ls&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;()&lt;/span&gt; 
+&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;p&gt;I’m trying to accustomed to using &lt;code class=&quot;highlighter-rouge&quot;&gt;pathlib&lt;/code&gt; module, not just it became built-in module in python, but I felt uncomfortable myself with os module. However, still unpredictable conflicts are remain, even in the quite standard library like Pytorch, tensorflow, onnx.(it require me string for path. not PosixPath. will send PR..)&lt;/p&gt;
+
+&lt;div class=&quot;highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;[PosixPath('/root/.fastai/data/camvid/valid.txt'),
+ PosixPath('/root/.fastai/data/camvid/images'),
+ PosixPath('/root/.fastai/data/camvid/labels'),
+ PosixPath('/root/.fastai/data/camvid/codes.txt')]
+&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;div class=&quot;language-python highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;&lt;span class=&quot;n&quot;&gt;path_img&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;path&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;/&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;'images'&lt;/span&gt;
+&lt;span class=&quot;n&quot;&gt;path_lbl&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;path&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;/&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;'labels'&lt;/span&gt;
+&lt;span class=&quot;n&quot;&gt;fnames&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;get_image_files&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;path_img&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;c1&quot;&gt;#filename
+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;lbl_names&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;get_image_files&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;path_lbl&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt;
+&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;h2 id=&quot;1play-with-data-my-hypothesis&quot;&gt;1.(Play with data) My Hypothesis&lt;/h2&gt;
+&lt;ul&gt;
+  &lt;li&gt;File name has &lt;code class=&quot;highlighter-rouge&quot;&gt;A_B&lt;/code&gt; format. and A / B would be at key-value position.&lt;/li&gt;
+  &lt;li&gt;Use &lt;code class=&quot;highlighter-rouge&quot;&gt;collections - defaultdict&lt;/code&gt;
+    &lt;h3 id=&quot;default-dict-link&quot;&gt;Default Dict: &lt;a href=&quot;https://docs.python.org/3.8/library/collections.html#defaultdict-objects&quot;&gt;Link&lt;/a&gt;&lt;/h3&gt;
+  &lt;/li&gt;
+  &lt;li&gt;easy to group a sequence of key and value pairs into a dictionary of list?&lt;/li&gt;
+&lt;/ul&gt;
+
+&lt;div class=&quot;language-python highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;&lt;span class=&quot;kn&quot;&gt;from&lt;/span&gt; &lt;span class=&quot;nn&quot;&gt;collections&lt;/span&gt; &lt;span class=&quot;kn&quot;&gt;import&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;defaultdict&lt;/span&gt;
+&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;div class=&quot;language-python highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;&lt;span class=&quot;n&quot;&gt;fnames&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;[&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;0&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;],&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;lbl_names&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;[&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;0&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;]&lt;/span&gt;
+&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;div class=&quot;highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;(PosixPath('/root/.fastai/data/camvid/images/0001TP_009210.png'),
+ PosixPath('/root/.fastai/data/camvid/labels/0016E5_01800_P.png'))
+&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;div class=&quot;language-python highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;&lt;span class=&quot;n&quot;&gt;files&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;p&quot;&gt;[&lt;/span&gt;&lt;span class=&quot;nb&quot;&gt;tuple&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;i&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;stem&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;split&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;'_'&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;))&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;for&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;i&lt;/span&gt; &lt;span class=&quot;ow&quot;&gt;in&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;fnames&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;]&lt;/span&gt;
+&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;div class=&quot;language-python highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;&lt;span class=&quot;n&quot;&gt;labels&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;p&quot;&gt;[&lt;/span&gt;&lt;span class=&quot;nb&quot;&gt;tuple&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;i&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;stem&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;split&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;'_'&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)[:&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;-&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;1&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;])&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;for&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;i&lt;/span&gt; &lt;span class=&quot;ow&quot;&gt;in&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;lbl_names&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;]&lt;/span&gt;
+&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;div class=&quot;language-python highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;&lt;span class=&quot;n&quot;&gt;d&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;defaultdict&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;nb&quot;&gt;list&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt;
+&lt;span class=&quot;k&quot;&gt;for&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;k&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;v&lt;/span&gt; &lt;span class=&quot;ow&quot;&gt;in&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;files&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;:&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;d&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;[&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;k&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;]&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;append&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;v&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt;
+&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;div class=&quot;language-python highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;&lt;span class=&quot;n&quot;&gt;d&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;keys&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;()&lt;/span&gt;
+&lt;span class=&quot;nb&quot;&gt;len&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;d&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;[&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;'0001TP'&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;])&lt;/span&gt;
+&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;div class=&quot;highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;124
+&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;div class=&quot;language-python highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;&lt;span class=&quot;k&quot;&gt;for&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;k&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;v&lt;/span&gt; &lt;span class=&quot;ow&quot;&gt;in&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;d&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;items&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;():&lt;/span&gt;
+    &lt;span class=&quot;k&quot;&gt;print&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;k&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;v&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt;
+&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;div class=&quot;highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;0001TP ['009210', '008850', '007350', '008970', '009840', '010140', '008490', '008520', '009540', '008250', '008340', '006840', '007860', '007410', '007740', '009870', '010080', '007890', '008790', '010020', '008400', '007080', '008280', '010380', '009330', '009060', '007470', '006810', '009720', '008580', '007110', '008730', '009150', '007680', '009780', '007800', '007290', '008760', '009510', '008640', '008310', '007440', '006900', '007500', '008460', '009030', '008130', '009480', '009900', '010230', '009270', '008040', '007590', '007950', '009990', '008550', '007260', '008100', '007530', '006960', '008190', '009420', '009930', '009000', '007830', '008940', '006690', '009570', '008880', '010170', '007560', '009300', '006750', '009360', '010200', '007320', '008010', '009120', '007620', '007200', '007140', '010320', '006720', '008670', '007230', '008370', '010260', '009690', '006930', '009090', '007770', '010290', '010350', '008610', '008070', '009600', '008430', '009450', '007380', '009240', '007710', '007170', '008160', '008910', '007020', '006780', '007050', '009960', '009810', '008220', '009180', '009750', '010050', '009660', '010110', '007920', '009630', '007650', '006990', '008700', '009390', '007980', '008820', '006870']
+0016E5 ['01290', '08159', '05760', '08133', '08063', '06660', '00960', '05850', '00750', '06960', '08035', '08107', '07975', '08017', '05610', '07140', '08119', '08027', '07170', '08400', '08093', '02100', '06390', '04470', '08340', '06060', '00600', '07470', '08151', '07800', '01620', '05730', '01530', '00690', '08430', '05940', '01980', '07320', '08069', '07965', '04380', '05430', '01410', '06780', '08007', '08087', '08079', '06600', '08109', '05490', '00901', '04590', '04680', '08045', '01770', '06690', '08085', '06810', '00420', '08011', '07440', '02190', '06300', '04800', '01500', '00450', '08029', '01470', '06330', '07997', '08067', '05370', '08013', '08190', '00840', '02370', '08049', '08135', '01440', '06870', '05820', '05280', '08051', '04440', '08091', '01380', '00630', '07290', '05520', '04770', '00540', '07995', '07999', '05550', '07920', '08101', '08141', '08053', '04620', '08103', '05160', '07350', '08057', '06030', '06000', '08550', '07963', '08089', '05970', '08047', '05640', '06240', '05220', '04350', '01590', '07959', '01950', '08117', '06180', '01560', '05400', '08043', '07680', '00780', '08081', '07050', '01020', '01350', '04530', '06720', '07969', '08149', '08003', '08131', '08129', '08033', '05460', '01650', '07530', '08023', '05340', '08640', '05100', '08075', '01230', '04980', '02070', '01080', '06210', '05910', '08009', '01800', '05190', '02400', '08083', '08019', '07620', '07200', '07890', '08059', '06990', '04410', '08121', '08123', '06930', '08137', '08147', '08095', '06570', '06150', '08153', '06840', '05250', '00510', '08370', '08580', '08113', '07410', '08097', '01200', '04950', '07770', '07650', '04710', '06090', '08055', '07110', '07981', '00990', '08250', '08127', '01920', '07985', '08220', '08005', '08157', '05130', '08071', '01140', '04830', '07740', '08143', '06120', '02040', '08111', '08115', '00660', '08280', '06420', '07983', '02220', '05700', '01860', '01260', '04920', '06510', '07020', '08073', '08105', '08125', '06360', '07860', '07993', '00810', '06540', '08099', '08139', '02010', '07973', '08155', '07991', '06630', '00480', '06750', '04890', '08001', '08025', '00870', '08490', '01830', '07977', '05010', '01170', '07961', '01680', '01050', '07987', '07080', '04560', '00930', '05310', '02340', '05790', '08460', '00720', '08031', '02280', '08039', '08037', '08065', '06270', '08077', '06900', '04650', '06480', '07230', '08041', '06450', '00570', '07989', '04740', '07979', '02250', '07380', '00390', '01710', '07590', '08021', '08520', '07500', '01110', '04500', '02310', '07971', '02130', '05580', '05880', '08610', '08310', '08145', '05670', '04860', '07260', '08015', '07967', '01740', '01320', '07560', '07830', '01890', '08061', '02160', '07710', '05070', '05040']
+Seq05VD ['f00030', 'f02550', 'f03450', 'f01110', 'f00480', 'f00210', 'f04590', 'f04170', 'f01800', 'f03990', 'f03360', 'f03900', 'f02070', 'f00810', 'f03690', 'f01350', 'f01530', 'f04980', 'f05100', 'f03060', 'f00900', 'f03870', 'f02460', 'f01470', 'f02370', 'f02820', 'f04080', 'f02760', 'f04860', 'f02250', 'f04200', 'f00270', 'f03720', 'f02850', 'f04410', 'f01200', 'f03090', 'f02010', 'f03930', 'f00090', 'f01650', 'f01890', 'f03840', 'f03030', 'f02130', 'f01230', 'f04110', 'f02520', 'f04140', 'f04020', 'f00060', 'f03420', 'f01560', 'f00120', 'f04290', 'f02340', 'f00300', 'f01380', 'f00870', 'f01860', 'f02970', 'f04560', 'f02730', 'f00330', 'f04530', 'f03780', 'f01770', 'f03390', 'f05040', 'f02430', 'f03330', 'f00660', 'f01740', 'f02100', 'f04800', 'f04050', 'f00510', 'f02790', 'f04350', 'f00690', 'f00540', 'f02490', 'f00960', 'f00930', 'f04230', 'f02880', 'f03600', 'f01020', 'f01500', 'f02400', 'f04830', 'f04470', 'f03300', 'f02670', 'f00450', 'f01980', 'f01170', 'f01620', 'f04500', 'f01080', 'f03180', 'f05070', 'f03150', 'f04950', 'f01440', 'f03510', 'f01710', 'f00360', 'f04770', 'f02910', 'f01050', 'f00630', 'f04320', 'f00570', 'f03240', 'f02190', 'f01140', 'f03540', 'f02220', 'f02640', 'f03960', 'f00000', 'f04920', 'f01950', 'f00990', 'f03480', 'f03000', 'f00420', 'f04620', 'f03210', 'f00780', 'f03570', 'f01590', 'f00750', 'f01920', 'f04650', 'f03750', 'f03630', 'f02310', 'f02610', 'f02580', 'f04740', 'f02280', 'f04680', 'f00390', 'f00720', 'f03660', 'f02040', 'f03270', 'f00180', 'f03810', 'f01410', 'f01290', 'f03120', 'f00840', 'f04440', 'f00150', 'f01260', 'f02700', 'f02940', 'f00600', 'f01830', 'f04260', 'f05010', 'f04890', 'f02160', 'f00240', 'f04380', 'f01680', 'f04710', 'f01320']
+0006R0 ['f02820', 'f03690', 'f03180', 'f02550', 'f01020', 'f03660', 'f02340', 'f01170', 'f02610', 'f02940', 'f01290', 'f02100', 'f01350', 'f03270', 'f03870', 'f01380', 'f01980', 'f03810', 'f02430', 'f02310', 'f01830', 'f03480', 'f02970', 'f01890', 'f03210', 'f03930', 'f02040', 'f02070', 'f02400', 'f01560', 'f03030', 'f01770', 'f01590', 'f01950', 'f03420', 'f01650', 'f03450', 'f00990', 'f03630', 'f01500', 'f03570', 'f00930', 'f03090', 'f03360', 'f02880', 'f02460', 'f01440', 'f01920', 'f01230', 'f03840', 'f02730', 'f01620', 'f02220', 'f03750', 'f03330', 'f03540', 'f02520', 'f02790', 'f01050', 'f03120', 'f01800', 'f01140', 'f01860', 'f01530', 'f01470', 'f02670', 'f02490', 'f01260', 'f01110', 'f02760', 'f01680', 'f03150', 'f02580', 'f03300', 'f02280', 'f01200', 'f03390', 'f03510', 'f02640', 'f02190', 'f02370', 'f01320', 'f02130', 'f03600', 'f03240', 'f03780', 'f03720', 'f02700', 'f01410', 'f01080', 'f02850', 'f01710', 'f03900', 'f03060', 'f01740', 'f02010', 'f02250', 'f00960', 'f03000', 'f02160', 'f02910']
+&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;div class=&quot;language-python highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;&lt;span class=&quot;k&quot;&gt;for&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;k&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;v&lt;/span&gt; &lt;span class=&quot;ow&quot;&gt;in&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;d&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;items&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;():&lt;/span&gt;
+    &lt;span class=&quot;k&quot;&gt;print&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;k&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;nb&quot;&gt;len&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;d&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;[&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;k&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;]))&lt;/span&gt;
+&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;div class=&quot;highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;0001TP 124
+0016E5 305
+Seq05VD 171
+0006R0 101
+&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;div class=&quot;language-python highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;&lt;span class=&quot;k&quot;&gt;for&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;i&lt;/span&gt; &lt;span class=&quot;ow&quot;&gt;in&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;d2&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;keys&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;():&lt;/span&gt;
+    &lt;span class=&quot;k&quot;&gt;print&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;i&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt;&lt;span class=&quot;nb&quot;&gt;len&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;d2&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;[&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;i&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;]))&lt;/span&gt;
+&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;div class=&quot;highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;0016E5 305
+0001TP 124
+0006R0 101
+Seq05VD 171
+&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;div class=&quot;language-python highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;&lt;span class=&quot;n&quot;&gt;files&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;[&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;0&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;],&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;labels&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;[&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;0&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;]&lt;/span&gt;
+&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;div class=&quot;highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;(('0001TP', '009210'), ('0016E5', '01800'))
+&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;h2 id=&quot;2my-question-link&quot;&gt;2.My question: &lt;a href=&quot;https://docs.fast.ai/vision.image.html#ImageSegment&quot;&gt;Link&lt;/a&gt;&lt;/h2&gt;
+
+&lt;ul&gt;
+  &lt;li&gt;Why do we need masking? and does color from fastai library? (have to look into source code)&lt;/li&gt;
+  &lt;li&gt;What do the parameter alpha do?&lt;/li&gt;
+  &lt;li&gt;When people make masked img, would it be have ranged integer limit?&lt;/li&gt;
+  &lt;li&gt;Does image normalization related with this?&lt;/li&gt;
+&lt;/ul&gt;
+
+&lt;div class=&quot;language-python highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;&lt;span class=&quot;n&quot;&gt;lbl_sorted&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;nb&quot;&gt;sorted&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;lbl_names&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt;
+&lt;span class=&quot;n&quot;&gt;f_sorted&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;nb&quot;&gt;sorted&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;fnames&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt;
+&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;div class=&quot;language-python highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;&lt;span class=&quot;n&quot;&gt;lbl_1&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;lbl_sorted&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;[&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;33&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;]&lt;/span&gt;
+&lt;span class=&quot;n&quot;&gt;f_1&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;f_sorted&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;[&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;33&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;]&lt;/span&gt;
+&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;div class=&quot;language-python highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;&lt;span class=&quot;n&quot;&gt;img&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;open_image&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;lbl_1&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt;
+&lt;span class=&quot;n&quot;&gt;mask&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;open_mask&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;lbl_1&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt;
+&lt;span class=&quot;n&quot;&gt;_&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;axs&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;plt&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;subplots&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;1&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;2&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;figsize&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;10&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;5&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;))&lt;/span&gt;
+&lt;span class=&quot;c1&quot;&gt;# img.show(ax=axs[0], y=mask, title='masked')
+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;img&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;show&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;ax&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;axs&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;[&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;0&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;],&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;title&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;'1'&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt;
+&lt;span class=&quot;n&quot;&gt;mask&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;show&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;ax&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;axs&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;[&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;1&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;],&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;title&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;'2'&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;alpha&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;mf&quot;&gt;1.&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt;
+&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;p&gt;&lt;img src=&quot;output_21_0.png&quot; alt=&quot;png&quot; /&gt;&lt;/p&gt;
+
+&lt;div class=&quot;language-python highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;&lt;span class=&quot;n&quot;&gt;img_2&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;open_image&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;f_1&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt;
+&lt;span class=&quot;n&quot;&gt;mask_2&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;open_mask&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;f_1&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt;
+&lt;span class=&quot;n&quot;&gt;_&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;axs&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;plt&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;subplots&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;1&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;2&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;figsize&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;10&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;5&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;))&lt;/span&gt;
+&lt;span class=&quot;c1&quot;&gt;# img.show(ax=axs[0], y=mask, title='masked')
+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;img_2&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;show&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;ax&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;axs&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;[&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;0&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;],&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;title&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;'3'&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,)&lt;/span&gt;
+&lt;span class=&quot;n&quot;&gt;mask_2&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;show&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;ax&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;axs&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;[&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;1&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;],&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;title&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;'4'&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;alpha&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;mf&quot;&gt;1.&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt;
+&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;p&gt;&lt;img src=&quot;output_22_0.png&quot; alt=&quot;png&quot; /&gt;&lt;/p&gt;
+
+&lt;div class=&quot;language-python highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;&lt;span class=&quot;n&quot;&gt;open_mask&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;lbl_1&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;data&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;shape&lt;/span&gt;
+&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;div class=&quot;highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;torch.Size([1, 720, 960])
+&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;div class=&quot;language-python highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;&lt;span class=&quot;n&quot;&gt;open_mask&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;lbl_1&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;data&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;shape&lt;/span&gt;
+&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;div class=&quot;highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;torch.Size([1, 720, 960])
+&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;div class=&quot;language-python highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;&lt;span class=&quot;n&quot;&gt;open_image&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;f_1&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;data&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;shape&lt;/span&gt;
+&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;div class=&quot;highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;torch.Size([3, 720, 960])
+&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;div class=&quot;language-python highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;&lt;span class=&quot;n&quot;&gt;open_image&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;f_1&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;data&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;shape&lt;/span&gt;
+&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;div class=&quot;highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;torch.Size([3, 720, 960])
+&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;div class=&quot;language-python highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;&lt;span class=&quot;n&quot;&gt;img&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;data&lt;/span&gt; &lt;span class=&quot;c1&quot;&gt;#labeled data
+&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;div class=&quot;highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;tensor([[[0.0157, 0.0157, 0.0157,  ..., 0.0824, 0.0824, 0.0824],
+         [0.0157, 0.0157, 0.0157,  ..., 0.0824, 0.0824, 0.0824],
+         [0.0157, 0.0157, 0.0157,  ..., 0.0824, 0.0824, 0.0824],
+         ...,
+         [0.0667, 0.0667, 0.0667,  ..., 0.1176, 0.1176, 0.1176],
+         [0.0667, 0.0667, 0.0667,  ..., 0.1176, 0.1176, 0.1176],
+         [0.0667, 0.0667, 0.0667,  ..., 0.1176, 0.1176, 0.1176]],
+
+        [[0.0157, 0.0157, 0.0157,  ..., 0.0824, 0.0824, 0.0824],
+         [0.0157, 0.0157, 0.0157,  ..., 0.0824, 0.0824, 0.0824],
+         [0.0157, 0.0157, 0.0157,  ..., 0.0824, 0.0824, 0.0824],
+         ...,
+         [0.0667, 0.0667, 0.0667,  ..., 0.1176, 0.1176, 0.1176],
+         [0.0667, 0.0667, 0.0667,  ..., 0.1176, 0.1176, 0.1176],
+         [0.0667, 0.0667, 0.0667,  ..., 0.1176, 0.1176, 0.1176]],
+
+        [[0.0157, 0.0157, 0.0157,  ..., 0.0824, 0.0824, 0.0824],
+         [0.0157, 0.0157, 0.0157,  ..., 0.0824, 0.0824, 0.0824],
+         [0.0157, 0.0157, 0.0157,  ..., 0.0824, 0.0824, 0.0824],
+         ...,
+         [0.0667, 0.0667, 0.0667,  ..., 0.1176, 0.1176, 0.1176],
+         [0.0667, 0.0667, 0.0667,  ..., 0.1176, 0.1176, 0.1176],
+         [0.0667, 0.0667, 0.0667,  ..., 0.1176, 0.1176, 0.1176]]])
+&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;div class=&quot;language-python highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;&lt;span class=&quot;n&quot;&gt;mask&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;data&lt;/span&gt; &lt;span class=&quot;c1&quot;&gt;# after mask, labeled data
+&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;div class=&quot;highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;tensor([[[ 4,  4,  4,  ..., 21, 21, 21],
+         [ 4,  4,  4,  ..., 21, 21, 21],
+         [ 4,  4,  4,  ..., 21, 21, 21],
+         ...,
+         [17, 17, 17,  ..., 30, 30, 30],
+         [17, 17, 17,  ..., 30, 30, 30],
+         [17, 17, 17,  ..., 30, 30, 30]]])
+&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;div class=&quot;language-python highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;&lt;span class=&quot;n&quot;&gt;img_2&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;data&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;mask_2&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;data&lt;/span&gt;
+&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;div class=&quot;highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;(tensor([[[0.0706, 0.0667, 0.0706,  ..., 0.6431, 0.6549, 0.6627],
+          [0.0745, 0.0706, 0.0706,  ..., 0.6431, 0.6510, 0.6549],
+          [0.0784, 0.0706, 0.0745,  ..., 0.6392, 0.6588, 0.6588],
+          ...,
+          [0.0863, 0.0824, 0.0824,  ..., 0.1333, 0.1216, 0.1255],
+          [0.0902, 0.0863, 0.0824,  ..., 0.1255, 0.1176, 0.1216],
+          [0.0863, 0.0824, 0.0784,  ..., 0.1137, 0.1059, 0.1137]],
+ 
+         [[0.0706, 0.0667, 0.0706,  ..., 0.7490, 0.7608, 0.7686],
+          [0.0745, 0.0706, 0.0706,  ..., 0.7451, 0.7569, 0.7608],
+          [0.0784, 0.0706, 0.0745,  ..., 0.7412, 0.7529, 0.7529],
+          ...,
+          [0.0980, 0.0941, 0.0941,  ..., 0.1804, 0.1686, 0.1725],
+          [0.1059, 0.1020, 0.0980,  ..., 0.1725, 0.1647, 0.1686],
+          [0.1020, 0.0980, 0.0941,  ..., 0.1608, 0.1529, 0.1608]],
+ 
+         [[0.0784, 0.0745, 0.0784,  ..., 0.7569, 0.7686, 0.7765],
+          [0.0824, 0.0784, 0.0784,  ..., 0.7647, 0.7647, 0.7686],
+          [0.0784, 0.0706, 0.0745,  ..., 0.7608, 0.7647, 0.7647],
+          ...,
+          [0.1216, 0.1176, 0.1176,  ..., 0.2000, 0.1882, 0.1922],
+          [0.1176, 0.1137, 0.1098,  ..., 0.1843, 0.1765, 0.1804],
+          [0.1137, 0.1098, 0.1059,  ..., 0.1725, 0.1647, 0.1725]]]),
+ tensor([[[ 18,  17,  18,  ..., 183, 186, 188],
+          [ 19,  18,  18,  ..., 183, 185, 186],
+          [ 20,  18,  19,  ..., 182, 185, 185],
+          ...,
+          [ 25,  24,  24,  ...,  43,  40,  41],
+          [ 26,  25,  24,  ...,  41,  39,  40],
+          [ 25,  24,  23,  ...,  38,  36,  38]]]))
+&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;h2 id=&quot;3what-is-a-difference-between-image-and-imagesegment&quot;&gt;3.What is a difference between image and imageSegment?&lt;/h2&gt;
+
+&lt;p&gt;&lt;a href=&quot;https://docs.fast.ai/vision.image.html#ImageSegment&quot;&gt;imageSegment&lt;/a&gt;&lt;br /&gt;&lt;/p&gt;
+
+&lt;ul&gt;
+  &lt;li&gt;An ImageSegment object has the same properties as an Image. The only difference is that when applying the transformations to an ImageSegment, it will ignore the functions that deal with lighting and keep values of 0 and 1.&lt;/li&gt;
+  &lt;li&gt;It’s easy to show the segmentation mask over the associated Image by using the y argument of show_image.&lt;/li&gt;
+&lt;/ul&gt;
+
+&lt;div class=&quot;language-python highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;&lt;span class=&quot;n&quot;&gt;img&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;open_image&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;fnames&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;[&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;0&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;])&lt;/span&gt;
+&lt;span class=&quot;n&quot;&gt;mask&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;open_mask&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;lbl_names&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;[&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;0&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;])&lt;/span&gt;
+&lt;span class=&quot;n&quot;&gt;_&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;axs&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;plt&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;subplots&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;1&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;3&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;figsize&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;8&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;4&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;))&lt;/span&gt;
+&lt;span class=&quot;n&quot;&gt;img&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;show&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;ax&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;axs&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;[&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;0&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;],&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;title&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;'no mask'&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt;
+&lt;span class=&quot;n&quot;&gt;img&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;show&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;ax&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;axs&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;[&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;1&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;],&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;y&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;mask&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;title&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;'masked'&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;c1&quot;&gt;#seg mask over the img using y arg
+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;mask&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;show&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;ax&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;axs&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;[&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;2&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;],&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;title&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;'mask only'&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;alpha&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;mf&quot;&gt;1.&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt;
+&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;p&gt;&lt;img src=&quot;output_31_0.png&quot; alt=&quot;png&quot; /&gt;&lt;/p&gt;
+
+&lt;p&gt;&lt;a href=&quot;https://docs.fast.ai/vision.image.html#ImageSegment&quot;&gt;vision.image&lt;/a&gt;&lt;/p&gt;
+
+&lt;p&gt;##4.Why/How img div by 255 and how it results&lt;/p&gt;
+
+&lt;p&gt;&lt;a href=&quot;https://docs.fast.ai/vision.image.html#open_image&quot;&gt;fast.ai : vision.image&lt;/a&gt; - If div=True, pixel values are divided by 255. to become floats between 0. and 1.&lt;/p&gt;
+
+&lt;ul&gt;
+  &lt;li&gt;At times, you want to get rid of distortions caused by lights and shadows in an image.&lt;/li&gt;
+  &lt;li&gt;
+    &lt;p&gt;Normalizing the RGB values of an image can at times be a simple and effective way of achieving this.&lt;/p&gt;
+  &lt;/li&gt;
+  &lt;li&gt;
+    &lt;p&gt;So sum of the pixel’s value over all channels(which is &lt;code class=&quot;highlighter-rouge&quot;&gt;S&lt;/code&gt;) divides each intensified channel so that nomalized value will be &lt;code class=&quot;highlighter-rouge&quot;&gt;R/S&lt;/code&gt;, &lt;code class=&quot;highlighter-rouge&quot;&gt;G/S&lt;/code&gt; and &lt;code class=&quot;highlighter-rouge&quot;&gt;B/S&lt;/code&gt; (where, S=R+G+B).&lt;/p&gt;
+  &lt;/li&gt;
+  &lt;li&gt;&lt;a href=&quot;https://aishack.in/tutorials/normalized-rgb/&quot;&gt;Detailed explain here&lt;/a&gt;&lt;/li&gt;
+&lt;/ul&gt;
+
+&lt;h2 id=&quot;4-python-evaluation-order&quot;&gt;4. Python &lt;a href=&quot;https://docs.python.org/3/reference/expressions.html#evaluation-order&quot;&gt;Evaluation Order&lt;/a&gt;&lt;/h2&gt;
+
+&lt;p&gt;Python evaluates expressions from left to right. Notice that while evaluating an assignment, the right-hand side is evaluated before the left-hand side.&lt;/p&gt;
+
+&lt;div class=&quot;language-python highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;&lt;span class=&quot;n&quot;&gt;mask_tmp&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;trg_tmp&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;void_tmp&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;2&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;1&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;10&lt;/span&gt;
+&lt;span class=&quot;n&quot;&gt;mask_tmp&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;trg_tmp&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;!=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;void_tmp&lt;/span&gt;
+&lt;span class=&quot;k&quot;&gt;print&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;mask_tmp&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;trg_tmp&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;void_tmp&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;c1&quot;&gt;# (1) target is not same with void
+&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;div class=&quot;highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;True 1 10
+&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;div class=&quot;language-python highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;&lt;span class=&quot;c1&quot;&gt;# Example 1
+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;x&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;1&lt;/span&gt;
+&lt;span class=&quot;n&quot;&gt;y&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;2&lt;/span&gt;
+&lt;span class=&quot;n&quot;&gt;x&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;y&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;y&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;x&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;y&lt;/span&gt;
+&lt;span class=&quot;n&quot;&gt;x&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;y&lt;/span&gt;
+&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;div class=&quot;highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;(2, 3)
+&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;div class=&quot;language-python highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;&lt;span class=&quot;c1&quot;&gt;# Example 2
+&lt;/span&gt;
+&lt;span class=&quot;n&quot;&gt;x&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;1&lt;/span&gt;
+&lt;span class=&quot;n&quot;&gt;y&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;2&lt;/span&gt;
+&lt;span class=&quot;n&quot;&gt;x&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;y&lt;/span&gt;
+&lt;span class=&quot;n&quot;&gt;y&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;x&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;y&lt;/span&gt;
+&lt;span class=&quot;n&quot;&gt;x&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;y&lt;/span&gt;
+&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;div class=&quot;highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;(2, 4)
+&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;h2 id=&quot;5-model-learner-parameter--pct_start&quot;&gt;5. model learner parameter :: pct_start&lt;/h2&gt;
+
+&lt;p&gt;A: Percentage of total number of epochs when learning rate rises during one cycle.
+Q: Sorry, I still confused that one cycle in the new API only runs one epoch. How the percentage of total number of epochs works? Can you give a example? If learn.fit_one_cycle(10, slice(1e-4,1e-3,1e-2), pct_start=0.05)??
+A: Ok, strictly correct answer would be percentage of iterations, so you can have lr both increase and decrease during same epoch. In your example, say, you have 100 iterations per epoch, then for half an epoch (0.05 * (10 * 100) = 50) lr will rise, then slowly decrease.&lt;/p&gt;
+
+&lt;hr /&gt;
+
+&lt;p&gt;Q2: Thanks for this explanation … so essentially, it is the percentage of overall iterations where the LR is increasing, correct?&lt;/p&gt;
+
+&lt;p&gt;So, given the default of 0.3, it means that your LR is going up for 30% of your iterations and then decreasing over the last 70%.&lt;/p&gt;
+
+&lt;p&gt;Is that a correct summation of what is happening?&lt;/p&gt;
+
+&lt;p&gt;A2: Yes, I think that’s correct.&lt;/p&gt;
+
+&lt;p&gt;You can verify that by changing its value and check:
+learn.recorder.plot_lr()&lt;/p&gt;
+
+&lt;p&gt;For example if pct_start = 0.2&lt;/p&gt;
+
+&lt;p&gt;&lt;img src=&quot;attachment:ScreenClip.png&quot; alt=&quot;ScreenClip.png&quot; /&gt;&lt;/p&gt;
+
+&lt;p&gt;source: &lt;a href=&quot;https://forums.fast.ai/t/what-is-the-pct-start-mean/26168/10&quot;&gt;forums.fastai&lt;/a&gt;&lt;/p&gt;</content><author><name>dionne</name></author><category term="featured" /><summary type="html">This post is about my questions while I was studying USF Deep Learning course about image segmentation task. All the answers are from the course, source code, library document, or document. I cared about being clear at reporting information including source of information, however if there are still anything unclear, please contact me.</summary><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://localhost:4000/assets/images/output_31_0.png" /></entry><entry><title type="html">Gradient backward, Chain Rule, Refactoring</title><link href="http://localhost:4000/2020/03/note08-fastai-4/" rel="alternate" type="text/html" title="Gradient backward, Chain Rule, Refactoring" /><published>2020-03-02T00:00:00+09:00</published><updated>2020-03-02T00:00:00+09:00</updated><id>http://localhost:4000/2020/03/note08-fastai-4</id><content type="html" xml:base="http://localhost:4000/2020/03/note08-fastai-4/">&lt;ul&gt;
+  &lt;li&gt;This note is divided into 4 section.
+    &lt;ul&gt;
+      &lt;li&gt;Section1: &lt;a href=&quot;https://spellonyou.github.io/2020/02/note08-fastai-1/&quot;&gt;What is the meaning of ‘deep-learning from foundations?’&lt;/a&gt;&lt;/li&gt;
+      &lt;li&gt;Section2: &lt;a href=&quot;https://spellonyou.github.io/2020/03/note08-fastai-2/&quot;&gt;What’s inside Pytorch Operator?&lt;/a&gt;&lt;/li&gt;
+      &lt;li&gt;Section3: &lt;a href=&quot;https://spellonyou.github.io/2020/03/note08-fastai-3/&quot;&gt;Implement forward&amp;amp;backward pass from scratch&lt;/a&gt;&lt;/li&gt;
+      &lt;li&gt;Section4: &lt;a href=&quot;https://spellonyou.github.io/2020/03/note08-fastai-4/&quot;&gt;Gradient backward, Chain Rule, Refactoring&lt;/a&gt;&lt;/li&gt;
+    &lt;/ul&gt;
+  &lt;/li&gt;
 &lt;/ul&gt;
 
 &lt;p&gt;” Lecture 08 - Deep Learning From Foundations-part2 “&lt;/p&gt;
@@ -282,6 +780,7 @@
       &lt;li&gt;So it saves our time to differentiate ourselves&lt;/li&gt;
     &lt;/ul&gt;
   &lt;/li&gt;
+  &lt;li&gt;Postfix underscore means in pytorch, &lt;code class=&quot;highlighter-rouge&quot;&gt;in-place&lt;/code&gt; function, &lt;a href=&quot;https://discuss.pytorch.org/t/what-is-in-place-operation/16244&quot;&gt;What is in-place function?&lt;/a&gt;&lt;/li&gt;
 &lt;/ul&gt;
 
 &lt;p&gt;⤵️ THis is benchmark…..&lt;/p&gt;
@@ -463,7 +962,7 @@ def backward(self):
       &lt;p&gt;&lt;a href=&quot;https://stackoverflow.com/questions/34439/finding-what-methods-a-python-object-has/34452#34452&quot;&gt;stackoverflow - finding methods a object has&lt;/a&gt; &lt;a href=&quot;#fnref:3&quot; class=&quot;reversefootnote&quot;&gt;&amp;#8617;&lt;/a&gt;&lt;/p&gt;
     &lt;/li&gt;
   &lt;/ol&gt;
-&lt;/div&gt;</content><author><name>dionne</name></author><category term="sticky" /><summary type="html">This note is divided into 4 section. Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring</summary><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://localhost:4000/assets/images/4-classlin.png" /></entry><entry><title type="html">What’s inside Pytorch Operator?</title><link href="http://localhost:4000/2020/03/note08-fastai-2/" rel="alternate" type="text/html" title="What's inside Pytorch Operator?" /><published>2020-03-01T00:00:00+01:00</published><updated>2020-03-01T00:00:00+01:00</updated><id>http://localhost:4000/2020/03/note08-fastai-2</id><content type="html" xml:base="http://localhost:4000/2020/03/note08-fastai-2/">&lt;p&gt;This note is divided into 4 section.&lt;/p&gt;
+&lt;/div&gt;</content><author><name>dionne</name></author><category term="sticky" /><summary type="html">This note is divided into 4 section. Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring</summary><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://localhost:4000/assets/images/4-classlin.png" /></entry><entry><title type="html">What’s inside Pytorch Operator?</title><link href="http://localhost:4000/2020/03/note08-fastai-2/" rel="alternate" type="text/html" title="What's inside Pytorch Operator?" /><published>2020-03-01T00:00:00+09:00</published><updated>2020-03-01T00:00:00+09:00</updated><id>http://localhost:4000/2020/03/note08-fastai-2</id><content type="html" xml:base="http://localhost:4000/2020/03/note08-fastai-2/">&lt;p&gt;This note is divided into 4 section.&lt;/p&gt;
 &lt;ul&gt;
   &lt;li&gt;Section1: &lt;a href=&quot;https://spellonyou.github.io/2020/02/note08-fastai-1/&quot;&gt;What is the meaning of ‘deep-learning from foundations?’&lt;/a&gt;&lt;/li&gt;
   &lt;li&gt;Section2: &lt;a href=&quot;https://spellonyou.github.io/2020/03/note08-fastai-2/&quot;&gt;What’s inside Pytorch Operator?&lt;/a&gt;&lt;/li&gt;
@@ -783,7 +1282,7 @@ a.k.a&lt;/li&gt;
   &lt;li&gt;&lt;a href=&quot;https://docs.scipy.org/doc/numpy/reference/generated/numpy.einsum.html&quot;&gt;np.einsum Review&lt;/a&gt;&lt;/li&gt;
 &lt;/ul&gt;
 
-&lt;p&gt;h&lt;/p&gt;</content><author><name>dionne</name></author><category term="featured" /><summary type="html">This note is divided into 4 section. Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring</summary><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://localhost:4000/assets/images/30.png" /></entry><entry><title type="html">Implement forward&amp;amp;backward pass from scratch</title><link href="http://localhost:4000/2020/03/note08-fastai-3/" rel="alternate" type="text/html" title="Implement forward&amp;backward pass from scratch" /><published>2020-03-01T00:00:00+01:00</published><updated>2020-03-01T00:00:00+01:00</updated><id>http://localhost:4000/2020/03/note08-fastai-3</id><content type="html" xml:base="http://localhost:4000/2020/03/note08-fastai-3/">&lt;p&gt;This note is divided into 4 section.&lt;/p&gt;
+&lt;p&gt;h&lt;/p&gt;</content><author><name>dionne</name></author><category term="featured" /><summary type="html">This note is divided into 4 section. Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring</summary><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://localhost:4000/assets/images/30.png" /></entry><entry><title type="html">Implement forward&amp;amp;backward pass from scratch</title><link href="http://localhost:4000/2020/03/note08-fastai-3/" rel="alternate" type="text/html" title="Implement forward&amp;backward pass from scratch" /><published>2020-03-01T00:00:00+09:00</published><updated>2020-03-01T00:00:00+09:00</updated><id>http://localhost:4000/2020/03/note08-fastai-3</id><content type="html" xml:base="http://localhost:4000/2020/03/note08-fastai-3/">&lt;p&gt;This note is divided into 4 section.&lt;/p&gt;
 &lt;ul&gt;
   &lt;li&gt;Section1: &lt;a href=&quot;https://spellonyou.github.io/2020/02/note08-fastai-1/&quot;&gt;What is the meaning of ‘deep-learning from foundations?’&lt;/a&gt;&lt;/li&gt;
   &lt;li&gt;Section2: &lt;a href=&quot;https://spellonyou.github.io/2020/03/note08-fastai-2/&quot;&gt;What’s inside Pytorch Operator?&lt;/a&gt;&lt;/li&gt;
@@ -1058,7 +1557,7 @@ a.k.a&lt;/li&gt;
       &lt;p&gt;&lt;a href=&quot;https://discuss.pytorch.org/t/why-the-default-negative-slope-for-kaiming-uniform-initialization-of-convolution-and-linear-layers-is-5/29290&quot;&gt;Pytorch kaiming init explained&lt;/a&gt; &lt;a href=&quot;#fnref:4&quot; class=&quot;reversefootnote&quot;&gt;&amp;#8617;&lt;/a&gt;&lt;/p&gt;
     &lt;/li&gt;
   &lt;/ol&gt;
-&lt;/div&gt;</content><author><name>dionne</name></author><category term="featured" /><summary type="html">This note is divided into 4 section. Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring</summary><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://localhost:4000/assets/images/4-backward3.png" /></entry><entry><title type="html">What is the meaning of ‘deep-learning from foundations?’</title><link href="http://localhost:4000/2020/02/note08-fastai-1/" rel="alternate" type="text/html" title="What is the meaning of 'deep-learning from foundations?'" /><published>2020-02-29T00:00:00+01:00</published><updated>2020-02-29T00:00:00+01:00</updated><id>http://localhost:4000/2020/02/note08-fastai-1</id><content type="html" xml:base="http://localhost:4000/2020/02/note08-fastai-1/">&lt;p&gt;This note is divided into 4 section.&lt;/p&gt;
+&lt;/div&gt;</content><author><name>dionne</name></author><category term="featured" /><summary type="html">This note is divided into 4 section. Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring</summary><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://localhost:4000/assets/images/4-backward3.png" /></entry><entry><title type="html">What is the meaning of ‘deep-learning from foundations?’</title><link href="http://localhost:4000/2020/02/note08-fastai-1/" rel="alternate" type="text/html" title="What is the meaning of 'deep-learning from foundations?'" /><published>2020-02-29T00:00:00+09:00</published><updated>2020-02-29T00:00:00+09:00</updated><id>http://localhost:4000/2020/02/note08-fastai-1</id><content type="html" xml:base="http://localhost:4000/2020/02/note08-fastai-1/">&lt;p&gt;This note is divided into 4 section.&lt;/p&gt;
 &lt;ul&gt;
   &lt;li&gt;Section1: &lt;a href=&quot;https://spellonyou.github.io/2020/02/note08-fastai-1/&quot;&gt;What is the meaning of ‘deep-learning from foundations?’&lt;/a&gt;&lt;/li&gt;
   &lt;li&gt;Section2: &lt;a href=&quot;https://spellonyou.github.io/2020/03/note08-fastai-2/&quot;&gt;What’s inside Pytorch Operator?&lt;/a&gt;&lt;/li&gt;
@@ -1254,7 +1753,7 @@ a.k.a&lt;/li&gt;
   &lt;li&gt;&lt;a href=&quot;https://drive.google.com/file/d/18QwDI25Lf0ld0-cEugu7LxjwTc2NRkha/view&quot;&gt;material&lt;/a&gt;&lt;/li&gt;
   &lt;li&gt;&lt;a href=&quot;https://course.fast.ai/videos/?lesson=8&quot;&gt;video&lt;/a&gt;&lt;/li&gt;
   &lt;li&gt;&lt;a href=&quot;https://docs.google.com/spreadsheets/d/1bIPBcf-p9iqNG8BGmIVlJCFa4jEsbOZvcPXGTYe5pjI/edit#gid=0&quot;&gt;broadcasting excel&lt;/a&gt;&lt;/li&gt;
-&lt;/ul&gt;</content><author><name>dionne</name></author><category term="featured" /><summary type="html">This note is divided into 4 section. Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring</summary><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://localhost:4000/assets/images/1-matmul.png" /></entry><entry><title type="html">Digging into convolution</title><link href="http://localhost:4000/2020/02/what-is-convolution/" rel="alternate" type="text/html" title="Digging into convolution" /><published>2020-02-28T00:00:00+01:00</published><updated>2020-02-28T00:00:00+01:00</updated><id>http://localhost:4000/2020/02/what-is-convolution</id><content type="html" xml:base="http://localhost:4000/2020/02/what-is-convolution/">&lt;p&gt;Issues &lt;br /&gt;&lt;/p&gt;
+&lt;/ul&gt;</content><author><name>dionne</name></author><category term="featured" /><summary type="html">This note is divided into 4 section. Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring</summary><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://localhost:4000/assets/images/1-matmul.png" /></entry><entry><title type="html">Digging into convolution</title><link href="http://localhost:4000/2020/02/what-is-convolution/" rel="alternate" type="text/html" title="Digging into convolution" /><published>2020-02-28T00:00:00+09:00</published><updated>2020-02-28T00:00:00+09:00</updated><id>http://localhost:4000/2020/02/what-is-convolution</id><content type="html" xml:base="http://localhost:4000/2020/02/what-is-convolution/">&lt;p&gt;Issues &lt;br /&gt;&lt;/p&gt;
 
 &lt;p&gt;1) Kaiming Initializtion in Pytorch was in trouble.&lt;sup id=&quot;fnref:1&quot;&gt;&lt;a href=&quot;#fn:1&quot; class=&quot;footnote&quot;&gt;1&lt;/a&gt;&lt;/sup&gt; &lt;br /&gt;&lt;/p&gt;
 
@@ -1410,46 +1909,34 @@ a.k.a&lt;/li&gt;
       &lt;p&gt;&lt;a href=&quot;https://youtu.be/hkBa9pU-H48?t=4937&quot;&gt;stack kernel and make new rank of tensor at output, Lesson06-2019&lt;/a&gt; &lt;a href=&quot;#fnref:5&quot; class=&quot;reversefootnote&quot;&gt;&amp;#8617;&lt;/a&gt;&lt;/p&gt;
     &lt;/li&gt;
   &lt;/ol&gt;
-&lt;/div&gt;</content><author><name>dionne</name></author><category term="feature" /><summary type="html">Issues</summary><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://localhost:4000/assets/images/cnn-jiwon.png" /></entry><entry><title type="html">Digital Product School week 8&amp;amp;9</title><link href="http://localhost:4000/2020/02/dps-week8/" rel="alternate" type="text/html" title="Digital Product School week 8&amp;9" /><published>2020-02-24T00:00:00+01:00</published><updated>2020-02-24T00:00:00+01:00</updated><id>http://localhost:4000/2020/02/dps-week8</id><content type="html" xml:base="http://localhost:4000/2020/02/dps-week8/">&lt;p&gt;The 8th week retropect at Digital Product School&lt;/p&gt;
+&lt;/div&gt;</content><author><name>dionne</name></author><category term="feature" /><summary type="html">Issues</summary><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://localhost:4000/assets/images/cnn-jiwon.png" /></entry><entry><title type="html">My life in Digital Product School - week 8/19/10</title><link href="http://localhost:4000/2020/02/dps-week8/" rel="alternate" type="text/html" title="My life in Digital Product School - week 8/19/10" /><published>2020-02-24T00:00:00+09:00</published><updated>2020-02-24T00:00:00+09:00</updated><id>http://localhost:4000/2020/02/dps-week8</id><content type="html" xml:base="http://localhost:4000/2020/02/dps-week8/">&lt;p&gt;The 8/9/10th week retropect at Digital Product School&lt;/p&gt;
 
-&lt;p&gt;&lt;strong&gt;Week 8/9 - Ship your MVP/Release next iteration each day&lt;/strong&gt;&lt;/p&gt;
+&lt;p&gt;Week 8 - Ship your MVP&lt;br /&gt;
+Week 9/10 - Release next iteration each day&lt;/p&gt;
 
 &lt;div style=&quot;text-align:center&quot;&gt;
     &lt;img src=&quot;/assets/images/week8/schedule.png&quot; width=&quot;70%&quot; height=&quot;70%&quot; /&gt;
-    &lt;figcaption&gt;This week's schedule&lt;/figcaption&gt;    
+    &lt;figcaption&gt;Week 8th schedule&lt;/figcaption&gt;    
 &lt;/div&gt;
 
 &lt;h2 class=&quot;no_toc&quot; id=&quot;content&quot;&gt;CONTENT&lt;/h2&gt;
 
 &lt;ul id=&quot;markdown-toc&quot;&gt;
-  &lt;li&gt;&lt;a href=&quot;#preparing-engineering-weekly&quot; id=&quot;markdown-toc-preparing-engineering-weekly&quot;&gt;Preparing engineering weekly&lt;/a&gt;&lt;/li&gt;
-  &lt;li&gt;&lt;a href=&quot;#agile-process&quot; id=&quot;markdown-toc-agile-process&quot;&gt;Agile Process&lt;/a&gt;    &lt;ul&gt;
-      &lt;li&gt;&lt;a href=&quot;#daily-stand-up&quot; id=&quot;markdown-toc-daily-stand-up&quot;&gt;Daily Stand-up&lt;/a&gt;&lt;/li&gt;
-    &lt;/ul&gt;
-  &lt;/li&gt;
-  &lt;li&gt;&lt;a href=&quot;#making-application-flowchart-feat-drawio---er-diagram&quot; id=&quot;markdown-toc-making-application-flowchart-feat-drawio---er-diagram&quot;&gt;Making application flowchart (feat draw.io) /  ER diagram&lt;/a&gt;    &lt;ul&gt;
-      &lt;li&gt;&lt;a href=&quot;#flowchart-understaning-user-journey&quot; id=&quot;markdown-toc-flowchart-understaning-user-journey&quot;&gt;Flowchart, understaning user journey&lt;/a&gt;&lt;/li&gt;
-      &lt;li&gt;&lt;a href=&quot;#er-diagram&quot; id=&quot;markdown-toc-er-diagram&quot;&gt;ER diagram&lt;/a&gt;&lt;/li&gt;
+  &lt;li&gt;&lt;a href=&quot;#agile-product-development&quot; id=&quot;markdown-toc-agile-product-development&quot;&gt;Agile Product Development&lt;/a&gt;    &lt;ul&gt;
+      &lt;li&gt;&lt;a href=&quot;#daily-stand-upplanning&quot; id=&quot;markdown-toc-daily-stand-upplanning&quot;&gt;Daily Stand-up(planning)&lt;/a&gt;&lt;/li&gt;
+      &lt;li&gt;&lt;a href=&quot;#gemba-walk&quot; id=&quot;markdown-toc-gemba-walk&quot;&gt;Gemba Walk&lt;/a&gt;&lt;/li&gt;
+      &lt;li&gt;&lt;a href=&quot;#sprint-reviews&quot; id=&quot;markdown-toc-sprint-reviews&quot;&gt;Sprint Reviews&lt;/a&gt;&lt;/li&gt;
     &lt;/ul&gt;
   &lt;/li&gt;
   &lt;li&gt;&lt;a href=&quot;#engineering-weekly&quot; id=&quot;markdown-toc-engineering-weekly&quot;&gt;Engineering weekly&lt;/a&gt;&lt;/li&gt;
-  &lt;li&gt;&lt;a href=&quot;#ai-lunch&quot; id=&quot;markdown-toc-ai-lunch&quot;&gt;AI lunch&lt;/a&gt;&lt;/li&gt;
-  &lt;li&gt;&lt;a href=&quot;#connecting-firebase-and&quot; id=&quot;markdown-toc-connecting-firebase-and&quot;&gt;Connecting firebase and&lt;/a&gt;&lt;/li&gt;
 &lt;/ul&gt;
 
 &lt;hr /&gt;
 
-&lt;h2 id=&quot;preparing-engineering-weekly&quot;&gt;Preparing engineering weekly&lt;/h2&gt;
-
-&lt;p&gt;This week at Wednesday, I planned to explain the &lt;em&gt;Language Modelings&lt;/em&gt;, mainly focusing ELMo, ULMFiT, BERT and GPT-2.&lt;br /&gt;
-Slides is available &lt;a href=&quot;https://docs.google.com/presentation/d/1-oTUqgFq0x3N8QpcN7gHay5KhIN2KdFGOTxzujr75ck/edit?usp=sharing&quot;&gt;here&lt;/a&gt; 
-Changed the presentation, because there were people who are not in ML domain. &lt;a href=&quot;https://www.slideshare.net/JiWenKim/applicable-bert&quot;&gt;here&lt;/a&gt;&lt;br /&gt;
-Whenever I do the presentation, I learn more than the information I give them. At the same time, I realize I need to learn more than I know.&lt;/p&gt;
+&lt;h2 id=&quot;agile-product-development&quot;&gt;Agile Product Development&lt;/h2&gt;
+&lt;p&gt;One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m still not sure ‘what is agile’ but this is how we tried to make agile process. &lt;br /&gt;&lt;/p&gt;
 
-&lt;h2 id=&quot;agile-process&quot;&gt;Agile Process&lt;/h2&gt;
-&lt;p&gt;One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m not sure ‘what is agile’ but this is what we tried to make agile process. &lt;br /&gt;&lt;/p&gt;
-
-&lt;h3 id=&quot;daily-stand-up&quot;&gt;Daily Stand-up&lt;/h3&gt;
+&lt;h3 id=&quot;daily-stand-upplanning&quot;&gt;Daily Stand-up(planning)&lt;/h3&gt;
 
 &lt;p&gt;Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more &lt;strong&gt;familiar&lt;/strong&gt;. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.&lt;/p&gt;
 
@@ -1465,39 +1952,35 @@ Whenever I do the presentation, I learn more than the information I give them. A
 
 &lt;p&gt;Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others.&lt;/p&gt;
 
-&lt;h2 id=&quot;making-application-flowchart-feat-drawio---er-diagram&quot;&gt;Making application flowchart (feat draw.io) /  ER diagram&lt;/h2&gt;
+&lt;h3 id=&quot;gemba-walk&quot;&gt;Gemba Walk&lt;/h3&gt;
 
-&lt;p&gt;&lt;img src=&quot;/assets/images/week8/draw-io.png&quot; alt=&quot;&quot; /&gt;&lt;/p&gt;
+&lt;div style=&quot;text-align:center&quot;&gt;
+    &lt;img src=&quot;/assets/images/gembawalk.png&quot; width=&quot;50%&quot; height=&quot;50%&quot; /&gt;
+    &lt;figcaption&gt;Team Cero with core team&lt;/figcaption&gt;    
+&lt;/div&gt;
 
-&lt;ul&gt;
-  &lt;li&gt;Before we start the party, we should clarify the flowchart and ER diagram of our application.&lt;/li&gt;
-&lt;/ul&gt;
+&lt;p&gt;Every 2 weeks, we do the Gemba work, which is ‘question everything to the core team’ time. At this period, people can ask anything related to our product, workshop, and framework. Core team will help just for each team, and each team can solve the problem related to their work. &amp;lt; br/&amp;gt;
+Why we need this session? because with workshop and general schedule, core team has no time just focus on each team. So through this session, we can have opportunity to understand each program and workshop, like why we are using this platform, and when is the due of our small project, and &lt;code class=&quot;highlighter-rouge&quot;&gt;we have this problem and we need help for this&lt;/code&gt;. whatever small problem you have, core team is always willing to help you.&lt;/p&gt;
 
-&lt;h3 id=&quot;flowchart-understaning-user-journey&quot;&gt;Flowchart, understaning user journey&lt;/h3&gt;
+&lt;h3 id=&quot;sprint-reviews&quot;&gt;Sprint Reviews&lt;/h3&gt;
 
-&lt;p&gt;Thanks for google, we could use draw.io for our framechart framework. Actually, we cana choice other good flatform, but draw.io has connected app throgh google drive, most of our engineer was used to it. And after this job, I got to know there is also (&lt;em&gt;of course&lt;/em&gt;) rule with the symbols, color, size, space, scaling and direction of arrow -&lt;a href=&quot;https://creately.com/blog/diagrams/flowchart-guide-flowchart-tutorial&quot;&gt;reference&lt;/a&gt;. &lt;br /&gt;
-But why we should do this? WE have made our storymap before!! I think storymap is for visualize our status and app. So it should be shared with whole the team, and they should able to understand each role’s issue. But flowchart is more like testing technical feasibility, and error that user can experience. So it could be little more specific, complicated, and hypothetical.&lt;/p&gt;
+&lt;p&gt;Every Friday, we have time to summarise what we did for the week. Maybe we need &lt;code class=&quot;highlighter-rouge&quot;&gt;HMW question&lt;/code&gt; and our &lt;code class=&quot;highlighter-rouge&quot;&gt;storymap&lt;/code&gt; to share our process and then tell and share what we did try, what point we succeeded and what point it was deviant of our prediction, and why we tried it..&lt;/p&gt;
 
 &lt;div style=&quot;text-align:center&quot;&gt;
-    &lt;img src=&quot;/assets/images/week8/flowchart.jpg&quot; width=&quot;50%&quot; height=&quot;50%&quot; /&gt;
-    &lt;figcaption&gt;This week engineer's main plan&lt;/figcaption&gt;    
+    &lt;img src=&quot;/assets/images/gembawalk.png&quot; width=&quot;50%&quot; height=&quot;50%&quot; /&gt;
+    &lt;figcaption&gt;Sprint of Ve-link&lt;/figcaption&gt;    
 &lt;/div&gt;
 
-&lt;h3 id=&quot;er-diagram&quot;&gt;ER diagram&lt;/h3&gt;
-
-&lt;p&gt;Even if we use NoSQL database through firebase, my team was accustomed to SQL more. That what we educated when we were at college, so we had to organize our concept while we were learning NoSQL.&lt;/p&gt;
+&lt;p&gt;And then, just after all team’s ppt, we do vote with such a cute marvel. Always it’s very difficult to vote (of course you can’t vote to your team!) Because it depends on criteria what do I value!&lt;br /&gt;
+But since this is process of our agile work, I try to focus on &lt;code class=&quot;highlighter-rouge&quot;&gt;what they have changed since last week, and why they did it, how they did it&lt;/code&gt;.&lt;/p&gt;
 
 &lt;h2 id=&quot;engineering-weekly&quot;&gt;Engineering weekly&lt;/h2&gt;
 
-&lt;p&gt;Every engineering weekly we exchange our knowledge each other so that we can grow together.&lt;/p&gt;
+&lt;p&gt;Every engineering weekly we exchange our knowledge each other so that we can grow together. Everyone have their knowledge to share and we can be tutor and at the same time can be of tutee.&lt;/p&gt;
 
-&lt;p&gt;Before today, my AI collegues presented &lt;code class=&quot;highlighter-rouge&quot;&gt;regression, knn&lt;/code&gt; and it was my turn. I prepared slide that explain about &lt;a href=&quot;https://www.slideshare.net/JiWenKim/pre-trained-language-model&quot;&gt;pre-trained language model&lt;/a&gt;, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief &lt;a href=&quot;https://www.slideshare.net/JiWenKim/applicable-bert&quot;&gt;BERT mode&lt;/a&gt;, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product.&lt;/p&gt;
+&lt;p&gt;Previously, my AI collegues presented &lt;code class=&quot;highlighter-rouge&quot;&gt;regression&lt;/code&gt;, &lt;code class=&quot;highlighter-rouge&quot;&gt;knn&lt;/code&gt;. And because I’m somewhat specialized to NLP, I prepared slide that explain about &lt;a href=&quot;https://www.slideshare.net/JiWenKim/pre-trained-language-model&quot;&gt;pre-trained language model&lt;/a&gt;, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief &lt;a href=&quot;https://www.slideshare.net/JiWenKim/applicable-bert&quot;&gt;BERT mode&lt;/a&gt;, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product.&lt;/p&gt;
 
-&lt;p&gt;The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback!&lt;/p&gt;
-
-&lt;h2 id=&quot;ai-lunch&quot;&gt;AI lunch&lt;/h2&gt;
-
-&lt;h2 id=&quot;connecting-firebase-and&quot;&gt;Connecting firebase and&lt;/h2&gt;</content><author><name>dionne</name></author><summary type="html">The 8th week retropect at Digital Product School</summary><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://localhost:4000/assets/images/week8/gate.png" /></entry><entry><title type="html">Algorithmic bias</title><link href="http://localhost:4000/2020/02/fast.ai-nlp-note-16/" rel="alternate" type="text/html" title="Algorithmic bias" /><published>2020-02-20T00:00:00+01:00</published><updated>2020-02-20T00:00:00+01:00</updated><id>http://localhost:4000/2020/02/fast.ai-nlp-note-16</id><content type="html" xml:base="http://localhost:4000/2020/02/fast.ai-nlp-note-16/">&lt;p&gt;&lt;strong&gt;Algorithms can encode &amp;amp; magnify human bias&lt;/strong&gt;&lt;/p&gt;
+&lt;p&gt;The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback!&lt;/p&gt;</content><author><name>dionne</name></author><summary type="html">The 8/9/10th week retropect at Digital Product School</summary><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://localhost:4000/assets/images/week8/gate.png" /></entry><entry><title type="html">Algorithmic bias</title><link href="http://localhost:4000/2020/02/fast.ai-nlp-note-16/" rel="alternate" type="text/html" title="Algorithmic bias" /><published>2020-02-20T00:00:00+09:00</published><updated>2020-02-20T00:00:00+09:00</updated><id>http://localhost:4000/2020/02/fast.ai-nlp-note-16</id><content type="html" xml:base="http://localhost:4000/2020/02/fast.ai-nlp-note-16/">&lt;p&gt;&lt;strong&gt;Algorithms can encode &amp;amp; magnify human bias&lt;/strong&gt;&lt;/p&gt;
 
 &lt;h3 id=&quot;case-study-1-facial-recognition--predictive-policing&quot;&gt;Case Study 1: Facial Recognition &amp;amp; Predictive Policing&lt;/h3&gt;
 
@@ -1637,7 +2120,7 @@ They’re the result of a complex process that starts with years of historical c
   &lt;li&gt;Increase diversity in our workspace&lt;/li&gt;
   &lt;li&gt;Advocate for good policy&lt;/li&gt;
   &lt;li&gt;Be on the ongoing lookout for bias&lt;/li&gt;
-&lt;/ol&gt;</content><author><name>dionne</name></author><summary type="html">Algorithms can encode &amp;amp; magnify human bias</summary><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://localhost:4000/assets/images/13.png" /></entry><entry><title type="html">Making a classifier with image dataset made from gooogle</title><link href="http://localhost:4000/2020/02/classifier-city/" rel="alternate" type="text/html" title="Making a classifier with image dataset made from gooogle" /><published>2020-02-15T00:00:00+01:00</published><updated>2020-02-15T00:00:00+01:00</updated><id>http://localhost:4000/2020/02/classifier-city</id><content type="html" xml:base="http://localhost:4000/2020/02/classifier-city/">&lt;h3 class=&quot;no_toc&quot; id=&quot;contents&quot;&gt;CONTENTS&lt;/h3&gt;
+&lt;/ol&gt;</content><author><name>dionne</name></author><summary type="html">Algorithms can encode &amp;amp; magnify human bias</summary><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://localhost:4000/assets/images/13.png" /></entry><entry><title type="html">Making a classifier with image dataset made from gooogle</title><link href="http://localhost:4000/2020/02/classifier-city/" rel="alternate" type="text/html" title="Making a classifier with image dataset made from gooogle" /><published>2020-02-15T00:00:00+09:00</published><updated>2020-02-15T00:00:00+09:00</updated><id>http://localhost:4000/2020/02/classifier-city</id><content type="html" xml:base="http://localhost:4000/2020/02/classifier-city/">&lt;h3 class=&quot;no_toc&quot; id=&quot;contents&quot;&gt;CONTENTS&lt;/h3&gt;
 
 &lt;ul id=&quot;markdown-toc&quot;&gt;
   &lt;li&gt;&lt;a href=&quot;#creating-dataset-from-google-images&quot; id=&quot;markdown-toc-creating-dataset-from-google-images&quot;&gt;Creating dataset from google images&lt;/a&gt;    &lt;ul&gt;
@@ -1823,205 +2306,4 @@ And it gets way more better result(even it’s still underfitting!)&lt;/p&gt;
       &lt;p&gt;&lt;a href=&quot;https://en.wikipedia.org/wiki/Accuracy_and_precision&quot;&gt;Accuracy_and_precision&lt;/a&gt; &lt;a href=&quot;#fnref:3&quot; class=&quot;reversefootnote&quot;&gt;&amp;#8617;&lt;/a&gt;&lt;/p&gt;
     &lt;/li&gt;
   &lt;/ol&gt;
-&lt;/div&gt;</content><author><name>dionne</name></author><category term="feature" /><summary type="html">CONTENTS</summary><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://localhost:4000/assets/images/munich2.jpg" /></entry><entry><title type="html">Digital Product School week 5</title><link href="http://localhost:4000/2020/02/dps-week5/" rel="alternate" type="text/html" title="Digital Product School week 5" /><published>2020-02-09T00:00:00+01:00</published><updated>2020-02-09T00:00:00+01:00</updated><id>http://localhost:4000/2020/02/dps-week5</id><content type="html" xml:base="http://localhost:4000/2020/02/dps-week5/">&lt;p&gt;The 5th week retropect at Digital Product School&lt;/p&gt;
-
-&lt;p&gt;&lt;strong&gt;Week 5 - Create a Storymap and sync it with Lean Canvas&lt;/strong&gt;&lt;/p&gt;
-
-&lt;div style=&quot;text-align:center&quot;&gt;
-    &lt;img src=&quot;/assets/images/week5/feb-3-1.png&quot; width=&quot;70%&quot; height=&quot;70%&quot; /&gt;
-    &lt;figcaption&gt;This week's schedule&lt;/figcaption&gt;    
-&lt;/div&gt;
-
-&lt;h2 class=&quot;no_toc&quot; id=&quot;content&quot;&gt;CONTENT&lt;/h2&gt;
-
-&lt;ul id=&quot;markdown-toc&quot;&gt;
-  &lt;li&gt;&lt;a href=&quot;#how-to-create-our-story-map&quot; id=&quot;markdown-toc-how-to-create-our-story-map&quot;&gt;How to create our story map&lt;/a&gt;&lt;/li&gt;
-  &lt;li&gt;&lt;a href=&quot;#prepare-your-story&quot; id=&quot;markdown-toc-prepare-your-story&quot;&gt;Prepare your story&lt;/a&gt;&lt;/li&gt;
-  &lt;li&gt;&lt;a href=&quot;#discover-your-products-ai-potential&quot; id=&quot;markdown-toc-discover-your-products-ai-potential&quot;&gt;Discover your product’s AI potential&lt;/a&gt;&lt;/li&gt;
-&lt;/ul&gt;
-
-&lt;hr /&gt;
-
-&lt;h1 class=&quot;no_toc&quot; id=&quot;monday&quot;&gt;Monday&lt;/h1&gt;
-
-&lt;h2 id=&quot;how-to-create-our-story-map&quot;&gt;How to create our story map&lt;/h2&gt;
-
-&lt;div style=&quot;text-align:center&quot;&gt;
-    &lt;img src=&quot;/assets/images/week5/aha-moment.png&quot; width=&quot;60%&quot; height=&quot;60%&quot; /&gt;
-    &lt;figcaption&gt;We need this 'aha' moment&lt;/figcaption&gt;    
-&lt;/div&gt;
-
-&lt;p&gt;There was a Milestone workshop, about our weekly goal. As we are agile working, we go fast and change every week’s goal. This week we will finalize our &lt;a href=&quot;http://shop.oreilly.com/product/0636920033851.do&quot;&gt;story map&lt;/a&gt; based on user’s pain-point and HMW questions.&lt;/p&gt;
-
-&lt;div style=&quot;text-align:center&quot;&gt;
-    &lt;img src=&quot;/assets/images/week5/storymap.png&quot; width=&quot;60%&quot; height=&quot;60%&quot; /&gt;
-    &lt;figcaption&gt;How should we make our story-map&lt;/figcaption&gt;    
-&lt;/div&gt;
-
-&lt;p&gt;Basically we should make story map based on this rule&lt;/p&gt;
-
-&lt;ul&gt;
-  &lt;li&gt;&lt;em&gt;Tell stories, don’t just write them!&lt;/em&gt;
-    &lt;ul&gt;
-      &lt;li&gt;We always need context, that means all the story component should be connected&lt;/li&gt;
-    &lt;/ul&gt;
-  &lt;/li&gt;
-  &lt;li&gt;&lt;em&gt;Visualize your product to establish a shared understanding and speed up discussions!&lt;/em&gt;
-    &lt;ul&gt;
-      &lt;li&gt;Post-it filled of text is not enough, we should fill it with visualizations then team mates can understand it fast&lt;/li&gt;
-    &lt;/ul&gt;
-  &lt;/li&gt;
-  &lt;li&gt;&lt;em&gt;Only discuss in front our your story map! (Speed)&lt;/em&gt;
-    &lt;ul&gt;
-      &lt;li&gt;So we can update our story-map as soon as we change our opinion&lt;/li&gt;
-      &lt;li&gt;And also&lt;/li&gt;
-    &lt;/ul&gt;
-  &lt;/li&gt;
-  &lt;li&gt;&lt;em&gt;Use a story map to find the parts that matter most and to identify holes in your idea!&lt;/em&gt;
-    &lt;ul&gt;
-      &lt;li&gt;Since the story map consists of techinical part, we should consider each story’s technical feasibility&lt;/li&gt;
-    &lt;/ul&gt;
-  &lt;/li&gt;
-  &lt;li&gt;&lt;em&gt;Minimise output, maximise outcome and impact!&lt;/em&gt;&lt;/li&gt;
-  &lt;li&gt;&lt;em&gt;Build tests to figure out what’s minimum and what’s viable!&lt;/em&gt;
-    &lt;ul&gt;
-      &lt;li&gt;This story map functions to find out our minimum value of ideas&lt;/li&gt;
-    &lt;/ul&gt;
-  &lt;/li&gt;
-  &lt;li&gt;&lt;em&gt;Work iteratively: Change your story map according to your learnings!&lt;/em&gt;
-    &lt;ul&gt;
-      &lt;li&gt;We should repeat this process again and again&lt;/li&gt;
-    &lt;/ul&gt;
-  &lt;/li&gt;
-  &lt;li&gt;&lt;em&gt;PMs: Make sure Storymap is up to date!&lt;/em&gt;&lt;/li&gt;
-&lt;/ul&gt;
-
-&lt;h2 id=&quot;prepare-your-story&quot;&gt;Prepare your story&lt;/h2&gt;
-
-&lt;div style=&quot;text-align:center&quot;&gt;
-    &lt;img src=&quot;/assets/images/week5/storymap-1.png&quot; width=&quot;60%&quot; height=&quot;60%&quot; /&gt;
-    &lt;figcaption&gt;team cero, our whole story map&lt;/figcaption&gt;
-&lt;/div&gt;
-
-&lt;div style=&quot;text-align:center&quot;&gt;
-    &lt;img src=&quot;/assets/images/week5/storymap-5.png&quot; width=&quot;60%&quot; height=&quot;60%&quot; /&gt;
-    &lt;figcaption&gt;Our goal&lt;/figcaption&gt;
-&lt;/div&gt;
-
-&lt;div style=&quot;text-align:center&quot;&gt;
-    &lt;img src=&quot;/assets/images/week5/storymap-3.png&quot; width=&quot;60%&quot; height=&quot;60%&quot; /&gt;
-    &lt;figcaption&gt;Technical feasibility of our story&lt;/figcaption&gt;
-&lt;/div&gt;
-
-&lt;p&gt;What is your strategy to make user achieve something? This would be our expand point&lt;/p&gt;
-
-&lt;h2 id=&quot;discover-your-products-ai-potential&quot;&gt;Discover your product’s AI potential&lt;/h2&gt;
-
-&lt;p&gt;&lt;em&gt;How can we apply AI to our product? Let’s write down our ‘HMW’ questions, and find out all p ossibilities.&lt;/em&gt; &lt;br /&gt;
-These are suggestion of possibilities, so don’t attached to feasibility (we will do in at lean start-up)&lt;/p&gt;
-
-&lt;div style=&quot;text-align:center&quot;&gt;
-    &lt;img src=&quot;/assets/images/week5/se.jpg&quot; width=&quot;60%&quot; height=&quot;60%&quot; /&gt;
-    &lt;figcaption&gt;Software section's expectation&lt;/figcaption&gt;
-&lt;/div&gt;
-
-&lt;div style=&quot;text-align:center&quot;&gt;
-    &lt;img src=&quot;/assets/images/week5/ai.JPG&quot; width=&quot;60%&quot; height=&quot;60%&quot; /&gt;
-    &lt;figcaption&gt;AI section's expectation&lt;/figcaption&gt;
-&lt;/div&gt;
-
-&lt;h1 class=&quot;no_toc&quot; id=&quot;tuesday&quot;&gt;Tuesday&lt;/h1&gt;
-
-&lt;div style=&quot;text-align:center&quot;&gt;
-    &lt;img src=&quot;/assets/images/week5/simulator.png&quot; width=&quot;60%&quot; height=&quot;60%&quot; /&gt;
-    &lt;figcaption&gt;Engineer's task, week5&lt;/figcaption&gt;
-&lt;/div&gt;
-
-&lt;p&gt;This 5th week, engineers settled&lt;/p&gt;
-
-&lt;h1 class=&quot;no_toc&quot; id=&quot;wendesday&quot;&gt;Wendesday&lt;/h1&gt;
-
-&lt;h1 class=&quot;no_toc&quot; id=&quot;thursday&quot;&gt;Thursday&lt;/h1&gt;
-
-&lt;h1 class=&quot;no_toc&quot; id=&quot;friday&quot;&gt;Friday&lt;/h1&gt;</content><author><name>dionne</name></author><summary type="html">The 5th week retropect at Digital Product School</summary><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://localhost:4000/assets/images/week5/user-storymap.png" /></entry><entry><title type="html">4 reasons took much time to setting GPU for fast.ai than I expected</title><link href="http://localhost:4000/2020/02/GPU-time/" rel="alternate" type="text/html" title="4 reasons took much time to setting GPU for fast.ai than I expected" /><published>2020-02-05T00:00:00+01:00</published><updated>2020-02-05T00:00:00+01:00</updated><id>http://localhost:4000/2020/02/GPU-time</id><content type="html" xml:base="http://localhost:4000/2020/02/GPU-time/">&lt;h2 id=&quot;motivation&quot;&gt;Motivation&lt;/h2&gt;
-
-&lt;p&gt;Before now, me as a undergraduate student, I was parsimony who usually depend on colab, kaggle, friend’s server(occasional) whenever i need GPU..&lt;br /&gt;
-&lt;br /&gt;
-And this time it’s been for a while to install GPU than I expected and I share the several component that stood in my way.&lt;br /&gt;
-&lt;br /&gt;
-&lt;strong&gt;&lt;span style=&quot;color:red&quot;&gt;Written at Oct 24 2019, if you think this is deprecated, please do not have a leap of faith.&lt;/span&gt;&lt;/strong&gt;&lt;/p&gt;
-
-&lt;p&gt;Just for the record, I’ve used Kaggle, Colab, GCP, Azure, EC2 as GPU cloud.
-&lt;br /&gt;&lt;br /&gt;&lt;/p&gt;
-
-&lt;h3 id=&quot;1-did-not-know-there-is-jupyterlab-option-in-google-cloud-platform&quot;&gt;1. Did not know there is &lt;em&gt;JupyterLab&lt;/em&gt; option in &lt;strong&gt;Google Cloud Platform&lt;/strong&gt;.&lt;/h3&gt;
-
-&lt;p&gt;At the first time when GCP came out, there was no &lt;strong&gt;AI  Platform&lt;/strong&gt; service. So from starting vm instance to launching jupyter and installing packages, I did all of the things myself. (and I learned 🤗)&lt;/p&gt;
-
-&lt;p&gt;&lt;img src=&quot;/assets/images/8.png&quot; alt=&quot;installing-conda-cli&quot; /&gt;&lt;/p&gt;
-
-&lt;div class=&quot;highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;$	curl -O https://repo.continuum.io/archive/Anaconda3-5.0.1-Linux-x86_64.sh
-&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
-
-&lt;p align=&quot;center&quot;&gt;[Downloading conda in ssh]&lt;/p&gt;
-
-&lt;p&gt;I created VM instance,selected zone, machine type and disk type. Then, define firewall rules and in ssh terminal, install jupyter and other packages.&lt;br /&gt;&lt;/p&gt;
-
-&lt;p&gt;But you can do all of these things just using AI Platform.&lt;br /&gt;&lt;/p&gt;
-
-&lt;p&gt;&lt;img src=&quot;/assets/images/9.png&quot; alt=&quot;installing-conda-cli&quot; /&gt;&lt;/p&gt;
-&lt;p align=&quot;center&quot;&gt;[AI Platform]&lt;/p&gt;
-
-&lt;p&gt;I think it especially save your time if you are living in Asia-Pacific, which google doesn’t support not that much GPU resources.&lt;/p&gt;
-
-&lt;p&gt;&lt;br /&gt;&lt;br /&gt;&lt;/p&gt;
-&lt;h3 id=&quot;2-consider-if-the-platform-has-limited-resources-in-a-region-you-live-in&quot;&gt;2. Consider if the platform has limited resources in a region you live in.&lt;/h3&gt;
-&lt;p&gt;&lt;br /&gt;
-I live in &lt;em&gt;South Korea, East Asia&lt;/em&gt;, and it seems like this region has lots of limitation in GPU (except quite expensive AWS)&lt;br /&gt;&lt;/p&gt;
-
-&lt;p&gt;And the Taiwan which was the only one region where I can launch my own VM with GPU (I tried all the other regions in the list) sometimes do normaly, but not always. 😥&lt;br /&gt;
-After launching, I did several works and next day I could not start VM. (I didn’t count it, but tried it a few hours because I didn’t want cost any more time…)&lt;/p&gt;
-
-&lt;p&gt;&lt;br /&gt;
-Endlessly failed to start instance, then I choose to move AWS as an alternative way.&lt;/p&gt;
-
-&lt;h3 id=&quot;3-fastai-gives-deliberate-guide-and-i-didnt-know-it&quot;&gt;3. &lt;span style=&quot;color:blue&quot;&gt;Fast.ai gives deliberate &lt;a href=&quot;https://course.fast.ai/start_gcp.html&quot;&gt;guide&lt;/a&gt; and I didn’t know it.&lt;/span&gt;&lt;/h3&gt;
-
-&lt;p&gt;Fast.ai offer the guide for all available platform. (Colab, salamander, Gradient, Kaggle, Colab, and so on)&lt;br /&gt;&lt;/p&gt;
-
-&lt;p&gt;It is so important, and really needs, because cloud computing options are vary as occasion and purpose arise.&lt;br /&gt;&lt;/p&gt;
-
-&lt;p&gt;I didn’t know fast.ai has manual to running GCP, and I think it’s as good a reason as any for me to be have taken time.&lt;br /&gt;&lt;/p&gt;
-
-&lt;p&gt;It helped me so much when I had aws and shortened my time.&lt;br /&gt;&lt;/p&gt;
-
-&lt;p&gt;I don’t want to read all of the manual in amazno.. (It is recommended.. but I’d rather read &lt;a href=&quot;https://git-scm.com/book/en/v2&quot;&gt;GIT PRO&lt;/a&gt; now…)&lt;/p&gt;
-&lt;div class=&quot;highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;ssh -i ~/.ssh/&amp;lt;your_private_key_pair&amp;gt; -L localhost:8888:localhost:8888 ubuntu@&amp;lt;your instance IP&amp;gt;
-&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
-
-&lt;h3 id=&quot;4-you-should-wait-to-add-more-volume-just-after-add-volume-by-building-aws-ec2&quot;&gt;4. You should wait to add more volume just after add volume, by building AWS EC2.&lt;/h3&gt;
-
-&lt;p&gt;Since Elastic Block Store(EBS) storage supports optimized storage, users can’t extend storage volume two times in a row. &lt;br /&gt;&lt;/p&gt;
-
-&lt;p&gt;Unfortunately, at the first time, I didn’t know it (again 👻) and when VM lacked volume, I doubled dist capacity (76*2) at a rough but It needs more. &lt;br /&gt;&lt;/p&gt;
-
-&lt;p&gt;&amp;lt;!–&lt;/p&gt;
-
-&lt;p&gt;this time I installed GPU in two years, and it became little complicated compared to 2 years ago.
-And this time for the first time(maybe not the first time.. but i handled it in my class or with my friend. but it’s my first time on my own.) I 
-very I’m started to using used google colab, kaggle
-and, GCP-JupyterLab, ec2 - friend made, 
-aws vm machine but I had a environment variable but i did not know of it.
-On these days, I could not get a resources from taiwan…&lt;/p&gt;
-
-&lt;ol&gt;
-  &lt;li&gt;
-    &lt;p&gt;I couldn’t notice a deliberate&lt;/p&gt;
-  &lt;/li&gt;
-  &lt;li&gt;
-    &lt;p&gt;Anyway, as a result I tried myself gcp myself and aws ec2 with fast.ai But I think doing on my self surely takes much time (in this point I wonder why I’m doing this, and should remind me, especially I was studying disk volume optimization)&lt;/p&gt;
-  &lt;/li&gt;
-  &lt;li&gt;
-    &lt;h2 id=&quot;disk-volume-exceed---httpsaskubuntucomquestions919748no-space-left-on-device-even-though-there-is&quot;&gt;disk volume exceed - https://askubuntu.com/questions/919748/no-space-left-on-device-even-though-there-is&lt;/h2&gt;
-  &lt;/li&gt;
-&lt;/ol&gt;</content><author><name>dionne</name></author><summary type="html">Motivation</summary><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://localhost:4000/assets/images/10.png" /></entry></feed>
\ No newline at end of file
+&lt;/div&gt;</content><author><name>dionne</name></author><category term="feature" /><summary type="html">CONTENTS</summary><media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://localhost:4000/assets/images/munich2.jpg" /></entry></feed>
\ No newline at end of file
diff --git a/_site/index.html b/_site/index.html
index 93662e19c0..0629b90f55 100644
--- a/_site/index.html
+++ b/_site/index.html
@@ -110,7 +110,7 @@
     "id": 2,
     "url": "http://localhost:4000/author-dionne.html",
     "title": "dionne",
-    "body": "                        dionne Follow:         https://spellonyou. github. io         Welcome, I'm dionne. My old hobby is collecting and systemizing data related with me. (I'm with my kindergarten newsletter) I am an observationist. In the meantime, I enjoy the difference between my expectation and result which I've been facinated(my korean-sarcasm project). And I love a film written and directed by Paul Thomas Anderson. Now, I'm interested in AI ethics, and especially studying bias.                                   Posts by dionne:                   		Gradient backward, Chain Rule, Refactoring	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 02, 2020						            		Implement forward&amp;backward pass from scratch	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						            		What's inside Pytorch Operator?	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						            		What is the meaning of 'deep-learning from foundations?'	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Feb 29, 2020						            		Digging into convolution	: 		  Issues 	 			In 				Fast. AI-v3, 								Feb 28, 2020						            		Digital Product School week 8&amp;9	: 		  The 8th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 24, 2020						            		Algorithmic bias	: 		  Algorithms can encode &amp; magnify human bias	 			In 				Fast. AI-v3, 								Feb 20, 2020						            		Making a classifier with image dataset made from gooogle	: 		  CONTENTS	 			In 				toy-project, 								Feb 15, 2020						            		Digital Product School week 5	: 		  The 5th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 09, 2020						            		4 reasons took much time to setting GPU for fast. ai than I expected	: 		  Motivation	 			In 				Resource, 								Feb 05, 2020						            		Digital Product School week 4	: 		  The 4th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 01, 2020						            		Retrospect of ACL 2020 paper writing	: 		  2020 Annual Conference of the Association for Computational Linguistics	 			In 				Retrospect, 								Jan 29, 2020						            		Why am I not listed as a contributor?!	: 		  From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’. . . 	 			In 				Resource, 								Jan 10, 2020						            		Fine Grained Classification	: 		  Finally you can solve the mystery behind this weird drawing. . through this course. 	 			In 				Fast. AI-v3, 								Dec 31, 2019						            		Jeremy Howard	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Dec 15, 2019						            		Julia Evans	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 20, 2019						            		Retrospective on Pycon 2019 Korea (CoC Committee)	: 		  When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized. . . 	 			In 				Retrospect, 								Nov 05, 2019						            		Elif Shafak	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 05, 2019						            		Digital Product School week 1	: 		  The 1th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Jan 11, 2019						        "
+    "body": "                        dionne Follow:         https://spellonyou. github. io         Welcome, I'm dionne. My old hobby is collecting and systemizing data related with me. (I'm with my kindergarten newsletter) I am an observationist. In the meantime, I enjoy the difference between my expectation and result which I've been facinated(my korean-sarcasm project). And I love a film written and directed by Paul Thomas Anderson. Now, I'm interested in AI ethics, and especially studying bias.                                   Posts by dionne:                   		fastai 2019 course-v3 Part1, lesson06	: 		  Lesson 06	 			In 				fastai-v3, 								Apr 15, 2020						            		[Q&amp;A] Image Segmentation, using Unet with Driving Video data	: 		  This post is about my questions while I was studying USF Deep Learning course about image segmentation task. All the answers are from the course, source code, library document, or docu. . . 	 			In 				Fast. AI-v3, 								Apr 02, 2020						            		Gradient backward, Chain Rule, Refactoring	: 		   This note is divided into 4 section.      Section1: What is the meaning of ‘deep-learning from foundations?’   Section2: What’s inside Pytorch Operator?   Section3: Implem. . . 	 			In 				fastai-v3, 								Mar 02, 2020						            		Implement forward&amp;backward pass from scratch	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						            		What's inside Pytorch Operator?	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						            		What is the meaning of 'deep-learning from foundations?'	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Feb 29, 2020						            		Digging into convolution	: 		  Issues 	 			In 				Fast. AI-v3, 								Feb 28, 2020						            		My life in Digital Product School - week 8/19/10	: 		  The 8/9/10th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 24, 2020						            		Algorithmic bias	: 		  Algorithms can encode &amp; magnify human bias	 			In 				Fast. AI-v3, 								Feb 20, 2020						            		Making a classifier with image dataset made from gooogle	: 		  CONTENTS	 			In 				toy-project, 								Feb 15, 2020						            		Digital Product School week 5	: 		  The 5th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 09, 2020						            		4 reasons took much time to setting GPU for fast. ai than I expected	: 		  Motivation	 			In 				Resource, 								Feb 05, 2020						            		Digital Product School week 4	: 		  The 4th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 01, 2020						            		Retrospect of ACL 2020 paper writing	: 		  2020 Annual Conference of the Association for Computational Linguistics	 			In 				Retrospect, 								Jan 29, 2020						            		Why am I not listed as a contributor?!	: 		  From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’. . . 	 			In 				Resource, 								Jan 10, 2020						            		Jeremy Howard	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Dec 15, 2019						            		Julia Evans	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 20, 2019						            		Retrospective on Pycon 2019 Korea (CoC Committee)	: 		  When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized. . . 	 			In 				Retrospect, 								Nov 05, 2019						            		Elif Shafak	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 05, 2019						            		Digital Product School week 1	: 		  The 1th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Jan 11, 2019						        "
     }, {
     "id": 3,
     "url": "http://localhost:4000/authors-list.html",
@@ -120,7 +120,7 @@
     "id": 4,
     "url": "http://localhost:4000/categories.html",
     "title": "Categories",
-    "body": "          Categories               DigitalProductSchool:                                  		Digital Product School week 8&amp;9	: 		  The 8th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 24, 2020						                                 		Digital Product School week 5	: 		  The 5th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 09, 2020						                                 		Digital Product School week 4	: 		  The 4th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 01, 2020						                                 		Digital Product School week 1	: 		  The 1th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Jan 11, 2019						                              People in the World:                                  		Jeremy Howard	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Dec 15, 2019						                                 		Julia Evans	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 20, 2019						                                 		Elif Shafak	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 05, 2019						                              Retrospect:                                  		Retrospect of ACL 2020 paper writing	: 		  2020 Annual Conference of the Association for Computational Linguistics	 			In 				Retrospect, 								Jan 29, 2020						                                 		Retrospective on Pycon 2019 Korea (CoC Committee)	: 		  When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized. . . 	 			In 				Retrospect, 								Nov 05, 2019						                              Fast. AI-v3:                                  		Gradient backward, Chain Rule, Refactoring	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 02, 2020						                                 		Implement forward&amp;backward pass from scratch	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						                                 		What's inside Pytorch Operator?	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						                                 		What is the meaning of 'deep-learning from foundations?'	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Feb 29, 2020						                                 		Digging into convolution	: 		  Issues 	 			In 				Fast. AI-v3, 								Feb 28, 2020						                                 		Algorithmic bias	: 		  Algorithms can encode &amp; magnify human bias	 			In 				Fast. AI-v3, 								Feb 20, 2020						                                 		Fine Grained Classification	: 		  Finally you can solve the mystery behind this weird drawing. . through this course. 	 			In 				Fast. AI-v3, 								Dec 31, 2019						                              Resource:                                  		4 reasons took much time to setting GPU for fast. ai than I expected	: 		  Motivation	 			In 				Resource, 								Feb 05, 2020						                                 		Why am I not listed as a contributor?!	: 		  From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’. . . 	 			In 				Resource, 								Jan 10, 2020						                              toy-project:                                  		Making a classifier with image dataset made from gooogle	: 		  CONTENTS	 			In 				toy-project, 								Feb 15, 2020						                                             Featured:    				                                          Implement forward&amp;backward pass from scratch                          In                     Fast. AI-v3,                                                                                           What's inside Pytorch Operator?                          In                     Fast. AI-v3,                                                                                           What is the meaning of 'deep-learning from foundations?'                          In                     Fast. AI-v3,                                                                                           Why am I not listed as a contributor?!                          In                     Resource,                                                                                           Jeremy Howard                          In                     People in the World,                                                                   "
+    "body": "          Categories               DigitalProductSchool:                                  		My life in Digital Product School - week 8/19/10	: 		  The 8/9/10th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 24, 2020						                                 		Digital Product School week 5	: 		  The 5th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 09, 2020						                                 		Digital Product School week 4	: 		  The 4th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 01, 2020						                                 		Digital Product School week 1	: 		  The 1th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Jan 11, 2019						                              People in the World:                                  		Jeremy Howard	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Dec 15, 2019						                                 		Julia Evans	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 20, 2019						                                 		Elif Shafak	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 05, 2019						                              Retrospect:                                  		Retrospect of ACL 2020 paper writing	: 		  2020 Annual Conference of the Association for Computational Linguistics	 			In 				Retrospect, 								Jan 29, 2020						                                 		Retrospective on Pycon 2019 Korea (CoC Committee)	: 		  When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized. . . 	 			In 				Retrospect, 								Nov 05, 2019						                              Resource:                                  		4 reasons took much time to setting GPU for fast. ai than I expected	: 		  Motivation	 			In 				Resource, 								Feb 05, 2020						                                 		Why am I not listed as a contributor?!	: 		  From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’. . . 	 			In 				Resource, 								Jan 10, 2020						                              toy-project:                                  		Making a classifier with image dataset made from gooogle	: 		  CONTENTS	 			In 				toy-project, 								Feb 15, 2020						                              Fast. AI-v3:                                  		[Q&amp;A] Image Segmentation, using Unet with Driving Video data	: 		  This post is about my questions while I was studying USF Deep Learning course about image segmentation task. All the answers are from the course, source code, library document, or docu. . . 	 			In 				Fast. AI-v3, 								Apr 02, 2020						                                 		Implement forward&amp;backward pass from scratch	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						                                 		What's inside Pytorch Operator?	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						                                 		What is the meaning of 'deep-learning from foundations?'	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Feb 29, 2020						                                 		Digging into convolution	: 		  Issues 	 			In 				Fast. AI-v3, 								Feb 28, 2020						                                 		Algorithmic bias	: 		  Algorithms can encode &amp; magnify human bias	 			In 				Fast. AI-v3, 								Feb 20, 2020						                              fastai-v3:                                  		fastai 2019 course-v3 Part1, lesson06	: 		  Lesson 06	 			In 				fastai-v3, 								Apr 15, 2020						                                 		Gradient backward, Chain Rule, Refactoring	: 		   This note is divided into 4 section.      Section1: What is the meaning of ‘deep-learning from foundations?’   Section2: What’s inside Pytorch Operator?   Section3: Implem. . . 	 			In 				fastai-v3, 								Mar 02, 2020						                                             Featured:    				                                          [Q&amp;A] Image Segmentation, using Unet with Driving Video data                          In                     Fast. AI-v3,                                                                                           Implement forward&amp;backward pass from scratch                          In                     Fast. AI-v3,                                                                                           What's inside Pytorch Operator?                          In                     Fast. AI-v3,                                                                                           What is the meaning of 'deep-learning from foundations?'                          In                     Fast. AI-v3,                                                                                           Why am I not listed as a contributor?!                          In                     Resource,                                                                                           Jeremy Howard                          In                     People in the World,                                                                   "
     }, {
     "id": 5,
     "url": "http://localhost:4000/contact.html",
@@ -130,7 +130,7 @@
     "id": 6,
     "url": "http://localhost:4000/",
     "title": "Jiwon Kim | Dionne Blog",
-    "body": "                                  Gradient backward, Chain Rule, Refactoring  :       This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pyt. . .               In                 Fast. AI-v3,                                        Mar 02, 2020                                                                                                                             Implement forward&amp;backward pass from scratch          :                       In                         Fast. AI-v3,                                                                  Mar 01, 2020                                                                                                                                     What's inside Pytorch Operator?          :                       In                         Fast. AI-v3,                                                                  Mar 01, 2020                                                                                                                                    What is the meaning of 'deep-learning from foundations?'          :                       In                         Fast. AI-v3,                                                                  Feb 29, 2020                                                       Gradient backward, Chain Rule, Refactoring                  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pyt. . .                 Read More            	                                        All Stories:                   		Gradient backward, Chain Rule, Refactoring	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 02, 2020						                  		Implement forward&amp;backward pass from scratch	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						                  		What's inside Pytorch Operator?	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						                  		What is the meaning of 'deep-learning from foundations?'	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Feb 29, 2020						                  		Digging into convolution	: 		  Issues 	 			In 				Fast. AI-v3, 								Feb 28, 2020						                  		Digital Product School week 8&amp;9	: 		  The 8th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 24, 2020						                  		Algorithmic bias	: 		  Algorithms can encode &amp; magnify human bias	 			In 				Fast. AI-v3, 								Feb 20, 2020						                  		Making a classifier with image dataset made from gooogle	: 		  CONTENTS	 			In 				toy-project, 								Feb 15, 2020						                  		Digital Product School week 5	: 		  The 5th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 09, 2020						                  		4 reasons took much time to setting GPU for fast. ai than I expected	: 		  Motivation	 			In 				Resource, 								Feb 05, 2020						                                                &laquo;                              1                               2                              Next &raquo;                                          Featured:    				                                          Implement forward&amp;backward pass from scratch                          In                     Fast. AI-v3,                                                                                           What's inside Pytorch Operator?                          In                     Fast. AI-v3,                                                                                           What is the meaning of 'deep-learning from foundations?'                          In                     Fast. AI-v3,                                                                                           Why am I not listed as a contributor?!                          In                     Resource,                                                                                           Jeremy Howard                          In                     People in the World,                                                               "
+    "body": "                                  fastai 2019 course-v3 Part1, lesson06  :       Lesson 06               In                 fastai-v3,                                        Apr 15, 2020                                                                                                                             [Q&amp;A] Image Segmentation, using Unet with Driving Video data          :                       In                         Fast. AI-v3,                                                                  Apr 02, 2020                                                                                                                                     Gradient backward, Chain Rule, Refactoring          :                       In                         fastai-v3,                                                                  Mar 02, 2020                                                                                                                                    Implement forward&amp;backward pass from scratch          :                       In                         Fast. AI-v3,                                                                  Mar 01, 2020                                                        Gradient backward, Chain Rule, Refactoring                   This note is divided into 4 section.      Section1: What is the meaning of ‘deep-learning from foundations?’   Section2: Wha. . .                 Read More            	                                       All Stories:                   		fastai 2019 course-v3 Part1, lesson06	: 		  Lesson 06	 			In 				fastai-v3, 								Apr 15, 2020						                  		[Q&amp;A] Image Segmentation, using Unet with Driving Video data	: 		  This post is about my questions while I was studying USF Deep Learning course about image segmentation task. All the answers are from the course, source code, library document, or docu. . . 	 			In 				Fast. AI-v3, 								Apr 02, 2020						                  		Gradient backward, Chain Rule, Refactoring	: 		   This note is divided into 4 section.      Section1: What is the meaning of ‘deep-learning from foundations?’   Section2: What’s inside Pytorch Operator?   Section3: Implem. . . 	 			In 				fastai-v3, 								Mar 02, 2020						                  		Implement forward&amp;backward pass from scratch	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						                  		What's inside Pytorch Operator?	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						                  		What is the meaning of 'deep-learning from foundations?'	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Feb 29, 2020						                  		Digging into convolution	: 		  Issues 	 			In 				Fast. AI-v3, 								Feb 28, 2020						                  		My life in Digital Product School - week 8/19/10	: 		  The 8/9/10th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 24, 2020						                  		Algorithmic bias	: 		  Algorithms can encode &amp; magnify human bias	 			In 				Fast. AI-v3, 								Feb 20, 2020						                  		Making a classifier with image dataset made from gooogle	: 		  CONTENTS	 			In 				toy-project, 								Feb 15, 2020						                                                &laquo;                              1                               2                              Next &raquo;                                          Featured:    				                                          [Q&amp;A] Image Segmentation, using Unet with Driving Video data                          In                     Fast. AI-v3,                                                                                           Implement forward&amp;backward pass from scratch                          In                     Fast. AI-v3,                                                                                           What's inside Pytorch Operator?                          In                     Fast. AI-v3,                                                                                           What is the meaning of 'deep-learning from foundations?'                          In                     Fast. AI-v3,                                                                                           Why am I not listed as a contributor?!                          In                     Resource,                                                                                           Jeremy Howard                          In                     People in the World,                                                               "
     }, {
     "id": 7,
     "url": "http://localhost:4000/privacy-policy.html",
@@ -158,96 +158,101 @@
     "body": "  {% if page. url ==  /  %}            {% assign latest_post = site. posts[0] %}          &lt;div class= topfirstimage  style= background-image: url({% if latest_post. image contains  ://  %}{{ latest_post. image }}{% else %} {{site. baseurl}}/{{ latest_post. image}}{% endif %}); height: 200px;  background-size: cover;  background-repeat: no-repeat; &gt;&lt;/div&gt;           {{ latest_post. title }}  :       {{ latest_post. excerpt | strip_html | strip_newlines | truncate: 136 }}               In         {% for category in latest_post. categories %}        {{ category }},         {% endfor %}                                {{ latest_post. date | date: '%b %d, %Y' }}                            {%- assign second_post = site. posts[1] -%}                        {% if second_post. image %}                         &lt;img class= w-100  src= {% if second_post. image contains  ://  %}{{ second_post. image }}{% else %}{{ second_post. image | absolute_url }}{% endif %}  alt= {{ second_post. title }} &gt;                        {% endif %}                                    {{ second_post. title }}          :                       In             {% for category in second_post. categories %}            {{ category }},             {% endfor %}                                                      {{ second_post. date | date: '%b %d, %Y' }}                                    {%- assign third_post = site. posts[2] -%}                        {% if third_post. image %}                         &lt;img class= w-100  src= {% if third_post. image contains  ://  %}{{ third_post. image }}{% else %}{{site. baseurl}}/{{ third_post. image }}{% endif %}  alt= {{ third_post. title }} &gt;                        {% endif %}                                    {{ third_post. title }}          :                       In             {% for category in third_post. categories %}            {{ category }},             {% endfor %}                                                      {{ third_post. date | date: '%b %d, %Y' }}                                    {%- assign fourth_post = site. posts[3] -%}                        {% if fourth_post. image %}                        &lt;img class= w-100  src= {% if fourth_post. image contains  ://  %}{{ fourth_post. image }}{% else %}{{site. baseurl}}/{{ fourth_post. image }}{% endif %}  alt= {{ fourth_post. title }} &gt;                        {% endif %}                                    {{ fourth_post. title }}          :                       In             {% for category in fourth_post. categories %}            {{ category }},             {% endfor %}                                                      {{ fourth_post. date | date: '%b %d, %Y' }}                                  {% for post in site. posts %} {% if post. tags contains  sticky  %}                    {{post. title}}                  {{ post. excerpt | strip_html | strip_newlines | truncate: 136 }}                 Read More            	             {% endif %}{% endfor %}  {% endif %}                All Stories:         {% for post in paginator. posts %}          {% include main-loop-card. html %}        {% endfor %}                   {% if paginator. total_pages &gt; 1 %}              {% if paginator. previous_page %}        &laquo; Prev       {% else %}        &laquo;       {% endif %}       {% for page in (1. . paginator. total_pages) %}        {% if page == paginator. page %}        {{ page }}        {% elsif page == 1 %}        {{ page }}        {% else %}        {{ page }}        {% endif %}       {% endfor %}       {% if paginator. next_page %}        Next &raquo;       {% else %}        &raquo;       {% endif %}            {% endif %}                     {% include sidebar-featured. html %}      "
     }, {
     "id": 12,
+    "url": "http://localhost:4000/2020/04/v3-2019-lesson06-note/",
+    "title": "fastai 2019 course-v3 Part1, lesson06",
+    "body": "2020/04/15 - Lesson 06Rossmann(Tabular):  Tabular data: be careful on Categorical variable vs Continuous variable.  if datatype is int, fastai think it is classification, not a regression.  Root mean square percentage error. as loss function.  When you assign the y_range, it’s better to assign little bit more than actual maximum. &gt; because it’s sigmoid.  intermediate layers, which is weight matrix is 1) 1000, and 2) 500 -&gt; which means our parameter would be 500*1000. learn. modelWhat is dropout and embedding dropout?: Nitish Srivastava, Dropout: A Simple way to prevent Neural Networks from Overfitting  you can dropout with p value, make it specified to specific layer, or make it applied to all the layers.  Pytorch code 1) bernoulli, which decides whether you will hold it? 2) and divide the noise value depends on noise value. so noise became 2 or remain 0.      According to pytorch code, We do change at training time, but we do nothing at test time. and this means you don’t have to do anything special with inference time. ’   TODO: find at forums what is inference time - Related to NVIDIA, GPU.     Embedding dropout is just a dropout.      It’s different between continuous variable and embedding layer.  TODO Still can’t understand. why embedding dropout is effective. or,… in need.    Let’s delete at random, some of the results of the embedding.    and It worked well especially at Kaggle   Batch Normalization: Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift -&gt; came out false! According to How Does Batch Normalization Help Optimization?  The key was multiplicative bias {\gamma} and additive bias {\beta}` Explain     Let $$ \hat{y} = f(w_1, w_2, w_3, … , x)} $$ , loss = MSE , Then y_range should be between 1 and 5`   And Activation function ends with -1 -&gt; +1   To mitigate this problem, we can add the other parameter, like $$w_n$$   But there’re so much interactions in the process so just re-scale the output.    Momentum parameter at BatchNorm1d:  Different from momentum like in optimization.  This momentum is Exponentially weighted moving average of the mean, instead of deviation.      If this is small number: mean standard deviation would be less from mini_batch to mini_batch » less regularization effect. (If this is large number, variation would be greater from mini_batch to mini_batch » more regularization effect)   TODO: can’t sure, but i understand, this is not about how to update parameter but about how much reflect previous value when scale and shift   Q. Preference between batchnorm and the other regularizations(drop out, weight decay)A. Nope, always try and see the results## lesson6-pets-more### Data Augmentation- Last reg- `get_transforms` has lots of params (even not yet learned all) -&gt; check documentation	- Remember you can implement all the doc contents bc it's made from nbdev	- TODO: try this!!- Essence of data augmentation is you should maintain the label, while somewhat making sense. 	- ex) tilt, because it's optically sensible, you can always change the angle of the data view. - zeros, border, and reflection but always `reflection` works most of the time, so that is the default### Convolutional Kernel(What is convolution?)- Will make heat\_map from scratch, which means the parts convolution focuses on![setosa_visualization]()- http://setosa. io/ev/image-kernels/	- javascript thing	- How convolution works	- Kernel. which does element-wise multiplication, and sum them up	- so it has on pixel less at borders -&gt; so it uses padding, and fastai uses reflection as said. - why this Kernel(matrix) helps catching horizontal edge side?	- because this kernel`(picture2)` weights differently, depends on `x axis`	- why familiar, because it's similar intuition with fugus`(paper)` paper- CNN from different viewpoints`link`	- output of pixel is results from different linear equations. 	- If you connect this with represents of neural network nodes, you can see that the specific inp nodes connected with specific out nodes. 	- **Summarize**: cnn does 1) matmul some of the elements are always zero 2) same weight for every row, which is called `weight time? weight. . ?, 1:18:50` `(picture)`#### Further lowdown- Because generally image has 3 channels, we need rank 3 kernel. - And **do multiply with all channel output is one pixel**. (`draw by your self`)	- but this kernel will catch one feature, like horizontal, so that we make more kernel so that output becomes (h * w * kernel)	- And that `kernel` come to `channel`- **Conv2d**: with 3 by 3 kernel, stride 2 conv -&gt; (h/2 * w/2 * kernel)	- skip or jump over input pixel	- to protect from memory out of control~~~pythonlearn. modellearn. summary()~~~TODO: understand yourself the blocks of conv-kernel: - Usually use big kernel size at first layer (will study this at part2)- Bottom right highlighting kernel(`pic / draw`)- `torch. tensor. expand`: for memory efficient, because we should do RGB- We do not make separate kernel, but make rank 4 kernel	- 4d tensor is just stacked kernel- `t[None]. shape` create new unit axis, and why? we make this -&gt; it should move unit of batch, not one size image. ### Average pooling, feature- suppose our pre-trained model results in size of `11 by 11 by 512 ` `pic 4` and my classification task has 37 classes	* take the first face of channel, which is 11 by 11 and `mean` it, so that make rank 2 tensor, 512 by 1	* and make 2d matrix, which is 512 by 37 and multiply so that we can get 37 by 1 matrix. - Feature, at convolution block	- So, when we transfer-learning without unfreeze, every element of last matrix (512 by 1) should represent(or could catch) each feature. 	### Heatmap, Hook~~~hook_output(model[0]) -&gt; acts -&gt; avg_acts~~~- if we average the block with `axis=feature`, result of matrix(11 by 11) depicts `how activated was that area?` -&gt; it is heatmap, `avg_acts`- and acts comes from hook, which is more advanced pytorch feature. 	- hook into pytorch machine itself, and run any arbitrary Pytorch code	- Why this is cool?: Normally it gives set of outputs of forward pass, but we can interrupt and hook the forward pass. 	- Also can store the output of the convolutional part of the model, which is before avg_pooling- Thinking back when we do cut off `after` the conv part. 	- but with fast. ai the original convolutional part of the model would be *the first thing in the model*, specifically could be given from `learn. model. eval()[0]`	- And this is gotten from `hooked_output` and having hooked the output, we can pass our x_minibatch to output. 	- Not directly, but with normalized, minibatch, put on to the gpu	- `one_item()` function do it, when we have one data `TODO: this is assignment` do it yourself without one_item function	- and `. cuda()` put it on gpu- you should print out very often the shape of tensor, and try think why. "
+    }, {
+    "id": 13,
+    "url": "http://localhost:4000/2020/04/qna-image-segmentation/",
+    "title": "[Q&A] Image Segmentation, using Unet with Driving Video data",
+    "body": "2020/04/02 - This post is about my questions while I was studying USF Deep Learning course about image segmentation task. All the answers are from the course, source code, library document, or document. I cared about being clear at reporting information including source of information, however if there are still anything unclear, please contact me. And thank you Jeremy&amp;Rachael for everything. Also Thank you Cambridge Computer Vision Lab to made us to study with your labor. The Cambridge-driving Labeled Video Database (CamVid) is the first collection of videos with object class semantic labels, complete with metadata. The database provides ground truth labels that associate each pixel with one of 32 semantic classes. If someone is interested in this project, please check the site and see the details. Now, let’s start first using jupyter’s one of tricks which I love most. It enables cell to print the code without print function. from IPython. core. interactiveshell import InteractiveShell# pretty print all cell's output and not just the last oneInteractiveShell. ast_node_interactivity =  all from fastai. vision import *from fastai. callbacks. hooks import *from fastai. utils. mem import *path = untar_data(URLs. CAMVID) # The locations where the data and models are downloaded are set in config. ymlpath. ls() I’m trying to accustomed to using pathlib module, not just it became built-in module in python, but I felt uncomfortable myself with os module. However, still unpredictable conflicts are remain, even in the quite standard library like Pytorch, tensorflow, onnx. (it require me string for path. not PosixPath. will send PR. . ) [PosixPath('/root/. fastai/data/camvid/valid. txt'), PosixPath('/root/. fastai/data/camvid/images'), PosixPath('/root/. fastai/data/camvid/labels'), PosixPath('/root/. fastai/data/camvid/codes. txt')]path_img = path/'images'path_lbl = path/'labels'fnames = get_image_files(path_img) #filenamelbl_names = get_image_files(path_lbl)1. (Play with data) My Hypothesis:  File name has A_B format. and A / B would be at key-value position.  Use collections - defaultdict  Default Dict: Link:   easy to group a sequence of key and value pairs into a dictionary of list?from collections import defaultdictfnames[0], lbl_names[0](PosixPath('/root/. fastai/data/camvid/images/0001TP_009210. png'), PosixPath('/root/. fastai/data/camvid/labels/0016E5_01800_P. png'))files = [tuple(i. stem. split('_')) for i in fnames]labels = [tuple(i. stem. split('_')[:-1]) for i in lbl_names]d = defaultdict(list)for k, v in files: d[k]. append(v)d. keys()len(d['0001TP'])124for k, v in d. items():  print(k, v)0001TP ['009210', '008850', '007350', '008970', '009840', '010140', '008490', '008520', '009540', '008250', '008340', '006840', '007860', '007410', '007740', '009870', '010080', '007890', '008790', '010020', '008400', '007080', '008280', '010380', '009330', '009060', '007470', '006810', '009720', '008580', '007110', '008730', '009150', '007680', '009780', '007800', '007290', '008760', '009510', '008640', '008310', '007440', '006900', '007500', '008460', '009030', '008130', '009480', '009900', '010230', '009270', '008040', '007590', '007950', '009990', '008550', '007260', '008100', '007530', '006960', '008190', '009420', '009930', '009000', '007830', '008940', '006690', '009570', '008880', '010170', '007560', '009300', '006750', '009360', '010200', '007320', '008010', '009120', '007620', '007200', '007140', '010320', '006720', '008670', '007230', '008370', '010260', '009690', '006930', '009090', '007770', '010290', '010350', '008610', '008070', '009600', '008430', '009450', '007380', '009240', '007710', '007170', '008160', '008910', '007020', '006780', '007050', '009960', '009810', '008220', '009180', '009750', '010050', '009660', '010110', '007920', '009630', '007650', '006990', '008700', '009390', '007980', '008820', '006870']0016E5 ['01290', '08159', '05760', '08133', '08063', '06660', '00960', '05850', '00750', '06960', '08035', '08107', '07975', '08017', '05610', '07140', '08119', '08027', '07170', '08400', '08093', '02100', '06390', '04470', '08340', '06060', '00600', '07470', '08151', '07800', '01620', '05730', '01530', '00690', '08430', '05940', '01980', '07320', '08069', '07965', '04380', '05430', '01410', '06780', '08007', '08087', '08079', '06600', '08109', '05490', '00901', '04590', '04680', '08045', '01770', '06690', '08085', '06810', '00420', '08011', '07440', '02190', '06300', '04800', '01500', '00450', '08029', '01470', '06330', '07997', '08067', '05370', '08013', '08190', '00840', '02370', '08049', '08135', '01440', '06870', '05820', '05280', '08051', '04440', '08091', '01380', '00630', '07290', '05520', '04770', '00540', '07995', '07999', '05550', '07920', '08101', '08141', '08053', '04620', '08103', '05160', '07350', '08057', '06030', '06000', '08550', '07963', '08089', '05970', '08047', '05640', '06240', '05220', '04350', '01590', '07959', '01950', '08117', '06180', '01560', '05400', '08043', '07680', '00780', '08081', '07050', '01020', '01350', '04530', '06720', '07969', '08149', '08003', '08131', '08129', '08033', '05460', '01650', '07530', '08023', '05340', '08640', '05100', '08075', '01230', '04980', '02070', '01080', '06210', '05910', '08009', '01800', '05190', '02400', '08083', '08019', '07620', '07200', '07890', '08059', '06990', '04410', '08121', '08123', '06930', '08137', '08147', '08095', '06570', '06150', '08153', '06840', '05250', '00510', '08370', '08580', '08113', '07410', '08097', '01200', '04950', '07770', '07650', '04710', '06090', '08055', '07110', '07981', '00990', '08250', '08127', '01920', '07985', '08220', '08005', '08157', '05130', '08071', '01140', '04830', '07740', '08143', '06120', '02040', '08111', '08115', '00660', '08280', '06420', '07983', '02220', '05700', '01860', '01260', '04920', '06510', '07020', '08073', '08105', '08125', '06360', '07860', '07993', '00810', '06540', '08099', '08139', '02010', '07973', '08155', '07991', '06630', '00480', '06750', '04890', '08001', '08025', '00870', '08490', '01830', '07977', '05010', '01170', '07961', '01680', '01050', '07987', '07080', '04560', '00930', '05310', '02340', '05790', '08460', '00720', '08031', '02280', '08039', '08037', '08065', '06270', '08077', '06900', '04650', '06480', '07230', '08041', '06450', '00570', '07989', '04740', '07979', '02250', '07380', '00390', '01710', '07590', '08021', '08520', '07500', '01110', '04500', '02310', '07971', '02130', '05580', '05880', '08610', '08310', '08145', '05670', '04860', '07260', '08015', '07967', '01740', '01320', '07560', '07830', '01890', '08061', '02160', '07710', '05070', '05040']Seq05VD ['f00030', 'f02550', 'f03450', 'f01110', 'f00480', 'f00210', 'f04590', 'f04170', 'f01800', 'f03990', 'f03360', 'f03900', 'f02070', 'f00810', 'f03690', 'f01350', 'f01530', 'f04980', 'f05100', 'f03060', 'f00900', 'f03870', 'f02460', 'f01470', 'f02370', 'f02820', 'f04080', 'f02760', 'f04860', 'f02250', 'f04200', 'f00270', 'f03720', 'f02850', 'f04410', 'f01200', 'f03090', 'f02010', 'f03930', 'f00090', 'f01650', 'f01890', 'f03840', 'f03030', 'f02130', 'f01230', 'f04110', 'f02520', 'f04140', 'f04020', 'f00060', 'f03420', 'f01560', 'f00120', 'f04290', 'f02340', 'f00300', 'f01380', 'f00870', 'f01860', 'f02970', 'f04560', 'f02730', 'f00330', 'f04530', 'f03780', 'f01770', 'f03390', 'f05040', 'f02430', 'f03330', 'f00660', 'f01740', 'f02100', 'f04800', 'f04050', 'f00510', 'f02790', 'f04350', 'f00690', 'f00540', 'f02490', 'f00960', 'f00930', 'f04230', 'f02880', 'f03600', 'f01020', 'f01500', 'f02400', 'f04830', 'f04470', 'f03300', 'f02670', 'f00450', 'f01980', 'f01170', 'f01620', 'f04500', 'f01080', 'f03180', 'f05070', 'f03150', 'f04950', 'f01440', 'f03510', 'f01710', 'f00360', 'f04770', 'f02910', 'f01050', 'f00630', 'f04320', 'f00570', 'f03240', 'f02190', 'f01140', 'f03540', 'f02220', 'f02640', 'f03960', 'f00000', 'f04920', 'f01950', 'f00990', 'f03480', 'f03000', 'f00420', 'f04620', 'f03210', 'f00780', 'f03570', 'f01590', 'f00750', 'f01920', 'f04650', 'f03750', 'f03630', 'f02310', 'f02610', 'f02580', 'f04740', 'f02280', 'f04680', 'f00390', 'f00720', 'f03660', 'f02040', 'f03270', 'f00180', 'f03810', 'f01410', 'f01290', 'f03120', 'f00840', 'f04440', 'f00150', 'f01260', 'f02700', 'f02940', 'f00600', 'f01830', 'f04260', 'f05010', 'f04890', 'f02160', 'f00240', 'f04380', 'f01680', 'f04710', 'f01320']0006R0 ['f02820', 'f03690', 'f03180', 'f02550', 'f01020', 'f03660', 'f02340', 'f01170', 'f02610', 'f02940', 'f01290', 'f02100', 'f01350', 'f03270', 'f03870', 'f01380', 'f01980', 'f03810', 'f02430', 'f02310', 'f01830', 'f03480', 'f02970', 'f01890', 'f03210', 'f03930', 'f02040', 'f02070', 'f02400', 'f01560', 'f03030', 'f01770', 'f01590', 'f01950', 'f03420', 'f01650', 'f03450', 'f00990', 'f03630', 'f01500', 'f03570', 'f00930', 'f03090', 'f03360', 'f02880', 'f02460', 'f01440', 'f01920', 'f01230', 'f03840', 'f02730', 'f01620', 'f02220', 'f03750', 'f03330', 'f03540', 'f02520', 'f02790', 'f01050', 'f03120', 'f01800', 'f01140', 'f01860', 'f01530', 'f01470', 'f02670', 'f02490', 'f01260', 'f01110', 'f02760', 'f01680', 'f03150', 'f02580', 'f03300', 'f02280', 'f01200', 'f03390', 'f03510', 'f02640', 'f02190', 'f02370', 'f01320', 'f02130', 'f03600', 'f03240', 'f03780', 'f03720', 'f02700', 'f01410', 'f01080', 'f02850', 'f01710', 'f03900', 'f03060', 'f01740', 'f02010', 'f02250', 'f00960', 'f03000', 'f02160', 'f02910']for k, v in d. items():  print(k, len(d[k]))0001TP 1240016E5 305Seq05VD 1710006R0 101for i in d2. keys():  print(i,len(d2[i]))0016E5 3050001TP 1240006R0 101Seq05VD 171files[0], labels[0](('0001TP', '009210'), ('0016E5', '01800'))2. My question: Link:  Why do we need masking? and does color from fastai library? (have to look into source code) What do the parameter alpha do? When people make masked img, would it be have ranged integer limit? Does image normalization related with this?lbl_sorted = sorted(lbl_names)f_sorted = sorted(fnames)lbl_1 = lbl_sorted[33]f_1 = f_sorted[33]img = open_image(lbl_1)mask = open_mask(lbl_1)_,axs = plt. subplots(1,2, figsize=(10,5))# img. show(ax=axs[0], y=mask, title='masked')img. show(ax=axs[0], title='1')mask. show(ax=axs[1], title='2', alpha=1. ) img_2 = open_image(f_1)mask_2 = open_mask(f_1)_,axs = plt. subplots(1,2, figsize=(10,5))# img. show(ax=axs[0], y=mask, title='masked')img_2. show(ax=axs[0], title='3',)mask_2. show(ax=axs[1], title='4', alpha=1. ) open_mask(lbl_1). data. shapetorch. Size([1, 720, 960])open_mask(lbl_1). data. shapetorch. Size([1, 720, 960])open_image(f_1). data. shapetorch. Size([3, 720, 960])open_image(f_1). data. shapetorch. Size([3, 720, 960])img. data #labeled datatensor([[[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]],    [[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]],    [[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]]])mask. data # after mask, labeled datatensor([[[ 4, 4, 4, . . . , 21, 21, 21],     [ 4, 4, 4, . . . , 21, 21, 21],     [ 4, 4, 4, . . . , 21, 21, 21],     . . . ,     [17, 17, 17, . . . , 30, 30, 30],     [17, 17, 17, . . . , 30, 30, 30],     [17, 17, 17, . . . , 30, 30, 30]]])img_2. data, mask_2. data(tensor([[[0. 0706, 0. 0667, 0. 0706, . . . , 0. 6431, 0. 6549, 0. 6627],     [0. 0745, 0. 0706, 0. 0706, . . . , 0. 6431, 0. 6510, 0. 6549],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 6392, 0. 6588, 0. 6588],     . . . ,     [0. 0863, 0. 0824, 0. 0824, . . . , 0. 1333, 0. 1216, 0. 1255],     [0. 0902, 0. 0863, 0. 0824, . . . , 0. 1255, 0. 1176, 0. 1216],     [0. 0863, 0. 0824, 0. 0784, . . . , 0. 1137, 0. 1059, 0. 1137]],     [[0. 0706, 0. 0667, 0. 0706, . . . , 0. 7490, 0. 7608, 0. 7686],     [0. 0745, 0. 0706, 0. 0706, . . . , 0. 7451, 0. 7569, 0. 7608],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 7412, 0. 7529, 0. 7529],     . . . ,     [0. 0980, 0. 0941, 0. 0941, . . . , 0. 1804, 0. 1686, 0. 1725],     [0. 1059, 0. 1020, 0. 0980, . . . , 0. 1725, 0. 1647, 0. 1686],     [0. 1020, 0. 0980, 0. 0941, . . . , 0. 1608, 0. 1529, 0. 1608]],     [[0. 0784, 0. 0745, 0. 0784, . . . , 0. 7569, 0. 7686, 0. 7765],     [0. 0824, 0. 0784, 0. 0784, . . . , 0. 7647, 0. 7647, 0. 7686],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 7608, 0. 7647, 0. 7647],     . . . ,     [0. 1216, 0. 1176, 0. 1176, . . . , 0. 2000, 0. 1882, 0. 1922],     [0. 1176, 0. 1137, 0. 1098, . . . , 0. 1843, 0. 1765, 0. 1804],     [0. 1137, 0. 1098, 0. 1059, . . . , 0. 1725, 0. 1647, 0. 1725]]]), tensor([[[ 18, 17, 18, . . . , 183, 186, 188],     [ 19, 18, 18, . . . , 183, 185, 186],     [ 20, 18, 19, . . . , 182, 185, 185],     . . . ,     [ 25, 24, 24, . . . , 43, 40, 41],     [ 26, 25, 24, . . . , 41, 39, 40],     [ 25, 24, 23, . . . , 38, 36, 38]]]))3. What is a difference between image and imageSegment?: imageSegment  An ImageSegment object has the same properties as an Image. The only difference is that when applying the transformations to an ImageSegment, it will ignore the functions that deal with lighting and keep values of 0 and 1.  It’s easy to show the segmentation mask over the associated Image by using the y argument of show_image. img = open_image(fnames[0])mask = open_mask(lbl_names[0])_,axs = plt. subplots(1,3, figsize=(8,4))img. show(ax=axs[0], title='no mask')img. show(ax=axs[1], y=mask, title='masked') #seg mask over the img using y argmask. show(ax=axs[2], title='mask only', alpha=1. ) vision. image ##4. Why/How img div by 255 and how it results fast. ai : vision. image - If div=True, pixel values are divided by 255. to become floats between 0. and 1.  At times, you want to get rid of distortions caused by lights and shadows in an image.    Normalizing the RGB values of an image can at times be a simple and effective way of achieving this.     So sum of the pixel’s value over all channels(which is S) divides each intensified channel so that nomalized value will be R/S, G/S and B/S (where, S=R+G+B).   Detailed explain here4. Python Evaluation Order: Python evaluates expressions from left to right. Notice that while evaluating an assignment, the right-hand side is evaluated before the left-hand side. mask_tmp, trg_tmp, void_tmp = 2, 1, 10mask_tmp = trg_tmp != void_tmpprint(mask_tmp, trg_tmp, void_tmp) # (1) target is not same with voidTrue 1 10# Example 1x = 1y = 2x,y = y,x+yx, y(2, 3)# Example 2x = 1y = 2x = yy = x+yx, y(2, 4)5. model learner parameter :: pct_start: A: Percentage of total number of epochs when learning rate rises during one cycle. Q: Sorry, I still confused that one cycle in the new API only runs one epoch. How the percentage of total number of epochs works? Can you give a example? If learn. fit_one_cycle(10, slice(1e-4,1e-3,1e-2), pct_start=0. 05)??A: Ok, strictly correct answer would be percentage of iterations, so you can have lr both increase and decrease during same epoch. In your example, say, you have 100 iterations per epoch, then for half an epoch (0. 05 * (10 * 100) = 50) lr will rise, then slowly decrease. Q2: Thanks for this explanation … so essentially, it is the percentage of overall iterations where the LR is increasing, correct? So, given the default of 0. 3, it means that your LR is going up for 30% of your iterations and then decreasing over the last 70%. Is that a correct summation of what is happening? A2: Yes, I think that’s correct. You can verify that by changing its value and check:learn. recorder. plot_lr() For example if pct_start = 0. 2 source: forums. fastai "
+    }, {
+    "id": 14,
     "url": "http://localhost:4000/2020/03/note08-fastai-4/",
     "title": "Gradient backward, Chain Rule, Refactoring",
-    "body": "2020/03/02 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves   ⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
+    "body": "2020/03/02 -  This note is divided into 4 section.      Section1: What is the meaning of ‘deep-learning from foundations?’   Section2: What’s inside Pytorch Operator?   Section3: Implement forward&amp;backward pass from scratch   Section4: Gradient backward, Chain Rule, Refactoring   ” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves    Postfix underscore means in pytorch, in-place function, What is in-place function?⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
     }, {
-    "id": 13,
+    "id": 15,
     "url": "http://localhost:4000/2020/03/note08-fastai-3/",
     "title": "Implement forward&backward pass from scratch",
     "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring1. The forward and backward passes: 1. 1 Normalization: train_mean,train_std = x_train. mean(),x_train. std()&gt;&gt;&gt; train_mean,train_std(tensor(0. 1304), tensor(0. 3073))Remember!  Dataset, which is x_train, mean and standard deviation is not 0&amp;1. But we need them to be which means we should substract means and divide data by std.  You should not standarlize validation set because training set and validation set should be aparted.  after normalize, mean is close to zero, and standard deviation is close to 1. 1. 2 Variable definition:  n,m: size of the training set c: the number of activations we need in our model2. Foundation Version: 2. 1 Basic architecture:  Our model has one hidden layer, output to have 10 activations, used in cross entropy.    But in process of building architecture, we will use mean square error, output to have 1 activations and lator change it to cross entropy   number of hidden unit; 50see below pic  We want to make w1&amp;w2 mean and std be 0&amp;1.      why initializating and make mean zero and std one is important?   paper highlighting importance of normalisation - training 10,000 layer network without regularisation1   2. 1. 1 simplified kaiming initQ: Why we did init, normalize with only validation data? Because we can not handle and get statistics from each value of x_valid?{: style=”color:red; font-size: 130%; text-align: center;”}  what about hidden(first) layer?w1 = torch. randn(m,nh)b1 = torch. zeros(nh)t = lin(x_valid, w1, b1) # hidden&gt;&gt;&gt; t. mean(), t. std()((tensor(2. 3191), tensor(27. 0303))In output(second) layer, w2 = torch. randn(nh,1)b2 = torch. zeros(1)t2 = lin(t, w2, b2) # output&gt;&gt;&gt; t2. mean(), t2. std()(tensor(-58. 2665), tensor(170. 9717))   which is terribly far from normalzed value.     But if we apply simplified kaiming init  w1 = torch. randn(m,nh)/math. sqrt(m); b1 = torch. zeros(nh)w2 = torch. randn(nh,1)/math. sqrt(nh); b2 = torch. zeros(1)t = lin(x_valid, w1, b1)t. mean(),t. std()&gt;&gt;&gt; (tensor(-0. 0516), tensor(0. 9354)) But, actually, we use activations not only linear function After applying activations relu at linear layer, mean and deviation became 0. 5. 2. 1. 2 Glorrot initializationPaper2: Understanding the difficulty of training deep feedforward neural networks  Gaussian(, bell shaped, normal distributions) is not trained very well.  How to initialize neural nets? with the size of layer , the number of filters .  But there is No acount for import of ReLU If we got 1000 layers, vanishing gradients problem emerges2. 1. 3 Kaiming initializatingPaper3: Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification  Kaiming He, explained here rectifier: rectified linear unit rectifier network: neural network with rectifier linear units  This is kaiming init, and why suddenly replace one to two on a top?     to avoid vanishing gradient(weights)   But it doesn’t give very nice mean tough.    2. 1. 4 Pytorch package Why fan_out?     according to pytorch documentation,   choosing 'fan_in' preserves the magnitude of the variance of the wights in the forward pass. choosing 'fan_out' preserves the magnitues in the backward pass(, which means matmul; with transposed matrix) ➡️ in the other words, torch use fan_out cz pytorch transpose in linear transformaton.  What about CNN in Pytorch?I tried torch. nn. Conv2d. conv2d_forward?? Jeremy digged into using torch. nn. modules. conv. _ConvNd. reset_parameters?? 2  in Pytorch, it doesn’t seem to be implemented kaiming init in right formula. so we should use our own operation.  But actually, this has been discussed in Pytorch community before. 3 4 Jeremy said it enhanced variance also, so I sampled 100 times and counted better results.  To make sure the shape seems sensible. check with assert. (remember we will replace 1 to 10 in cross entropy)assert model(x_valid). shape==torch. Size([x_valid. shape[0],1])&gt;&gt;&gt; model(x_valid). shape(10000, 1) We have made Relu, init, linear, it seems we can forward pass code we need for basic architecture nh = 50def lin(x, w, b): return x@w + b;w1 = torch. randn(m,nh)*math. sqrt(2. /m ); b1 = torch. zeros(nh)w2 = torch. randn(nh,1); b2 = torch. zeros(1)def relu(x): return x. clamp_min(0. ) - 0. 5t1 = relu(lin(x_valid, w1, b1))def model(xb):  l1 = lin(xb, w1, b1)  l2 = relu(l1)  l3 = lin(l2, w2, b2)  return l32. 2 Loss function: MSE:  Mean squared error need unit vector, so we remove unit axis.   def mse(output, targ): return (output. squeeze(-1) - targ). pow(2). mean()    In python, in case you remove axis, you use ‘squeeze’, or add axis use ‘unsqueeze’ torch. squeeze where code commonly broken. so, when you use squeeze, clarify dimension axis you want to removetmp = torch. tensor([1,1])tmp. squeeze()&gt;&gt;&gt; tensor([1, 1]) make sure to make as float when you calculateBut why??? because it is tensor?{: style=”color:red; font-size: 130%;”} Here’s the error when I don’t transform the data type ---------------------------------------------------------------------------TypeError                 Traceback (most recent call last)&lt;ipython-input-22-ae6009bef8b4&gt; in &lt;module&gt;()----&gt; 1 y_train = get_data()[1] # call data again   2 mse(preds, y_train)TypeError: 'map' object is not subscriptable This is forward passFootnote: Other materials:  Understanding the difficulty of training deep feedforward neural networks, paper that introduced Xavier initialization      Fixup Initialization: Residual Learning Without Normalization &#8617;        Pytorch implementaion on Kaiming init of conv and linear layers &#8617;        Pytorch kaiming init issue &#8617;        Pytorch kaiming init explained &#8617;    "
     }, {
-    "id": 14,
+    "id": 16,
     "url": "http://localhost:4000/2020/03/note08-fastai-2/",
     "title": "What's inside Pytorch Operator?",
     "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, RefactoringWhat’s inside Pytorch Operator?: Section02 Time comparison with pure Python:    Matmul with broadcasting&gt; 3194. 95 times faster     Einstein summation&gt; 16090. 91 times faster     Pytorch’s operator&gt; 49166. 67 times faster  1. Elementwise op: 1. 1 Frobenius norm:   above converted into (m*m). sum(). sqrt() Plus, don’t suffer from mathmatical symbols. He also copy and paste that equations from wikipedia.  and if you need latex form, download it from archive. 2. Elementwise Matmul:  What is the meaning of elementwise?   We do not calculate each component. But all of the component at once. Because, length of column of A and row of B are fixed.   How much time we saved?  So now that takes 1. 37ms. We have removed one line of code and it is a 178 times faster…#TODOI don’t know where the 5 from. but keep it. Maybe this is related with frobenius norm…?as a result, the code before for k in range(ac):  c[i,j] += a[i,k] + b[k,j]the code after c[i,j] = (a[i,:] * b[:,j]). sum()To compare it (result betweet original and adjusted version) we use not test_eq but other function. The reason for this is that due to rounding errors from math operations, matrices may not be exactly the same. As a result, we want a function that will “is a equal to b within some tolerance” #exportdef near(a,b):   return torch. allclose(a, b, rtol=1e-3, atol=1e-5)def test_near(a,b):   test(a,b,near)test_near(t1, matmul(m1, m2))3. Broadcasting:  Now, we will use the broadcasting and removec[i,j] = (a[i,:] * b[:,j]). sum() How it works?&gt;&gt;&gt; a=tensor([[10,10,10],     [20,20,20],     [30,30,30]])&gt;&gt;&gt; b=tensor([1,2,3,])&gt;&gt;&gt; a,b  (tensor([[10, 10, 10],     [20, 20, 20],     [30, 30, 30]]),tensor([1, 2, 3]))     &gt;&gt;&gt; a+btensor([[11, 12, 13],    [21, 22, 23],    [31, 32, 33]])  &lt;Figure 2&gt; demonstrated how array b is broadcasting(or copied but not occupy memory) to compatible with a. Refered from numpy_tutorial   there is no loop, but it seems there is exactly the loop.     This is not from jeremy (actually after a moment he cover it) but i wondered How to broadcast an array by columns?  c=tensor([[1],[2],[3]])a+ctensor([[11, 11, 11],    [22, 22, 22],    [33, 33, 33]])s  What is tensor. stride()?help(t. stride)Help on built-in function stride:  stride(…) method of torch. Tensor instancestride(dim) -&gt; tuple or intReturns the stride of :attr:’self’ tensor. Stride is the jump necessary to go from one element to the next one in the specified dimension :attr:’dim’. A tuple of all strides is returned when no argument is passed in. Otherwise, an integer value is returned as the stride in the particular dimension :attr:’dim’. Args:  dim (int, optional): the desired dimension in which stride is requiredExample::* x = torch. tensor([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]])`x. stride()&gt;&gt;&gt; (5, 1)x. stride(0)&gt;&gt;&gt; 5x. stride(-1)&gt;&gt;&gt; 1   unsqueeze &amp; None index   We can manipulate rank of tensor Special value ‘None’, which means please squeeze a new axis here== please broadcast herec = torch. tensor([10,20,30])c[None,:] in c, squeeze a new axis in here please. 2. 2 Matmul with broadcasting: for i in range(ar):#  c[i,j] = (a[i,:]).      *[:,j]. sum() #previous  c[i]  = (a[i]. unsqueeze(-1) * b). sum(dim=0) And Using None also (As howard teached)c[i]  = (a[i ]. unsqueeze(-1) * b). sum(dim=0) #howardc[i]  = (a[i][:,None] * b). sum(dim=0) # using Nonec[i]  = (a[i,:,None]*b). sum(dim=0)⭐️Tips🌟 1) Anytime there’s a trailinng(final) colon in numpy or pytorch you can delete it ex) c[i, :] = c [i]2) any number of colon commas at the start, you can switch it with the single elipsis.  ex) c[:,:,:,:,i] = c […,i] 2. 3 Broadcasting Rules:  What if we tensor. size([1,3]) * tensor. size([3,1])?  torch. Size([3, 3])    What is scale????   What if they are one array is times of the other array? ex) Image : 256 x 256 x 3Scale : 128 x 256 x 3Result: ?   Why I did not inserted axis via None, but happened broadcasting? &gt;&gt;&gt; c * c[:,None]tensor([[100. , 200. , 300. ],    [200. , 400. , 600. ],    [300. , 600. , 900. ]])maybe it broadcast cz following array has 3 rows as same principle, no matter what nature shape was, if we do the operation tensor broadcasts to the other. &gt;&gt;&gt; c==c[None]tensor([[True, True, True]])&gt;&gt;&gt; c[None]==c[None,:]tensor([[True, True, True]])&gt;&gt;&gt;c[None,:]==ctensor([[True, True, True]])3. Einstein summation:  Creates batch-wise, remove inner most loop, and replaced it with an elementwise producta. k. ac[i,j] += a[i,k] * b[k,j]inner most loop c[i,j] = (a[i,:] * b[:,j]). sum()elementwise product  Because K is repeated so we do a dot product. And it is torch. Usage of einsum()1) transpose2) diagnalisation tracing3) batch-wise (matmul) …  einstein summation notationdef matmul(a,b): return torch. einsum('ik,kj-&gt;ij', a, b)so after all, we are now 16000 times faster than Python. 4. Pytorch op: 49166. 67 times faster than pure python And we will use this matrix multiplication in Fully Connect forward, with some initialized parameters and ReLU. But before that, we need initialized parameters and ReLU, Footnote:  TensorRank ti noteResources:  Frobenius Norm Review Broadcasting Review (especially Rule)     Refer colab! (I totally confused with extension of arrays)    torch. allclose Review np. einsum Reviewh "
     }, {
-    "id": 15,
+    "id": 17,
     "url": "http://localhost:4000/2020/02/note08-fastai-1/",
     "title": "What is the meaning of 'deep-learning from foundations?'",
     "body": "2020/02/29 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ I don’t know if you read this article, but I heartily appreciate Rachael Thomas and Jeremy Howard for providing these priceless lectures for free Homework:  Review concepts 16 concepts from Course 1 (lessons 1 - 7)(1) Affine Functions &amp; non-linearities; 2) Parameters &amp; activations; 3) Random initialization &amp; transfer learning; 4) SGD, Momentum, Adam; 5) Convolutions; Batch-norm; 6) Dropout; 7) Data augmentation; 8) Weight decay; 9) Res/dense blocks; 10) Image classification and regression; 11)Embeddings; 12) Continuous &amp; Categorical variables; 13) Collaborative filtering; 14) Language models; 15) NLP classification; 16) Segmentation; U-net; GANS) Make sure you understand broadcasting Read section 2. 2 in Delving Deep into Rectifiers Try to replicate as much of the notebooks as you can without peeking; when you get stuck, peek at the lesson notebook, but then close it and try to do it yourself calculus for machine learning     based on weight…    einsum conventionCONTENTS:  What is going on in this course?     What is ‘from foundations’?   Steps to a basic modern CNN model   Today’s implementation goal: 1) matmul -&gt; 4) FC backward    Library development using jupyter notebook     jupyter notebook certainly can make module    Elementwise ops     How can we make python faster?         What is element wise operation?           FootnoteWhat is going on in this course?: What is ‘from foundations’?: 1) Recreate fast. ai and Pytorch 2) using pure python  Evade OverfittingOverfit : validation error getting worsetraining loss &lt; validation loss  Know the name of the symbol you usefind in this page if you don’t know the symbol that you are using or just draw it here (run by ML!) Steps to a basic modern CNN model:  1) Matrix multiplication -&gt; 2) Relu/Initialization -&gt; 3) Fully-connected Forward-&gt; 4) Fully-connected Backward -&gt; 5) Train loop -&gt; 6) Convolution-&gt; 7) Optimization -&gt;8) Batchnormalization -&gt; 9) Resnet Today’s implementation goal: 1) matmul -&gt; 4) FC backward: Library development using jupyter notebook: what is assers? jupyter notebook certainly can make module:  There will be #export tag that Howard (and we) want to extract special notebook2script. py will detect sign of #expert and convert following into python module and test ittest\_eq(TEST,'test')test\_eq(TEST,'test1')  what is run_notebook. py?     when you want to test your module in command line interface   		!python run\_notebook. py 01_matmul. ipynb  Is there any difference between 1) and 2)?1) test -&gt; test01 2) test01 -&gt; test #TODO I don’t know yet  look into run_notebook. py, package fire Jeremy used. What is that?read and run the code in a notebook, and in the process, Jeremy made Python Fire library called!shockingly, fire takes any kind of function and converts into CLI command. fire library was released by Google open source, Thursday, March 2, 2017    Get data   pytorch and numpy are pretty much same.  variable c explains how many pixels there are in in MNIST, 28 pixels PyTorch’s view() method: torch function that manipulating tensor, and squeeze() in torch &amp; mathmatical operation similar function Rao &amp; McMahan said usually this functions result in feature vector.    In part 1, you can use view function several times.     Initial python model     Which is Linear, like $Xw$(weight)$+a$(bias) $= Y$     If you don’t know hou to multiple matrix, refer this site matmul visulization site   How many time spends if we we use pure python   function matmul, typical matrix multiplication function, takes about 1 second for calculating 1 single train data! (maybe assumed stochastic, 5 data points in validation)     it takes about 11. 36 hours to update parameters even single layer and 1 iteration! (if that was my computer, it would be 14 hours. . )🤪   THIS is why we need to consider ‘time’&amp;’space’ This is kinda slow - what if we could speed it up by 50,000 times? Let’s try! Elementwise ops: How can we make python faster?:  If we want to calculate faster, then do remove pythonic calcuation, by passing its computation down to something that is written something other than python, like pytorch.  According to PyTorch doc it uses C++ (via ATen), so we are going to implement that function with python. What is element wise operation?:  items makes a pair, operate corresponding componentFootnote:  notebooks material video broadcasting excel"
     }, {
-    "id": 16,
+    "id": 18,
     "url": "http://localhost:4000/2020/02/what-is-convolution/",
     "title": "Digging into convolution",
     "body": "2020/02/28 - Issues 1) Kaiming Initializtion in Pytorch was in trouble. 1 2) Jeremy started to dig in, in lesson09, but I didn’t know why the size of tensor is 2 and even understand this spreadsheet data. 3 Homework: Read Visualizing and Understanding Convolutional Networks paper  What is a convolution?     Visualization         one kernel     Matthew D Zeiler &amp; Rob Fergus Paper          Convolution can be represented as matmul   Padding   Kernel has rank 3   How can we find a side-edge, a gradient and area of constant weight?   What is a convolution?: A convolutional neural network is that your red, green, and blue pixels go into the simple computation, and something comes out of that, and then the result of that goes into a second layer, and the result of that goes into the third layer and so forth. Visualization: one kernel Refer this site for visualizing CNN filteringMatthew D Zeiler &amp; Rob Fergus PaperLecture01     Nine examples of the actual coefficients from the **first layer**  Convolution can be represented as matmul: CNNs from different viewpoints {align-items: center;}    [A B C D E F G H I J] is 3 by 3 image data flatten to vector.   As a result, convolution is a just matrix just two things happens     Some of entries are set to zeros at all the times   same color always have the same weight. That called weight time / wegith sharing    So, we can implement a convolution with matrix multiplication. But, we don’t do that because it’s slow!Padding:  What most of libraries do is just put zeros asdie of matrix  fast. ai uses reflection paddings (what is this? Jeremy said he uttered it)Kernel has rank 3:  As standard picture input would be 4 5, it would be actually 3d, not 2d.  If we make kernel as a 3x3 size, we pass over same kernel all the different Red, Green, Blue Pixels.      This could make problem, because, if we want to detect frog, which is green, we would want more activations on the green(I made a test cell in my colab 6)   How can we find a side-edge, a gradient and area of constant weight?: Not top-edge!  One kernel can find only the top-edge, so we should stack the kernels 7 So, we pass it through bunch of kernels to the input images, and that process gives us height x width x corresponding number of kernels.  Usually that number of chanel is 16 And if we want to get the more channels and features, we should repeat that process     This process gives rise to memory out of control, we do the stride   #### conv-example. xlsx  2 convolutional filters At a second layer, filter is 3x3x2 tensor, because to add up together the first layer’s channel. Reference:       Problem was math. sqrt(5) was not kaiming initialization formula, Implementation in Pytorch &#8617;        size of tensor, lecture09 &#8617;        conv-example. xlsx &#8617;        Why do computer use red, green and blue instead of primary colors &#8617;        Grayscale is a group of shades without any visible color. … Each of these dots has its own brightness level as well and, therefore, can be converted to grayscale. A grayscale image is one with all color information removed.  &#8617;        Testing RGB and grayscale &#8617;        stack kernel and make new rank of tensor at output, Lesson06-2019 &#8617;    "
     }, {
-    "id": 17,
+    "id": 19,
     "url": "http://localhost:4000/2020/02/dps-week8/",
-    "title": "Digital Product School week 8&9",
-    "body": "2020/02/24 - The 8th week retropect at Digital Product School Week 8/9 - Ship your MVP/Release next iteration each day     This week's schedule  CONTENT:  Preparing engineering weekly Agile Process     Daily Stand-up    Making application flowchart (feat draw. io) / ER diagram     Flowchart, understaning user journey   ER diagram    Engineering weekly AI lunch Connecting firebase andPreparing engineering weekly: This week at Wednesday, I planned to explain the Language Modelings, mainly focusing ELMo, ULMFiT, BERT and GPT-2. Slides is available here Changed the presentation, because there were people who are not in ML domain. hereWhenever I do the presentation, I learn more than the information I give them. At the same time, I realize I need to learn more than I know. Agile Process: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m not sure ‘what is agile’ but this is what we tried to make agile process.  Daily Stand-up: Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Making application flowchart (feat draw. io) / ER diagram:  Before we start the party, we should clarify the flowchart and ER diagram of our application. Flowchart, understaning user journey: Thanks for google, we could use draw. io for our framechart framework. Actually, we cana choice other good flatform, but draw. io has connected app throgh google drive, most of our engineer was used to it. And after this job, I got to know there is also (of course) rule with the symbols, color, size, space, scaling and direction of arrow -reference. But why we should do this? WE have made our storymap before!! I think storymap is for visualize our status and app. So it should be shared with whole the team, and they should able to understand each role’s issue. But flowchart is more like testing technical feasibility, and error that user can experience. So it could be little more specific, complicated, and hypothetical.     This week engineer's main plan  ER diagram: Even if we use NoSQL database through firebase, my team was accustomed to SQL more. That what we educated when we were at college, so we had to organize our concept while we were learning NoSQL. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Before today, my AI collegues presented regression, knn and it was my turn. I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! AI lunch: Connecting firebase and: "
+    "title": "My life in Digital Product School - week 8/19/10",
+    "body": "2020/02/24 - The 8/9/10th week retropect at Digital Product School Week 8 - Ship your MVPWeek 9/10 - Release next iteration each day     Week 8th schedule  CONTENT:  Agile Product Development     Daily Stand-up(planning)   Gemba Walk   Sprint Reviews    Engineering weeklyAgile Product Development: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m still not sure ‘what is agile’ but this is how we tried to make agile process.  Daily Stand-up(planning): Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Gemba Walk:     Team Cero with core team  Every 2 weeks, we do the Gemba work, which is ‘question everything to the core team’ time. At this period, people can ask anything related to our product, workshop, and framework. Core team will help just for each team, and each team can solve the problem related to their work. &lt; br/&gt;Why we need this session? because with workshop and general schedule, core team has no time just focus on each team. So through this session, we can have opportunity to understand each program and workshop, like why we are using this platform, and when is the due of our small project, and we have this problem and we need help for this. whatever small problem you have, core team is always willing to help you. Sprint Reviews: Every Friday, we have time to summarise what we did for the week. Maybe we need HMW question and our storymap to share our process and then tell and share what we did try, what point we succeeded and what point it was deviant of our prediction, and why we tried it. .     Sprint of Ve-link  And then, just after all team’s ppt, we do vote with such a cute marvel. Always it’s very difficult to vote (of course you can’t vote to your team!) Because it depends on criteria what do I value!But since this is process of our agile work, I try to focus on what they have changed since last week, and why they did it, how they did it. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Everyone have their knowledge to share and we can be tutor and at the same time can be of tutee. Previously, my AI collegues presented regression, knn. And because I’m somewhat specialized to NLP, I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! "
     }, {
-    "id": 18,
+    "id": 20,
     "url": "http://localhost:4000/2020/02/fast.ai-nlp-note-16/",
     "title": "Algorithmic bias",
     "body": "2020/02/20 - Algorithms can encode &amp; magnify human bias Case Study 1: Facial Recognition &amp; Predictive Policing:  Joy Buolamwini &amp; Timnit Gebru, gendershades. org     Microsoft, FACE+, IBM - All of these things are sell now.    Largest gap between $\therefore\ Lighter Male\ &gt;\ Darker\ Female $      This US mayor joked cops should “mount . 50-caliber” guns where AI predicts crime      With machine learning, with automation, there’s a 99% success, so that robot is ㅡwill beㅡ99% accurate in telling us what is going to happen next, which is really interesting.     - city official in Lancater, CA, approving on using IBM for public security  Bias:  Bias is type of error Statistical Bias: difference between a statistic’s expected value and the true value Unjust Bias: disproportionate preference for or prejudice against a group Unconscious bias: bias that we don’t realize we have But, term bias is too generic to be productive. Different sources of bias have different causes Representation Bias: Dataset was not representative of the algorithm that might be used on later. Above : Data is okay, but algorithm has some problem. Below : Data has error.  For example, object detection production that performs very well in common product of US. But in contrast, change of target product region, like Zimbabwe, Solomon Island, and so on, reduced the performence remarkably. It is not the algorithmic problem, so we should care about data volume of region. Evaluation Bias: Benchmark datasets spur on research, 4. 4% of IJB-A images are dark-skinned women. 2/3 of ImageNet images from the West (Sharkar et al, 2017) Case Study 2: Recidivism Algorithm Used Prison Sentencing: Case Study 3: Online Ad Delivery: Bias in NLP: ( Nothing to do with the course, but I’m researching this field these days. )    But all about Englsih     ImpactThe person is doctor. The person is nurse -&gt; 그는 의사다. 그녀는 간호사다.  Concept of “biased data” often too generic to be useful:  Different sources of bias have different sources Data, models and systems are not unchanging numbers on a screen. They’re the result of a complex process that starts with years of historical context and involves a series of choices and norms, from data measurement to model evaluation to human interpretation. - Harini Suresh, “The problem with Biased Data” Five Sources of Bias in ML:  Representation Bias Evaluation Bias Measurement Bias Aggregation Bias(46:02) Historical Bias(46:26)     A few studies(47:13)   Racial Bias, Even when we have good intentions(new york times)(47:10)   gender(48:59)   Humans are biased, so why does algorithmic bias matter?: Algorithms &amp; humans are used differently (humans are usually decision maker)  Algorithms are accurate and objective No way to apeal if there if error processed large scale cheap Machine learning can amplify bias Machine learning can create feedback loops. Technology is power. And with that comes responsibility. Solutions:  Analyze a project at work/school:     Questions about AI   5 types of bias (Suresh &amp; Guttag)   Datasheets for datasets, Modelcards for model reporting   Accuracy rate on different sub-groups    Work with domain experts &amp; those impacted Increase diversity in our workspace Advocate for good policy Be on the ongoing lookout for bias"
     }, {
-    "id": 19,
+    "id": 21,
     "url": "http://localhost:4000/2020/02/classifier-city/",
     "title": "Making a classifier with image dataset made from gooogle",
     "body": "2020/02/15 - CONTENTS:  Creating dataset from google images     Using google_images_download   Create ImageDataBunch    Train model     fit_one_cycle()   Let’s find-tune   Let’s train the whole model!   Let’s make batch size bigger!    Interpretation Model in productionCode can be found hereDeployed model here Making a classifier which can distinguish Seoul from Munich and Sanfrancisco!(hoping my well in Munich!) Creating dataset from google images: In machine learning, you always need data before you build your model. You can use either URLs or google_images_download package. Since Jeremy explained specifically, I will try the other. Using google_images_download: note: This is not google official package Refer to Official Doncument, put that arguments. from google_images_download import google_images_downloadresponse = google_images_download. googleimagesdownload()  #class instantiationout_dir = os. path. abspath('. . /. . /materials/dataset/pkg/')os. mkdir(out_dir)arguments = { keywords : Cebu,Munich,Seoul ,        print_urls :True,        suffix_keywords : city ,        output_directory :out_dir,        type : photo ,      }paths = response. download(arguments)  #passing the arguments to the functionprint(paths)and if you need, here is main code. Create ImageDataBunch: We need to separate validation set because we just grabbed these imagese from Google. Most of the dataset we use (kaggle/research) splited into train / validation / test so if they are not devided beforehand we should make databunch, and Jeremy recommended assign 20% to validation. Help on function verify_images in module fastai. vision. data:verify_images(path: Union[pathlib. Path, str], delete: bool = True, max_workers: int = 4, max_size: int = None, recurse: bool = False, dest: Union[pathlib. Path, str] = '. ', n_channels: int = 3, interp=2, ext: str = None, img_format: str = None, resume: bool = None, **kwargs)  Check if the images in `path` aren't broken, maybe resize them and copy it in `dest`. Data from google image url Data from package Train model:           len(class)   len(train)   len(valid)         Data_url   3   432   108       Data_pkg   3   216   53   Uisng model: restnet34 1, Measurement: accuracy 2 fit_one_cycle(): What is fit one cycle? Cyclical Learning Rates for Training Neural Networks One of the way to find good learning rate. Core idea is to start with small learning rate (like 1e-4, 1e-3) and increase the learning rate after each mini-batch till loss starts exploding. And pick up learning rate one order lower than exploding point. For example, plotted learning rate is like below picture, picking up around 1e-2 is the best way.  Why this methods Traditionally, the learning rate is decreased as the learning starts converging with time. But this paper suggests to cycle our learning rate, because it makes us avoid local minimum. Basically this cyclic method enables us to explore whole of loss function so that find out global minimum. In other words, higher learning rate behaves like regularisation. Let’s find-tune: Do train just one last layer by learning rate found by find_lr This section you should find the strongest downward slope that kind of sticking around for quite a while. And choose just one order lower than lowest point. As explained before, I will pick up 1e-2. And of course, this is fine-tuning, we don’t need discriminative learning rate yet.  Let’s train the whole model!: link When you plot the learning rate again, maybe you will get soaring shape of learning rate. Rule of thumb, When you slice the learning rate, use learning rate you used at unfrozen part. Divide it by 5 or 10 and put it on maximum bound. At minimum bound, get the point just before it soared, and divide it by 10.  Let’s make batch size bigger!: Since default batch size is 64, I tried it to 128. And it gets way more better result(even it’s still underfitting!) And if I freeze model and train whole model again, the model would be better. Also, you can use this method to the other big dataset model training! Interpretation: See the confusion matrix. Result is quite great. *Since I’m using colab, I will skip data cleansing. But I highly recommend you to use ImageCleaner widget, only if you are using jupyter notebook (not jupyter lab) Model in production: You can deploy your model in simple way. I referred fast. ai, and used render(it’s free for limited time). You can find detailed document here. and you can create a route like this. @app. route( /classify-url , methods=[ GET ])async def classify_url(request):  bytes = await get_bytes(request. query_params[ url ])  img = open_image(BytesIO(bytes))  _,_,losses = learner. predict(img)  return JSONResponse({     predictions : sorted(      zip(cat_learner. data. classes, map(float, losses)),      key=lambda p: p[1],      reverse=True    )  })You can find my deployed model here Reference: How to create a deep learning dataset using Google Images towardsdatascience - one cycle policy       Deep Residual Learning for Image Recognition &#8617;        Accuracy_and_precision &#8617;    "
     }, {
-    "id": 20,
+    "id": 22,
     "url": "http://localhost:4000/2020/02/dps-week5/",
     "title": "Digital Product School week 5",
     "body": "2020/02/09 - The 5th week retropect at Digital Product School Week 5 - Create a Storymap and sync it with Lean Canvas     This week's schedule  CONTENT:  How to create our story map Prepare your story Discover your product’s AI potentialMondayHow to create our story map:     We need this 'aha' moment  There was a Milestone workshop, about our weekly goal. As we are agile working, we go fast and change every week’s goal. This week we will finalize our story map based on user’s pain-point and HMW questions.     How should we make our story-map  Basically we should make story map based on this rule  Tell stories, don’t just write them!     We always need context, that means all the story component should be connected    Visualize your product to establish a shared understanding and speed up discussions!     Post-it filled of text is not enough, we should fill it with visualizations then team mates can understand it fast    Only discuss in front our your story map! (Speed)     So we can update our story-map as soon as we change our opinion   And also    Use a story map to find the parts that matter most and to identify holes in your idea!     Since the story map consists of techinical part, we should consider each story’s technical feasibility    Minimise output, maximise outcome and impact! Build tests to figure out what’s minimum and what’s viable!     This story map functions to find out our minimum value of ideas    Work iteratively: Change your story map according to your learnings!     We should repeat this process again and again    PMs: Make sure Storymap is up to date!Prepare your story:     team cero, our whole story map    Our goal    Technical feasibility of our storyWhat is your strategy to make user achieve something? This would be our expand point Discover your product’s AI potential: How can we apply AI to our product? Let’s write down our ‘HMW’ questions, and find out all p ossibilities. These are suggestion of possibilities, so don’t attached to feasibility (we will do in at lean start-up)     Software section's expectation    AI section's expectationTuesday    Engineer's task, week5This 5th week, engineers settled WendesdayThursdayFriday"
     }, {
-    "id": 21,
+    "id": 23,
     "url": "http://localhost:4000/2020/02/GPU-time/",
     "title": "4 reasons took much time to setting GPU for fast.ai than I expected",
     "body": "2020/02/05 - Motivation: Before now, me as a undergraduate student, I was parsimony who usually depend on colab, kaggle, friend’s server(occasional) whenever i need GPU. . And this time it’s been for a while to install GPU than I expected and I share the several component that stood in my way. Written at Oct 24 2019, if you think this is deprecated, please do not have a leap of faith. Just for the record, I’ve used Kaggle, Colab, GCP, Azure, EC2 as GPU cloud. 1. Did not know there is JupyterLab option in Google Cloud Platform. : At the first time when GCP came out, there was no AI Platform service. So from starting vm instance to launching jupyter and installing packages, I did all of the things myself. (and I learned 🤗) $	curl -O https://repo. continuum. io/archive/Anaconda3-5. 0. 1-Linux-x86_64. sh[Downloading conda in ssh] I created VM instance,selected zone, machine type and disk type. Then, define firewall rules and in ssh terminal, install jupyter and other packages. But you can do all of these things just using AI Platform.  [AI Platform] I think it especially save your time if you are living in Asia-Pacific, which google doesn’t support not that much GPU resources.  2. Consider if the platform has limited resources in a region you live in. : I live in South Korea, East Asia, and it seems like this region has lots of limitation in GPU (except quite expensive AWS) And the Taiwan which was the only one region where I can launch my own VM with GPU (I tried all the other regions in the list) sometimes do normaly, but not always. 😥After launching, I did several works and next day I could not start VM. (I didn’t count it, but tried it a few hours because I didn’t want cost any more time…) Endlessly failed to start instance, then I choose to move AWS as an alternative way. 3. Fast. ai gives deliberate guide and I didn’t know it. : Fast. ai offer the guide for all available platform. (Colab, salamander, Gradient, Kaggle, Colab, and so on) It is so important, and really needs, because cloud computing options are vary as occasion and purpose arise. I didn’t know fast. ai has manual to running GCP, and I think it’s as good a reason as any for me to be have taken time. It helped me so much when I had aws and shortened my time. I don’t want to read all of the manual in amazno. . (It is recommended. . but I’d rather read GIT PRO now…) ssh -i ~/. ssh/&lt;your_private_key_pair&gt; -L localhost:8888:localhost:8888 ubuntu@&lt;your instance IP&gt;4. You should wait to add more volume just after add volume, by building AWS EC2. : Since Elastic Block Store(EBS) storage supports optimized storage, users can’t extend storage volume two times in a row.  Unfortunately, at the first time, I didn’t know it (again 👻) and when VM lacked volume, I doubled dist capacity (76*2) at a rough but It needs more.  &lt;!– this time I installed GPU in two years, and it became little complicated compared to 2 years ago. And this time for the first time(maybe not the first time. . but i handled it in my class or with my friend. but it’s my first time on my own. ) I very I’m started to using used google colab, kaggleand, GCP-JupyterLab, ec2 - friend made, aws vm machine but I had a environment variable but i did not know of it. On these days, I could not get a resources from taiwan…    I couldn’t notice a deliberate     Anyway, as a result I tried myself gcp myself and aws ec2 with fast. ai But I think doing on my self surely takes much time (in this point I wonder why I’m doing this, and should remind me, especially I was studying disk volume optimization)     disk volume exceed - https://askubuntu. com/questions/919748/no-space-left-on-device-even-though-there-is:  "
     }, {
-    "id": 22,
+    "id": 24,
     "url": "http://localhost:4000/2020/02/dps-week4/",
     "title": "Digital Product School week 4",
     "body": "2020/02/01 - The 4th week retropect at Digital Product School Week 4 - Find solution ideas and run experiments   [This week’s schedule] CONTENT:  Ideation Techniques     What is ideation techniques?    Generating idea in my team AIdeation Team brain storming of idea Die Produkt MacherMondayIdeation Techniques:   [slides from @steffen] What is ideation techniques?: We tried to find out user’s painpoint last week. Tried to users talk about their, pain point. No question directly, but extract from them their pain with transportation. Generating idea in my team:   AIdeation:   TuesdayTeam brain storming of idea:   Based on generated idea on Monday, we extended our idea doing rolling-paper!     Die Produkt Macher: What is lean start-up?  Lean startup is a methodology for developing businesses and products that aims to shorten product development cycles and rapidly discover if a proposed business model is viable; this is achieved by adopting a combination of business-hypothesis-driven experimentation, iterative product releases, and validated learning. - wikipedia WendesdayThursdayFriday"
     }, {
-    "id": 23,
+    "id": 25,
     "url": "http://localhost:4000/2020/01/retrosprect-of-acl-paper-2020/",
     "title": "Retrospect of ACL 2020 paper writing",
     "body": "2020/01/29 - 2020 Annual Conference of the Association for Computational Linguistics Why I can’t use ‘Cebuano’ for the research?: Why I had to change target language from ‘Cebuano’ to ‘Tagalog’?-&gt; No language translator options except google translation. But before knowing that I already consult my friend, whose mother tongue is English. So I had to aplogize her, but couldn’t tell her why suddenly I changed my plan. -&gt; I realized there are many languages even can’t be researched at all. . -&gt; Getting accustomed to discrimination makes misunderstanding, sometimes. At my country, we couldn’t use music streaming service, because of legal problem. But at that moment, I thought it was discrimination, which is done by music company. "
     }, {
-    "id": 24,
+    "id": 26,
     "url": "http://localhost:4000/2020/01/Git-Merge/",
     "title": "Why am I not listed as a contributor?!",
     "body": "2020/01/10 - From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’m not listed as contributor of repo which my PR accessed. How did I come to a repository?: When I’m stuck, I would prefer to code, than to go deep in theory. (It must be so. . too much to understand 🤒)It was BERT released by Google AI I felt keenly the necessity of implementing, because not only couldn’t understand the way they figured out positional encoding formula, but how it actually works. What does it mean to “scale” dot product in Attention? (Now I know it’s far from my section 😂) Figure 1. Scaled Dot Product. Adopted from tensorflow blogWhat was the code error?: For implement code in paper, I read the papers Transformer and BERT, structured the model, and refered the others’ code. Meanwhile, I found out a small error in tokenization process, which was changing a token into [MASK], enabled bidirectional representation. I’ve made PR, and got merged. But I was not in contributors. Why?: Figure 2. Merged Pull request Adopted from graykode projectActually I happened to know there can be couple of reasons github doesn’t include my name as contributor. Well, if contributors tab has more than 100 people, in which case it shows you up only if you are in the top 100 contributors because displaying too many contributors can make webpages down. Somethimes, however, it doesn’t that problem. Why not? Two possibilities are there.    First, According to Joel-Glovier, if repository maintainer merged-as-a-rebase PR will end up showing as maintainer’s commit. But maintainer shouldn’t normally do this.     Second, if you happend to commit using a different git email that what is in your GitHub profile, it will not be attached to your Github user, and “doesn’t show up” as you.  Reference:  Michał Chromiak’s blog Github: why are my contributions are not showing on my profile atlassian-gitfetch"
     }, {
-    "id": 25,
-    "url": "http://localhost:4000/2019/12/lesson1-fastai/",
-    "title": "Fine Grained Classification",
-    "body": "2019/12/31 - Finally you can solve the mystery behind this weird drawing. . through this course. juptyer notebook magic: %reload_ext autoreload%autoreload 2%matplotlib inlinethis is special directives to jupyter notebook, not python code. And it is called ‘magics’ (but i think jeremy is magicion)  If somebody changes underlying library code while I’m running this, please reload it automatically If somebody asks to plot something, then please plot it here in this Jupyter NotebookDon’t hesitate to import start~ Digging into untar_data, path. ls: Union[pathlib. Path, str]: typed programming language? -&gt; maybe i think disclaim the type beforehand for sure.  Q. like assert? path. ls()this is some module that fast. ai made because os. listdir(‘path’) is unconvinient. Python3 pathlib library!: pathlib "
-    }, {
-    "id": 26,
+    "id": 27,
     "url": "http://localhost:4000/2019/12/jeremy-howard/",
     "title": "Jeremy Howard",
     "body": "2019/12/15 - This is journey to find out ‘who am I trying to be?’: How he impacted me?  The person who made me start Computer Vision again. He emphasized the importance of studying NLP and Computer together to understand the deep-learning. He didn’t order it to study, but always he pursuade me with reasonable way.  “It’s not just something I can throw away. NLP and computer vision a few weeks apart and that’s going to force your brain to realize like ‘oh I have to remember this’”  He made me admit my failure in deep-learning. I started to objectify where am I. What should I do when I’m frustrated.  “Keep going. You’re not expected to remember everything. Yet.  You’re not expected to understand everything. Yet. You’re not expected to know why everything works.  Yet. ”  His articles are numerous, below.      What is torch. nn Really?   High Performance Numeric Programming with Swift: Explorations and Reflections   C++11, random distributions, and Swift    And especially, I like this book. Designing great data products Great predictive modeling is an important part of the solution, but it no longer stands on its own; as products become more sophisticated, it disappears into the plumbing. Designing great data products And he is also famous for words. Here are some.  we’re going to try and use that to really understand what’s going on. So to warn you, none of it is rocket science but a lot of its going to look really new. So don’t expect to get it the first time but expect to listen and jump into the notebook try a few things test things out look particularly at like tensor shapes and inputs and outputs to check your understanding then go back and listen again. But and kind of try it, a few times, because you will get there right, it’s just that there’s going to be a lot of new concepts because we haven’t done that much stuff in pure Pytorch. Lesson 6: Deep Learning 2019 "
     }, {
-    "id": 27,
+    "id": 28,
     "url": "http://localhost:4000/2019/11/julia-evans/",
     "title": "Julia Evans",
     "body": "2019/11/20 - This is journey to find out ‘who am I trying to be?’: The women who surprised me in many ways. First, she approached me to teaching some concepts drawing cartoons. It was at Hackers news, which was hightest ranks. Personally I have the use of not to reading title, so and cartoon was so cute and clear. I naturally gonna understood mechanism and astonished by her explaination ability. Her value, which she was taught by many people so want to do same things, moved me. Volume of her knowledge, that just reading post title is a deal of work, amazed me. "
     }, {
-    "id": 28,
+    "id": 29,
     "url": "http://localhost:4000/2019/11/coc-retropective/",
     "title": "Retrospective on Pycon 2019 Korea (CoC Committee)",
     "body": "2019/11/05 - When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized our responsibility, and if I passed each other’s burden, It could be my burden next time. In solidarity of the obligation, we finished conference well. And after participating PyCon Korea 2018 as volunteer, I’ve joined PyCon Korea Organizer last year. &lt;Figure 1&gt; First meeting of PyCon 2019 Korea Organizers It’s been a while since PyCon 2019 finished. It’s held on Aug 15 - 18, at Coex Grand Balloom &lt;Figure 2&gt; Ongoing session, speaking on news comment processing &lt;Figure 3&gt; Sponsor Booth iin Coex Hall &lt;Figure 4&gt; After PyCon 2019, with all of volunteer, organizer, speakers 😍 🥰  Serving as part of the coc TF, I spent large fraction of last year doing CoC job. here’s the path what we’ve been grappled with to grasp a solution. First half: Before the conference Toward Diverse Community:  Formally we’ve been reusing and modifying PyCon US CoC, but we needed fit in Korean and I was part of that to revise code of conduct. Except ‘That’ Diversity, Because it is ‘Harassment’:  Specific point was harassment, and the others were not. process of finding the points. How can we settle this point?Second half: During the conference Handling the potential Harassment: Disjunction of policy and real-time situation: This ‘PyCon 2019 Korea retrospective series’ would be devided into 3 Episodes.  “Retrospective on Pycon 2019 Korea (CoC Committee)” “Retrospective on Pycon 2019 Korea (Program Chair)” (20 Nov, To Be Update) “Maintaining participation while still making timely decisions” (29 Nov, To Be Update)"
     }, {
-    "id": 29,
+    "id": 30,
     "url": "http://localhost:4000/2019/11/elif-shafak/",
     "title": "Elif Shafak",
     "body": "2019/11/05 - This is journey to find out ‘who am I trying to be?’: For creative-minded people, Istanbul is a treasure. ’ Photo © Chris Boland, licensed under CC BY-NC-ND 2. 0    it suddenly felt like what I was trying to convey was more complicated and detailed than what the circumstances allowed me to say.     And I did what I usually do in similar situations: I stammered, I shut down, and I stopped talking. I stopped talking because the truth was complicated, even though I knew, deep within, that one should never, ever remain silent for fear of complexity.         &lt;Figure 1&gt; Elif Shafak   Photo credit: www. elifsafak. com. tr      I want to talk about emotions and the need to boost our emotional intelligence. I think it’s a pity that mainstream political theory pays very little attention to emotions.     Oftentimes, analysts and experts are so busy with data and metrics that they seem to forget those things in life that are difficult to measure and perhaps impossible to cluster under statistical models. But I think this is a mistake, for two main reasons. We are emotional beings.     I think it’s going to be one of our biggest intellectual challenges, because our political systems are replete with emotions. In country after country, we have seen illiberal politicians exploiting these emotions. And yet within the academia and among the intelligentsia, we are yet to take emotions seriously. I think we should.  1 2 Reference:       British Council Worldwide &#8617;        Ted Talk &#8617;    "
     }, {
-    "id": 30,
+    "id": 31,
     "url": "http://localhost:4000/2019/01/dps-week1/",
     "title": "Digital Product School week 1",
     "body": "2019/01/11 - The 1th week retropect at Digital Product School [This week’s schedule] CONTENT:  Welcome to Digital Product School!     Trip to Spitzingsee   Welcome to Design Office   Specifying our goal of product   Welcome to Digital Product School!: Trip to Spitzingsee:    At the first day of Digital Product School, we had a off-site with all of batch 9 people. All the costs were managed by dps. At the beautiful mountain, we settled the team, and got my team goal.     Basically, there are two kind of team in DPS. (1) Wild team - the team has fixed topic(2) Company team - the team which has specific stakeholders, and also topic defined by that stakeholders  The Core-team will fix what team you will join in DPS for 3 months based on ymy professionals, they announce it at off-site. [My team for 3 months at DPS]    And we decide on my batch #9 theme song. How? Each team draw for songs and pitch ‘why this song should be batch #9 theme song’The result? Imagine dragon - Believer (I didn’t know at the moment, this song would be stamped in my memory)     We have a workshop for getting to know each other. For example, we share 1) what do I expect from 3 months of dps, 2) when I feel happy in my life time, 3) what I worked for last week, 4) what was my last project and 5) what plays important role in my life      My team's board  Cero  Welcome to Design Office:  At first day of design office, we had workshop, which celebrates my day in dps also discuss specific rule, menifesto and stakeholders    We get sticker and attach it in map depends on my nationality  Now time to get to know my team’s stakeholders. What they want for us? What they expect from us? How free my team are on the topic?To be honest, it is endless tug-of-war. We should discuss with my stakeholders, endlessly, and find out solution which can meet interest of users, stakeholders and my team. Basically, my team’s main stakeholder is ADAC, but BMW, City of munich and Nokia will also participate as my team’s stakeholders. Specifying our goal of product: "
@@ -331,26 +336,26 @@
     
     <div class="col-md-6">
     <div class="card border-0 mb-4 box-shadow">   
-    <a href="/2020/03/note08-fastai-4/">
-    <div class="topfirstimage" style="background-image: url( /assets/images/4-classlin.png); height: 200px;    background-size: cover;    background-repeat: no-repeat;"></div>     
+    <a href="/2020/04/v3-2019-lesson06-note/">
+    <div class="topfirstimage" style="background-image: url( /assets/images/att_00069.png); height: 200px;    background-size: cover;    background-repeat: no-repeat;"></div>     
     </a>
     <div class="card-body px-0 pb-0 d-flex flex-column align-items-start">
     <h2 class="h4 font-weight-bold">
-    <a class="text-dark" href="/2020/03/note08-fastai-4/">Gradient backward, Chain Rule, Refactoring</a>
+    <a class="text-dark" href="/2020/04/v3-2019-lesson06-note/">fastai 2019 course-v3 Part1, lesson06</a>
     </h2>
     <p class="excerpt">
-        This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’  Section2: What’s inside Pyt...
+        Lesson 06
     </p>
     <div>
         <small class="d-block text-muted">
             In <span class="catlist">
                 
-                <a class="text-capitalize text-muted smoothscroll" href="/categories.html#fast.ai-v3">Fast.AI-v3</a><span class="sep">, </span>
+                <a class="text-capitalize text-muted smoothscroll" href="/categories.html#fastai-v3">fastai-v3</a><span class="sep">, </span>
                 
                 </span>                   
         </small>
         <small class="text-muted">
-            Mar 02, 2020
+            Apr 15, 2020
         </small>
     </div>
     </div>
@@ -362,14 +367,14 @@ <h2 class="h4 font-weight-bold">
         <!-- second latest post --><div class="mb-3 d-flex align-items-center">                
                 
                 <div class="col-md-4">
-                <a href="/2020/03/note08-fastai-3/">
-                 <img class="w-100" src="http://localhost:4000/assets/images/4-backward3.png" alt="Implement forward&backward pass from scratch">
+                <a href="/2020/04/qna-image-segmentation/">
+                 <img class="w-100" src="http://localhost:4000/assets/images/output_31_0.png" alt="[Q&A] Image Segmentation, using Unet with Driving Video data">
                 </a>
                 </div>
                                 
                 <div>
                     <h2 class="mb-2 h6 font-weight-bold">
-                    <a class="text-dark" href="/2020/03/note08-fastai-3/">Implement forward&backward pass from scratch</a>
+                    <a class="text-dark" href="/2020/04/qna-image-segmentation/">[Q&A] Image Segmentation, using Unet with Driving Video data</a>
                     </h2>
                     <small class="d-block text-muted">
                         In <span class="catlist">
@@ -379,7 +384,7 @@ <h2 class="mb-2 h6 font-weight-bold">
                         </span>                   
                     </small>
                     <small class="text-muted">
-                        Mar 01, 2020
+                        Apr 02, 2020
                     </small>
                 </div>
             </div>
@@ -387,24 +392,24 @@ <h2 class="mb-2 h6 font-weight-bold">
         <!-- third latest post --><div class="mb-3 d-flex align-items-center">                
                 
                 <div class="col-md-4">
-                <a href="/2020/03/note08-fastai-2/">
-                 <img class="w-100" src="/assets/images/30.png" alt="What's inside Pytorch Operator?">
+                <a href="/2020/03/note08-fastai-4/">
+                 <img class="w-100" src="/assets/images/4-classlin.png" alt="Gradient backward, Chain Rule, Refactoring">
                 </a>
                 </div>
                                 
                 <div>
                     <h2 class="mb-2 h6 font-weight-bold">
-                    <a class="text-dark" href="/2020/03/note08-fastai-2/">What's inside Pytorch Operator?</a>
+                    <a class="text-dark" href="/2020/03/note08-fastai-4/">Gradient backward, Chain Rule, Refactoring</a>
                     </h2>
                     <small class="d-block text-muted">
                         In <span class="catlist">
                         
-                        <a class="text-capitalize text-muted smoothscroll" href="/categories.html#fast.ai-v3">Fast.AI-v3</a><span class="sep">, </span>
+                        <a class="text-capitalize text-muted smoothscroll" href="/categories.html#fastai-v3">fastai-v3</a><span class="sep">, </span>
                         
                         </span>                   
                     </small>
                     <small class="text-muted">
-                        Mar 01, 2020
+                        Mar 02, 2020
                     </small>
                 </div>
             </div>
@@ -412,14 +417,14 @@ <h2 class="mb-2 h6 font-weight-bold">
         <!-- fourth latest post --><div class="mb-3 d-flex align-items-center">                
                 
                 <div class="col-md-4">
-                <a href="/2020/02/note08-fastai-1/">
-                <img class="w-100" src="/assets/images/1-matmul.png" alt="What is the meaning of 'deep-learning from foundations?'">
+                <a href="/2020/03/note08-fastai-3/">
+                <img class="w-100" src="/assets/images/4-backward3.png" alt="Implement forward&backward pass from scratch">
                 </a>
                 </div>
                                 
                 <div>
                     <h2 class="mb-2 h6 font-weight-bold">
-                    <a class="text-dark" href="/2020/02/note08-fastai-1/">What is the meaning of 'deep-learning from foundations?'</a>
+                    <a class="text-dark" href="/2020/03/note08-fastai-3/">Implement forward&backward pass from scratch</a>
                     </h2>
                     <small class="d-block text-muted">
                         In <span class="catlist">
@@ -429,7 +434,7 @@ <h2 class="mb-2 h6 font-weight-bold">
                         </span>                   
                     </small>
                     <small class="text-muted">
-                        Feb 29, 2020
+                        Mar 01, 2020
                     </small>
                 </div>
             </div>
@@ -441,13 +446,17 @@ <h2 class="mb-2 h6 font-weight-bold">
 <!-- Sticky - add sticky tag to the post you want to highlight here - tags: [sticky] -->
  
 
+ 
+
+ 
+
 <div class="jumbotron jumbotron-fluid jumbotron-home pt-0 pb-0 mt-3 mb-2rem bg-lightblue position-relative">
     <div class="pl-4 pr-0 h-100 tofront">
         <div class="row justify-content-between">
             <div class="col-md-6 pt-6 pb-6 pr-lg-4 align-self-center">
                 <h1 class="mb-3">Gradient backward, Chain Rule, Refactoring</h1>
                 <p class="mb-3 lead">
-                    This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’  Section2: What’s inside Pyt...
+                      This note is divided into 4 section.          Section1: What is the meaning of ‘deep-learning from foundations?’      Section2: Wha...
                 </p>
                 <a href="/2020/03/note08-fastai-4/" class="btn btn-dark">Read More</a>
             </div>
@@ -488,8 +497,6 @@ <h1 class="mb-3">Gradient backward, Chain Rule, Refactoring</h1>
  
 
  
-
- 
 
  
 
@@ -517,26 +524,26 @@ <h4 class="font-weight-bold spanborder"><span>All Stories</span></h4>
             <div class="mb-5 d-flex justify-content-between main-loop-card">
 <div class="pr-3">
 	<h2 class="mb-1 h4 font-weight-bold">
-	<a class="text-dark" href="/2020/03/note08-fastai-4/">Gradient backward, Chain Rule, Refactoring</a>
+	<a class="text-dark" href="/2020/04/v3-2019-lesson06-note/">fastai 2019 course-v3 Part1, lesson06</a>
 	</h2>
 	<p class="excerpt">
-	   This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’  Section2: What’s inside Pytorch Operator?  Section3: Implement forward&amp;ba...
+	   Lesson 06
 	</p>
 	<small class="d-block text-muted">
 		In <span class="catlist">
 		
-		<a class="text-capitalize text-muted smoothscroll" href="/categories.html#fast.ai-v3">Fast.AI-v3</a><span class="sep">, </span>
+		<a class="text-capitalize text-muted smoothscroll" href="/categories.html#fastai-v3">fastai-v3</a><span class="sep">, </span>
 		
 		</span>
 	</small>
 	<small class="text-muted">
-		Mar 02, 2020
+		Apr 15, 2020
 	</small>
 </div>
 
 	<div class="col-md-3 pr-0 text-right">
-	<a href="/2020/03/note08-fastai-4/">
-	<img class="w-100" src="/assets/images/4-classlin.png" alt="Gradient backward, Chain Rule, Refactoring">
+	<a href="/2020/04/v3-2019-lesson06-note/">
+	<img class="w-100" src="/assets/images/att_00069.png" alt="fastai 2019 course-v3 Part1, lesson06">
 	</a>
 	</div>
 
@@ -547,10 +554,10 @@ <h2 class="mb-1 h4 font-weight-bold">
             <div class="mb-5 d-flex justify-content-between main-loop-card">
 <div class="pr-3">
 	<h2 class="mb-1 h4 font-weight-bold">
-	<a class="text-dark" href="/2020/03/note08-fastai-3/">Implement forward&backward pass from scratch</a>
+	<a class="text-dark" href="/2020/04/qna-image-segmentation/">[Q&A] Image Segmentation, using Unet with Driving Video data</a>
 	</h2>
 	<p class="excerpt">
-	   This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’  Section2: What’s inside Pytorch Operator?  Section3: Implement forward&amp;ba...
+	   This post is about my questions while I was studying USF Deep Learning course about image segmentation task.All the answers are from the course, source code, library document, or docu...
 	</p>
 	<small class="d-block text-muted">
 		In <span class="catlist">
@@ -560,13 +567,13 @@ <h2 class="mb-1 h4 font-weight-bold">
 		</span>
 	</small>
 	<small class="text-muted">
-		Mar 01, 2020
+		Apr 02, 2020
 	</small>
 </div>
 
 	<div class="col-md-3 pr-0 text-right">
-	<a href="/2020/03/note08-fastai-3/">
-	<img class="w-100" src="/assets/images/4-backward3.png" alt="Implement forward&backward pass from scratch">
+	<a href="/2020/04/qna-image-segmentation/">
+	<img class="w-100" src="/assets/images/output_31_0.png" alt="[Q&A] Image Segmentation, using Unet with Driving Video data">
 	</a>
 	</div>
 
@@ -577,26 +584,26 @@ <h2 class="mb-1 h4 font-weight-bold">
             <div class="mb-5 d-flex justify-content-between main-loop-card">
 <div class="pr-3">
 	<h2 class="mb-1 h4 font-weight-bold">
-	<a class="text-dark" href="/2020/03/note08-fastai-2/">What's inside Pytorch Operator?</a>
+	<a class="text-dark" href="/2020/03/note08-fastai-4/">Gradient backward, Chain Rule, Refactoring</a>
 	</h2>
 	<p class="excerpt">
-	   This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’  Section2: What’s inside Pytorch Operator?  Section3: Implement forward&amp;ba...
+	     This note is divided into 4 section.          Section1: What is the meaning of ‘deep-learning from foundations?’      Section2: What’s inside Pytorch Operator?      Section3: Implem...
 	</p>
 	<small class="d-block text-muted">
 		In <span class="catlist">
 		
-		<a class="text-capitalize text-muted smoothscroll" href="/categories.html#fast.ai-v3">Fast.AI-v3</a><span class="sep">, </span>
+		<a class="text-capitalize text-muted smoothscroll" href="/categories.html#fastai-v3">fastai-v3</a><span class="sep">, </span>
 		
 		</span>
 	</small>
 	<small class="text-muted">
-		Mar 01, 2020
+		Mar 02, 2020
 	</small>
 </div>
 
 	<div class="col-md-3 pr-0 text-right">
-	<a href="/2020/03/note08-fastai-2/">
-	<img class="w-100" src="/assets/images/30.png" alt="What's inside Pytorch Operator?">
+	<a href="/2020/03/note08-fastai-4/">
+	<img class="w-100" src="/assets/images/4-classlin.png" alt="Gradient backward, Chain Rule, Refactoring">
 	</a>
 	</div>
 
@@ -607,7 +614,7 @@ <h2 class="mb-1 h4 font-weight-bold">
             <div class="mb-5 d-flex justify-content-between main-loop-card">
 <div class="pr-3">
 	<h2 class="mb-1 h4 font-weight-bold">
-	<a class="text-dark" href="/2020/02/note08-fastai-1/">What is the meaning of 'deep-learning from foundations?'</a>
+	<a class="text-dark" href="/2020/03/note08-fastai-3/">Implement forward&backward pass from scratch</a>
 	</h2>
 	<p class="excerpt">
 	   This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’  Section2: What’s inside Pytorch Operator?  Section3: Implement forward&amp;ba...
@@ -620,13 +627,13 @@ <h2 class="mb-1 h4 font-weight-bold">
 		</span>
 	</small>
 	<small class="text-muted">
-		Feb 29, 2020
+		Mar 01, 2020
 	</small>
 </div>
 
 	<div class="col-md-3 pr-0 text-right">
-	<a href="/2020/02/note08-fastai-1/">
-	<img class="w-100" src="/assets/images/1-matmul.png" alt="What is the meaning of 'deep-learning from foundations?'">
+	<a href="/2020/03/note08-fastai-3/">
+	<img class="w-100" src="/assets/images/4-backward3.png" alt="Implement forward&backward pass from scratch">
 	</a>
 	</div>
 
@@ -637,10 +644,10 @@ <h2 class="mb-1 h4 font-weight-bold">
             <div class="mb-5 d-flex justify-content-between main-loop-card">
 <div class="pr-3">
 	<h2 class="mb-1 h4 font-weight-bold">
-	<a class="text-dark" href="/2020/02/what-is-convolution/">Digging into convolution</a>
+	<a class="text-dark" href="/2020/03/note08-fastai-2/">What's inside Pytorch Operator?</a>
 	</h2>
 	<p class="excerpt">
-	   Issues 
+	   This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’  Section2: What’s inside Pytorch Operator?  Section3: Implement forward&amp;ba...
 	</p>
 	<small class="d-block text-muted">
 		In <span class="catlist">
@@ -650,13 +657,13 @@ <h2 class="mb-1 h4 font-weight-bold">
 		</span>
 	</small>
 	<small class="text-muted">
-		Feb 28, 2020
+		Mar 01, 2020
 	</small>
 </div>
 
 	<div class="col-md-3 pr-0 text-right">
-	<a href="/2020/02/what-is-convolution/">
-	<img class="w-100" src="/assets/images/cnn-jiwon.png" alt="Digging into convolution">
+	<a href="/2020/03/note08-fastai-2/">
+	<img class="w-100" src="/assets/images/30.png" alt="What's inside Pytorch Operator?">
 	</a>
 	</div>
 
@@ -667,26 +674,26 @@ <h2 class="mb-1 h4 font-weight-bold">
             <div class="mb-5 d-flex justify-content-between main-loop-card">
 <div class="pr-3">
 	<h2 class="mb-1 h4 font-weight-bold">
-	<a class="text-dark" href="/2020/02/dps-week8/">Digital Product School week 8&9</a>
+	<a class="text-dark" href="/2020/02/note08-fastai-1/">What is the meaning of 'deep-learning from foundations?'</a>
 	</h2>
 	<p class="excerpt">
-	   The 8th week retropect at Digital Product School
+	   This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’  Section2: What’s inside Pytorch Operator?  Section3: Implement forward&amp;ba...
 	</p>
 	<small class="d-block text-muted">
 		In <span class="catlist">
 		
-		<a class="text-capitalize text-muted smoothscroll" href="/categories.html#digitalproductschool">DigitalProductSchool</a><span class="sep">, </span>
+		<a class="text-capitalize text-muted smoothscroll" href="/categories.html#fast.ai-v3">Fast.AI-v3</a><span class="sep">, </span>
 		
 		</span>
 	</small>
 	<small class="text-muted">
-		Feb 24, 2020
+		Feb 29, 2020
 	</small>
 </div>
 
 	<div class="col-md-3 pr-0 text-right">
-	<a href="/2020/02/dps-week8/">
-	<img class="w-100" src="/assets/images/week8/gate.png" alt="Digital Product School week 8&9">
+	<a href="/2020/02/note08-fastai-1/">
+	<img class="w-100" src="/assets/images/1-matmul.png" alt="What is the meaning of 'deep-learning from foundations?'">
 	</a>
 	</div>
 
@@ -697,10 +704,10 @@ <h2 class="mb-1 h4 font-weight-bold">
             <div class="mb-5 d-flex justify-content-between main-loop-card">
 <div class="pr-3">
 	<h2 class="mb-1 h4 font-weight-bold">
-	<a class="text-dark" href="/2020/02/fast.ai-nlp-note-16/">Algorithmic bias</a>
+	<a class="text-dark" href="/2020/02/what-is-convolution/">Digging into convolution</a>
 	</h2>
 	<p class="excerpt">
-	   Algorithms can encode &amp; magnify human bias
+	   Issues 
 	</p>
 	<small class="d-block text-muted">
 		In <span class="catlist">
@@ -710,13 +717,13 @@ <h2 class="mb-1 h4 font-weight-bold">
 		</span>
 	</small>
 	<small class="text-muted">
-		Feb 20, 2020
+		Feb 28, 2020
 	</small>
 </div>
 
 	<div class="col-md-3 pr-0 text-right">
-	<a href="/2020/02/fast.ai-nlp-note-16/">
-	<img class="w-100" src="/assets/images/13.png" alt="Algorithmic bias">
+	<a href="/2020/02/what-is-convolution/">
+	<img class="w-100" src="/assets/images/cnn-jiwon.png" alt="Digging into convolution">
 	</a>
 	</div>
 
@@ -727,26 +734,26 @@ <h2 class="mb-1 h4 font-weight-bold">
             <div class="mb-5 d-flex justify-content-between main-loop-card">
 <div class="pr-3">
 	<h2 class="mb-1 h4 font-weight-bold">
-	<a class="text-dark" href="/2020/02/classifier-city/">Making a classifier with image dataset made from gooogle</a>
+	<a class="text-dark" href="/2020/02/dps-week8/">My life in Digital Product School - week 8/19/10</a>
 	</h2>
 	<p class="excerpt">
-	   CONTENTS
+	   The 8/9/10th week retropect at Digital Product School
 	</p>
 	<small class="d-block text-muted">
 		In <span class="catlist">
 		
-		<a class="text-capitalize text-muted smoothscroll" href="/categories.html#toy-project">toy-project</a><span class="sep">, </span>
+		<a class="text-capitalize text-muted smoothscroll" href="/categories.html#digitalproductschool">DigitalProductSchool</a><span class="sep">, </span>
 		
 		</span>
 	</small>
 	<small class="text-muted">
-		Feb 15, 2020
+		Feb 24, 2020
 	</small>
 </div>
 
 	<div class="col-md-3 pr-0 text-right">
-	<a href="/2020/02/classifier-city/">
-	<img class="w-100" src="/assets/images/munich2.jpg" alt="Making a classifier with image dataset made from gooogle">
+	<a href="/2020/02/dps-week8/">
+	<img class="w-100" src="/assets/images/week8/gate.png" alt="My life in Digital Product School - week 8/19/10">
 	</a>
 	</div>
 
@@ -757,26 +764,26 @@ <h2 class="mb-1 h4 font-weight-bold">
             <div class="mb-5 d-flex justify-content-between main-loop-card">
 <div class="pr-3">
 	<h2 class="mb-1 h4 font-weight-bold">
-	<a class="text-dark" href="/2020/02/dps-week5/">Digital Product School week 5</a>
+	<a class="text-dark" href="/2020/02/fast.ai-nlp-note-16/">Algorithmic bias</a>
 	</h2>
 	<p class="excerpt">
-	   The 5th week retropect at Digital Product School
+	   Algorithms can encode &amp; magnify human bias
 	</p>
 	<small class="d-block text-muted">
 		In <span class="catlist">
 		
-		<a class="text-capitalize text-muted smoothscroll" href="/categories.html#digitalproductschool">DigitalProductSchool</a><span class="sep">, </span>
+		<a class="text-capitalize text-muted smoothscroll" href="/categories.html#fast.ai-v3">Fast.AI-v3</a><span class="sep">, </span>
 		
 		</span>
 	</small>
 	<small class="text-muted">
-		Feb 09, 2020
+		Feb 20, 2020
 	</small>
 </div>
 
 	<div class="col-md-3 pr-0 text-right">
-	<a href="/2020/02/dps-week5/">
-	<img class="w-100" src="/assets/images/week5/user-storymap.png" alt="Digital Product School week 5">
+	<a href="/2020/02/fast.ai-nlp-note-16/">
+	<img class="w-100" src="/assets/images/13.png" alt="Algorithmic bias">
 	</a>
 	</div>
 
@@ -787,26 +794,26 @@ <h2 class="mb-1 h4 font-weight-bold">
             <div class="mb-5 d-flex justify-content-between main-loop-card">
 <div class="pr-3">
 	<h2 class="mb-1 h4 font-weight-bold">
-	<a class="text-dark" href="/2020/02/GPU-time/">4 reasons took much time to setting GPU for fast.ai than I expected</a>
+	<a class="text-dark" href="/2020/02/classifier-city/">Making a classifier with image dataset made from gooogle</a>
 	</h2>
 	<p class="excerpt">
-	   Motivation
+	   CONTENTS
 	</p>
 	<small class="d-block text-muted">
 		In <span class="catlist">
 		
-		<a class="text-capitalize text-muted smoothscroll" href="/categories.html#resource">Resource</a><span class="sep">, </span>
+		<a class="text-capitalize text-muted smoothscroll" href="/categories.html#toy-project">toy-project</a><span class="sep">, </span>
 		
 		</span>
 	</small>
 	<small class="text-muted">
-		Feb 05, 2020
+		Feb 15, 2020
 	</small>
 </div>
 
 	<div class="col-md-3 pr-0 text-right">
-	<a href="/2020/02/GPU-time/">
-	<img class="w-100" src="/assets/images/10.png" alt="4 reasons took much time to setting GPU for fast.ai than I expected">
+	<a href="/2020/02/classifier-city/">
+	<img class="w-100" src="/assets/images/munich2.jpg" alt="Making a classifier with image dataset made from gooogle">
 	</a>
 	</div>
 
@@ -846,6 +853,21 @@ <h2 class="mb-1 h4 font-weight-bold">
     <h4 class="font-weight-bold spanborder"><span>Featured</span></h4>  
     <ol class="list-featured">				
                         
+            <li class="mb-4">
+            <span>
+                <h6 class="font-weight-bold">
+                    <a href="/2020/04/qna-image-segmentation/" class="text-dark">[Q&A] Image Segmentation, using Unet with Driving Video data</a>
+                </h6>
+                <span class="d-block text-muted">
+                    In <span class="catlist">
+                    
+                    <a class="text-capitalize text-muted smoothscroll" href="/categories.html#fast.ai-v3">Fast.AI-v3</a><span class="sep">, </span>
+                    
+                    </span>
+                </span>
+            </span>
+            </li>                
+                        
             <li class="mb-4">
             <span>
                 <h6 class="font-weight-bold">
diff --git a/_site/page2/index.html b/_site/page2/index.html
index f50418f85d..9e385df30b 100644
--- a/_site/page2/index.html
+++ b/_site/page2/index.html
@@ -110,7 +110,7 @@
     "id": 2,
     "url": "http://localhost:4000/author-dionne.html",
     "title": "dionne",
-    "body": "                        dionne Follow:         https://spellonyou. github. io         Welcome, I'm dionne. My old hobby is collecting and systemizing data related with me. (I'm with my kindergarten newsletter) I am an observationist. In the meantime, I enjoy the difference between my expectation and result which I've been facinated(my korean-sarcasm project). And I love a film written and directed by Paul Thomas Anderson. Now, I'm interested in AI ethics, and especially studying bias.                                   Posts by dionne:                   		Gradient backward, Chain Rule, Refactoring	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 02, 2020						            		Implement forward&amp;backward pass from scratch	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						            		What's inside Pytorch Operator?	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						            		What is the meaning of 'deep-learning from foundations?'	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Feb 29, 2020						            		Digging into convolution	: 		  Issues 	 			In 				Fast. AI-v3, 								Feb 28, 2020						            		Digital Product School week 8&amp;9	: 		  The 8th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 24, 2020						            		Algorithmic bias	: 		  Algorithms can encode &amp; magnify human bias	 			In 				Fast. AI-v3, 								Feb 20, 2020						            		Making a classifier with image dataset made from gooogle	: 		  CONTENTS	 			In 				toy-project, 								Feb 15, 2020						            		Digital Product School week 5	: 		  The 5th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 09, 2020						            		4 reasons took much time to setting GPU for fast. ai than I expected	: 		  Motivation	 			In 				Resource, 								Feb 05, 2020						            		Digital Product School week 4	: 		  The 4th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 01, 2020						            		Retrospect of ACL 2020 paper writing	: 		  2020 Annual Conference of the Association for Computational Linguistics	 			In 				Retrospect, 								Jan 29, 2020						            		Why am I not listed as a contributor?!	: 		  From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’. . . 	 			In 				Resource, 								Jan 10, 2020						            		Fine Grained Classification	: 		  Finally you can solve the mystery behind this weird drawing. . through this course. 	 			In 				Fast. AI-v3, 								Dec 31, 2019						            		Jeremy Howard	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Dec 15, 2019						            		Julia Evans	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 20, 2019						            		Retrospective on Pycon 2019 Korea (CoC Committee)	: 		  When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized. . . 	 			In 				Retrospect, 								Nov 05, 2019						            		Elif Shafak	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 05, 2019						            		Digital Product School week 1	: 		  The 1th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Jan 11, 2019						        "
+    "body": "                        dionne Follow:         https://spellonyou. github. io         Welcome, I'm dionne. My old hobby is collecting and systemizing data related with me. (I'm with my kindergarten newsletter) I am an observationist. In the meantime, I enjoy the difference between my expectation and result which I've been facinated(my korean-sarcasm project). And I love a film written and directed by Paul Thomas Anderson. Now, I'm interested in AI ethics, and especially studying bias.                                   Posts by dionne:                   		fastai 2019 course-v3 Part1, lesson06	: 		  Lesson 06	 			In 				fastai-v3, 								Apr 15, 2020						            		[Q&amp;A] Image Segmentation, using Unet with Driving Video data	: 		  This post is about my questions while I was studying USF Deep Learning course about image segmentation task. All the answers are from the course, source code, library document, or docu. . . 	 			In 				Fast. AI-v3, 								Apr 02, 2020						            		Gradient backward, Chain Rule, Refactoring	: 		   This note is divided into 4 section.      Section1: What is the meaning of ‘deep-learning from foundations?’   Section2: What’s inside Pytorch Operator?   Section3: Implem. . . 	 			In 				fastai-v3, 								Mar 02, 2020						            		Implement forward&amp;backward pass from scratch	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						            		What's inside Pytorch Operator?	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						            		What is the meaning of 'deep-learning from foundations?'	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Feb 29, 2020						            		Digging into convolution	: 		  Issues 	 			In 				Fast. AI-v3, 								Feb 28, 2020						            		My life in Digital Product School - week 8/19/10	: 		  The 8/9/10th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 24, 2020						            		Algorithmic bias	: 		  Algorithms can encode &amp; magnify human bias	 			In 				Fast. AI-v3, 								Feb 20, 2020						            		Making a classifier with image dataset made from gooogle	: 		  CONTENTS	 			In 				toy-project, 								Feb 15, 2020						            		Digital Product School week 5	: 		  The 5th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 09, 2020						            		4 reasons took much time to setting GPU for fast. ai than I expected	: 		  Motivation	 			In 				Resource, 								Feb 05, 2020						            		Digital Product School week 4	: 		  The 4th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 01, 2020						            		Retrospect of ACL 2020 paper writing	: 		  2020 Annual Conference of the Association for Computational Linguistics	 			In 				Retrospect, 								Jan 29, 2020						            		Why am I not listed as a contributor?!	: 		  From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’. . . 	 			In 				Resource, 								Jan 10, 2020						            		Jeremy Howard	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Dec 15, 2019						            		Julia Evans	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 20, 2019						            		Retrospective on Pycon 2019 Korea (CoC Committee)	: 		  When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized. . . 	 			In 				Retrospect, 								Nov 05, 2019						            		Elif Shafak	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 05, 2019						            		Digital Product School week 1	: 		  The 1th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Jan 11, 2019						        "
     }, {
     "id": 3,
     "url": "http://localhost:4000/authors-list.html",
@@ -120,7 +120,7 @@
     "id": 4,
     "url": "http://localhost:4000/categories.html",
     "title": "Categories",
-    "body": "          Categories               DigitalProductSchool:                                  		Digital Product School week 8&amp;9	: 		  The 8th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 24, 2020						                                 		Digital Product School week 5	: 		  The 5th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 09, 2020						                                 		Digital Product School week 4	: 		  The 4th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 01, 2020						                                 		Digital Product School week 1	: 		  The 1th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Jan 11, 2019						                              People in the World:                                  		Jeremy Howard	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Dec 15, 2019						                                 		Julia Evans	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 20, 2019						                                 		Elif Shafak	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 05, 2019						                              Retrospect:                                  		Retrospect of ACL 2020 paper writing	: 		  2020 Annual Conference of the Association for Computational Linguistics	 			In 				Retrospect, 								Jan 29, 2020						                                 		Retrospective on Pycon 2019 Korea (CoC Committee)	: 		  When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized. . . 	 			In 				Retrospect, 								Nov 05, 2019						                              Fast. AI-v3:                                  		Gradient backward, Chain Rule, Refactoring	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 02, 2020						                                 		Implement forward&amp;backward pass from scratch	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						                                 		What's inside Pytorch Operator?	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						                                 		What is the meaning of 'deep-learning from foundations?'	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Feb 29, 2020						                                 		Digging into convolution	: 		  Issues 	 			In 				Fast. AI-v3, 								Feb 28, 2020						                                 		Algorithmic bias	: 		  Algorithms can encode &amp; magnify human bias	 			In 				Fast. AI-v3, 								Feb 20, 2020						                                 		Fine Grained Classification	: 		  Finally you can solve the mystery behind this weird drawing. . through this course. 	 			In 				Fast. AI-v3, 								Dec 31, 2019						                              Resource:                                  		4 reasons took much time to setting GPU for fast. ai than I expected	: 		  Motivation	 			In 				Resource, 								Feb 05, 2020						                                 		Why am I not listed as a contributor?!	: 		  From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’. . . 	 			In 				Resource, 								Jan 10, 2020						                              toy-project:                                  		Making a classifier with image dataset made from gooogle	: 		  CONTENTS	 			In 				toy-project, 								Feb 15, 2020						                                             Featured:    				                                          Implement forward&amp;backward pass from scratch                          In                     Fast. AI-v3,                                                                                           What's inside Pytorch Operator?                          In                     Fast. AI-v3,                                                                                           What is the meaning of 'deep-learning from foundations?'                          In                     Fast. AI-v3,                                                                                           Why am I not listed as a contributor?!                          In                     Resource,                                                                                           Jeremy Howard                          In                     People in the World,                                                                   "
+    "body": "          Categories               DigitalProductSchool:                                  		My life in Digital Product School - week 8/19/10	: 		  The 8/9/10th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 24, 2020						                                 		Digital Product School week 5	: 		  The 5th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 09, 2020						                                 		Digital Product School week 4	: 		  The 4th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 01, 2020						                                 		Digital Product School week 1	: 		  The 1th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Jan 11, 2019						                              People in the World:                                  		Jeremy Howard	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Dec 15, 2019						                                 		Julia Evans	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 20, 2019						                                 		Elif Shafak	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 05, 2019						                              Retrospect:                                  		Retrospect of ACL 2020 paper writing	: 		  2020 Annual Conference of the Association for Computational Linguistics	 			In 				Retrospect, 								Jan 29, 2020						                                 		Retrospective on Pycon 2019 Korea (CoC Committee)	: 		  When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized. . . 	 			In 				Retrospect, 								Nov 05, 2019						                              Resource:                                  		4 reasons took much time to setting GPU for fast. ai than I expected	: 		  Motivation	 			In 				Resource, 								Feb 05, 2020						                                 		Why am I not listed as a contributor?!	: 		  From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’. . . 	 			In 				Resource, 								Jan 10, 2020						                              toy-project:                                  		Making a classifier with image dataset made from gooogle	: 		  CONTENTS	 			In 				toy-project, 								Feb 15, 2020						                              Fast. AI-v3:                                  		[Q&amp;A] Image Segmentation, using Unet with Driving Video data	: 		  This post is about my questions while I was studying USF Deep Learning course about image segmentation task. All the answers are from the course, source code, library document, or docu. . . 	 			In 				Fast. AI-v3, 								Apr 02, 2020						                                 		Implement forward&amp;backward pass from scratch	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						                                 		What's inside Pytorch Operator?	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						                                 		What is the meaning of 'deep-learning from foundations?'	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Feb 29, 2020						                                 		Digging into convolution	: 		  Issues 	 			In 				Fast. AI-v3, 								Feb 28, 2020						                                 		Algorithmic bias	: 		  Algorithms can encode &amp; magnify human bias	 			In 				Fast. AI-v3, 								Feb 20, 2020						                              fastai-v3:                                  		fastai 2019 course-v3 Part1, lesson06	: 		  Lesson 06	 			In 				fastai-v3, 								Apr 15, 2020						                                 		Gradient backward, Chain Rule, Refactoring	: 		   This note is divided into 4 section.      Section1: What is the meaning of ‘deep-learning from foundations?’   Section2: What’s inside Pytorch Operator?   Section3: Implem. . . 	 			In 				fastai-v3, 								Mar 02, 2020						                                             Featured:    				                                          [Q&amp;A] Image Segmentation, using Unet with Driving Video data                          In                     Fast. AI-v3,                                                                                           Implement forward&amp;backward pass from scratch                          In                     Fast. AI-v3,                                                                                           What's inside Pytorch Operator?                          In                     Fast. AI-v3,                                                                                           What is the meaning of 'deep-learning from foundations?'                          In                     Fast. AI-v3,                                                                                           Why am I not listed as a contributor?!                          In                     Resource,                                                                                           Jeremy Howard                          In                     People in the World,                                                                   "
     }, {
     "id": 5,
     "url": "http://localhost:4000/contact.html",
@@ -130,7 +130,7 @@
     "id": 6,
     "url": "http://localhost:4000/",
     "title": "Jiwon Kim | Dionne Blog",
-    "body": "                                  Gradient backward, Chain Rule, Refactoring  :       This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pyt. . .               In                 Fast. AI-v3,                                        Mar 02, 2020                                                                                                                             Implement forward&amp;backward pass from scratch          :                       In                         Fast. AI-v3,                                                                  Mar 01, 2020                                                                                                                                     What's inside Pytorch Operator?          :                       In                         Fast. AI-v3,                                                                  Mar 01, 2020                                                                                                                                    What is the meaning of 'deep-learning from foundations?'          :                       In                         Fast. AI-v3,                                                                  Feb 29, 2020                                                       Gradient backward, Chain Rule, Refactoring                  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pyt. . .                 Read More            	                                        All Stories:                   		Gradient backward, Chain Rule, Refactoring	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 02, 2020						                  		Implement forward&amp;backward pass from scratch	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						                  		What's inside Pytorch Operator?	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						                  		What is the meaning of 'deep-learning from foundations?'	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Feb 29, 2020						                  		Digging into convolution	: 		  Issues 	 			In 				Fast. AI-v3, 								Feb 28, 2020						                  		Digital Product School week 8&amp;9	: 		  The 8th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 24, 2020						                  		Algorithmic bias	: 		  Algorithms can encode &amp; magnify human bias	 			In 				Fast. AI-v3, 								Feb 20, 2020						                  		Making a classifier with image dataset made from gooogle	: 		  CONTENTS	 			In 				toy-project, 								Feb 15, 2020						                  		Digital Product School week 5	: 		  The 5th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 09, 2020						                  		4 reasons took much time to setting GPU for fast. ai than I expected	: 		  Motivation	 			In 				Resource, 								Feb 05, 2020						                                                &laquo;                              1                               2                              Next &raquo;                                          Featured:    				                                          Implement forward&amp;backward pass from scratch                          In                     Fast. AI-v3,                                                                                           What's inside Pytorch Operator?                          In                     Fast. AI-v3,                                                                                           What is the meaning of 'deep-learning from foundations?'                          In                     Fast. AI-v3,                                                                                           Why am I not listed as a contributor?!                          In                     Resource,                                                                                           Jeremy Howard                          In                     People in the World,                                                               "
+    "body": "                                  fastai 2019 course-v3 Part1, lesson06  :       Lesson 06               In                 fastai-v3,                                        Apr 15, 2020                                                                                                                             [Q&amp;A] Image Segmentation, using Unet with Driving Video data          :                       In                         Fast. AI-v3,                                                                  Apr 02, 2020                                                                                                                                     Gradient backward, Chain Rule, Refactoring          :                       In                         fastai-v3,                                                                  Mar 02, 2020                                                                                                                                    Implement forward&amp;backward pass from scratch          :                       In                         Fast. AI-v3,                                                                  Mar 01, 2020                                                        Gradient backward, Chain Rule, Refactoring                   This note is divided into 4 section.      Section1: What is the meaning of ‘deep-learning from foundations?’   Section2: Wha. . .                 Read More            	                                       All Stories:                   		fastai 2019 course-v3 Part1, lesson06	: 		  Lesson 06	 			In 				fastai-v3, 								Apr 15, 2020						                  		[Q&amp;A] Image Segmentation, using Unet with Driving Video data	: 		  This post is about my questions while I was studying USF Deep Learning course about image segmentation task. All the answers are from the course, source code, library document, or docu. . . 	 			In 				Fast. AI-v3, 								Apr 02, 2020						                  		Gradient backward, Chain Rule, Refactoring	: 		   This note is divided into 4 section.      Section1: What is the meaning of ‘deep-learning from foundations?’   Section2: What’s inside Pytorch Operator?   Section3: Implem. . . 	 			In 				fastai-v3, 								Mar 02, 2020						                  		Implement forward&amp;backward pass from scratch	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						                  		What's inside Pytorch Operator?	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						                  		What is the meaning of 'deep-learning from foundations?'	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Feb 29, 2020						                  		Digging into convolution	: 		  Issues 	 			In 				Fast. AI-v3, 								Feb 28, 2020						                  		My life in Digital Product School - week 8/19/10	: 		  The 8/9/10th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 24, 2020						                  		Algorithmic bias	: 		  Algorithms can encode &amp; magnify human bias	 			In 				Fast. AI-v3, 								Feb 20, 2020						                  		Making a classifier with image dataset made from gooogle	: 		  CONTENTS	 			In 				toy-project, 								Feb 15, 2020						                                                &laquo;                              1                               2                              Next &raquo;                                          Featured:    				                                          [Q&amp;A] Image Segmentation, using Unet with Driving Video data                          In                     Fast. AI-v3,                                                                                           Implement forward&amp;backward pass from scratch                          In                     Fast. AI-v3,                                                                                           What's inside Pytorch Operator?                          In                     Fast. AI-v3,                                                                                           What is the meaning of 'deep-learning from foundations?'                          In                     Fast. AI-v3,                                                                                           Why am I not listed as a contributor?!                          In                     Resource,                                                                                           Jeremy Howard                          In                     People in the World,                                                               "
     }, {
     "id": 7,
     "url": "http://localhost:4000/privacy-policy.html",
@@ -145,7 +145,7 @@
     "id": 9,
     "url": "http://localhost:4000/tags.html",
     "title": "Tags",
-    "body": "          Tags               featured:                                  		Implement forward&amp;backward pass from scratch	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						                                 		What's inside Pytorch Operator?	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						                                 		What is the meaning of 'deep-learning from foundations?'	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Feb 29, 2020						                                 		Why am I not listed as a contributor?!	: 		  From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’. . . 	 			In 				Resource, 								Jan 10, 2020						                                 		Jeremy Howard	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Dec 15, 2019						                              feature:                                  		Digging into convolution	: 		  Issues 	 			In 				Fast. AI-v3, 								Feb 28, 2020						                                 		Making a classifier with image dataset made from gooogle	: 		  CONTENTS	 			In 				toy-project, 								Feb 15, 2020						                              sticky:                                  		Gradient backward, Chain Rule, Refactoring	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 02, 2020						                                             Featured:    				                                          Implement forward&amp;backward pass from scratch                          In                     Fast. AI-v3,                                                                                           What's inside Pytorch Operator?                          In                     Fast. AI-v3,                                                                                           What is the meaning of 'deep-learning from foundations?'                          In                     Fast. AI-v3,                                                                                           Why am I not listed as a contributor?!                          In                     Resource,                                                                                           Jeremy Howard                          In                     People in the World,                                                                   "
+    "body": "          Tags               featured:                                  		[Q&amp;A] Image Segmentation, using Unet with Driving Video data	: 		  This post is about my questions while I was studying USF Deep Learning course about image segmentation task. All the answers are from the course, source code, library document, or docu. . . 	 			In 				Fast. AI-v3, 								Apr 02, 2020						                                 		Implement forward&amp;backward pass from scratch	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						                                 		What's inside Pytorch Operator?	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						                                 		What is the meaning of 'deep-learning from foundations?'	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Feb 29, 2020						                                 		Why am I not listed as a contributor?!	: 		  From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’. . . 	 			In 				Resource, 								Jan 10, 2020						                                 		Jeremy Howard	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Dec 15, 2019						                              feature:                                  		Digging into convolution	: 		  Issues 	 			In 				Fast. AI-v3, 								Feb 28, 2020						                                 		Making a classifier with image dataset made from gooogle	: 		  CONTENTS	 			In 				toy-project, 								Feb 15, 2020						                              sticky:                                  		Gradient backward, Chain Rule, Refactoring	: 		   This note is divided into 4 section.      Section1: What is the meaning of ‘deep-learning from foundations?’   Section2: What’s inside Pytorch Operator?   Section3: Implem. . . 	 			In 				fastai-v3, 								Mar 02, 2020						                                             Featured:    				                                          [Q&amp;A] Image Segmentation, using Unet with Driving Video data                          In                     Fast. AI-v3,                                                                                           Implement forward&amp;backward pass from scratch                          In                     Fast. AI-v3,                                                                                           What's inside Pytorch Operator?                          In                     Fast. AI-v3,                                                                                           What is the meaning of 'deep-learning from foundations?'                          In                     Fast. AI-v3,                                                                                           Why am I not listed as a contributor?!                          In                     Resource,                                                                                           Jeremy Howard                          In                     People in the World,                                                                   "
     }, {
     "id": 10,
     "url": "http://localhost:4000/robots.txt",
@@ -155,99 +155,104 @@
     "id": 11,
     "url": "http://localhost:4000/page2/",
     "title": "Jiwon Kim | Dionne Blog",
-    "body": "                  All Stories:                   		Digital Product School week 4	: 		  The 4th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 01, 2020						                  		Retrospect of ACL 2020 paper writing	: 		  2020 Annual Conference of the Association for Computational Linguistics	 			In 				Retrospect, 								Jan 29, 2020						                  		Why am I not listed as a contributor?!	: 		  From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’. . . 	 			In 				Resource, 								Jan 10, 2020						                  		Fine Grained Classification	: 		  Finally you can solve the mystery behind this weird drawing. . through this course. 	 			In 				Fast. AI-v3, 								Dec 31, 2019						                  		Jeremy Howard	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Dec 15, 2019						                  		Julia Evans	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 20, 2019						                  		Retrospective on Pycon 2019 Korea (CoC Committee)	: 		  When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized. . . 	 			In 				Retrospect, 								Nov 05, 2019						                  		Elif Shafak	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 05, 2019						                  		Digital Product School week 1	: 		  The 1th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Jan 11, 2019						                                                &laquo; Prev                              1                               2                              &raquo;                                          Featured:    				                                          Implement forward&amp;backward pass from scratch                          In                     Fast. AI-v3,                                                                                           What's inside Pytorch Operator?                          In                     Fast. AI-v3,                                                                                           What is the meaning of 'deep-learning from foundations?'                          In                     Fast. AI-v3,                                                                                           Why am I not listed as a contributor?!                          In                     Resource,                                                                                           Jeremy Howard                          In                     People in the World,                                                               "
+    "body": "                  All Stories:                   		Digital Product School week 5	: 		  The 5th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 09, 2020						                  		4 reasons took much time to setting GPU for fast. ai than I expected	: 		  Motivation	 			In 				Resource, 								Feb 05, 2020						                  		Digital Product School week 4	: 		  The 4th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 01, 2020						                  		Retrospect of ACL 2020 paper writing	: 		  2020 Annual Conference of the Association for Computational Linguistics	 			In 				Retrospect, 								Jan 29, 2020						                  		Why am I not listed as a contributor?!	: 		  From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’. . . 	 			In 				Resource, 								Jan 10, 2020						                  		Jeremy Howard	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Dec 15, 2019						                  		Julia Evans	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 20, 2019						                  		Retrospective on Pycon 2019 Korea (CoC Committee)	: 		  When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized. . . 	 			In 				Retrospect, 								Nov 05, 2019						                  		Elif Shafak	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 05, 2019						                  		Digital Product School week 1	: 		  The 1th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Jan 11, 2019						                                                &laquo; Prev                              1                               2                              &raquo;                                          Featured:    				                                          [Q&amp;A] Image Segmentation, using Unet with Driving Video data                          In                     Fast. AI-v3,                                                                                           Implement forward&amp;backward pass from scratch                          In                     Fast. AI-v3,                                                                                           What's inside Pytorch Operator?                          In                     Fast. AI-v3,                                                                                           What is the meaning of 'deep-learning from foundations?'                          In                     Fast. AI-v3,                                                                                           Why am I not listed as a contributor?!                          In                     Resource,                                                                                           Jeremy Howard                          In                     People in the World,                                                               "
     }, {
     "id": 12,
+    "url": "http://localhost:4000/2020/04/v3-2019-lesson06-note/",
+    "title": "fastai 2019 course-v3 Part1, lesson06",
+    "body": "2020/04/15 - Lesson 06Rossmann(Tabular):  Tabular data: be careful on Categorical variable vs Continuous variable.  if datatype is int, fastai think it is classification, not a regression.  Root mean square percentage error. as loss function.  When you assign the y_range, it’s better to assign little bit more than actual maximum. &gt; because it’s sigmoid.  intermediate layers, which is weight matrix is 1) 1000, and 2) 500 -&gt; which means our parameter would be 500*1000. learn. modelWhat is dropout and embedding dropout?: Nitish Srivastava, Dropout: A Simple way to prevent Neural Networks from Overfitting  you can dropout with p value, make it specified to specific layer, or make it applied to all the layers.  Pytorch code 1) bernoulli, which decides whether you will hold it? 2) and divide the noise value depends on noise value. so noise became 2 or remain 0.      According to pytorch code, We do change at training time, but we do nothing at test time. and this means you don’t have to do anything special with inference time. ’   TODO: find at forums what is inference time - Related to NVIDIA, GPU.     Embedding dropout is just a dropout.      It’s different between continuous variable and embedding layer.  TODO Still can’t understand. why embedding dropout is effective. or,… in need.    Let’s delete at random, some of the results of the embedding.    and It worked well especially at Kaggle   Batch Normalization: Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift -&gt; came out false! According to How Does Batch Normalization Help Optimization?  The key was multiplicative bias {\gamma} and additive bias {\beta}` Explain     Let $$ \hat{y} = f(w_1, w_2, w_3, … , x)} $$ , loss = MSE , Then y_range should be between 1 and 5`   And Activation function ends with -1 -&gt; +1   To mitigate this problem, we can add the other parameter, like $$w_n$$   But there’re so much interactions in the process so just re-scale the output.    Momentum parameter at BatchNorm1d:  Different from momentum like in optimization.  This momentum is Exponentially weighted moving average of the mean, instead of deviation.      If this is small number: mean standard deviation would be less from mini_batch to mini_batch » less regularization effect. (If this is large number, variation would be greater from mini_batch to mini_batch » more regularization effect)   TODO: can’t sure, but i understand, this is not about how to update parameter but about how much reflect previous value when scale and shift   Q. Preference between batchnorm and the other regularizations(drop out, weight decay)A. Nope, always try and see the results## lesson6-pets-more### Data Augmentation- Last reg- `get_transforms` has lots of params (even not yet learned all) -&gt; check documentation	- Remember you can implement all the doc contents bc it's made from nbdev	- TODO: try this!!- Essence of data augmentation is you should maintain the label, while somewhat making sense. 	- ex) tilt, because it's optically sensible, you can always change the angle of the data view. - zeros, border, and reflection but always `reflection` works most of the time, so that is the default### Convolutional Kernel(What is convolution?)- Will make heat\_map from scratch, which means the parts convolution focuses on![setosa_visualization]()- http://setosa. io/ev/image-kernels/	- javascript thing	- How convolution works	- Kernel. which does element-wise multiplication, and sum them up	- so it has on pixel less at borders -&gt; so it uses padding, and fastai uses reflection as said. - why this Kernel(matrix) helps catching horizontal edge side?	- because this kernel`(picture2)` weights differently, depends on `x axis`	- why familiar, because it's similar intuition with fugus`(paper)` paper- CNN from different viewpoints`link`	- output of pixel is results from different linear equations. 	- If you connect this with represents of neural network nodes, you can see that the specific inp nodes connected with specific out nodes. 	- **Summarize**: cnn does 1) matmul some of the elements are always zero 2) same weight for every row, which is called `weight time? weight. . ?, 1:18:50` `(picture)`#### Further lowdown- Because generally image has 3 channels, we need rank 3 kernel. - And **do multiply with all channel output is one pixel**. (`draw by your self`)	- but this kernel will catch one feature, like horizontal, so that we make more kernel so that output becomes (h * w * kernel)	- And that `kernel` come to `channel`- **Conv2d**: with 3 by 3 kernel, stride 2 conv -&gt; (h/2 * w/2 * kernel)	- skip or jump over input pixel	- to protect from memory out of control~~~pythonlearn. modellearn. summary()~~~TODO: understand yourself the blocks of conv-kernel: - Usually use big kernel size at first layer (will study this at part2)- Bottom right highlighting kernel(`pic / draw`)- `torch. tensor. expand`: for memory efficient, because we should do RGB- We do not make separate kernel, but make rank 4 kernel	- 4d tensor is just stacked kernel- `t[None]. shape` create new unit axis, and why? we make this -&gt; it should move unit of batch, not one size image. ### Average pooling, feature- suppose our pre-trained model results in size of `11 by 11 by 512 ` `pic 4` and my classification task has 37 classes	* take the first face of channel, which is 11 by 11 and `mean` it, so that make rank 2 tensor, 512 by 1	* and make 2d matrix, which is 512 by 37 and multiply so that we can get 37 by 1 matrix. - Feature, at convolution block	- So, when we transfer-learning without unfreeze, every element of last matrix (512 by 1) should represent(or could catch) each feature. 	### Heatmap, Hook~~~hook_output(model[0]) -&gt; acts -&gt; avg_acts~~~- if we average the block with `axis=feature`, result of matrix(11 by 11) depicts `how activated was that area?` -&gt; it is heatmap, `avg_acts`- and acts comes from hook, which is more advanced pytorch feature. 	- hook into pytorch machine itself, and run any arbitrary Pytorch code	- Why this is cool?: Normally it gives set of outputs of forward pass, but we can interrupt and hook the forward pass. 	- Also can store the output of the convolutional part of the model, which is before avg_pooling- Thinking back when we do cut off `after` the conv part. 	- but with fast. ai the original convolutional part of the model would be *the first thing in the model*, specifically could be given from `learn. model. eval()[0]`	- And this is gotten from `hooked_output` and having hooked the output, we can pass our x_minibatch to output. 	- Not directly, but with normalized, minibatch, put on to the gpu	- `one_item()` function do it, when we have one data `TODO: this is assignment` do it yourself without one_item function	- and `. cuda()` put it on gpu- you should print out very often the shape of tensor, and try think why. "
+    }, {
+    "id": 13,
+    "url": "http://localhost:4000/2020/04/qna-image-segmentation/",
+    "title": "[Q&A] Image Segmentation, using Unet with Driving Video data",
+    "body": "2020/04/02 - This post is about my questions while I was studying USF Deep Learning course about image segmentation task. All the answers are from the course, source code, library document, or document. I cared about being clear at reporting information including source of information, however if there are still anything unclear, please contact me. And thank you Jeremy&amp;Rachael for everything. Also Thank you Cambridge Computer Vision Lab to made us to study with your labor. The Cambridge-driving Labeled Video Database (CamVid) is the first collection of videos with object class semantic labels, complete with metadata. The database provides ground truth labels that associate each pixel with one of 32 semantic classes. If someone is interested in this project, please check the site and see the details. Now, let’s start first using jupyter’s one of tricks which I love most. It enables cell to print the code without print function. from IPython. core. interactiveshell import InteractiveShell# pretty print all cell's output and not just the last oneInteractiveShell. ast_node_interactivity =  all from fastai. vision import *from fastai. callbacks. hooks import *from fastai. utils. mem import *path = untar_data(URLs. CAMVID) # The locations where the data and models are downloaded are set in config. ymlpath. ls() I’m trying to accustomed to using pathlib module, not just it became built-in module in python, but I felt uncomfortable myself with os module. However, still unpredictable conflicts are remain, even in the quite standard library like Pytorch, tensorflow, onnx. (it require me string for path. not PosixPath. will send PR. . ) [PosixPath('/root/. fastai/data/camvid/valid. txt'), PosixPath('/root/. fastai/data/camvid/images'), PosixPath('/root/. fastai/data/camvid/labels'), PosixPath('/root/. fastai/data/camvid/codes. txt')]path_img = path/'images'path_lbl = path/'labels'fnames = get_image_files(path_img) #filenamelbl_names = get_image_files(path_lbl)1. (Play with data) My Hypothesis:  File name has A_B format. and A / B would be at key-value position.  Use collections - defaultdict  Default Dict: Link:   easy to group a sequence of key and value pairs into a dictionary of list?from collections import defaultdictfnames[0], lbl_names[0](PosixPath('/root/. fastai/data/camvid/images/0001TP_009210. png'), PosixPath('/root/. fastai/data/camvid/labels/0016E5_01800_P. png'))files = [tuple(i. stem. split('_')) for i in fnames]labels = [tuple(i. stem. split('_')[:-1]) for i in lbl_names]d = defaultdict(list)for k, v in files: d[k]. append(v)d. keys()len(d['0001TP'])124for k, v in d. items():  print(k, v)0001TP ['009210', '008850', '007350', '008970', '009840', '010140', '008490', '008520', '009540', '008250', '008340', '006840', '007860', '007410', '007740', '009870', '010080', '007890', '008790', '010020', '008400', '007080', '008280', '010380', '009330', '009060', '007470', '006810', '009720', '008580', '007110', '008730', '009150', '007680', '009780', '007800', '007290', '008760', '009510', '008640', '008310', '007440', '006900', '007500', '008460', '009030', '008130', '009480', '009900', '010230', '009270', '008040', '007590', '007950', '009990', '008550', '007260', '008100', '007530', '006960', '008190', '009420', '009930', '009000', '007830', '008940', '006690', '009570', '008880', '010170', '007560', '009300', '006750', '009360', '010200', '007320', '008010', '009120', '007620', '007200', '007140', '010320', '006720', '008670', '007230', '008370', '010260', '009690', '006930', '009090', '007770', '010290', '010350', '008610', '008070', '009600', '008430', '009450', '007380', '009240', '007710', '007170', '008160', '008910', '007020', '006780', '007050', '009960', '009810', '008220', '009180', '009750', '010050', '009660', '010110', '007920', '009630', '007650', '006990', '008700', '009390', '007980', '008820', '006870']0016E5 ['01290', '08159', '05760', '08133', '08063', '06660', '00960', '05850', '00750', '06960', '08035', '08107', '07975', '08017', '05610', '07140', '08119', '08027', '07170', '08400', '08093', '02100', '06390', '04470', '08340', '06060', '00600', '07470', '08151', '07800', '01620', '05730', '01530', '00690', '08430', '05940', '01980', '07320', '08069', '07965', '04380', '05430', '01410', '06780', '08007', '08087', '08079', '06600', '08109', '05490', '00901', '04590', '04680', '08045', '01770', '06690', '08085', '06810', '00420', '08011', '07440', '02190', '06300', '04800', '01500', '00450', '08029', '01470', '06330', '07997', '08067', '05370', '08013', '08190', '00840', '02370', '08049', '08135', '01440', '06870', '05820', '05280', '08051', '04440', '08091', '01380', '00630', '07290', '05520', '04770', '00540', '07995', '07999', '05550', '07920', '08101', '08141', '08053', '04620', '08103', '05160', '07350', '08057', '06030', '06000', '08550', '07963', '08089', '05970', '08047', '05640', '06240', '05220', '04350', '01590', '07959', '01950', '08117', '06180', '01560', '05400', '08043', '07680', '00780', '08081', '07050', '01020', '01350', '04530', '06720', '07969', '08149', '08003', '08131', '08129', '08033', '05460', '01650', '07530', '08023', '05340', '08640', '05100', '08075', '01230', '04980', '02070', '01080', '06210', '05910', '08009', '01800', '05190', '02400', '08083', '08019', '07620', '07200', '07890', '08059', '06990', '04410', '08121', '08123', '06930', '08137', '08147', '08095', '06570', '06150', '08153', '06840', '05250', '00510', '08370', '08580', '08113', '07410', '08097', '01200', '04950', '07770', '07650', '04710', '06090', '08055', '07110', '07981', '00990', '08250', '08127', '01920', '07985', '08220', '08005', '08157', '05130', '08071', '01140', '04830', '07740', '08143', '06120', '02040', '08111', '08115', '00660', '08280', '06420', '07983', '02220', '05700', '01860', '01260', '04920', '06510', '07020', '08073', '08105', '08125', '06360', '07860', '07993', '00810', '06540', '08099', '08139', '02010', '07973', '08155', '07991', '06630', '00480', '06750', '04890', '08001', '08025', '00870', '08490', '01830', '07977', '05010', '01170', '07961', '01680', '01050', '07987', '07080', '04560', '00930', '05310', '02340', '05790', '08460', '00720', '08031', '02280', '08039', '08037', '08065', '06270', '08077', '06900', '04650', '06480', '07230', '08041', '06450', '00570', '07989', '04740', '07979', '02250', '07380', '00390', '01710', '07590', '08021', '08520', '07500', '01110', '04500', '02310', '07971', '02130', '05580', '05880', '08610', '08310', '08145', '05670', '04860', '07260', '08015', '07967', '01740', '01320', '07560', '07830', '01890', '08061', '02160', '07710', '05070', '05040']Seq05VD ['f00030', 'f02550', 'f03450', 'f01110', 'f00480', 'f00210', 'f04590', 'f04170', 'f01800', 'f03990', 'f03360', 'f03900', 'f02070', 'f00810', 'f03690', 'f01350', 'f01530', 'f04980', 'f05100', 'f03060', 'f00900', 'f03870', 'f02460', 'f01470', 'f02370', 'f02820', 'f04080', 'f02760', 'f04860', 'f02250', 'f04200', 'f00270', 'f03720', 'f02850', 'f04410', 'f01200', 'f03090', 'f02010', 'f03930', 'f00090', 'f01650', 'f01890', 'f03840', 'f03030', 'f02130', 'f01230', 'f04110', 'f02520', 'f04140', 'f04020', 'f00060', 'f03420', 'f01560', 'f00120', 'f04290', 'f02340', 'f00300', 'f01380', 'f00870', 'f01860', 'f02970', 'f04560', 'f02730', 'f00330', 'f04530', 'f03780', 'f01770', 'f03390', 'f05040', 'f02430', 'f03330', 'f00660', 'f01740', 'f02100', 'f04800', 'f04050', 'f00510', 'f02790', 'f04350', 'f00690', 'f00540', 'f02490', 'f00960', 'f00930', 'f04230', 'f02880', 'f03600', 'f01020', 'f01500', 'f02400', 'f04830', 'f04470', 'f03300', 'f02670', 'f00450', 'f01980', 'f01170', 'f01620', 'f04500', 'f01080', 'f03180', 'f05070', 'f03150', 'f04950', 'f01440', 'f03510', 'f01710', 'f00360', 'f04770', 'f02910', 'f01050', 'f00630', 'f04320', 'f00570', 'f03240', 'f02190', 'f01140', 'f03540', 'f02220', 'f02640', 'f03960', 'f00000', 'f04920', 'f01950', 'f00990', 'f03480', 'f03000', 'f00420', 'f04620', 'f03210', 'f00780', 'f03570', 'f01590', 'f00750', 'f01920', 'f04650', 'f03750', 'f03630', 'f02310', 'f02610', 'f02580', 'f04740', 'f02280', 'f04680', 'f00390', 'f00720', 'f03660', 'f02040', 'f03270', 'f00180', 'f03810', 'f01410', 'f01290', 'f03120', 'f00840', 'f04440', 'f00150', 'f01260', 'f02700', 'f02940', 'f00600', 'f01830', 'f04260', 'f05010', 'f04890', 'f02160', 'f00240', 'f04380', 'f01680', 'f04710', 'f01320']0006R0 ['f02820', 'f03690', 'f03180', 'f02550', 'f01020', 'f03660', 'f02340', 'f01170', 'f02610', 'f02940', 'f01290', 'f02100', 'f01350', 'f03270', 'f03870', 'f01380', 'f01980', 'f03810', 'f02430', 'f02310', 'f01830', 'f03480', 'f02970', 'f01890', 'f03210', 'f03930', 'f02040', 'f02070', 'f02400', 'f01560', 'f03030', 'f01770', 'f01590', 'f01950', 'f03420', 'f01650', 'f03450', 'f00990', 'f03630', 'f01500', 'f03570', 'f00930', 'f03090', 'f03360', 'f02880', 'f02460', 'f01440', 'f01920', 'f01230', 'f03840', 'f02730', 'f01620', 'f02220', 'f03750', 'f03330', 'f03540', 'f02520', 'f02790', 'f01050', 'f03120', 'f01800', 'f01140', 'f01860', 'f01530', 'f01470', 'f02670', 'f02490', 'f01260', 'f01110', 'f02760', 'f01680', 'f03150', 'f02580', 'f03300', 'f02280', 'f01200', 'f03390', 'f03510', 'f02640', 'f02190', 'f02370', 'f01320', 'f02130', 'f03600', 'f03240', 'f03780', 'f03720', 'f02700', 'f01410', 'f01080', 'f02850', 'f01710', 'f03900', 'f03060', 'f01740', 'f02010', 'f02250', 'f00960', 'f03000', 'f02160', 'f02910']for k, v in d. items():  print(k, len(d[k]))0001TP 1240016E5 305Seq05VD 1710006R0 101for i in d2. keys():  print(i,len(d2[i]))0016E5 3050001TP 1240006R0 101Seq05VD 171files[0], labels[0](('0001TP', '009210'), ('0016E5', '01800'))2. My question: Link:  Why do we need masking? and does color from fastai library? (have to look into source code) What do the parameter alpha do? When people make masked img, would it be have ranged integer limit? Does image normalization related with this?lbl_sorted = sorted(lbl_names)f_sorted = sorted(fnames)lbl_1 = lbl_sorted[33]f_1 = f_sorted[33]img = open_image(lbl_1)mask = open_mask(lbl_1)_,axs = plt. subplots(1,2, figsize=(10,5))# img. show(ax=axs[0], y=mask, title='masked')img. show(ax=axs[0], title='1')mask. show(ax=axs[1], title='2', alpha=1. ) img_2 = open_image(f_1)mask_2 = open_mask(f_1)_,axs = plt. subplots(1,2, figsize=(10,5))# img. show(ax=axs[0], y=mask, title='masked')img_2. show(ax=axs[0], title='3',)mask_2. show(ax=axs[1], title='4', alpha=1. ) open_mask(lbl_1). data. shapetorch. Size([1, 720, 960])open_mask(lbl_1). data. shapetorch. Size([1, 720, 960])open_image(f_1). data. shapetorch. Size([3, 720, 960])open_image(f_1). data. shapetorch. Size([3, 720, 960])img. data #labeled datatensor([[[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]],    [[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]],    [[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]]])mask. data # after mask, labeled datatensor([[[ 4, 4, 4, . . . , 21, 21, 21],     [ 4, 4, 4, . . . , 21, 21, 21],     [ 4, 4, 4, . . . , 21, 21, 21],     . . . ,     [17, 17, 17, . . . , 30, 30, 30],     [17, 17, 17, . . . , 30, 30, 30],     [17, 17, 17, . . . , 30, 30, 30]]])img_2. data, mask_2. data(tensor([[[0. 0706, 0. 0667, 0. 0706, . . . , 0. 6431, 0. 6549, 0. 6627],     [0. 0745, 0. 0706, 0. 0706, . . . , 0. 6431, 0. 6510, 0. 6549],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 6392, 0. 6588, 0. 6588],     . . . ,     [0. 0863, 0. 0824, 0. 0824, . . . , 0. 1333, 0. 1216, 0. 1255],     [0. 0902, 0. 0863, 0. 0824, . . . , 0. 1255, 0. 1176, 0. 1216],     [0. 0863, 0. 0824, 0. 0784, . . . , 0. 1137, 0. 1059, 0. 1137]],     [[0. 0706, 0. 0667, 0. 0706, . . . , 0. 7490, 0. 7608, 0. 7686],     [0. 0745, 0. 0706, 0. 0706, . . . , 0. 7451, 0. 7569, 0. 7608],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 7412, 0. 7529, 0. 7529],     . . . ,     [0. 0980, 0. 0941, 0. 0941, . . . , 0. 1804, 0. 1686, 0. 1725],     [0. 1059, 0. 1020, 0. 0980, . . . , 0. 1725, 0. 1647, 0. 1686],     [0. 1020, 0. 0980, 0. 0941, . . . , 0. 1608, 0. 1529, 0. 1608]],     [[0. 0784, 0. 0745, 0. 0784, . . . , 0. 7569, 0. 7686, 0. 7765],     [0. 0824, 0. 0784, 0. 0784, . . . , 0. 7647, 0. 7647, 0. 7686],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 7608, 0. 7647, 0. 7647],     . . . ,     [0. 1216, 0. 1176, 0. 1176, . . . , 0. 2000, 0. 1882, 0. 1922],     [0. 1176, 0. 1137, 0. 1098, . . . , 0. 1843, 0. 1765, 0. 1804],     [0. 1137, 0. 1098, 0. 1059, . . . , 0. 1725, 0. 1647, 0. 1725]]]), tensor([[[ 18, 17, 18, . . . , 183, 186, 188],     [ 19, 18, 18, . . . , 183, 185, 186],     [ 20, 18, 19, . . . , 182, 185, 185],     . . . ,     [ 25, 24, 24, . . . , 43, 40, 41],     [ 26, 25, 24, . . . , 41, 39, 40],     [ 25, 24, 23, . . . , 38, 36, 38]]]))3. What is a difference between image and imageSegment?: imageSegment  An ImageSegment object has the same properties as an Image. The only difference is that when applying the transformations to an ImageSegment, it will ignore the functions that deal with lighting and keep values of 0 and 1.  It’s easy to show the segmentation mask over the associated Image by using the y argument of show_image. img = open_image(fnames[0])mask = open_mask(lbl_names[0])_,axs = plt. subplots(1,3, figsize=(8,4))img. show(ax=axs[0], title='no mask')img. show(ax=axs[1], y=mask, title='masked') #seg mask over the img using y argmask. show(ax=axs[2], title='mask only', alpha=1. ) vision. image ##4. Why/How img div by 255 and how it results fast. ai : vision. image - If div=True, pixel values are divided by 255. to become floats between 0. and 1.  At times, you want to get rid of distortions caused by lights and shadows in an image.    Normalizing the RGB values of an image can at times be a simple and effective way of achieving this.     So sum of the pixel’s value over all channels(which is S) divides each intensified channel so that nomalized value will be R/S, G/S and B/S (where, S=R+G+B).   Detailed explain here4. Python Evaluation Order: Python evaluates expressions from left to right. Notice that while evaluating an assignment, the right-hand side is evaluated before the left-hand side. mask_tmp, trg_tmp, void_tmp = 2, 1, 10mask_tmp = trg_tmp != void_tmpprint(mask_tmp, trg_tmp, void_tmp) # (1) target is not same with voidTrue 1 10# Example 1x = 1y = 2x,y = y,x+yx, y(2, 3)# Example 2x = 1y = 2x = yy = x+yx, y(2, 4)5. model learner parameter :: pct_start: A: Percentage of total number of epochs when learning rate rises during one cycle. Q: Sorry, I still confused that one cycle in the new API only runs one epoch. How the percentage of total number of epochs works? Can you give a example? If learn. fit_one_cycle(10, slice(1e-4,1e-3,1e-2), pct_start=0. 05)??A: Ok, strictly correct answer would be percentage of iterations, so you can have lr both increase and decrease during same epoch. In your example, say, you have 100 iterations per epoch, then for half an epoch (0. 05 * (10 * 100) = 50) lr will rise, then slowly decrease. Q2: Thanks for this explanation … so essentially, it is the percentage of overall iterations where the LR is increasing, correct? So, given the default of 0. 3, it means that your LR is going up for 30% of your iterations and then decreasing over the last 70%. Is that a correct summation of what is happening? A2: Yes, I think that’s correct. You can verify that by changing its value and check:learn. recorder. plot_lr() For example if pct_start = 0. 2 source: forums. fastai "
+    }, {
+    "id": 14,
     "url": "http://localhost:4000/2020/03/note08-fastai-4/",
     "title": "Gradient backward, Chain Rule, Refactoring",
-    "body": "2020/03/02 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves   ⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
+    "body": "2020/03/02 -  This note is divided into 4 section.      Section1: What is the meaning of ‘deep-learning from foundations?’   Section2: What’s inside Pytorch Operator?   Section3: Implement forward&amp;backward pass from scratch   Section4: Gradient backward, Chain Rule, Refactoring   ” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves    Postfix underscore means in pytorch, in-place function, What is in-place function?⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
     }, {
-    "id": 13,
+    "id": 15,
     "url": "http://localhost:4000/2020/03/note08-fastai-3/",
     "title": "Implement forward&backward pass from scratch",
     "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring1. The forward and backward passes: 1. 1 Normalization: train_mean,train_std = x_train. mean(),x_train. std()&gt;&gt;&gt; train_mean,train_std(tensor(0. 1304), tensor(0. 3073))Remember!  Dataset, which is x_train, mean and standard deviation is not 0&amp;1. But we need them to be which means we should substract means and divide data by std.  You should not standarlize validation set because training set and validation set should be aparted.  after normalize, mean is close to zero, and standard deviation is close to 1. 1. 2 Variable definition:  n,m: size of the training set c: the number of activations we need in our model2. Foundation Version: 2. 1 Basic architecture:  Our model has one hidden layer, output to have 10 activations, used in cross entropy.    But in process of building architecture, we will use mean square error, output to have 1 activations and lator change it to cross entropy   number of hidden unit; 50see below pic  We want to make w1&amp;w2 mean and std be 0&amp;1.      why initializating and make mean zero and std one is important?   paper highlighting importance of normalisation - training 10,000 layer network without regularisation1   2. 1. 1 simplified kaiming initQ: Why we did init, normalize with only validation data? Because we can not handle and get statistics from each value of x_valid?{: style=”color:red; font-size: 130%; text-align: center;”}  what about hidden(first) layer?w1 = torch. randn(m,nh)b1 = torch. zeros(nh)t = lin(x_valid, w1, b1) # hidden&gt;&gt;&gt; t. mean(), t. std()((tensor(2. 3191), tensor(27. 0303))In output(second) layer, w2 = torch. randn(nh,1)b2 = torch. zeros(1)t2 = lin(t, w2, b2) # output&gt;&gt;&gt; t2. mean(), t2. std()(tensor(-58. 2665), tensor(170. 9717))   which is terribly far from normalzed value.     But if we apply simplified kaiming init  w1 = torch. randn(m,nh)/math. sqrt(m); b1 = torch. zeros(nh)w2 = torch. randn(nh,1)/math. sqrt(nh); b2 = torch. zeros(1)t = lin(x_valid, w1, b1)t. mean(),t. std()&gt;&gt;&gt; (tensor(-0. 0516), tensor(0. 9354)) But, actually, we use activations not only linear function After applying activations relu at linear layer, mean and deviation became 0. 5. 2. 1. 2 Glorrot initializationPaper2: Understanding the difficulty of training deep feedforward neural networks  Gaussian(, bell shaped, normal distributions) is not trained very well.  How to initialize neural nets? with the size of layer , the number of filters .  But there is No acount for import of ReLU If we got 1000 layers, vanishing gradients problem emerges2. 1. 3 Kaiming initializatingPaper3: Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification  Kaiming He, explained here rectifier: rectified linear unit rectifier network: neural network with rectifier linear units  This is kaiming init, and why suddenly replace one to two on a top?     to avoid vanishing gradient(weights)   But it doesn’t give very nice mean tough.    2. 1. 4 Pytorch package Why fan_out?     according to pytorch documentation,   choosing 'fan_in' preserves the magnitude of the variance of the wights in the forward pass. choosing 'fan_out' preserves the magnitues in the backward pass(, which means matmul; with transposed matrix) ➡️ in the other words, torch use fan_out cz pytorch transpose in linear transformaton.  What about CNN in Pytorch?I tried torch. nn. Conv2d. conv2d_forward?? Jeremy digged into using torch. nn. modules. conv. _ConvNd. reset_parameters?? 2  in Pytorch, it doesn’t seem to be implemented kaiming init in right formula. so we should use our own operation.  But actually, this has been discussed in Pytorch community before. 3 4 Jeremy said it enhanced variance also, so I sampled 100 times and counted better results.  To make sure the shape seems sensible. check with assert. (remember we will replace 1 to 10 in cross entropy)assert model(x_valid). shape==torch. Size([x_valid. shape[0],1])&gt;&gt;&gt; model(x_valid). shape(10000, 1) We have made Relu, init, linear, it seems we can forward pass code we need for basic architecture nh = 50def lin(x, w, b): return x@w + b;w1 = torch. randn(m,nh)*math. sqrt(2. /m ); b1 = torch. zeros(nh)w2 = torch. randn(nh,1); b2 = torch. zeros(1)def relu(x): return x. clamp_min(0. ) - 0. 5t1 = relu(lin(x_valid, w1, b1))def model(xb):  l1 = lin(xb, w1, b1)  l2 = relu(l1)  l3 = lin(l2, w2, b2)  return l32. 2 Loss function: MSE:  Mean squared error need unit vector, so we remove unit axis.   def mse(output, targ): return (output. squeeze(-1) - targ). pow(2). mean()    In python, in case you remove axis, you use ‘squeeze’, or add axis use ‘unsqueeze’ torch. squeeze where code commonly broken. so, when you use squeeze, clarify dimension axis you want to removetmp = torch. tensor([1,1])tmp. squeeze()&gt;&gt;&gt; tensor([1, 1]) make sure to make as float when you calculateBut why??? because it is tensor?{: style=”color:red; font-size: 130%;”} Here’s the error when I don’t transform the data type ---------------------------------------------------------------------------TypeError                 Traceback (most recent call last)&lt;ipython-input-22-ae6009bef8b4&gt; in &lt;module&gt;()----&gt; 1 y_train = get_data()[1] # call data again   2 mse(preds, y_train)TypeError: 'map' object is not subscriptable This is forward passFootnote: Other materials:  Understanding the difficulty of training deep feedforward neural networks, paper that introduced Xavier initialization      Fixup Initialization: Residual Learning Without Normalization &#8617;        Pytorch implementaion on Kaiming init of conv and linear layers &#8617;        Pytorch kaiming init issue &#8617;        Pytorch kaiming init explained &#8617;    "
     }, {
-    "id": 14,
+    "id": 16,
     "url": "http://localhost:4000/2020/03/note08-fastai-2/",
     "title": "What's inside Pytorch Operator?",
     "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, RefactoringWhat’s inside Pytorch Operator?: Section02 Time comparison with pure Python:    Matmul with broadcasting&gt; 3194. 95 times faster     Einstein summation&gt; 16090. 91 times faster     Pytorch’s operator&gt; 49166. 67 times faster  1. Elementwise op: 1. 1 Frobenius norm:   above converted into (m*m). sum(). sqrt() Plus, don’t suffer from mathmatical symbols. He also copy and paste that equations from wikipedia.  and if you need latex form, download it from archive. 2. Elementwise Matmul:  What is the meaning of elementwise?   We do not calculate each component. But all of the component at once. Because, length of column of A and row of B are fixed.   How much time we saved?  So now that takes 1. 37ms. We have removed one line of code and it is a 178 times faster…#TODOI don’t know where the 5 from. but keep it. Maybe this is related with frobenius norm…?as a result, the code before for k in range(ac):  c[i,j] += a[i,k] + b[k,j]the code after c[i,j] = (a[i,:] * b[:,j]). sum()To compare it (result betweet original and adjusted version) we use not test_eq but other function. The reason for this is that due to rounding errors from math operations, matrices may not be exactly the same. As a result, we want a function that will “is a equal to b within some tolerance” #exportdef near(a,b):   return torch. allclose(a, b, rtol=1e-3, atol=1e-5)def test_near(a,b):   test(a,b,near)test_near(t1, matmul(m1, m2))3. Broadcasting:  Now, we will use the broadcasting and removec[i,j] = (a[i,:] * b[:,j]). sum() How it works?&gt;&gt;&gt; a=tensor([[10,10,10],     [20,20,20],     [30,30,30]])&gt;&gt;&gt; b=tensor([1,2,3,])&gt;&gt;&gt; a,b  (tensor([[10, 10, 10],     [20, 20, 20],     [30, 30, 30]]),tensor([1, 2, 3]))     &gt;&gt;&gt; a+btensor([[11, 12, 13],    [21, 22, 23],    [31, 32, 33]])  &lt;Figure 2&gt; demonstrated how array b is broadcasting(or copied but not occupy memory) to compatible with a. Refered from numpy_tutorial   there is no loop, but it seems there is exactly the loop.     This is not from jeremy (actually after a moment he cover it) but i wondered How to broadcast an array by columns?  c=tensor([[1],[2],[3]])a+ctensor([[11, 11, 11],    [22, 22, 22],    [33, 33, 33]])s  What is tensor. stride()?help(t. stride)Help on built-in function stride:  stride(…) method of torch. Tensor instancestride(dim) -&gt; tuple or intReturns the stride of :attr:’self’ tensor. Stride is the jump necessary to go from one element to the next one in the specified dimension :attr:’dim’. A tuple of all strides is returned when no argument is passed in. Otherwise, an integer value is returned as the stride in the particular dimension :attr:’dim’. Args:  dim (int, optional): the desired dimension in which stride is requiredExample::* x = torch. tensor([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]])`x. stride()&gt;&gt;&gt; (5, 1)x. stride(0)&gt;&gt;&gt; 5x. stride(-1)&gt;&gt;&gt; 1   unsqueeze &amp; None index   We can manipulate rank of tensor Special value ‘None’, which means please squeeze a new axis here== please broadcast herec = torch. tensor([10,20,30])c[None,:] in c, squeeze a new axis in here please. 2. 2 Matmul with broadcasting: for i in range(ar):#  c[i,j] = (a[i,:]).      *[:,j]. sum() #previous  c[i]  = (a[i]. unsqueeze(-1) * b). sum(dim=0) And Using None also (As howard teached)c[i]  = (a[i ]. unsqueeze(-1) * b). sum(dim=0) #howardc[i]  = (a[i][:,None] * b). sum(dim=0) # using Nonec[i]  = (a[i,:,None]*b). sum(dim=0)⭐️Tips🌟 1) Anytime there’s a trailinng(final) colon in numpy or pytorch you can delete it ex) c[i, :] = c [i]2) any number of colon commas at the start, you can switch it with the single elipsis.  ex) c[:,:,:,:,i] = c […,i] 2. 3 Broadcasting Rules:  What if we tensor. size([1,3]) * tensor. size([3,1])?  torch. Size([3, 3])    What is scale????   What if they are one array is times of the other array? ex) Image : 256 x 256 x 3Scale : 128 x 256 x 3Result: ?   Why I did not inserted axis via None, but happened broadcasting? &gt;&gt;&gt; c * c[:,None]tensor([[100. , 200. , 300. ],    [200. , 400. , 600. ],    [300. , 600. , 900. ]])maybe it broadcast cz following array has 3 rows as same principle, no matter what nature shape was, if we do the operation tensor broadcasts to the other. &gt;&gt;&gt; c==c[None]tensor([[True, True, True]])&gt;&gt;&gt; c[None]==c[None,:]tensor([[True, True, True]])&gt;&gt;&gt;c[None,:]==ctensor([[True, True, True]])3. Einstein summation:  Creates batch-wise, remove inner most loop, and replaced it with an elementwise producta. k. ac[i,j] += a[i,k] * b[k,j]inner most loop c[i,j] = (a[i,:] * b[:,j]). sum()elementwise product  Because K is repeated so we do a dot product. And it is torch. Usage of einsum()1) transpose2) diagnalisation tracing3) batch-wise (matmul) …  einstein summation notationdef matmul(a,b): return torch. einsum('ik,kj-&gt;ij', a, b)so after all, we are now 16000 times faster than Python. 4. Pytorch op: 49166. 67 times faster than pure python And we will use this matrix multiplication in Fully Connect forward, with some initialized parameters and ReLU. But before that, we need initialized parameters and ReLU, Footnote:  TensorRank ti noteResources:  Frobenius Norm Review Broadcasting Review (especially Rule)     Refer colab! (I totally confused with extension of arrays)    torch. allclose Review np. einsum Reviewh "
     }, {
-    "id": 15,
+    "id": 17,
     "url": "http://localhost:4000/2020/02/note08-fastai-1/",
     "title": "What is the meaning of 'deep-learning from foundations?'",
     "body": "2020/02/29 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ I don’t know if you read this article, but I heartily appreciate Rachael Thomas and Jeremy Howard for providing these priceless lectures for free Homework:  Review concepts 16 concepts from Course 1 (lessons 1 - 7)(1) Affine Functions &amp; non-linearities; 2) Parameters &amp; activations; 3) Random initialization &amp; transfer learning; 4) SGD, Momentum, Adam; 5) Convolutions; Batch-norm; 6) Dropout; 7) Data augmentation; 8) Weight decay; 9) Res/dense blocks; 10) Image classification and regression; 11)Embeddings; 12) Continuous &amp; Categorical variables; 13) Collaborative filtering; 14) Language models; 15) NLP classification; 16) Segmentation; U-net; GANS) Make sure you understand broadcasting Read section 2. 2 in Delving Deep into Rectifiers Try to replicate as much of the notebooks as you can without peeking; when you get stuck, peek at the lesson notebook, but then close it and try to do it yourself calculus for machine learning     based on weight…    einsum conventionCONTENTS:  What is going on in this course?     What is ‘from foundations’?   Steps to a basic modern CNN model   Today’s implementation goal: 1) matmul -&gt; 4) FC backward    Library development using jupyter notebook     jupyter notebook certainly can make module    Elementwise ops     How can we make python faster?         What is element wise operation?           FootnoteWhat is going on in this course?: What is ‘from foundations’?: 1) Recreate fast. ai and Pytorch 2) using pure python  Evade OverfittingOverfit : validation error getting worsetraining loss &lt; validation loss  Know the name of the symbol you usefind in this page if you don’t know the symbol that you are using or just draw it here (run by ML!) Steps to a basic modern CNN model:  1) Matrix multiplication -&gt; 2) Relu/Initialization -&gt; 3) Fully-connected Forward-&gt; 4) Fully-connected Backward -&gt; 5) Train loop -&gt; 6) Convolution-&gt; 7) Optimization -&gt;8) Batchnormalization -&gt; 9) Resnet Today’s implementation goal: 1) matmul -&gt; 4) FC backward: Library development using jupyter notebook: what is assers? jupyter notebook certainly can make module:  There will be #export tag that Howard (and we) want to extract special notebook2script. py will detect sign of #expert and convert following into python module and test ittest\_eq(TEST,'test')test\_eq(TEST,'test1')  what is run_notebook. py?     when you want to test your module in command line interface   		!python run\_notebook. py 01_matmul. ipynb  Is there any difference between 1) and 2)?1) test -&gt; test01 2) test01 -&gt; test #TODO I don’t know yet  look into run_notebook. py, package fire Jeremy used. What is that?read and run the code in a notebook, and in the process, Jeremy made Python Fire library called!shockingly, fire takes any kind of function and converts into CLI command. fire library was released by Google open source, Thursday, March 2, 2017    Get data   pytorch and numpy are pretty much same.  variable c explains how many pixels there are in in MNIST, 28 pixels PyTorch’s view() method: torch function that manipulating tensor, and squeeze() in torch &amp; mathmatical operation similar function Rao &amp; McMahan said usually this functions result in feature vector.    In part 1, you can use view function several times.     Initial python model     Which is Linear, like $Xw$(weight)$+a$(bias) $= Y$     If you don’t know hou to multiple matrix, refer this site matmul visulization site   How many time spends if we we use pure python   function matmul, typical matrix multiplication function, takes about 1 second for calculating 1 single train data! (maybe assumed stochastic, 5 data points in validation)     it takes about 11. 36 hours to update parameters even single layer and 1 iteration! (if that was my computer, it would be 14 hours. . )🤪   THIS is why we need to consider ‘time’&amp;’space’ This is kinda slow - what if we could speed it up by 50,000 times? Let’s try! Elementwise ops: How can we make python faster?:  If we want to calculate faster, then do remove pythonic calcuation, by passing its computation down to something that is written something other than python, like pytorch.  According to PyTorch doc it uses C++ (via ATen), so we are going to implement that function with python. What is element wise operation?:  items makes a pair, operate corresponding componentFootnote:  notebooks material video broadcasting excel"
     }, {
-    "id": 16,
+    "id": 18,
     "url": "http://localhost:4000/2020/02/what-is-convolution/",
     "title": "Digging into convolution",
     "body": "2020/02/28 - Issues 1) Kaiming Initializtion in Pytorch was in trouble. 1 2) Jeremy started to dig in, in lesson09, but I didn’t know why the size of tensor is 2 and even understand this spreadsheet data. 3 Homework: Read Visualizing and Understanding Convolutional Networks paper  What is a convolution?     Visualization         one kernel     Matthew D Zeiler &amp; Rob Fergus Paper          Convolution can be represented as matmul   Padding   Kernel has rank 3   How can we find a side-edge, a gradient and area of constant weight?   What is a convolution?: A convolutional neural network is that your red, green, and blue pixels go into the simple computation, and something comes out of that, and then the result of that goes into a second layer, and the result of that goes into the third layer and so forth. Visualization: one kernel Refer this site for visualizing CNN filteringMatthew D Zeiler &amp; Rob Fergus PaperLecture01     Nine examples of the actual coefficients from the **first layer**  Convolution can be represented as matmul: CNNs from different viewpoints {align-items: center;}    [A B C D E F G H I J] is 3 by 3 image data flatten to vector.   As a result, convolution is a just matrix just two things happens     Some of entries are set to zeros at all the times   same color always have the same weight. That called weight time / wegith sharing    So, we can implement a convolution with matrix multiplication. But, we don’t do that because it’s slow!Padding:  What most of libraries do is just put zeros asdie of matrix  fast. ai uses reflection paddings (what is this? Jeremy said he uttered it)Kernel has rank 3:  As standard picture input would be 4 5, it would be actually 3d, not 2d.  If we make kernel as a 3x3 size, we pass over same kernel all the different Red, Green, Blue Pixels.      This could make problem, because, if we want to detect frog, which is green, we would want more activations on the green(I made a test cell in my colab 6)   How can we find a side-edge, a gradient and area of constant weight?: Not top-edge!  One kernel can find only the top-edge, so we should stack the kernels 7 So, we pass it through bunch of kernels to the input images, and that process gives us height x width x corresponding number of kernels.  Usually that number of chanel is 16 And if we want to get the more channels and features, we should repeat that process     This process gives rise to memory out of control, we do the stride   #### conv-example. xlsx  2 convolutional filters At a second layer, filter is 3x3x2 tensor, because to add up together the first layer’s channel. Reference:       Problem was math. sqrt(5) was not kaiming initialization formula, Implementation in Pytorch &#8617;        size of tensor, lecture09 &#8617;        conv-example. xlsx &#8617;        Why do computer use red, green and blue instead of primary colors &#8617;        Grayscale is a group of shades without any visible color. … Each of these dots has its own brightness level as well and, therefore, can be converted to grayscale. A grayscale image is one with all color information removed.  &#8617;        Testing RGB and grayscale &#8617;        stack kernel and make new rank of tensor at output, Lesson06-2019 &#8617;    "
     }, {
-    "id": 17,
+    "id": 19,
     "url": "http://localhost:4000/2020/02/dps-week8/",
-    "title": "Digital Product School week 8&9",
-    "body": "2020/02/24 - The 8th week retropect at Digital Product School Week 8/9 - Ship your MVP/Release next iteration each day     This week's schedule  CONTENT:  Preparing engineering weekly Agile Process     Daily Stand-up    Making application flowchart (feat draw. io) / ER diagram     Flowchart, understaning user journey   ER diagram    Engineering weekly AI lunch Connecting firebase andPreparing engineering weekly: This week at Wednesday, I planned to explain the Language Modelings, mainly focusing ELMo, ULMFiT, BERT and GPT-2. Slides is available here Changed the presentation, because there were people who are not in ML domain. hereWhenever I do the presentation, I learn more than the information I give them. At the same time, I realize I need to learn more than I know. Agile Process: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m not sure ‘what is agile’ but this is what we tried to make agile process.  Daily Stand-up: Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Making application flowchart (feat draw. io) / ER diagram:  Before we start the party, we should clarify the flowchart and ER diagram of our application. Flowchart, understaning user journey: Thanks for google, we could use draw. io for our framechart framework. Actually, we cana choice other good flatform, but draw. io has connected app throgh google drive, most of our engineer was used to it. And after this job, I got to know there is also (of course) rule with the symbols, color, size, space, scaling and direction of arrow -reference. But why we should do this? WE have made our storymap before!! I think storymap is for visualize our status and app. So it should be shared with whole the team, and they should able to understand each role’s issue. But flowchart is more like testing technical feasibility, and error that user can experience. So it could be little more specific, complicated, and hypothetical.     This week engineer's main plan  ER diagram: Even if we use NoSQL database through firebase, my team was accustomed to SQL more. That what we educated when we were at college, so we had to organize our concept while we were learning NoSQL. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Before today, my AI collegues presented regression, knn and it was my turn. I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! AI lunch: Connecting firebase and: "
+    "title": "My life in Digital Product School - week 8/19/10",
+    "body": "2020/02/24 - The 8/9/10th week retropect at Digital Product School Week 8 - Ship your MVPWeek 9/10 - Release next iteration each day     Week 8th schedule  CONTENT:  Agile Product Development     Daily Stand-up(planning)   Gemba Walk   Sprint Reviews    Engineering weeklyAgile Product Development: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m still not sure ‘what is agile’ but this is how we tried to make agile process.  Daily Stand-up(planning): Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Gemba Walk:     Team Cero with core team  Every 2 weeks, we do the Gemba work, which is ‘question everything to the core team’ time. At this period, people can ask anything related to our product, workshop, and framework. Core team will help just for each team, and each team can solve the problem related to their work. &lt; br/&gt;Why we need this session? because with workshop and general schedule, core team has no time just focus on each team. So through this session, we can have opportunity to understand each program and workshop, like why we are using this platform, and when is the due of our small project, and we have this problem and we need help for this. whatever small problem you have, core team is always willing to help you. Sprint Reviews: Every Friday, we have time to summarise what we did for the week. Maybe we need HMW question and our storymap to share our process and then tell and share what we did try, what point we succeeded and what point it was deviant of our prediction, and why we tried it. .     Sprint of Ve-link  And then, just after all team’s ppt, we do vote with such a cute marvel. Always it’s very difficult to vote (of course you can’t vote to your team!) Because it depends on criteria what do I value!But since this is process of our agile work, I try to focus on what they have changed since last week, and why they did it, how they did it. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Everyone have their knowledge to share and we can be tutor and at the same time can be of tutee. Previously, my AI collegues presented regression, knn. And because I’m somewhat specialized to NLP, I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! "
     }, {
-    "id": 18,
+    "id": 20,
     "url": "http://localhost:4000/2020/02/fast.ai-nlp-note-16/",
     "title": "Algorithmic bias",
     "body": "2020/02/20 - Algorithms can encode &amp; magnify human bias Case Study 1: Facial Recognition &amp; Predictive Policing:  Joy Buolamwini &amp; Timnit Gebru, gendershades. org     Microsoft, FACE+, IBM - All of these things are sell now.    Largest gap between $\therefore\ Lighter Male\ &gt;\ Darker\ Female $      This US mayor joked cops should “mount . 50-caliber” guns where AI predicts crime      With machine learning, with automation, there’s a 99% success, so that robot is ㅡwill beㅡ99% accurate in telling us what is going to happen next, which is really interesting.     - city official in Lancater, CA, approving on using IBM for public security  Bias:  Bias is type of error Statistical Bias: difference between a statistic’s expected value and the true value Unjust Bias: disproportionate preference for or prejudice against a group Unconscious bias: bias that we don’t realize we have But, term bias is too generic to be productive. Different sources of bias have different causes Representation Bias: Dataset was not representative of the algorithm that might be used on later. Above : Data is okay, but algorithm has some problem. Below : Data has error.  For example, object detection production that performs very well in common product of US. But in contrast, change of target product region, like Zimbabwe, Solomon Island, and so on, reduced the performence remarkably. It is not the algorithmic problem, so we should care about data volume of region. Evaluation Bias: Benchmark datasets spur on research, 4. 4% of IJB-A images are dark-skinned women. 2/3 of ImageNet images from the West (Sharkar et al, 2017) Case Study 2: Recidivism Algorithm Used Prison Sentencing: Case Study 3: Online Ad Delivery: Bias in NLP: ( Nothing to do with the course, but I’m researching this field these days. )    But all about Englsih     ImpactThe person is doctor. The person is nurse -&gt; 그는 의사다. 그녀는 간호사다.  Concept of “biased data” often too generic to be useful:  Different sources of bias have different sources Data, models and systems are not unchanging numbers on a screen. They’re the result of a complex process that starts with years of historical context and involves a series of choices and norms, from data measurement to model evaluation to human interpretation. - Harini Suresh, “The problem with Biased Data” Five Sources of Bias in ML:  Representation Bias Evaluation Bias Measurement Bias Aggregation Bias(46:02) Historical Bias(46:26)     A few studies(47:13)   Racial Bias, Even when we have good intentions(new york times)(47:10)   gender(48:59)   Humans are biased, so why does algorithmic bias matter?: Algorithms &amp; humans are used differently (humans are usually decision maker)  Algorithms are accurate and objective No way to apeal if there if error processed large scale cheap Machine learning can amplify bias Machine learning can create feedback loops. Technology is power. And with that comes responsibility. Solutions:  Analyze a project at work/school:     Questions about AI   5 types of bias (Suresh &amp; Guttag)   Datasheets for datasets, Modelcards for model reporting   Accuracy rate on different sub-groups    Work with domain experts &amp; those impacted Increase diversity in our workspace Advocate for good policy Be on the ongoing lookout for bias"
     }, {
-    "id": 19,
+    "id": 21,
     "url": "http://localhost:4000/2020/02/classifier-city/",
     "title": "Making a classifier with image dataset made from gooogle",
     "body": "2020/02/15 - CONTENTS:  Creating dataset from google images     Using google_images_download   Create ImageDataBunch    Train model     fit_one_cycle()   Let’s find-tune   Let’s train the whole model!   Let’s make batch size bigger!    Interpretation Model in productionCode can be found hereDeployed model here Making a classifier which can distinguish Seoul from Munich and Sanfrancisco!(hoping my well in Munich!) Creating dataset from google images: In machine learning, you always need data before you build your model. You can use either URLs or google_images_download package. Since Jeremy explained specifically, I will try the other. Using google_images_download: note: This is not google official package Refer to Official Doncument, put that arguments. from google_images_download import google_images_downloadresponse = google_images_download. googleimagesdownload()  #class instantiationout_dir = os. path. abspath('. . /. . /materials/dataset/pkg/')os. mkdir(out_dir)arguments = { keywords : Cebu,Munich,Seoul ,        print_urls :True,        suffix_keywords : city ,        output_directory :out_dir,        type : photo ,      }paths = response. download(arguments)  #passing the arguments to the functionprint(paths)and if you need, here is main code. Create ImageDataBunch: We need to separate validation set because we just grabbed these imagese from Google. Most of the dataset we use (kaggle/research) splited into train / validation / test so if they are not devided beforehand we should make databunch, and Jeremy recommended assign 20% to validation. Help on function verify_images in module fastai. vision. data:verify_images(path: Union[pathlib. Path, str], delete: bool = True, max_workers: int = 4, max_size: int = None, recurse: bool = False, dest: Union[pathlib. Path, str] = '. ', n_channels: int = 3, interp=2, ext: str = None, img_format: str = None, resume: bool = None, **kwargs)  Check if the images in `path` aren't broken, maybe resize them and copy it in `dest`. Data from google image url Data from package Train model:           len(class)   len(train)   len(valid)         Data_url   3   432   108       Data_pkg   3   216   53   Uisng model: restnet34 1, Measurement: accuracy 2 fit_one_cycle(): What is fit one cycle? Cyclical Learning Rates for Training Neural Networks One of the way to find good learning rate. Core idea is to start with small learning rate (like 1e-4, 1e-3) and increase the learning rate after each mini-batch till loss starts exploding. And pick up learning rate one order lower than exploding point. For example, plotted learning rate is like below picture, picking up around 1e-2 is the best way.  Why this methods Traditionally, the learning rate is decreased as the learning starts converging with time. But this paper suggests to cycle our learning rate, because it makes us avoid local minimum. Basically this cyclic method enables us to explore whole of loss function so that find out global minimum. In other words, higher learning rate behaves like regularisation. Let’s find-tune: Do train just one last layer by learning rate found by find_lr This section you should find the strongest downward slope that kind of sticking around for quite a while. And choose just one order lower than lowest point. As explained before, I will pick up 1e-2. And of course, this is fine-tuning, we don’t need discriminative learning rate yet.  Let’s train the whole model!: link When you plot the learning rate again, maybe you will get soaring shape of learning rate. Rule of thumb, When you slice the learning rate, use learning rate you used at unfrozen part. Divide it by 5 or 10 and put it on maximum bound. At minimum bound, get the point just before it soared, and divide it by 10.  Let’s make batch size bigger!: Since default batch size is 64, I tried it to 128. And it gets way more better result(even it’s still underfitting!) And if I freeze model and train whole model again, the model would be better. Also, you can use this method to the other big dataset model training! Interpretation: See the confusion matrix. Result is quite great. *Since I’m using colab, I will skip data cleansing. But I highly recommend you to use ImageCleaner widget, only if you are using jupyter notebook (not jupyter lab) Model in production: You can deploy your model in simple way. I referred fast. ai, and used render(it’s free for limited time). You can find detailed document here. and you can create a route like this. @app. route( /classify-url , methods=[ GET ])async def classify_url(request):  bytes = await get_bytes(request. query_params[ url ])  img = open_image(BytesIO(bytes))  _,_,losses = learner. predict(img)  return JSONResponse({     predictions : sorted(      zip(cat_learner. data. classes, map(float, losses)),      key=lambda p: p[1],      reverse=True    )  })You can find my deployed model here Reference: How to create a deep learning dataset using Google Images towardsdatascience - one cycle policy       Deep Residual Learning for Image Recognition &#8617;        Accuracy_and_precision &#8617;    "
     }, {
-    "id": 20,
+    "id": 22,
     "url": "http://localhost:4000/2020/02/dps-week5/",
     "title": "Digital Product School week 5",
     "body": "2020/02/09 - The 5th week retropect at Digital Product School Week 5 - Create a Storymap and sync it with Lean Canvas     This week's schedule  CONTENT:  How to create our story map Prepare your story Discover your product’s AI potentialMondayHow to create our story map:     We need this 'aha' moment  There was a Milestone workshop, about our weekly goal. As we are agile working, we go fast and change every week’s goal. This week we will finalize our story map based on user’s pain-point and HMW questions.     How should we make our story-map  Basically we should make story map based on this rule  Tell stories, don’t just write them!     We always need context, that means all the story component should be connected    Visualize your product to establish a shared understanding and speed up discussions!     Post-it filled of text is not enough, we should fill it with visualizations then team mates can understand it fast    Only discuss in front our your story map! (Speed)     So we can update our story-map as soon as we change our opinion   And also    Use a story map to find the parts that matter most and to identify holes in your idea!     Since the story map consists of techinical part, we should consider each story’s technical feasibility    Minimise output, maximise outcome and impact! Build tests to figure out what’s minimum and what’s viable!     This story map functions to find out our minimum value of ideas    Work iteratively: Change your story map according to your learnings!     We should repeat this process again and again    PMs: Make sure Storymap is up to date!Prepare your story:     team cero, our whole story map    Our goal    Technical feasibility of our storyWhat is your strategy to make user achieve something? This would be our expand point Discover your product’s AI potential: How can we apply AI to our product? Let’s write down our ‘HMW’ questions, and find out all p ossibilities. These are suggestion of possibilities, so don’t attached to feasibility (we will do in at lean start-up)     Software section's expectation    AI section's expectationTuesday    Engineer's task, week5This 5th week, engineers settled WendesdayThursdayFriday"
     }, {
-    "id": 21,
+    "id": 23,
     "url": "http://localhost:4000/2020/02/GPU-time/",
     "title": "4 reasons took much time to setting GPU for fast.ai than I expected",
     "body": "2020/02/05 - Motivation: Before now, me as a undergraduate student, I was parsimony who usually depend on colab, kaggle, friend’s server(occasional) whenever i need GPU. . And this time it’s been for a while to install GPU than I expected and I share the several component that stood in my way. Written at Oct 24 2019, if you think this is deprecated, please do not have a leap of faith. Just for the record, I’ve used Kaggle, Colab, GCP, Azure, EC2 as GPU cloud. 1. Did not know there is JupyterLab option in Google Cloud Platform. : At the first time when GCP came out, there was no AI Platform service. So from starting vm instance to launching jupyter and installing packages, I did all of the things myself. (and I learned 🤗) $	curl -O https://repo. continuum. io/archive/Anaconda3-5. 0. 1-Linux-x86_64. sh[Downloading conda in ssh] I created VM instance,selected zone, machine type and disk type. Then, define firewall rules and in ssh terminal, install jupyter and other packages. But you can do all of these things just using AI Platform.  [AI Platform] I think it especially save your time if you are living in Asia-Pacific, which google doesn’t support not that much GPU resources.  2. Consider if the platform has limited resources in a region you live in. : I live in South Korea, East Asia, and it seems like this region has lots of limitation in GPU (except quite expensive AWS) And the Taiwan which was the only one region where I can launch my own VM with GPU (I tried all the other regions in the list) sometimes do normaly, but not always. 😥After launching, I did several works and next day I could not start VM. (I didn’t count it, but tried it a few hours because I didn’t want cost any more time…) Endlessly failed to start instance, then I choose to move AWS as an alternative way. 3. Fast. ai gives deliberate guide and I didn’t know it. : Fast. ai offer the guide for all available platform. (Colab, salamander, Gradient, Kaggle, Colab, and so on) It is so important, and really needs, because cloud computing options are vary as occasion and purpose arise. I didn’t know fast. ai has manual to running GCP, and I think it’s as good a reason as any for me to be have taken time. It helped me so much when I had aws and shortened my time. I don’t want to read all of the manual in amazno. . (It is recommended. . but I’d rather read GIT PRO now…) ssh -i ~/. ssh/&lt;your_private_key_pair&gt; -L localhost:8888:localhost:8888 ubuntu@&lt;your instance IP&gt;4. You should wait to add more volume just after add volume, by building AWS EC2. : Since Elastic Block Store(EBS) storage supports optimized storage, users can’t extend storage volume two times in a row.  Unfortunately, at the first time, I didn’t know it (again 👻) and when VM lacked volume, I doubled dist capacity (76*2) at a rough but It needs more.  &lt;!– this time I installed GPU in two years, and it became little complicated compared to 2 years ago. And this time for the first time(maybe not the first time. . but i handled it in my class or with my friend. but it’s my first time on my own. ) I very I’m started to using used google colab, kaggleand, GCP-JupyterLab, ec2 - friend made, aws vm machine but I had a environment variable but i did not know of it. On these days, I could not get a resources from taiwan…    I couldn’t notice a deliberate     Anyway, as a result I tried myself gcp myself and aws ec2 with fast. ai But I think doing on my self surely takes much time (in this point I wonder why I’m doing this, and should remind me, especially I was studying disk volume optimization)     disk volume exceed - https://askubuntu. com/questions/919748/no-space-left-on-device-even-though-there-is:  "
     }, {
-    "id": 22,
+    "id": 24,
     "url": "http://localhost:4000/2020/02/dps-week4/",
     "title": "Digital Product School week 4",
     "body": "2020/02/01 - The 4th week retropect at Digital Product School Week 4 - Find solution ideas and run experiments   [This week’s schedule] CONTENT:  Ideation Techniques     What is ideation techniques?    Generating idea in my team AIdeation Team brain storming of idea Die Produkt MacherMondayIdeation Techniques:   [slides from @steffen] What is ideation techniques?: We tried to find out user’s painpoint last week. Tried to users talk about their, pain point. No question directly, but extract from them their pain with transportation. Generating idea in my team:   AIdeation:   TuesdayTeam brain storming of idea:   Based on generated idea on Monday, we extended our idea doing rolling-paper!     Die Produkt Macher: What is lean start-up?  Lean startup is a methodology for developing businesses and products that aims to shorten product development cycles and rapidly discover if a proposed business model is viable; this is achieved by adopting a combination of business-hypothesis-driven experimentation, iterative product releases, and validated learning. - wikipedia WendesdayThursdayFriday"
     }, {
-    "id": 23,
+    "id": 25,
     "url": "http://localhost:4000/2020/01/retrosprect-of-acl-paper-2020/",
     "title": "Retrospect of ACL 2020 paper writing",
     "body": "2020/01/29 - 2020 Annual Conference of the Association for Computational Linguistics Why I can’t use ‘Cebuano’ for the research?: Why I had to change target language from ‘Cebuano’ to ‘Tagalog’?-&gt; No language translator options except google translation. But before knowing that I already consult my friend, whose mother tongue is English. So I had to aplogize her, but couldn’t tell her why suddenly I changed my plan. -&gt; I realized there are many languages even can’t be researched at all. . -&gt; Getting accustomed to discrimination makes misunderstanding, sometimes. At my country, we couldn’t use music streaming service, because of legal problem. But at that moment, I thought it was discrimination, which is done by music company. "
     }, {
-    "id": 24,
+    "id": 26,
     "url": "http://localhost:4000/2020/01/Git-Merge/",
     "title": "Why am I not listed as a contributor?!",
     "body": "2020/01/10 - From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’m not listed as contributor of repo which my PR accessed. How did I come to a repository?: When I’m stuck, I would prefer to code, than to go deep in theory. (It must be so. . too much to understand 🤒)It was BERT released by Google AI I felt keenly the necessity of implementing, because not only couldn’t understand the way they figured out positional encoding formula, but how it actually works. What does it mean to “scale” dot product in Attention? (Now I know it’s far from my section 😂) Figure 1. Scaled Dot Product. Adopted from tensorflow blogWhat was the code error?: For implement code in paper, I read the papers Transformer and BERT, structured the model, and refered the others’ code. Meanwhile, I found out a small error in tokenization process, which was changing a token into [MASK], enabled bidirectional representation. I’ve made PR, and got merged. But I was not in contributors. Why?: Figure 2. Merged Pull request Adopted from graykode projectActually I happened to know there can be couple of reasons github doesn’t include my name as contributor. Well, if contributors tab has more than 100 people, in which case it shows you up only if you are in the top 100 contributors because displaying too many contributors can make webpages down. Somethimes, however, it doesn’t that problem. Why not? Two possibilities are there.    First, According to Joel-Glovier, if repository maintainer merged-as-a-rebase PR will end up showing as maintainer’s commit. But maintainer shouldn’t normally do this.     Second, if you happend to commit using a different git email that what is in your GitHub profile, it will not be attached to your Github user, and “doesn’t show up” as you.  Reference:  Michał Chromiak’s blog Github: why are my contributions are not showing on my profile atlassian-gitfetch"
     }, {
-    "id": 25,
-    "url": "http://localhost:4000/2019/12/lesson1-fastai/",
-    "title": "Fine Grained Classification",
-    "body": "2019/12/31 - Finally you can solve the mystery behind this weird drawing. . through this course. juptyer notebook magic: %reload_ext autoreload%autoreload 2%matplotlib inlinethis is special directives to jupyter notebook, not python code. And it is called ‘magics’ (but i think jeremy is magicion)  If somebody changes underlying library code while I’m running this, please reload it automatically If somebody asks to plot something, then please plot it here in this Jupyter NotebookDon’t hesitate to import start~ Digging into untar_data, path. ls: Union[pathlib. Path, str]: typed programming language? -&gt; maybe i think disclaim the type beforehand for sure.  Q. like assert? path. ls()this is some module that fast. ai made because os. listdir(‘path’) is unconvinient. Python3 pathlib library!: pathlib "
-    }, {
-    "id": 26,
+    "id": 27,
     "url": "http://localhost:4000/2019/12/jeremy-howard/",
     "title": "Jeremy Howard",
     "body": "2019/12/15 - This is journey to find out ‘who am I trying to be?’: How he impacted me?  The person who made me start Computer Vision again. He emphasized the importance of studying NLP and Computer together to understand the deep-learning. He didn’t order it to study, but always he pursuade me with reasonable way.  “It’s not just something I can throw away. NLP and computer vision a few weeks apart and that’s going to force your brain to realize like ‘oh I have to remember this’”  He made me admit my failure in deep-learning. I started to objectify where am I. What should I do when I’m frustrated.  “Keep going. You’re not expected to remember everything. Yet.  You’re not expected to understand everything. Yet. You’re not expected to know why everything works.  Yet. ”  His articles are numerous, below.      What is torch. nn Really?   High Performance Numeric Programming with Swift: Explorations and Reflections   C++11, random distributions, and Swift    And especially, I like this book. Designing great data products Great predictive modeling is an important part of the solution, but it no longer stands on its own; as products become more sophisticated, it disappears into the plumbing. Designing great data products And he is also famous for words. Here are some.  we’re going to try and use that to really understand what’s going on. So to warn you, none of it is rocket science but a lot of its going to look really new. So don’t expect to get it the first time but expect to listen and jump into the notebook try a few things test things out look particularly at like tensor shapes and inputs and outputs to check your understanding then go back and listen again. But and kind of try it, a few times, because you will get there right, it’s just that there’s going to be a lot of new concepts because we haven’t done that much stuff in pure Pytorch. Lesson 6: Deep Learning 2019 "
     }, {
-    "id": 27,
+    "id": 28,
     "url": "http://localhost:4000/2019/11/julia-evans/",
     "title": "Julia Evans",
     "body": "2019/11/20 - This is journey to find out ‘who am I trying to be?’: The women who surprised me in many ways. First, she approached me to teaching some concepts drawing cartoons. It was at Hackers news, which was hightest ranks. Personally I have the use of not to reading title, so and cartoon was so cute and clear. I naturally gonna understood mechanism and astonished by her explaination ability. Her value, which she was taught by many people so want to do same things, moved me. Volume of her knowledge, that just reading post title is a deal of work, amazed me. "
     }, {
-    "id": 28,
+    "id": 29,
     "url": "http://localhost:4000/2019/11/coc-retropective/",
     "title": "Retrospective on Pycon 2019 Korea (CoC Committee)",
     "body": "2019/11/05 - When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized our responsibility, and if I passed each other’s burden, It could be my burden next time. In solidarity of the obligation, we finished conference well. And after participating PyCon Korea 2018 as volunteer, I’ve joined PyCon Korea Organizer last year. &lt;Figure 1&gt; First meeting of PyCon 2019 Korea Organizers It’s been a while since PyCon 2019 finished. It’s held on Aug 15 - 18, at Coex Grand Balloom &lt;Figure 2&gt; Ongoing session, speaking on news comment processing &lt;Figure 3&gt; Sponsor Booth iin Coex Hall &lt;Figure 4&gt; After PyCon 2019, with all of volunteer, organizer, speakers 😍 🥰  Serving as part of the coc TF, I spent large fraction of last year doing CoC job. here’s the path what we’ve been grappled with to grasp a solution. First half: Before the conference Toward Diverse Community:  Formally we’ve been reusing and modifying PyCon US CoC, but we needed fit in Korean and I was part of that to revise code of conduct. Except ‘That’ Diversity, Because it is ‘Harassment’:  Specific point was harassment, and the others were not. process of finding the points. How can we settle this point?Second half: During the conference Handling the potential Harassment: Disjunction of policy and real-time situation: This ‘PyCon 2019 Korea retrospective series’ would be devided into 3 Episodes.  “Retrospective on Pycon 2019 Korea (CoC Committee)” “Retrospective on Pycon 2019 Korea (Program Chair)” (20 Nov, To Be Update) “Maintaining participation while still making timely decisions” (29 Nov, To Be Update)"
     }, {
-    "id": 29,
+    "id": 30,
     "url": "http://localhost:4000/2019/11/elif-shafak/",
     "title": "Elif Shafak",
     "body": "2019/11/05 - This is journey to find out ‘who am I trying to be?’: For creative-minded people, Istanbul is a treasure. ’ Photo © Chris Boland, licensed under CC BY-NC-ND 2. 0    it suddenly felt like what I was trying to convey was more complicated and detailed than what the circumstances allowed me to say.     And I did what I usually do in similar situations: I stammered, I shut down, and I stopped talking. I stopped talking because the truth was complicated, even though I knew, deep within, that one should never, ever remain silent for fear of complexity.         &lt;Figure 1&gt; Elif Shafak   Photo credit: www. elifsafak. com. tr      I want to talk about emotions and the need to boost our emotional intelligence. I think it’s a pity that mainstream political theory pays very little attention to emotions.     Oftentimes, analysts and experts are so busy with data and metrics that they seem to forget those things in life that are difficult to measure and perhaps impossible to cluster under statistical models. But I think this is a mistake, for two main reasons. We are emotional beings.     I think it’s going to be one of our biggest intellectual challenges, because our political systems are replete with emotions. In country after country, we have seen illiberal politicians exploiting these emotions. And yet within the academia and among the intelligentsia, we are yet to take emotions seriously. I think we should.  1 2 Reference:       British Council Worldwide &#8617;        Ted Talk &#8617;    "
     }, {
-    "id": 30,
+    "id": 31,
     "url": "http://localhost:4000/2019/01/dps-week1/",
     "title": "Digital Product School week 1",
     "body": "2019/01/11 - The 1th week retropect at Digital Product School [This week’s schedule] CONTENT:  Welcome to Digital Product School!     Trip to Spitzingsee   Welcome to Design Office   Specifying our goal of product   Welcome to Digital Product School!: Trip to Spitzingsee:    At the first day of Digital Product School, we had a off-site with all of batch 9 people. All the costs were managed by dps. At the beautiful mountain, we settled the team, and got my team goal.     Basically, there are two kind of team in DPS. (1) Wild team - the team has fixed topic(2) Company team - the team which has specific stakeholders, and also topic defined by that stakeholders  The Core-team will fix what team you will join in DPS for 3 months based on ymy professionals, they announce it at off-site. [My team for 3 months at DPS]    And we decide on my batch #9 theme song. How? Each team draw for songs and pitch ‘why this song should be batch #9 theme song’The result? Imagine dragon - Believer (I didn’t know at the moment, this song would be stamped in my memory)     We have a workshop for getting to know each other. For example, we share 1) what do I expect from 3 months of dps, 2) when I feel happy in my life time, 3) what I worked for last week, 4) what was my last project and 5) what plays important role in my life      My team's board  Cero  Welcome to Design Office:  At first day of design office, we had workshop, which celebrates my day in dps also discuss specific rule, menifesto and stakeholders    We get sticker and attach it in map depends on my nationality  Now time to get to know my team’s stakeholders. What they want for us? What they expect from us? How free my team are on the topic?To be honest, it is endless tug-of-war. We should discuss with my stakeholders, endlessly, and find out solution which can meet interest of users, stakeholders and my team. Basically, my team’s main stakeholder is ADAC, but BMW, City of munich and Nokia will also participate as my team’s stakeholders. Specifying our goal of product: "
@@ -338,10 +343,10 @@ <h4 class="font-weight-bold spanborder"><span>All Stories</span></h4>
             <div class="mb-5 d-flex justify-content-between main-loop-card">
 <div class="pr-3">
 	<h2 class="mb-1 h4 font-weight-bold">
-	<a class="text-dark" href="/2020/02/dps-week4/">Digital Product School week 4</a>
+	<a class="text-dark" href="/2020/02/dps-week5/">Digital Product School week 5</a>
 	</h2>
 	<p class="excerpt">
-	   The 4th week retropect at Digital Product School
+	   The 5th week retropect at Digital Product School
 	</p>
 	<small class="d-block text-muted">
 		In <span class="catlist">
@@ -351,13 +356,13 @@ <h2 class="mb-1 h4 font-weight-bold">
 		</span>
 	</small>
 	<small class="text-muted">
-		Feb 01, 2020
+		Feb 09, 2020
 	</small>
 </div>
 
 	<div class="col-md-3 pr-0 text-right">
-	<a href="/2020/02/dps-week4/">
-	<img class="w-100" src="/assets/images/week4/week4-ourteam.JPG" alt="Digital Product School week 4">
+	<a href="/2020/02/dps-week5/">
+	<img class="w-100" src="/assets/images/week5/user-storymap.png" alt="Digital Product School week 5">
 	</a>
 	</div>
 
@@ -368,26 +373,26 @@ <h2 class="mb-1 h4 font-weight-bold">
             <div class="mb-5 d-flex justify-content-between main-loop-card">
 <div class="pr-3">
 	<h2 class="mb-1 h4 font-weight-bold">
-	<a class="text-dark" href="/2020/01/retrosprect-of-acl-paper-2020/">Retrospect of ACL 2020 paper writing</a>
+	<a class="text-dark" href="/2020/02/GPU-time/">4 reasons took much time to setting GPU for fast.ai than I expected</a>
 	</h2>
 	<p class="excerpt">
-	   2020 Annual Conference of the Association for Computational Linguistics
+	   Motivation
 	</p>
 	<small class="d-block text-muted">
 		In <span class="catlist">
 		
-		<a class="text-capitalize text-muted smoothscroll" href="/categories.html#retrospect">Retrospect</a><span class="sep">, </span>
+		<a class="text-capitalize text-muted smoothscroll" href="/categories.html#resource">Resource</a><span class="sep">, </span>
 		
 		</span>
 	</small>
 	<small class="text-muted">
-		Jan 29, 2020
+		Feb 05, 2020
 	</small>
 </div>
 
 	<div class="col-md-3 pr-0 text-right">
-	<a href="/2020/01/retrosprect-of-acl-paper-2020/">
-	<img class="w-100" src="/assets/images/acl2020.png" alt="Retrospect of ACL 2020 paper writing">
+	<a href="/2020/02/GPU-time/">
+	<img class="w-100" src="/assets/images/10.png" alt="4 reasons took much time to setting GPU for fast.ai than I expected">
 	</a>
 	</div>
 
@@ -398,26 +403,56 @@ <h2 class="mb-1 h4 font-weight-bold">
             <div class="mb-5 d-flex justify-content-between main-loop-card">
 <div class="pr-3">
 	<h2 class="mb-1 h4 font-weight-bold">
-	<a class="text-dark" href="/2020/01/Git-Merge/">Why am I not listed as a contributor?!</a>
+	<a class="text-dark" href="/2020/02/dps-week4/">Digital Product School week 4</a>
 	</h2>
 	<p class="excerpt">
-	   From the end of last year, big changes have witnessed in NLP research.Embracing an unprecedented growth, I started to study new exciting results and advances.In doing so, I noticed I’...
+	   The 4th week retropect at Digital Product School
 	</p>
 	<small class="d-block text-muted">
 		In <span class="catlist">
 		
-		<a class="text-capitalize text-muted smoothscroll" href="/categories.html#resource">Resource</a><span class="sep">, </span>
+		<a class="text-capitalize text-muted smoothscroll" href="/categories.html#digitalproductschool">DigitalProductSchool</a><span class="sep">, </span>
 		
 		</span>
 	</small>
 	<small class="text-muted">
-		Jan 10, 2020
+		Feb 01, 2020
 	</small>
 </div>
 
 	<div class="col-md-3 pr-0 text-right">
-	<a href="/2020/01/Git-Merge/">
-	<img class="w-100" src="/assets/images/1.png" alt="Why am I not listed as a contributor?!">
+	<a href="/2020/02/dps-week4/">
+	<img class="w-100" src="/assets/images/week4/week4-ourteam.JPG" alt="Digital Product School week 4">
+	</a>
+	</div>
+
+</div>
+        
+        
+        
+            <div class="mb-5 d-flex justify-content-between main-loop-card">
+<div class="pr-3">
+	<h2 class="mb-1 h4 font-weight-bold">
+	<a class="text-dark" href="/2020/01/retrosprect-of-acl-paper-2020/">Retrospect of ACL 2020 paper writing</a>
+	</h2>
+	<p class="excerpt">
+	   2020 Annual Conference of the Association for Computational Linguistics
+	</p>
+	<small class="d-block text-muted">
+		In <span class="catlist">
+		
+		<a class="text-capitalize text-muted smoothscroll" href="/categories.html#retrospect">Retrospect</a><span class="sep">, </span>
+		
+		</span>
+	</small>
+	<small class="text-muted">
+		Jan 29, 2020
+	</small>
+</div>
+
+	<div class="col-md-3 pr-0 text-right">
+	<a href="/2020/01/retrosprect-of-acl-paper-2020/">
+	<img class="w-100" src="/assets/images/acl2020.png" alt="Retrospect of ACL 2020 paper writing">
 	</a>
 	</div>
 
@@ -428,26 +463,26 @@ <h2 class="mb-1 h4 font-weight-bold">
             <div class="mb-5 d-flex justify-content-between main-loop-card">
 <div class="pr-3">
 	<h2 class="mb-1 h4 font-weight-bold">
-	<a class="text-dark" href="/2019/12/lesson1-fastai/">Fine Grained Classification</a>
+	<a class="text-dark" href="/2020/01/Git-Merge/">Why am I not listed as a contributor?!</a>
 	</h2>
 	<p class="excerpt">
-	   Finally you can solve the mystery behind this weird drawing.. through this course.
+	   From the end of last year, big changes have witnessed in NLP research.Embracing an unprecedented growth, I started to study new exciting results and advances.In doing so, I noticed I’...
 	</p>
 	<small class="d-block text-muted">
 		In <span class="catlist">
 		
-		<a class="text-capitalize text-muted smoothscroll" href="/categories.html#fast.ai-v3">Fast.AI-v3</a><span class="sep">, </span>
+		<a class="text-capitalize text-muted smoothscroll" href="/categories.html#resource">Resource</a><span class="sep">, </span>
 		
 		</span>
 	</small>
 	<small class="text-muted">
-		Dec 31, 2019
+		Jan 10, 2020
 	</small>
 </div>
 
 	<div class="col-md-3 pr-0 text-right">
-	<a href="/2019/12/lesson1-fastai/">
-	<img class="w-100" src="/assets/images/1-visual-cnn.png" alt="Fine Grained Classification">
+	<a href="/2020/01/Git-Merge/">
+	<img class="w-100" src="/assets/images/1.png" alt="Why am I not listed as a contributor?!">
 	</a>
 	</div>
 
@@ -637,6 +672,21 @@ <h2 class="mb-1 h4 font-weight-bold">
     <h4 class="font-weight-bold spanborder"><span>Featured</span></h4>  
     <ol class="list-featured">				
                         
+            <li class="mb-4">
+            <span>
+                <h6 class="font-weight-bold">
+                    <a href="/2020/04/qna-image-segmentation/" class="text-dark">[Q&A] Image Segmentation, using Unet with Driving Video data</a>
+                </h6>
+                <span class="d-block text-muted">
+                    In <span class="catlist">
+                    
+                    <a class="text-capitalize text-muted smoothscroll" href="/categories.html#fast.ai-v3">Fast.AI-v3</a><span class="sep">, </span>
+                    
+                    </span>
+                </span>
+            </span>
+            </li>                
+                        
             <li class="mb-4">
             <span>
                 <h6 class="font-weight-bold">
diff --git a/_site/privacy-policy.html b/_site/privacy-policy.html
index 167acb1150..dc7eb4def7 100644
--- a/_site/privacy-policy.html
+++ b/_site/privacy-policy.html
@@ -109,7 +109,7 @@
     "id": 2,
     "url": "http://localhost:4000/author-dionne.html",
     "title": "dionne",
-    "body": "                        dionne Follow:         https://spellonyou. github. io         Welcome, I'm dionne. My old hobby is collecting and systemizing data related with me. (I'm with my kindergarten newsletter) I am an observationist. In the meantime, I enjoy the difference between my expectation and result which I've been facinated(my korean-sarcasm project). And I love a film written and directed by Paul Thomas Anderson. Now, I'm interested in AI ethics, and especially studying bias.                                   Posts by dionne:                   		Gradient backward, Chain Rule, Refactoring	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 02, 2020						            		Implement forward&amp;backward pass from scratch	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						            		What's inside Pytorch Operator?	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						            		What is the meaning of 'deep-learning from foundations?'	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Feb 29, 2020						            		Digging into convolution	: 		  Issues 	 			In 				Fast. AI-v3, 								Feb 28, 2020						            		Digital Product School week 8&amp;9	: 		  The 8th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 24, 2020						            		Algorithmic bias	: 		  Algorithms can encode &amp; magnify human bias	 			In 				Fast. AI-v3, 								Feb 20, 2020						            		Making a classifier with image dataset made from gooogle	: 		  CONTENTS	 			In 				toy-project, 								Feb 15, 2020						            		Digital Product School week 5	: 		  The 5th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 09, 2020						            		4 reasons took much time to setting GPU for fast. ai than I expected	: 		  Motivation	 			In 				Resource, 								Feb 05, 2020						            		Digital Product School week 4	: 		  The 4th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 01, 2020						            		Retrospect of ACL 2020 paper writing	: 		  2020 Annual Conference of the Association for Computational Linguistics	 			In 				Retrospect, 								Jan 29, 2020						            		Why am I not listed as a contributor?!	: 		  From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’. . . 	 			In 				Resource, 								Jan 10, 2020						            		Fine Grained Classification	: 		  Finally you can solve the mystery behind this weird drawing. . through this course. 	 			In 				Fast. AI-v3, 								Dec 31, 2019						            		Jeremy Howard	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Dec 15, 2019						            		Julia Evans	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 20, 2019						            		Retrospective on Pycon 2019 Korea (CoC Committee)	: 		  When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized. . . 	 			In 				Retrospect, 								Nov 05, 2019						            		Elif Shafak	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 05, 2019						            		Digital Product School week 1	: 		  The 1th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Jan 11, 2019						        "
+    "body": "                        dionne Follow:         https://spellonyou. github. io         Welcome, I'm dionne. My old hobby is collecting and systemizing data related with me. (I'm with my kindergarten newsletter) I am an observationist. In the meantime, I enjoy the difference between my expectation and result which I've been facinated(my korean-sarcasm project). And I love a film written and directed by Paul Thomas Anderson. Now, I'm interested in AI ethics, and especially studying bias.                                   Posts by dionne:                   		fastai 2019 course-v3 Part1, lesson06	: 		  Lesson 06	 			In 				fastai-v3, 								Apr 15, 2020						            		[Q&amp;A] Image Segmentation, using Unet with Driving Video data	: 		  This post is about my questions while I was studying USF Deep Learning course about image segmentation task. All the answers are from the course, source code, library document, or docu. . . 	 			In 				Fast. AI-v3, 								Apr 02, 2020						            		Gradient backward, Chain Rule, Refactoring	: 		   This note is divided into 4 section.      Section1: What is the meaning of ‘deep-learning from foundations?’   Section2: What’s inside Pytorch Operator?   Section3: Implem. . . 	 			In 				fastai-v3, 								Mar 02, 2020						            		Implement forward&amp;backward pass from scratch	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						            		What's inside Pytorch Operator?	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						            		What is the meaning of 'deep-learning from foundations?'	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Feb 29, 2020						            		Digging into convolution	: 		  Issues 	 			In 				Fast. AI-v3, 								Feb 28, 2020						            		My life in Digital Product School - week 8/19/10	: 		  The 8/9/10th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 24, 2020						            		Algorithmic bias	: 		  Algorithms can encode &amp; magnify human bias	 			In 				Fast. AI-v3, 								Feb 20, 2020						            		Making a classifier with image dataset made from gooogle	: 		  CONTENTS	 			In 				toy-project, 								Feb 15, 2020						            		Digital Product School week 5	: 		  The 5th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 09, 2020						            		4 reasons took much time to setting GPU for fast. ai than I expected	: 		  Motivation	 			In 				Resource, 								Feb 05, 2020						            		Digital Product School week 4	: 		  The 4th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 01, 2020						            		Retrospect of ACL 2020 paper writing	: 		  2020 Annual Conference of the Association for Computational Linguistics	 			In 				Retrospect, 								Jan 29, 2020						            		Why am I not listed as a contributor?!	: 		  From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’. . . 	 			In 				Resource, 								Jan 10, 2020						            		Jeremy Howard	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Dec 15, 2019						            		Julia Evans	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 20, 2019						            		Retrospective on Pycon 2019 Korea (CoC Committee)	: 		  When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized. . . 	 			In 				Retrospect, 								Nov 05, 2019						            		Elif Shafak	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 05, 2019						            		Digital Product School week 1	: 		  The 1th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Jan 11, 2019						        "
     }, {
     "id": 3,
     "url": "http://localhost:4000/authors-list.html",
@@ -119,7 +119,7 @@
     "id": 4,
     "url": "http://localhost:4000/categories.html",
     "title": "Categories",
-    "body": "          Categories               DigitalProductSchool:                                  		Digital Product School week 8&amp;9	: 		  The 8th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 24, 2020						                                 		Digital Product School week 5	: 		  The 5th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 09, 2020						                                 		Digital Product School week 4	: 		  The 4th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 01, 2020						                                 		Digital Product School week 1	: 		  The 1th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Jan 11, 2019						                              People in the World:                                  		Jeremy Howard	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Dec 15, 2019						                                 		Julia Evans	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 20, 2019						                                 		Elif Shafak	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 05, 2019						                              Retrospect:                                  		Retrospect of ACL 2020 paper writing	: 		  2020 Annual Conference of the Association for Computational Linguistics	 			In 				Retrospect, 								Jan 29, 2020						                                 		Retrospective on Pycon 2019 Korea (CoC Committee)	: 		  When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized. . . 	 			In 				Retrospect, 								Nov 05, 2019						                              Fast. AI-v3:                                  		Gradient backward, Chain Rule, Refactoring	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 02, 2020						                                 		Implement forward&amp;backward pass from scratch	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						                                 		What's inside Pytorch Operator?	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						                                 		What is the meaning of 'deep-learning from foundations?'	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Feb 29, 2020						                                 		Digging into convolution	: 		  Issues 	 			In 				Fast. AI-v3, 								Feb 28, 2020						                                 		Algorithmic bias	: 		  Algorithms can encode &amp; magnify human bias	 			In 				Fast. AI-v3, 								Feb 20, 2020						                                 		Fine Grained Classification	: 		  Finally you can solve the mystery behind this weird drawing. . through this course. 	 			In 				Fast. AI-v3, 								Dec 31, 2019						                              Resource:                                  		4 reasons took much time to setting GPU for fast. ai than I expected	: 		  Motivation	 			In 				Resource, 								Feb 05, 2020						                                 		Why am I not listed as a contributor?!	: 		  From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’. . . 	 			In 				Resource, 								Jan 10, 2020						                              toy-project:                                  		Making a classifier with image dataset made from gooogle	: 		  CONTENTS	 			In 				toy-project, 								Feb 15, 2020						                                             Featured:    				                                          Implement forward&amp;backward pass from scratch                          In                     Fast. AI-v3,                                                                                           What's inside Pytorch Operator?                          In                     Fast. AI-v3,                                                                                           What is the meaning of 'deep-learning from foundations?'                          In                     Fast. AI-v3,                                                                                           Why am I not listed as a contributor?!                          In                     Resource,                                                                                           Jeremy Howard                          In                     People in the World,                                                                   "
+    "body": "          Categories               DigitalProductSchool:                                  		My life in Digital Product School - week 8/19/10	: 		  The 8/9/10th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 24, 2020						                                 		Digital Product School week 5	: 		  The 5th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 09, 2020						                                 		Digital Product School week 4	: 		  The 4th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 01, 2020						                                 		Digital Product School week 1	: 		  The 1th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Jan 11, 2019						                              People in the World:                                  		Jeremy Howard	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Dec 15, 2019						                                 		Julia Evans	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 20, 2019						                                 		Elif Shafak	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 05, 2019						                              Retrospect:                                  		Retrospect of ACL 2020 paper writing	: 		  2020 Annual Conference of the Association for Computational Linguistics	 			In 				Retrospect, 								Jan 29, 2020						                                 		Retrospective on Pycon 2019 Korea (CoC Committee)	: 		  When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized. . . 	 			In 				Retrospect, 								Nov 05, 2019						                              Resource:                                  		4 reasons took much time to setting GPU for fast. ai than I expected	: 		  Motivation	 			In 				Resource, 								Feb 05, 2020						                                 		Why am I not listed as a contributor?!	: 		  From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’. . . 	 			In 				Resource, 								Jan 10, 2020						                              toy-project:                                  		Making a classifier with image dataset made from gooogle	: 		  CONTENTS	 			In 				toy-project, 								Feb 15, 2020						                              Fast. AI-v3:                                  		[Q&amp;A] Image Segmentation, using Unet with Driving Video data	: 		  This post is about my questions while I was studying USF Deep Learning course about image segmentation task. All the answers are from the course, source code, library document, or docu. . . 	 			In 				Fast. AI-v3, 								Apr 02, 2020						                                 		Implement forward&amp;backward pass from scratch	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						                                 		What's inside Pytorch Operator?	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						                                 		What is the meaning of 'deep-learning from foundations?'	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Feb 29, 2020						                                 		Digging into convolution	: 		  Issues 	 			In 				Fast. AI-v3, 								Feb 28, 2020						                                 		Algorithmic bias	: 		  Algorithms can encode &amp; magnify human bias	 			In 				Fast. AI-v3, 								Feb 20, 2020						                              fastai-v3:                                  		fastai 2019 course-v3 Part1, lesson06	: 		  Lesson 06	 			In 				fastai-v3, 								Apr 15, 2020						                                 		Gradient backward, Chain Rule, Refactoring	: 		   This note is divided into 4 section.      Section1: What is the meaning of ‘deep-learning from foundations?’   Section2: What’s inside Pytorch Operator?   Section3: Implem. . . 	 			In 				fastai-v3, 								Mar 02, 2020						                                             Featured:    				                                          [Q&amp;A] Image Segmentation, using Unet with Driving Video data                          In                     Fast. AI-v3,                                                                                           Implement forward&amp;backward pass from scratch                          In                     Fast. AI-v3,                                                                                           What's inside Pytorch Operator?                          In                     Fast. AI-v3,                                                                                           What is the meaning of 'deep-learning from foundations?'                          In                     Fast. AI-v3,                                                                                           Why am I not listed as a contributor?!                          In                     Resource,                                                                                           Jeremy Howard                          In                     People in the World,                                                                   "
     }, {
     "id": 5,
     "url": "http://localhost:4000/contact.html",
@@ -129,7 +129,7 @@
     "id": 6,
     "url": "http://localhost:4000/",
     "title": "Jiwon Kim | Dionne Blog",
-    "body": "                                  Gradient backward, Chain Rule, Refactoring  :       This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pyt. . .               In                 Fast. AI-v3,                                        Mar 02, 2020                                                                                                                             Implement forward&amp;backward pass from scratch          :                       In                         Fast. AI-v3,                                                                  Mar 01, 2020                                                                                                                                     What's inside Pytorch Operator?          :                       In                         Fast. AI-v3,                                                                  Mar 01, 2020                                                                                                                                    What is the meaning of 'deep-learning from foundations?'          :                       In                         Fast. AI-v3,                                                                  Feb 29, 2020                                                       Gradient backward, Chain Rule, Refactoring                  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pyt. . .                 Read More            	                                        All Stories:                   		Gradient backward, Chain Rule, Refactoring	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 02, 2020						                  		Implement forward&amp;backward pass from scratch	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						                  		What's inside Pytorch Operator?	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						                  		What is the meaning of 'deep-learning from foundations?'	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Feb 29, 2020						                  		Digging into convolution	: 		  Issues 	 			In 				Fast. AI-v3, 								Feb 28, 2020						                  		Digital Product School week 8&amp;9	: 		  The 8th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 24, 2020						                  		Algorithmic bias	: 		  Algorithms can encode &amp; magnify human bias	 			In 				Fast. AI-v3, 								Feb 20, 2020						                  		Making a classifier with image dataset made from gooogle	: 		  CONTENTS	 			In 				toy-project, 								Feb 15, 2020						                  		Digital Product School week 5	: 		  The 5th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 09, 2020						                  		4 reasons took much time to setting GPU for fast. ai than I expected	: 		  Motivation	 			In 				Resource, 								Feb 05, 2020						                                                &laquo;                              1                               2                              Next &raquo;                                          Featured:    				                                          Implement forward&amp;backward pass from scratch                          In                     Fast. AI-v3,                                                                                           What's inside Pytorch Operator?                          In                     Fast. AI-v3,                                                                                           What is the meaning of 'deep-learning from foundations?'                          In                     Fast. AI-v3,                                                                                           Why am I not listed as a contributor?!                          In                     Resource,                                                                                           Jeremy Howard                          In                     People in the World,                                                               "
+    "body": "                                  fastai 2019 course-v3 Part1, lesson06  :       Lesson 06               In                 fastai-v3,                                        Apr 15, 2020                                                                                                                             [Q&amp;A] Image Segmentation, using Unet with Driving Video data          :                       In                         Fast. AI-v3,                                                                  Apr 02, 2020                                                                                                                                     Gradient backward, Chain Rule, Refactoring          :                       In                         fastai-v3,                                                                  Mar 02, 2020                                                                                                                                    Implement forward&amp;backward pass from scratch          :                       In                         Fast. AI-v3,                                                                  Mar 01, 2020                                                        Gradient backward, Chain Rule, Refactoring                   This note is divided into 4 section.      Section1: What is the meaning of ‘deep-learning from foundations?’   Section2: Wha. . .                 Read More            	                                       All Stories:                   		fastai 2019 course-v3 Part1, lesson06	: 		  Lesson 06	 			In 				fastai-v3, 								Apr 15, 2020						                  		[Q&amp;A] Image Segmentation, using Unet with Driving Video data	: 		  This post is about my questions while I was studying USF Deep Learning course about image segmentation task. All the answers are from the course, source code, library document, or docu. . . 	 			In 				Fast. AI-v3, 								Apr 02, 2020						                  		Gradient backward, Chain Rule, Refactoring	: 		   This note is divided into 4 section.      Section1: What is the meaning of ‘deep-learning from foundations?’   Section2: What’s inside Pytorch Operator?   Section3: Implem. . . 	 			In 				fastai-v3, 								Mar 02, 2020						                  		Implement forward&amp;backward pass from scratch	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						                  		What's inside Pytorch Operator?	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						                  		What is the meaning of 'deep-learning from foundations?'	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Feb 29, 2020						                  		Digging into convolution	: 		  Issues 	 			In 				Fast. AI-v3, 								Feb 28, 2020						                  		My life in Digital Product School - week 8/19/10	: 		  The 8/9/10th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 24, 2020						                  		Algorithmic bias	: 		  Algorithms can encode &amp; magnify human bias	 			In 				Fast. AI-v3, 								Feb 20, 2020						                  		Making a classifier with image dataset made from gooogle	: 		  CONTENTS	 			In 				toy-project, 								Feb 15, 2020						                                                &laquo;                              1                               2                              Next &raquo;                                          Featured:    				                                          [Q&amp;A] Image Segmentation, using Unet with Driving Video data                          In                     Fast. AI-v3,                                                                                           Implement forward&amp;backward pass from scratch                          In                     Fast. AI-v3,                                                                                           What's inside Pytorch Operator?                          In                     Fast. AI-v3,                                                                                           What is the meaning of 'deep-learning from foundations?'                          In                     Fast. AI-v3,                                                                                           Why am I not listed as a contributor?!                          In                     Resource,                                                                                           Jeremy Howard                          In                     People in the World,                                                               "
     }, {
     "id": 7,
     "url": "http://localhost:4000/privacy-policy.html",
@@ -157,96 +157,101 @@
     "body": "  {% if page. url ==  /  %}            {% assign latest_post = site. posts[0] %}          &lt;div class= topfirstimage  style= background-image: url({% if latest_post. image contains  ://  %}{{ latest_post. image }}{% else %} {{site. baseurl}}/{{ latest_post. image}}{% endif %}); height: 200px;  background-size: cover;  background-repeat: no-repeat; &gt;&lt;/div&gt;           {{ latest_post. title }}  :       {{ latest_post. excerpt | strip_html | strip_newlines | truncate: 136 }}               In         {% for category in latest_post. categories %}        {{ category }},         {% endfor %}                                {{ latest_post. date | date: '%b %d, %Y' }}                            {%- assign second_post = site. posts[1] -%}                        {% if second_post. image %}                         &lt;img class= w-100  src= {% if second_post. image contains  ://  %}{{ second_post. image }}{% else %}{{ second_post. image | absolute_url }}{% endif %}  alt= {{ second_post. title }} &gt;                        {% endif %}                                    {{ second_post. title }}          :                       In             {% for category in second_post. categories %}            {{ category }},             {% endfor %}                                                      {{ second_post. date | date: '%b %d, %Y' }}                                    {%- assign third_post = site. posts[2] -%}                        {% if third_post. image %}                         &lt;img class= w-100  src= {% if third_post. image contains  ://  %}{{ third_post. image }}{% else %}{{site. baseurl}}/{{ third_post. image }}{% endif %}  alt= {{ third_post. title }} &gt;                        {% endif %}                                    {{ third_post. title }}          :                       In             {% for category in third_post. categories %}            {{ category }},             {% endfor %}                                                      {{ third_post. date | date: '%b %d, %Y' }}                                    {%- assign fourth_post = site. posts[3] -%}                        {% if fourth_post. image %}                        &lt;img class= w-100  src= {% if fourth_post. image contains  ://  %}{{ fourth_post. image }}{% else %}{{site. baseurl}}/{{ fourth_post. image }}{% endif %}  alt= {{ fourth_post. title }} &gt;                        {% endif %}                                    {{ fourth_post. title }}          :                       In             {% for category in fourth_post. categories %}            {{ category }},             {% endfor %}                                                      {{ fourth_post. date | date: '%b %d, %Y' }}                                  {% for post in site. posts %} {% if post. tags contains  sticky  %}                    {{post. title}}                  {{ post. excerpt | strip_html | strip_newlines | truncate: 136 }}                 Read More            	             {% endif %}{% endfor %}  {% endif %}                All Stories:         {% for post in paginator. posts %}          {% include main-loop-card. html %}        {% endfor %}                   {% if paginator. total_pages &gt; 1 %}              {% if paginator. previous_page %}        &laquo; Prev       {% else %}        &laquo;       {% endif %}       {% for page in (1. . paginator. total_pages) %}        {% if page == paginator. page %}        {{ page }}        {% elsif page == 1 %}        {{ page }}        {% else %}        {{ page }}        {% endif %}       {% endfor %}       {% if paginator. next_page %}        Next &raquo;       {% else %}        &raquo;       {% endif %}            {% endif %}                     {% include sidebar-featured. html %}      "
     }, {
     "id": 12,
+    "url": "http://localhost:4000/2020/04/v3-2019-lesson06-note/",
+    "title": "fastai 2019 course-v3 Part1, lesson06",
+    "body": "2020/04/15 - Lesson 06Rossmann(Tabular):  Tabular data: be careful on Categorical variable vs Continuous variable.  if datatype is int, fastai think it is classification, not a regression.  Root mean square percentage error. as loss function.  When you assign the y_range, it’s better to assign little bit more than actual maximum. &gt; because it’s sigmoid.  intermediate layers, which is weight matrix is 1) 1000, and 2) 500 -&gt; which means our parameter would be 500*1000. learn. modelWhat is dropout and embedding dropout?: Nitish Srivastava, Dropout: A Simple way to prevent Neural Networks from Overfitting  you can dropout with p value, make it specified to specific layer, or make it applied to all the layers.  Pytorch code 1) bernoulli, which decides whether you will hold it? 2) and divide the noise value depends on noise value. so noise became 2 or remain 0.      According to pytorch code, We do change at training time, but we do nothing at test time. and this means you don’t have to do anything special with inference time. ’   TODO: find at forums what is inference time - Related to NVIDIA, GPU.     Embedding dropout is just a dropout.      It’s different between continuous variable and embedding layer.  TODO Still can’t understand. why embedding dropout is effective. or,… in need.    Let’s delete at random, some of the results of the embedding.    and It worked well especially at Kaggle   Batch Normalization: Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift -&gt; came out false! According to How Does Batch Normalization Help Optimization?  The key was multiplicative bias {\gamma} and additive bias {\beta}` Explain     Let $$ \hat{y} = f(w_1, w_2, w_3, … , x)} $$ , loss = MSE , Then y_range should be between 1 and 5`   And Activation function ends with -1 -&gt; +1   To mitigate this problem, we can add the other parameter, like $$w_n$$   But there’re so much interactions in the process so just re-scale the output.    Momentum parameter at BatchNorm1d:  Different from momentum like in optimization.  This momentum is Exponentially weighted moving average of the mean, instead of deviation.      If this is small number: mean standard deviation would be less from mini_batch to mini_batch » less regularization effect. (If this is large number, variation would be greater from mini_batch to mini_batch » more regularization effect)   TODO: can’t sure, but i understand, this is not about how to update parameter but about how much reflect previous value when scale and shift   Q. Preference between batchnorm and the other regularizations(drop out, weight decay)A. Nope, always try and see the results## lesson6-pets-more### Data Augmentation- Last reg- `get_transforms` has lots of params (even not yet learned all) -&gt; check documentation	- Remember you can implement all the doc contents bc it's made from nbdev	- TODO: try this!!- Essence of data augmentation is you should maintain the label, while somewhat making sense. 	- ex) tilt, because it's optically sensible, you can always change the angle of the data view. - zeros, border, and reflection but always `reflection` works most of the time, so that is the default### Convolutional Kernel(What is convolution?)- Will make heat\_map from scratch, which means the parts convolution focuses on![setosa_visualization]()- http://setosa. io/ev/image-kernels/	- javascript thing	- How convolution works	- Kernel. which does element-wise multiplication, and sum them up	- so it has on pixel less at borders -&gt; so it uses padding, and fastai uses reflection as said. - why this Kernel(matrix) helps catching horizontal edge side?	- because this kernel`(picture2)` weights differently, depends on `x axis`	- why familiar, because it's similar intuition with fugus`(paper)` paper- CNN from different viewpoints`link`	- output of pixel is results from different linear equations. 	- If you connect this with represents of neural network nodes, you can see that the specific inp nodes connected with specific out nodes. 	- **Summarize**: cnn does 1) matmul some of the elements are always zero 2) same weight for every row, which is called `weight time? weight. . ?, 1:18:50` `(picture)`#### Further lowdown- Because generally image has 3 channels, we need rank 3 kernel. - And **do multiply with all channel output is one pixel**. (`draw by your self`)	- but this kernel will catch one feature, like horizontal, so that we make more kernel so that output becomes (h * w * kernel)	- And that `kernel` come to `channel`- **Conv2d**: with 3 by 3 kernel, stride 2 conv -&gt; (h/2 * w/2 * kernel)	- skip or jump over input pixel	- to protect from memory out of control~~~pythonlearn. modellearn. summary()~~~TODO: understand yourself the blocks of conv-kernel: - Usually use big kernel size at first layer (will study this at part2)- Bottom right highlighting kernel(`pic / draw`)- `torch. tensor. expand`: for memory efficient, because we should do RGB- We do not make separate kernel, but make rank 4 kernel	- 4d tensor is just stacked kernel- `t[None]. shape` create new unit axis, and why? we make this -&gt; it should move unit of batch, not one size image. ### Average pooling, feature- suppose our pre-trained model results in size of `11 by 11 by 512 ` `pic 4` and my classification task has 37 classes	* take the first face of channel, which is 11 by 11 and `mean` it, so that make rank 2 tensor, 512 by 1	* and make 2d matrix, which is 512 by 37 and multiply so that we can get 37 by 1 matrix. - Feature, at convolution block	- So, when we transfer-learning without unfreeze, every element of last matrix (512 by 1) should represent(or could catch) each feature. 	### Heatmap, Hook~~~hook_output(model[0]) -&gt; acts -&gt; avg_acts~~~- if we average the block with `axis=feature`, result of matrix(11 by 11) depicts `how activated was that area?` -&gt; it is heatmap, `avg_acts`- and acts comes from hook, which is more advanced pytorch feature. 	- hook into pytorch machine itself, and run any arbitrary Pytorch code	- Why this is cool?: Normally it gives set of outputs of forward pass, but we can interrupt and hook the forward pass. 	- Also can store the output of the convolutional part of the model, which is before avg_pooling- Thinking back when we do cut off `after` the conv part. 	- but with fast. ai the original convolutional part of the model would be *the first thing in the model*, specifically could be given from `learn. model. eval()[0]`	- And this is gotten from `hooked_output` and having hooked the output, we can pass our x_minibatch to output. 	- Not directly, but with normalized, minibatch, put on to the gpu	- `one_item()` function do it, when we have one data `TODO: this is assignment` do it yourself without one_item function	- and `. cuda()` put it on gpu- you should print out very often the shape of tensor, and try think why. "
+    }, {
+    "id": 13,
+    "url": "http://localhost:4000/2020/04/qna-image-segmentation/",
+    "title": "[Q&A] Image Segmentation, using Unet with Driving Video data",
+    "body": "2020/04/02 - This post is about my questions while I was studying USF Deep Learning course about image segmentation task. All the answers are from the course, source code, library document, or document. I cared about being clear at reporting information including source of information, however if there are still anything unclear, please contact me. And thank you Jeremy&amp;Rachael for everything. Also Thank you Cambridge Computer Vision Lab to made us to study with your labor. The Cambridge-driving Labeled Video Database (CamVid) is the first collection of videos with object class semantic labels, complete with metadata. The database provides ground truth labels that associate each pixel with one of 32 semantic classes. If someone is interested in this project, please check the site and see the details. Now, let’s start first using jupyter’s one of tricks which I love most. It enables cell to print the code without print function. from IPython. core. interactiveshell import InteractiveShell# pretty print all cell's output and not just the last oneInteractiveShell. ast_node_interactivity =  all from fastai. vision import *from fastai. callbacks. hooks import *from fastai. utils. mem import *path = untar_data(URLs. CAMVID) # The locations where the data and models are downloaded are set in config. ymlpath. ls() I’m trying to accustomed to using pathlib module, not just it became built-in module in python, but I felt uncomfortable myself with os module. However, still unpredictable conflicts are remain, even in the quite standard library like Pytorch, tensorflow, onnx. (it require me string for path. not PosixPath. will send PR. . ) [PosixPath('/root/. fastai/data/camvid/valid. txt'), PosixPath('/root/. fastai/data/camvid/images'), PosixPath('/root/. fastai/data/camvid/labels'), PosixPath('/root/. fastai/data/camvid/codes. txt')]path_img = path/'images'path_lbl = path/'labels'fnames = get_image_files(path_img) #filenamelbl_names = get_image_files(path_lbl)1. (Play with data) My Hypothesis:  File name has A_B format. and A / B would be at key-value position.  Use collections - defaultdict  Default Dict: Link:   easy to group a sequence of key and value pairs into a dictionary of list?from collections import defaultdictfnames[0], lbl_names[0](PosixPath('/root/. fastai/data/camvid/images/0001TP_009210. png'), PosixPath('/root/. fastai/data/camvid/labels/0016E5_01800_P. png'))files = [tuple(i. stem. split('_')) for i in fnames]labels = [tuple(i. stem. split('_')[:-1]) for i in lbl_names]d = defaultdict(list)for k, v in files: d[k]. append(v)d. keys()len(d['0001TP'])124for k, v in d. items():  print(k, v)0001TP ['009210', '008850', '007350', '008970', '009840', '010140', '008490', '008520', '009540', '008250', '008340', '006840', '007860', '007410', '007740', '009870', '010080', '007890', '008790', '010020', '008400', '007080', '008280', '010380', '009330', '009060', '007470', '006810', '009720', '008580', '007110', '008730', '009150', '007680', '009780', '007800', '007290', '008760', '009510', '008640', '008310', '007440', '006900', '007500', '008460', '009030', '008130', '009480', '009900', '010230', '009270', '008040', '007590', '007950', '009990', '008550', '007260', '008100', '007530', '006960', '008190', '009420', '009930', '009000', '007830', '008940', '006690', '009570', '008880', '010170', '007560', '009300', '006750', '009360', '010200', '007320', '008010', '009120', '007620', '007200', '007140', '010320', '006720', '008670', '007230', '008370', '010260', '009690', '006930', '009090', '007770', '010290', '010350', '008610', '008070', '009600', '008430', '009450', '007380', '009240', '007710', '007170', '008160', '008910', '007020', '006780', '007050', '009960', '009810', '008220', '009180', '009750', '010050', '009660', '010110', '007920', '009630', '007650', '006990', '008700', '009390', '007980', '008820', '006870']0016E5 ['01290', '08159', '05760', '08133', '08063', '06660', '00960', '05850', '00750', '06960', '08035', '08107', '07975', '08017', '05610', '07140', '08119', '08027', '07170', '08400', '08093', '02100', '06390', '04470', '08340', '06060', '00600', '07470', '08151', '07800', '01620', '05730', '01530', '00690', '08430', '05940', '01980', '07320', '08069', '07965', '04380', '05430', '01410', '06780', '08007', '08087', '08079', '06600', '08109', '05490', '00901', '04590', '04680', '08045', '01770', '06690', '08085', '06810', '00420', '08011', '07440', '02190', '06300', '04800', '01500', '00450', '08029', '01470', '06330', '07997', '08067', '05370', '08013', '08190', '00840', '02370', '08049', '08135', '01440', '06870', '05820', '05280', '08051', '04440', '08091', '01380', '00630', '07290', '05520', '04770', '00540', '07995', '07999', '05550', '07920', '08101', '08141', '08053', '04620', '08103', '05160', '07350', '08057', '06030', '06000', '08550', '07963', '08089', '05970', '08047', '05640', '06240', '05220', '04350', '01590', '07959', '01950', '08117', '06180', '01560', '05400', '08043', '07680', '00780', '08081', '07050', '01020', '01350', '04530', '06720', '07969', '08149', '08003', '08131', '08129', '08033', '05460', '01650', '07530', '08023', '05340', '08640', '05100', '08075', '01230', '04980', '02070', '01080', '06210', '05910', '08009', '01800', '05190', '02400', '08083', '08019', '07620', '07200', '07890', '08059', '06990', '04410', '08121', '08123', '06930', '08137', '08147', '08095', '06570', '06150', '08153', '06840', '05250', '00510', '08370', '08580', '08113', '07410', '08097', '01200', '04950', '07770', '07650', '04710', '06090', '08055', '07110', '07981', '00990', '08250', '08127', '01920', '07985', '08220', '08005', '08157', '05130', '08071', '01140', '04830', '07740', '08143', '06120', '02040', '08111', '08115', '00660', '08280', '06420', '07983', '02220', '05700', '01860', '01260', '04920', '06510', '07020', '08073', '08105', '08125', '06360', '07860', '07993', '00810', '06540', '08099', '08139', '02010', '07973', '08155', '07991', '06630', '00480', '06750', '04890', '08001', '08025', '00870', '08490', '01830', '07977', '05010', '01170', '07961', '01680', '01050', '07987', '07080', '04560', '00930', '05310', '02340', '05790', '08460', '00720', '08031', '02280', '08039', '08037', '08065', '06270', '08077', '06900', '04650', '06480', '07230', '08041', '06450', '00570', '07989', '04740', '07979', '02250', '07380', '00390', '01710', '07590', '08021', '08520', '07500', '01110', '04500', '02310', '07971', '02130', '05580', '05880', '08610', '08310', '08145', '05670', '04860', '07260', '08015', '07967', '01740', '01320', '07560', '07830', '01890', '08061', '02160', '07710', '05070', '05040']Seq05VD ['f00030', 'f02550', 'f03450', 'f01110', 'f00480', 'f00210', 'f04590', 'f04170', 'f01800', 'f03990', 'f03360', 'f03900', 'f02070', 'f00810', 'f03690', 'f01350', 'f01530', 'f04980', 'f05100', 'f03060', 'f00900', 'f03870', 'f02460', 'f01470', 'f02370', 'f02820', 'f04080', 'f02760', 'f04860', 'f02250', 'f04200', 'f00270', 'f03720', 'f02850', 'f04410', 'f01200', 'f03090', 'f02010', 'f03930', 'f00090', 'f01650', 'f01890', 'f03840', 'f03030', 'f02130', 'f01230', 'f04110', 'f02520', 'f04140', 'f04020', 'f00060', 'f03420', 'f01560', 'f00120', 'f04290', 'f02340', 'f00300', 'f01380', 'f00870', 'f01860', 'f02970', 'f04560', 'f02730', 'f00330', 'f04530', 'f03780', 'f01770', 'f03390', 'f05040', 'f02430', 'f03330', 'f00660', 'f01740', 'f02100', 'f04800', 'f04050', 'f00510', 'f02790', 'f04350', 'f00690', 'f00540', 'f02490', 'f00960', 'f00930', 'f04230', 'f02880', 'f03600', 'f01020', 'f01500', 'f02400', 'f04830', 'f04470', 'f03300', 'f02670', 'f00450', 'f01980', 'f01170', 'f01620', 'f04500', 'f01080', 'f03180', 'f05070', 'f03150', 'f04950', 'f01440', 'f03510', 'f01710', 'f00360', 'f04770', 'f02910', 'f01050', 'f00630', 'f04320', 'f00570', 'f03240', 'f02190', 'f01140', 'f03540', 'f02220', 'f02640', 'f03960', 'f00000', 'f04920', 'f01950', 'f00990', 'f03480', 'f03000', 'f00420', 'f04620', 'f03210', 'f00780', 'f03570', 'f01590', 'f00750', 'f01920', 'f04650', 'f03750', 'f03630', 'f02310', 'f02610', 'f02580', 'f04740', 'f02280', 'f04680', 'f00390', 'f00720', 'f03660', 'f02040', 'f03270', 'f00180', 'f03810', 'f01410', 'f01290', 'f03120', 'f00840', 'f04440', 'f00150', 'f01260', 'f02700', 'f02940', 'f00600', 'f01830', 'f04260', 'f05010', 'f04890', 'f02160', 'f00240', 'f04380', 'f01680', 'f04710', 'f01320']0006R0 ['f02820', 'f03690', 'f03180', 'f02550', 'f01020', 'f03660', 'f02340', 'f01170', 'f02610', 'f02940', 'f01290', 'f02100', 'f01350', 'f03270', 'f03870', 'f01380', 'f01980', 'f03810', 'f02430', 'f02310', 'f01830', 'f03480', 'f02970', 'f01890', 'f03210', 'f03930', 'f02040', 'f02070', 'f02400', 'f01560', 'f03030', 'f01770', 'f01590', 'f01950', 'f03420', 'f01650', 'f03450', 'f00990', 'f03630', 'f01500', 'f03570', 'f00930', 'f03090', 'f03360', 'f02880', 'f02460', 'f01440', 'f01920', 'f01230', 'f03840', 'f02730', 'f01620', 'f02220', 'f03750', 'f03330', 'f03540', 'f02520', 'f02790', 'f01050', 'f03120', 'f01800', 'f01140', 'f01860', 'f01530', 'f01470', 'f02670', 'f02490', 'f01260', 'f01110', 'f02760', 'f01680', 'f03150', 'f02580', 'f03300', 'f02280', 'f01200', 'f03390', 'f03510', 'f02640', 'f02190', 'f02370', 'f01320', 'f02130', 'f03600', 'f03240', 'f03780', 'f03720', 'f02700', 'f01410', 'f01080', 'f02850', 'f01710', 'f03900', 'f03060', 'f01740', 'f02010', 'f02250', 'f00960', 'f03000', 'f02160', 'f02910']for k, v in d. items():  print(k, len(d[k]))0001TP 1240016E5 305Seq05VD 1710006R0 101for i in d2. keys():  print(i,len(d2[i]))0016E5 3050001TP 1240006R0 101Seq05VD 171files[0], labels[0](('0001TP', '009210'), ('0016E5', '01800'))2. My question: Link:  Why do we need masking? and does color from fastai library? (have to look into source code) What do the parameter alpha do? When people make masked img, would it be have ranged integer limit? Does image normalization related with this?lbl_sorted = sorted(lbl_names)f_sorted = sorted(fnames)lbl_1 = lbl_sorted[33]f_1 = f_sorted[33]img = open_image(lbl_1)mask = open_mask(lbl_1)_,axs = plt. subplots(1,2, figsize=(10,5))# img. show(ax=axs[0], y=mask, title='masked')img. show(ax=axs[0], title='1')mask. show(ax=axs[1], title='2', alpha=1. ) img_2 = open_image(f_1)mask_2 = open_mask(f_1)_,axs = plt. subplots(1,2, figsize=(10,5))# img. show(ax=axs[0], y=mask, title='masked')img_2. show(ax=axs[0], title='3',)mask_2. show(ax=axs[1], title='4', alpha=1. ) open_mask(lbl_1). data. shapetorch. Size([1, 720, 960])open_mask(lbl_1). data. shapetorch. Size([1, 720, 960])open_image(f_1). data. shapetorch. Size([3, 720, 960])open_image(f_1). data. shapetorch. Size([3, 720, 960])img. data #labeled datatensor([[[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]],    [[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]],    [[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]]])mask. data # after mask, labeled datatensor([[[ 4, 4, 4, . . . , 21, 21, 21],     [ 4, 4, 4, . . . , 21, 21, 21],     [ 4, 4, 4, . . . , 21, 21, 21],     . . . ,     [17, 17, 17, . . . , 30, 30, 30],     [17, 17, 17, . . . , 30, 30, 30],     [17, 17, 17, . . . , 30, 30, 30]]])img_2. data, mask_2. data(tensor([[[0. 0706, 0. 0667, 0. 0706, . . . , 0. 6431, 0. 6549, 0. 6627],     [0. 0745, 0. 0706, 0. 0706, . . . , 0. 6431, 0. 6510, 0. 6549],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 6392, 0. 6588, 0. 6588],     . . . ,     [0. 0863, 0. 0824, 0. 0824, . . . , 0. 1333, 0. 1216, 0. 1255],     [0. 0902, 0. 0863, 0. 0824, . . . , 0. 1255, 0. 1176, 0. 1216],     [0. 0863, 0. 0824, 0. 0784, . . . , 0. 1137, 0. 1059, 0. 1137]],     [[0. 0706, 0. 0667, 0. 0706, . . . , 0. 7490, 0. 7608, 0. 7686],     [0. 0745, 0. 0706, 0. 0706, . . . , 0. 7451, 0. 7569, 0. 7608],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 7412, 0. 7529, 0. 7529],     . . . ,     [0. 0980, 0. 0941, 0. 0941, . . . , 0. 1804, 0. 1686, 0. 1725],     [0. 1059, 0. 1020, 0. 0980, . . . , 0. 1725, 0. 1647, 0. 1686],     [0. 1020, 0. 0980, 0. 0941, . . . , 0. 1608, 0. 1529, 0. 1608]],     [[0. 0784, 0. 0745, 0. 0784, . . . , 0. 7569, 0. 7686, 0. 7765],     [0. 0824, 0. 0784, 0. 0784, . . . , 0. 7647, 0. 7647, 0. 7686],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 7608, 0. 7647, 0. 7647],     . . . ,     [0. 1216, 0. 1176, 0. 1176, . . . , 0. 2000, 0. 1882, 0. 1922],     [0. 1176, 0. 1137, 0. 1098, . . . , 0. 1843, 0. 1765, 0. 1804],     [0. 1137, 0. 1098, 0. 1059, . . . , 0. 1725, 0. 1647, 0. 1725]]]), tensor([[[ 18, 17, 18, . . . , 183, 186, 188],     [ 19, 18, 18, . . . , 183, 185, 186],     [ 20, 18, 19, . . . , 182, 185, 185],     . . . ,     [ 25, 24, 24, . . . , 43, 40, 41],     [ 26, 25, 24, . . . , 41, 39, 40],     [ 25, 24, 23, . . . , 38, 36, 38]]]))3. What is a difference between image and imageSegment?: imageSegment  An ImageSegment object has the same properties as an Image. The only difference is that when applying the transformations to an ImageSegment, it will ignore the functions that deal with lighting and keep values of 0 and 1.  It’s easy to show the segmentation mask over the associated Image by using the y argument of show_image. img = open_image(fnames[0])mask = open_mask(lbl_names[0])_,axs = plt. subplots(1,3, figsize=(8,4))img. show(ax=axs[0], title='no mask')img. show(ax=axs[1], y=mask, title='masked') #seg mask over the img using y argmask. show(ax=axs[2], title='mask only', alpha=1. ) vision. image ##4. Why/How img div by 255 and how it results fast. ai : vision. image - If div=True, pixel values are divided by 255. to become floats between 0. and 1.  At times, you want to get rid of distortions caused by lights and shadows in an image.    Normalizing the RGB values of an image can at times be a simple and effective way of achieving this.     So sum of the pixel’s value over all channels(which is S) divides each intensified channel so that nomalized value will be R/S, G/S and B/S (where, S=R+G+B).   Detailed explain here4. Python Evaluation Order: Python evaluates expressions from left to right. Notice that while evaluating an assignment, the right-hand side is evaluated before the left-hand side. mask_tmp, trg_tmp, void_tmp = 2, 1, 10mask_tmp = trg_tmp != void_tmpprint(mask_tmp, trg_tmp, void_tmp) # (1) target is not same with voidTrue 1 10# Example 1x = 1y = 2x,y = y,x+yx, y(2, 3)# Example 2x = 1y = 2x = yy = x+yx, y(2, 4)5. model learner parameter :: pct_start: A: Percentage of total number of epochs when learning rate rises during one cycle. Q: Sorry, I still confused that one cycle in the new API only runs one epoch. How the percentage of total number of epochs works? Can you give a example? If learn. fit_one_cycle(10, slice(1e-4,1e-3,1e-2), pct_start=0. 05)??A: Ok, strictly correct answer would be percentage of iterations, so you can have lr both increase and decrease during same epoch. In your example, say, you have 100 iterations per epoch, then for half an epoch (0. 05 * (10 * 100) = 50) lr will rise, then slowly decrease. Q2: Thanks for this explanation … so essentially, it is the percentage of overall iterations where the LR is increasing, correct? So, given the default of 0. 3, it means that your LR is going up for 30% of your iterations and then decreasing over the last 70%. Is that a correct summation of what is happening? A2: Yes, I think that’s correct. You can verify that by changing its value and check:learn. recorder. plot_lr() For example if pct_start = 0. 2 source: forums. fastai "
+    }, {
+    "id": 14,
     "url": "http://localhost:4000/2020/03/note08-fastai-4/",
     "title": "Gradient backward, Chain Rule, Refactoring",
-    "body": "2020/03/02 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves   ⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
+    "body": "2020/03/02 -  This note is divided into 4 section.      Section1: What is the meaning of ‘deep-learning from foundations?’   Section2: What’s inside Pytorch Operator?   Section3: Implement forward&amp;backward pass from scratch   Section4: Gradient backward, Chain Rule, Refactoring   ” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves    Postfix underscore means in pytorch, in-place function, What is in-place function?⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
     }, {
-    "id": 13,
+    "id": 15,
     "url": "http://localhost:4000/2020/03/note08-fastai-3/",
     "title": "Implement forward&backward pass from scratch",
     "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring1. The forward and backward passes: 1. 1 Normalization: train_mean,train_std = x_train. mean(),x_train. std()&gt;&gt;&gt; train_mean,train_std(tensor(0. 1304), tensor(0. 3073))Remember!  Dataset, which is x_train, mean and standard deviation is not 0&amp;1. But we need them to be which means we should substract means and divide data by std.  You should not standarlize validation set because training set and validation set should be aparted.  after normalize, mean is close to zero, and standard deviation is close to 1. 1. 2 Variable definition:  n,m: size of the training set c: the number of activations we need in our model2. Foundation Version: 2. 1 Basic architecture:  Our model has one hidden layer, output to have 10 activations, used in cross entropy.    But in process of building architecture, we will use mean square error, output to have 1 activations and lator change it to cross entropy   number of hidden unit; 50see below pic  We want to make w1&amp;w2 mean and std be 0&amp;1.      why initializating and make mean zero and std one is important?   paper highlighting importance of normalisation - training 10,000 layer network without regularisation1   2. 1. 1 simplified kaiming initQ: Why we did init, normalize with only validation data? Because we can not handle and get statistics from each value of x_valid?{: style=”color:red; font-size: 130%; text-align: center;”}  what about hidden(first) layer?w1 = torch. randn(m,nh)b1 = torch. zeros(nh)t = lin(x_valid, w1, b1) # hidden&gt;&gt;&gt; t. mean(), t. std()((tensor(2. 3191), tensor(27. 0303))In output(second) layer, w2 = torch. randn(nh,1)b2 = torch. zeros(1)t2 = lin(t, w2, b2) # output&gt;&gt;&gt; t2. mean(), t2. std()(tensor(-58. 2665), tensor(170. 9717))   which is terribly far from normalzed value.     But if we apply simplified kaiming init  w1 = torch. randn(m,nh)/math. sqrt(m); b1 = torch. zeros(nh)w2 = torch. randn(nh,1)/math. sqrt(nh); b2 = torch. zeros(1)t = lin(x_valid, w1, b1)t. mean(),t. std()&gt;&gt;&gt; (tensor(-0. 0516), tensor(0. 9354)) But, actually, we use activations not only linear function After applying activations relu at linear layer, mean and deviation became 0. 5. 2. 1. 2 Glorrot initializationPaper2: Understanding the difficulty of training deep feedforward neural networks  Gaussian(, bell shaped, normal distributions) is not trained very well.  How to initialize neural nets? with the size of layer , the number of filters .  But there is No acount for import of ReLU If we got 1000 layers, vanishing gradients problem emerges2. 1. 3 Kaiming initializatingPaper3: Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification  Kaiming He, explained here rectifier: rectified linear unit rectifier network: neural network with rectifier linear units  This is kaiming init, and why suddenly replace one to two on a top?     to avoid vanishing gradient(weights)   But it doesn’t give very nice mean tough.    2. 1. 4 Pytorch package Why fan_out?     according to pytorch documentation,   choosing 'fan_in' preserves the magnitude of the variance of the wights in the forward pass. choosing 'fan_out' preserves the magnitues in the backward pass(, which means matmul; with transposed matrix) ➡️ in the other words, torch use fan_out cz pytorch transpose in linear transformaton.  What about CNN in Pytorch?I tried torch. nn. Conv2d. conv2d_forward?? Jeremy digged into using torch. nn. modules. conv. _ConvNd. reset_parameters?? 2  in Pytorch, it doesn’t seem to be implemented kaiming init in right formula. so we should use our own operation.  But actually, this has been discussed in Pytorch community before. 3 4 Jeremy said it enhanced variance also, so I sampled 100 times and counted better results.  To make sure the shape seems sensible. check with assert. (remember we will replace 1 to 10 in cross entropy)assert model(x_valid). shape==torch. Size([x_valid. shape[0],1])&gt;&gt;&gt; model(x_valid). shape(10000, 1) We have made Relu, init, linear, it seems we can forward pass code we need for basic architecture nh = 50def lin(x, w, b): return x@w + b;w1 = torch. randn(m,nh)*math. sqrt(2. /m ); b1 = torch. zeros(nh)w2 = torch. randn(nh,1); b2 = torch. zeros(1)def relu(x): return x. clamp_min(0. ) - 0. 5t1 = relu(lin(x_valid, w1, b1))def model(xb):  l1 = lin(xb, w1, b1)  l2 = relu(l1)  l3 = lin(l2, w2, b2)  return l32. 2 Loss function: MSE:  Mean squared error need unit vector, so we remove unit axis.   def mse(output, targ): return (output. squeeze(-1) - targ). pow(2). mean()    In python, in case you remove axis, you use ‘squeeze’, or add axis use ‘unsqueeze’ torch. squeeze where code commonly broken. so, when you use squeeze, clarify dimension axis you want to removetmp = torch. tensor([1,1])tmp. squeeze()&gt;&gt;&gt; tensor([1, 1]) make sure to make as float when you calculateBut why??? because it is tensor?{: style=”color:red; font-size: 130%;”} Here’s the error when I don’t transform the data type ---------------------------------------------------------------------------TypeError                 Traceback (most recent call last)&lt;ipython-input-22-ae6009bef8b4&gt; in &lt;module&gt;()----&gt; 1 y_train = get_data()[1] # call data again   2 mse(preds, y_train)TypeError: 'map' object is not subscriptable This is forward passFootnote: Other materials:  Understanding the difficulty of training deep feedforward neural networks, paper that introduced Xavier initialization      Fixup Initialization: Residual Learning Without Normalization &#8617;        Pytorch implementaion on Kaiming init of conv and linear layers &#8617;        Pytorch kaiming init issue &#8617;        Pytorch kaiming init explained &#8617;    "
     }, {
-    "id": 14,
+    "id": 16,
     "url": "http://localhost:4000/2020/03/note08-fastai-2/",
     "title": "What's inside Pytorch Operator?",
     "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, RefactoringWhat’s inside Pytorch Operator?: Section02 Time comparison with pure Python:    Matmul with broadcasting&gt; 3194. 95 times faster     Einstein summation&gt; 16090. 91 times faster     Pytorch’s operator&gt; 49166. 67 times faster  1. Elementwise op: 1. 1 Frobenius norm:   above converted into (m*m). sum(). sqrt() Plus, don’t suffer from mathmatical symbols. He also copy and paste that equations from wikipedia.  and if you need latex form, download it from archive. 2. Elementwise Matmul:  What is the meaning of elementwise?   We do not calculate each component. But all of the component at once. Because, length of column of A and row of B are fixed.   How much time we saved?  So now that takes 1. 37ms. We have removed one line of code and it is a 178 times faster…#TODOI don’t know where the 5 from. but keep it. Maybe this is related with frobenius norm…?as a result, the code before for k in range(ac):  c[i,j] += a[i,k] + b[k,j]the code after c[i,j] = (a[i,:] * b[:,j]). sum()To compare it (result betweet original and adjusted version) we use not test_eq but other function. The reason for this is that due to rounding errors from math operations, matrices may not be exactly the same. As a result, we want a function that will “is a equal to b within some tolerance” #exportdef near(a,b):   return torch. allclose(a, b, rtol=1e-3, atol=1e-5)def test_near(a,b):   test(a,b,near)test_near(t1, matmul(m1, m2))3. Broadcasting:  Now, we will use the broadcasting and removec[i,j] = (a[i,:] * b[:,j]). sum() How it works?&gt;&gt;&gt; a=tensor([[10,10,10],     [20,20,20],     [30,30,30]])&gt;&gt;&gt; b=tensor([1,2,3,])&gt;&gt;&gt; a,b  (tensor([[10, 10, 10],     [20, 20, 20],     [30, 30, 30]]),tensor([1, 2, 3]))     &gt;&gt;&gt; a+btensor([[11, 12, 13],    [21, 22, 23],    [31, 32, 33]])  &lt;Figure 2&gt; demonstrated how array b is broadcasting(or copied but not occupy memory) to compatible with a. Refered from numpy_tutorial   there is no loop, but it seems there is exactly the loop.     This is not from jeremy (actually after a moment he cover it) but i wondered How to broadcast an array by columns?  c=tensor([[1],[2],[3]])a+ctensor([[11, 11, 11],    [22, 22, 22],    [33, 33, 33]])s  What is tensor. stride()?help(t. stride)Help on built-in function stride:  stride(…) method of torch. Tensor instancestride(dim) -&gt; tuple or intReturns the stride of :attr:’self’ tensor. Stride is the jump necessary to go from one element to the next one in the specified dimension :attr:’dim’. A tuple of all strides is returned when no argument is passed in. Otherwise, an integer value is returned as the stride in the particular dimension :attr:’dim’. Args:  dim (int, optional): the desired dimension in which stride is requiredExample::* x = torch. tensor([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]])`x. stride()&gt;&gt;&gt; (5, 1)x. stride(0)&gt;&gt;&gt; 5x. stride(-1)&gt;&gt;&gt; 1   unsqueeze &amp; None index   We can manipulate rank of tensor Special value ‘None’, which means please squeeze a new axis here== please broadcast herec = torch. tensor([10,20,30])c[None,:] in c, squeeze a new axis in here please. 2. 2 Matmul with broadcasting: for i in range(ar):#  c[i,j] = (a[i,:]).      *[:,j]. sum() #previous  c[i]  = (a[i]. unsqueeze(-1) * b). sum(dim=0) And Using None also (As howard teached)c[i]  = (a[i ]. unsqueeze(-1) * b). sum(dim=0) #howardc[i]  = (a[i][:,None] * b). sum(dim=0) # using Nonec[i]  = (a[i,:,None]*b). sum(dim=0)⭐️Tips🌟 1) Anytime there’s a trailinng(final) colon in numpy or pytorch you can delete it ex) c[i, :] = c [i]2) any number of colon commas at the start, you can switch it with the single elipsis.  ex) c[:,:,:,:,i] = c […,i] 2. 3 Broadcasting Rules:  What if we tensor. size([1,3]) * tensor. size([3,1])?  torch. Size([3, 3])    What is scale????   What if they are one array is times of the other array? ex) Image : 256 x 256 x 3Scale : 128 x 256 x 3Result: ?   Why I did not inserted axis via None, but happened broadcasting? &gt;&gt;&gt; c * c[:,None]tensor([[100. , 200. , 300. ],    [200. , 400. , 600. ],    [300. , 600. , 900. ]])maybe it broadcast cz following array has 3 rows as same principle, no matter what nature shape was, if we do the operation tensor broadcasts to the other. &gt;&gt;&gt; c==c[None]tensor([[True, True, True]])&gt;&gt;&gt; c[None]==c[None,:]tensor([[True, True, True]])&gt;&gt;&gt;c[None,:]==ctensor([[True, True, True]])3. Einstein summation:  Creates batch-wise, remove inner most loop, and replaced it with an elementwise producta. k. ac[i,j] += a[i,k] * b[k,j]inner most loop c[i,j] = (a[i,:] * b[:,j]). sum()elementwise product  Because K is repeated so we do a dot product. And it is torch. Usage of einsum()1) transpose2) diagnalisation tracing3) batch-wise (matmul) …  einstein summation notationdef matmul(a,b): return torch. einsum('ik,kj-&gt;ij', a, b)so after all, we are now 16000 times faster than Python. 4. Pytorch op: 49166. 67 times faster than pure python And we will use this matrix multiplication in Fully Connect forward, with some initialized parameters and ReLU. But before that, we need initialized parameters and ReLU, Footnote:  TensorRank ti noteResources:  Frobenius Norm Review Broadcasting Review (especially Rule)     Refer colab! (I totally confused with extension of arrays)    torch. allclose Review np. einsum Reviewh "
     }, {
-    "id": 15,
+    "id": 17,
     "url": "http://localhost:4000/2020/02/note08-fastai-1/",
     "title": "What is the meaning of 'deep-learning from foundations?'",
     "body": "2020/02/29 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ I don’t know if you read this article, but I heartily appreciate Rachael Thomas and Jeremy Howard for providing these priceless lectures for free Homework:  Review concepts 16 concepts from Course 1 (lessons 1 - 7)(1) Affine Functions &amp; non-linearities; 2) Parameters &amp; activations; 3) Random initialization &amp; transfer learning; 4) SGD, Momentum, Adam; 5) Convolutions; Batch-norm; 6) Dropout; 7) Data augmentation; 8) Weight decay; 9) Res/dense blocks; 10) Image classification and regression; 11)Embeddings; 12) Continuous &amp; Categorical variables; 13) Collaborative filtering; 14) Language models; 15) NLP classification; 16) Segmentation; U-net; GANS) Make sure you understand broadcasting Read section 2. 2 in Delving Deep into Rectifiers Try to replicate as much of the notebooks as you can without peeking; when you get stuck, peek at the lesson notebook, but then close it and try to do it yourself calculus for machine learning     based on weight…    einsum conventionCONTENTS:  What is going on in this course?     What is ‘from foundations’?   Steps to a basic modern CNN model   Today’s implementation goal: 1) matmul -&gt; 4) FC backward    Library development using jupyter notebook     jupyter notebook certainly can make module    Elementwise ops     How can we make python faster?         What is element wise operation?           FootnoteWhat is going on in this course?: What is ‘from foundations’?: 1) Recreate fast. ai and Pytorch 2) using pure python  Evade OverfittingOverfit : validation error getting worsetraining loss &lt; validation loss  Know the name of the symbol you usefind in this page if you don’t know the symbol that you are using or just draw it here (run by ML!) Steps to a basic modern CNN model:  1) Matrix multiplication -&gt; 2) Relu/Initialization -&gt; 3) Fully-connected Forward-&gt; 4) Fully-connected Backward -&gt; 5) Train loop -&gt; 6) Convolution-&gt; 7) Optimization -&gt;8) Batchnormalization -&gt; 9) Resnet Today’s implementation goal: 1) matmul -&gt; 4) FC backward: Library development using jupyter notebook: what is assers? jupyter notebook certainly can make module:  There will be #export tag that Howard (and we) want to extract special notebook2script. py will detect sign of #expert and convert following into python module and test ittest\_eq(TEST,'test')test\_eq(TEST,'test1')  what is run_notebook. py?     when you want to test your module in command line interface   		!python run\_notebook. py 01_matmul. ipynb  Is there any difference between 1) and 2)?1) test -&gt; test01 2) test01 -&gt; test #TODO I don’t know yet  look into run_notebook. py, package fire Jeremy used. What is that?read and run the code in a notebook, and in the process, Jeremy made Python Fire library called!shockingly, fire takes any kind of function and converts into CLI command. fire library was released by Google open source, Thursday, March 2, 2017    Get data   pytorch and numpy are pretty much same.  variable c explains how many pixels there are in in MNIST, 28 pixels PyTorch’s view() method: torch function that manipulating tensor, and squeeze() in torch &amp; mathmatical operation similar function Rao &amp; McMahan said usually this functions result in feature vector.    In part 1, you can use view function several times.     Initial python model     Which is Linear, like $Xw$(weight)$+a$(bias) $= Y$     If you don’t know hou to multiple matrix, refer this site matmul visulization site   How many time spends if we we use pure python   function matmul, typical matrix multiplication function, takes about 1 second for calculating 1 single train data! (maybe assumed stochastic, 5 data points in validation)     it takes about 11. 36 hours to update parameters even single layer and 1 iteration! (if that was my computer, it would be 14 hours. . )🤪   THIS is why we need to consider ‘time’&amp;’space’ This is kinda slow - what if we could speed it up by 50,000 times? Let’s try! Elementwise ops: How can we make python faster?:  If we want to calculate faster, then do remove pythonic calcuation, by passing its computation down to something that is written something other than python, like pytorch.  According to PyTorch doc it uses C++ (via ATen), so we are going to implement that function with python. What is element wise operation?:  items makes a pair, operate corresponding componentFootnote:  notebooks material video broadcasting excel"
     }, {
-    "id": 16,
+    "id": 18,
     "url": "http://localhost:4000/2020/02/what-is-convolution/",
     "title": "Digging into convolution",
     "body": "2020/02/28 - Issues 1) Kaiming Initializtion in Pytorch was in trouble. 1 2) Jeremy started to dig in, in lesson09, but I didn’t know why the size of tensor is 2 and even understand this spreadsheet data. 3 Homework: Read Visualizing and Understanding Convolutional Networks paper  What is a convolution?     Visualization         one kernel     Matthew D Zeiler &amp; Rob Fergus Paper          Convolution can be represented as matmul   Padding   Kernel has rank 3   How can we find a side-edge, a gradient and area of constant weight?   What is a convolution?: A convolutional neural network is that your red, green, and blue pixels go into the simple computation, and something comes out of that, and then the result of that goes into a second layer, and the result of that goes into the third layer and so forth. Visualization: one kernel Refer this site for visualizing CNN filteringMatthew D Zeiler &amp; Rob Fergus PaperLecture01     Nine examples of the actual coefficients from the **first layer**  Convolution can be represented as matmul: CNNs from different viewpoints {align-items: center;}    [A B C D E F G H I J] is 3 by 3 image data flatten to vector.   As a result, convolution is a just matrix just two things happens     Some of entries are set to zeros at all the times   same color always have the same weight. That called weight time / wegith sharing    So, we can implement a convolution with matrix multiplication. But, we don’t do that because it’s slow!Padding:  What most of libraries do is just put zeros asdie of matrix  fast. ai uses reflection paddings (what is this? Jeremy said he uttered it)Kernel has rank 3:  As standard picture input would be 4 5, it would be actually 3d, not 2d.  If we make kernel as a 3x3 size, we pass over same kernel all the different Red, Green, Blue Pixels.      This could make problem, because, if we want to detect frog, which is green, we would want more activations on the green(I made a test cell in my colab 6)   How can we find a side-edge, a gradient and area of constant weight?: Not top-edge!  One kernel can find only the top-edge, so we should stack the kernels 7 So, we pass it through bunch of kernels to the input images, and that process gives us height x width x corresponding number of kernels.  Usually that number of chanel is 16 And if we want to get the more channels and features, we should repeat that process     This process gives rise to memory out of control, we do the stride   #### conv-example. xlsx  2 convolutional filters At a second layer, filter is 3x3x2 tensor, because to add up together the first layer’s channel. Reference:       Problem was math. sqrt(5) was not kaiming initialization formula, Implementation in Pytorch &#8617;        size of tensor, lecture09 &#8617;        conv-example. xlsx &#8617;        Why do computer use red, green and blue instead of primary colors &#8617;        Grayscale is a group of shades without any visible color. … Each of these dots has its own brightness level as well and, therefore, can be converted to grayscale. A grayscale image is one with all color information removed.  &#8617;        Testing RGB and grayscale &#8617;        stack kernel and make new rank of tensor at output, Lesson06-2019 &#8617;    "
     }, {
-    "id": 17,
+    "id": 19,
     "url": "http://localhost:4000/2020/02/dps-week8/",
-    "title": "Digital Product School week 8&9",
-    "body": "2020/02/24 - The 8th week retropect at Digital Product School Week 8/9 - Ship your MVP/Release next iteration each day     This week's schedule  CONTENT:  Preparing engineering weekly Agile Process     Daily Stand-up    Making application flowchart (feat draw. io) / ER diagram     Flowchart, understaning user journey   ER diagram    Engineering weekly AI lunch Connecting firebase andPreparing engineering weekly: This week at Wednesday, I planned to explain the Language Modelings, mainly focusing ELMo, ULMFiT, BERT and GPT-2. Slides is available here Changed the presentation, because there were people who are not in ML domain. hereWhenever I do the presentation, I learn more than the information I give them. At the same time, I realize I need to learn more than I know. Agile Process: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m not sure ‘what is agile’ but this is what we tried to make agile process.  Daily Stand-up: Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Making application flowchart (feat draw. io) / ER diagram:  Before we start the party, we should clarify the flowchart and ER diagram of our application. Flowchart, understaning user journey: Thanks for google, we could use draw. io for our framechart framework. Actually, we cana choice other good flatform, but draw. io has connected app throgh google drive, most of our engineer was used to it. And after this job, I got to know there is also (of course) rule with the symbols, color, size, space, scaling and direction of arrow -reference. But why we should do this? WE have made our storymap before!! I think storymap is for visualize our status and app. So it should be shared with whole the team, and they should able to understand each role’s issue. But flowchart is more like testing technical feasibility, and error that user can experience. So it could be little more specific, complicated, and hypothetical.     This week engineer's main plan  ER diagram: Even if we use NoSQL database through firebase, my team was accustomed to SQL more. That what we educated when we were at college, so we had to organize our concept while we were learning NoSQL. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Before today, my AI collegues presented regression, knn and it was my turn. I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! AI lunch: Connecting firebase and: "
+    "title": "My life in Digital Product School - week 8/19/10",
+    "body": "2020/02/24 - The 8/9/10th week retropect at Digital Product School Week 8 - Ship your MVPWeek 9/10 - Release next iteration each day     Week 8th schedule  CONTENT:  Agile Product Development     Daily Stand-up(planning)   Gemba Walk   Sprint Reviews    Engineering weeklyAgile Product Development: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m still not sure ‘what is agile’ but this is how we tried to make agile process.  Daily Stand-up(planning): Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Gemba Walk:     Team Cero with core team  Every 2 weeks, we do the Gemba work, which is ‘question everything to the core team’ time. At this period, people can ask anything related to our product, workshop, and framework. Core team will help just for each team, and each team can solve the problem related to their work. &lt; br/&gt;Why we need this session? because with workshop and general schedule, core team has no time just focus on each team. So through this session, we can have opportunity to understand each program and workshop, like why we are using this platform, and when is the due of our small project, and we have this problem and we need help for this. whatever small problem you have, core team is always willing to help you. Sprint Reviews: Every Friday, we have time to summarise what we did for the week. Maybe we need HMW question and our storymap to share our process and then tell and share what we did try, what point we succeeded and what point it was deviant of our prediction, and why we tried it. .     Sprint of Ve-link  And then, just after all team’s ppt, we do vote with such a cute marvel. Always it’s very difficult to vote (of course you can’t vote to your team!) Because it depends on criteria what do I value!But since this is process of our agile work, I try to focus on what they have changed since last week, and why they did it, how they did it. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Everyone have their knowledge to share and we can be tutor and at the same time can be of tutee. Previously, my AI collegues presented regression, knn. And because I’m somewhat specialized to NLP, I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! "
     }, {
-    "id": 18,
+    "id": 20,
     "url": "http://localhost:4000/2020/02/fast.ai-nlp-note-16/",
     "title": "Algorithmic bias",
     "body": "2020/02/20 - Algorithms can encode &amp; magnify human bias Case Study 1: Facial Recognition &amp; Predictive Policing:  Joy Buolamwini &amp; Timnit Gebru, gendershades. org     Microsoft, FACE+, IBM - All of these things are sell now.    Largest gap between $\therefore\ Lighter Male\ &gt;\ Darker\ Female $      This US mayor joked cops should “mount . 50-caliber” guns where AI predicts crime      With machine learning, with automation, there’s a 99% success, so that robot is ㅡwill beㅡ99% accurate in telling us what is going to happen next, which is really interesting.     - city official in Lancater, CA, approving on using IBM for public security  Bias:  Bias is type of error Statistical Bias: difference between a statistic’s expected value and the true value Unjust Bias: disproportionate preference for or prejudice against a group Unconscious bias: bias that we don’t realize we have But, term bias is too generic to be productive. Different sources of bias have different causes Representation Bias: Dataset was not representative of the algorithm that might be used on later. Above : Data is okay, but algorithm has some problem. Below : Data has error.  For example, object detection production that performs very well in common product of US. But in contrast, change of target product region, like Zimbabwe, Solomon Island, and so on, reduced the performence remarkably. It is not the algorithmic problem, so we should care about data volume of region. Evaluation Bias: Benchmark datasets spur on research, 4. 4% of IJB-A images are dark-skinned women. 2/3 of ImageNet images from the West (Sharkar et al, 2017) Case Study 2: Recidivism Algorithm Used Prison Sentencing: Case Study 3: Online Ad Delivery: Bias in NLP: ( Nothing to do with the course, but I’m researching this field these days. )    But all about Englsih     ImpactThe person is doctor. The person is nurse -&gt; 그는 의사다. 그녀는 간호사다.  Concept of “biased data” often too generic to be useful:  Different sources of bias have different sources Data, models and systems are not unchanging numbers on a screen. They’re the result of a complex process that starts with years of historical context and involves a series of choices and norms, from data measurement to model evaluation to human interpretation. - Harini Suresh, “The problem with Biased Data” Five Sources of Bias in ML:  Representation Bias Evaluation Bias Measurement Bias Aggregation Bias(46:02) Historical Bias(46:26)     A few studies(47:13)   Racial Bias, Even when we have good intentions(new york times)(47:10)   gender(48:59)   Humans are biased, so why does algorithmic bias matter?: Algorithms &amp; humans are used differently (humans are usually decision maker)  Algorithms are accurate and objective No way to apeal if there if error processed large scale cheap Machine learning can amplify bias Machine learning can create feedback loops. Technology is power. And with that comes responsibility. Solutions:  Analyze a project at work/school:     Questions about AI   5 types of bias (Suresh &amp; Guttag)   Datasheets for datasets, Modelcards for model reporting   Accuracy rate on different sub-groups    Work with domain experts &amp; those impacted Increase diversity in our workspace Advocate for good policy Be on the ongoing lookout for bias"
     }, {
-    "id": 19,
+    "id": 21,
     "url": "http://localhost:4000/2020/02/classifier-city/",
     "title": "Making a classifier with image dataset made from gooogle",
     "body": "2020/02/15 - CONTENTS:  Creating dataset from google images     Using google_images_download   Create ImageDataBunch    Train model     fit_one_cycle()   Let’s find-tune   Let’s train the whole model!   Let’s make batch size bigger!    Interpretation Model in productionCode can be found hereDeployed model here Making a classifier which can distinguish Seoul from Munich and Sanfrancisco!(hoping my well in Munich!) Creating dataset from google images: In machine learning, you always need data before you build your model. You can use either URLs or google_images_download package. Since Jeremy explained specifically, I will try the other. Using google_images_download: note: This is not google official package Refer to Official Doncument, put that arguments. from google_images_download import google_images_downloadresponse = google_images_download. googleimagesdownload()  #class instantiationout_dir = os. path. abspath('. . /. . /materials/dataset/pkg/')os. mkdir(out_dir)arguments = { keywords : Cebu,Munich,Seoul ,        print_urls :True,        suffix_keywords : city ,        output_directory :out_dir,        type : photo ,      }paths = response. download(arguments)  #passing the arguments to the functionprint(paths)and if you need, here is main code. Create ImageDataBunch: We need to separate validation set because we just grabbed these imagese from Google. Most of the dataset we use (kaggle/research) splited into train / validation / test so if they are not devided beforehand we should make databunch, and Jeremy recommended assign 20% to validation. Help on function verify_images in module fastai. vision. data:verify_images(path: Union[pathlib. Path, str], delete: bool = True, max_workers: int = 4, max_size: int = None, recurse: bool = False, dest: Union[pathlib. Path, str] = '. ', n_channels: int = 3, interp=2, ext: str = None, img_format: str = None, resume: bool = None, **kwargs)  Check if the images in `path` aren't broken, maybe resize them and copy it in `dest`. Data from google image url Data from package Train model:           len(class)   len(train)   len(valid)         Data_url   3   432   108       Data_pkg   3   216   53   Uisng model: restnet34 1, Measurement: accuracy 2 fit_one_cycle(): What is fit one cycle? Cyclical Learning Rates for Training Neural Networks One of the way to find good learning rate. Core idea is to start with small learning rate (like 1e-4, 1e-3) and increase the learning rate after each mini-batch till loss starts exploding. And pick up learning rate one order lower than exploding point. For example, plotted learning rate is like below picture, picking up around 1e-2 is the best way.  Why this methods Traditionally, the learning rate is decreased as the learning starts converging with time. But this paper suggests to cycle our learning rate, because it makes us avoid local minimum. Basically this cyclic method enables us to explore whole of loss function so that find out global minimum. In other words, higher learning rate behaves like regularisation. Let’s find-tune: Do train just one last layer by learning rate found by find_lr This section you should find the strongest downward slope that kind of sticking around for quite a while. And choose just one order lower than lowest point. As explained before, I will pick up 1e-2. And of course, this is fine-tuning, we don’t need discriminative learning rate yet.  Let’s train the whole model!: link When you plot the learning rate again, maybe you will get soaring shape of learning rate. Rule of thumb, When you slice the learning rate, use learning rate you used at unfrozen part. Divide it by 5 or 10 and put it on maximum bound. At minimum bound, get the point just before it soared, and divide it by 10.  Let’s make batch size bigger!: Since default batch size is 64, I tried it to 128. And it gets way more better result(even it’s still underfitting!) And if I freeze model and train whole model again, the model would be better. Also, you can use this method to the other big dataset model training! Interpretation: See the confusion matrix. Result is quite great. *Since I’m using colab, I will skip data cleansing. But I highly recommend you to use ImageCleaner widget, only if you are using jupyter notebook (not jupyter lab) Model in production: You can deploy your model in simple way. I referred fast. ai, and used render(it’s free for limited time). You can find detailed document here. and you can create a route like this. @app. route( /classify-url , methods=[ GET ])async def classify_url(request):  bytes = await get_bytes(request. query_params[ url ])  img = open_image(BytesIO(bytes))  _,_,losses = learner. predict(img)  return JSONResponse({     predictions : sorted(      zip(cat_learner. data. classes, map(float, losses)),      key=lambda p: p[1],      reverse=True    )  })You can find my deployed model here Reference: How to create a deep learning dataset using Google Images towardsdatascience - one cycle policy       Deep Residual Learning for Image Recognition &#8617;        Accuracy_and_precision &#8617;    "
     }, {
-    "id": 20,
+    "id": 22,
     "url": "http://localhost:4000/2020/02/dps-week5/",
     "title": "Digital Product School week 5",
     "body": "2020/02/09 - The 5th week retropect at Digital Product School Week 5 - Create a Storymap and sync it with Lean Canvas     This week's schedule  CONTENT:  How to create our story map Prepare your story Discover your product’s AI potentialMondayHow to create our story map:     We need this 'aha' moment  There was a Milestone workshop, about our weekly goal. As we are agile working, we go fast and change every week’s goal. This week we will finalize our story map based on user’s pain-point and HMW questions.     How should we make our story-map  Basically we should make story map based on this rule  Tell stories, don’t just write them!     We always need context, that means all the story component should be connected    Visualize your product to establish a shared understanding and speed up discussions!     Post-it filled of text is not enough, we should fill it with visualizations then team mates can understand it fast    Only discuss in front our your story map! (Speed)     So we can update our story-map as soon as we change our opinion   And also    Use a story map to find the parts that matter most and to identify holes in your idea!     Since the story map consists of techinical part, we should consider each story’s technical feasibility    Minimise output, maximise outcome and impact! Build tests to figure out what’s minimum and what’s viable!     This story map functions to find out our minimum value of ideas    Work iteratively: Change your story map according to your learnings!     We should repeat this process again and again    PMs: Make sure Storymap is up to date!Prepare your story:     team cero, our whole story map    Our goal    Technical feasibility of our storyWhat is your strategy to make user achieve something? This would be our expand point Discover your product’s AI potential: How can we apply AI to our product? Let’s write down our ‘HMW’ questions, and find out all p ossibilities. These are suggestion of possibilities, so don’t attached to feasibility (we will do in at lean start-up)     Software section's expectation    AI section's expectationTuesday    Engineer's task, week5This 5th week, engineers settled WendesdayThursdayFriday"
     }, {
-    "id": 21,
+    "id": 23,
     "url": "http://localhost:4000/2020/02/GPU-time/",
     "title": "4 reasons took much time to setting GPU for fast.ai than I expected",
     "body": "2020/02/05 - Motivation: Before now, me as a undergraduate student, I was parsimony who usually depend on colab, kaggle, friend’s server(occasional) whenever i need GPU. . And this time it’s been for a while to install GPU than I expected and I share the several component that stood in my way. Written at Oct 24 2019, if you think this is deprecated, please do not have a leap of faith. Just for the record, I’ve used Kaggle, Colab, GCP, Azure, EC2 as GPU cloud. 1. Did not know there is JupyterLab option in Google Cloud Platform. : At the first time when GCP came out, there was no AI Platform service. So from starting vm instance to launching jupyter and installing packages, I did all of the things myself. (and I learned 🤗) $	curl -O https://repo. continuum. io/archive/Anaconda3-5. 0. 1-Linux-x86_64. sh[Downloading conda in ssh] I created VM instance,selected zone, machine type and disk type. Then, define firewall rules and in ssh terminal, install jupyter and other packages. But you can do all of these things just using AI Platform.  [AI Platform] I think it especially save your time if you are living in Asia-Pacific, which google doesn’t support not that much GPU resources.  2. Consider if the platform has limited resources in a region you live in. : I live in South Korea, East Asia, and it seems like this region has lots of limitation in GPU (except quite expensive AWS) And the Taiwan which was the only one region where I can launch my own VM with GPU (I tried all the other regions in the list) sometimes do normaly, but not always. 😥After launching, I did several works and next day I could not start VM. (I didn’t count it, but tried it a few hours because I didn’t want cost any more time…) Endlessly failed to start instance, then I choose to move AWS as an alternative way. 3. Fast. ai gives deliberate guide and I didn’t know it. : Fast. ai offer the guide for all available platform. (Colab, salamander, Gradient, Kaggle, Colab, and so on) It is so important, and really needs, because cloud computing options are vary as occasion and purpose arise. I didn’t know fast. ai has manual to running GCP, and I think it’s as good a reason as any for me to be have taken time. It helped me so much when I had aws and shortened my time. I don’t want to read all of the manual in amazno. . (It is recommended. . but I’d rather read GIT PRO now…) ssh -i ~/. ssh/&lt;your_private_key_pair&gt; -L localhost:8888:localhost:8888 ubuntu@&lt;your instance IP&gt;4. You should wait to add more volume just after add volume, by building AWS EC2. : Since Elastic Block Store(EBS) storage supports optimized storage, users can’t extend storage volume two times in a row.  Unfortunately, at the first time, I didn’t know it (again 👻) and when VM lacked volume, I doubled dist capacity (76*2) at a rough but It needs more.  &lt;!– this time I installed GPU in two years, and it became little complicated compared to 2 years ago. And this time for the first time(maybe not the first time. . but i handled it in my class or with my friend. but it’s my first time on my own. ) I very I’m started to using used google colab, kaggleand, GCP-JupyterLab, ec2 - friend made, aws vm machine but I had a environment variable but i did not know of it. On these days, I could not get a resources from taiwan…    I couldn’t notice a deliberate     Anyway, as a result I tried myself gcp myself and aws ec2 with fast. ai But I think doing on my self surely takes much time (in this point I wonder why I’m doing this, and should remind me, especially I was studying disk volume optimization)     disk volume exceed - https://askubuntu. com/questions/919748/no-space-left-on-device-even-though-there-is:  "
     }, {
-    "id": 22,
+    "id": 24,
     "url": "http://localhost:4000/2020/02/dps-week4/",
     "title": "Digital Product School week 4",
     "body": "2020/02/01 - The 4th week retropect at Digital Product School Week 4 - Find solution ideas and run experiments   [This week’s schedule] CONTENT:  Ideation Techniques     What is ideation techniques?    Generating idea in my team AIdeation Team brain storming of idea Die Produkt MacherMondayIdeation Techniques:   [slides from @steffen] What is ideation techniques?: We tried to find out user’s painpoint last week. Tried to users talk about their, pain point. No question directly, but extract from them their pain with transportation. Generating idea in my team:   AIdeation:   TuesdayTeam brain storming of idea:   Based on generated idea on Monday, we extended our idea doing rolling-paper!     Die Produkt Macher: What is lean start-up?  Lean startup is a methodology for developing businesses and products that aims to shorten product development cycles and rapidly discover if a proposed business model is viable; this is achieved by adopting a combination of business-hypothesis-driven experimentation, iterative product releases, and validated learning. - wikipedia WendesdayThursdayFriday"
     }, {
-    "id": 23,
+    "id": 25,
     "url": "http://localhost:4000/2020/01/retrosprect-of-acl-paper-2020/",
     "title": "Retrospect of ACL 2020 paper writing",
     "body": "2020/01/29 - 2020 Annual Conference of the Association for Computational Linguistics Why I can’t use ‘Cebuano’ for the research?: Why I had to change target language from ‘Cebuano’ to ‘Tagalog’?-&gt; No language translator options except google translation. But before knowing that I already consult my friend, whose mother tongue is English. So I had to aplogize her, but couldn’t tell her why suddenly I changed my plan. -&gt; I realized there are many languages even can’t be researched at all. . -&gt; Getting accustomed to discrimination makes misunderstanding, sometimes. At my country, we couldn’t use music streaming service, because of legal problem. But at that moment, I thought it was discrimination, which is done by music company. "
     }, {
-    "id": 24,
+    "id": 26,
     "url": "http://localhost:4000/2020/01/Git-Merge/",
     "title": "Why am I not listed as a contributor?!",
     "body": "2020/01/10 - From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’m not listed as contributor of repo which my PR accessed. How did I come to a repository?: When I’m stuck, I would prefer to code, than to go deep in theory. (It must be so. . too much to understand 🤒)It was BERT released by Google AI I felt keenly the necessity of implementing, because not only couldn’t understand the way they figured out positional encoding formula, but how it actually works. What does it mean to “scale” dot product in Attention? (Now I know it’s far from my section 😂) Figure 1. Scaled Dot Product. Adopted from tensorflow blogWhat was the code error?: For implement code in paper, I read the papers Transformer and BERT, structured the model, and refered the others’ code. Meanwhile, I found out a small error in tokenization process, which was changing a token into [MASK], enabled bidirectional representation. I’ve made PR, and got merged. But I was not in contributors. Why?: Figure 2. Merged Pull request Adopted from graykode projectActually I happened to know there can be couple of reasons github doesn’t include my name as contributor. Well, if contributors tab has more than 100 people, in which case it shows you up only if you are in the top 100 contributors because displaying too many contributors can make webpages down. Somethimes, however, it doesn’t that problem. Why not? Two possibilities are there.    First, According to Joel-Glovier, if repository maintainer merged-as-a-rebase PR will end up showing as maintainer’s commit. But maintainer shouldn’t normally do this.     Second, if you happend to commit using a different git email that what is in your GitHub profile, it will not be attached to your Github user, and “doesn’t show up” as you.  Reference:  Michał Chromiak’s blog Github: why are my contributions are not showing on my profile atlassian-gitfetch"
     }, {
-    "id": 25,
-    "url": "http://localhost:4000/2019/12/lesson1-fastai/",
-    "title": "Fine Grained Classification",
-    "body": "2019/12/31 - Finally you can solve the mystery behind this weird drawing. . through this course. juptyer notebook magic: %reload_ext autoreload%autoreload 2%matplotlib inlinethis is special directives to jupyter notebook, not python code. And it is called ‘magics’ (but i think jeremy is magicion)  If somebody changes underlying library code while I’m running this, please reload it automatically If somebody asks to plot something, then please plot it here in this Jupyter NotebookDon’t hesitate to import start~ Digging into untar_data, path. ls: Union[pathlib. Path, str]: typed programming language? -&gt; maybe i think disclaim the type beforehand for sure.  Q. like assert? path. ls()this is some module that fast. ai made because os. listdir(‘path’) is unconvinient. Python3 pathlib library!: pathlib "
-    }, {
-    "id": 26,
+    "id": 27,
     "url": "http://localhost:4000/2019/12/jeremy-howard/",
     "title": "Jeremy Howard",
     "body": "2019/12/15 - This is journey to find out ‘who am I trying to be?’: How he impacted me?  The person who made me start Computer Vision again. He emphasized the importance of studying NLP and Computer together to understand the deep-learning. He didn’t order it to study, but always he pursuade me with reasonable way.  “It’s not just something I can throw away. NLP and computer vision a few weeks apart and that’s going to force your brain to realize like ‘oh I have to remember this’”  He made me admit my failure in deep-learning. I started to objectify where am I. What should I do when I’m frustrated.  “Keep going. You’re not expected to remember everything. Yet.  You’re not expected to understand everything. Yet. You’re not expected to know why everything works.  Yet. ”  His articles are numerous, below.      What is torch. nn Really?   High Performance Numeric Programming with Swift: Explorations and Reflections   C++11, random distributions, and Swift    And especially, I like this book. Designing great data products Great predictive modeling is an important part of the solution, but it no longer stands on its own; as products become more sophisticated, it disappears into the plumbing. Designing great data products And he is also famous for words. Here are some.  we’re going to try and use that to really understand what’s going on. So to warn you, none of it is rocket science but a lot of its going to look really new. So don’t expect to get it the first time but expect to listen and jump into the notebook try a few things test things out look particularly at like tensor shapes and inputs and outputs to check your understanding then go back and listen again. But and kind of try it, a few times, because you will get there right, it’s just that there’s going to be a lot of new concepts because we haven’t done that much stuff in pure Pytorch. Lesson 6: Deep Learning 2019 "
     }, {
-    "id": 27,
+    "id": 28,
     "url": "http://localhost:4000/2019/11/julia-evans/",
     "title": "Julia Evans",
     "body": "2019/11/20 - This is journey to find out ‘who am I trying to be?’: The women who surprised me in many ways. First, she approached me to teaching some concepts drawing cartoons. It was at Hackers news, which was hightest ranks. Personally I have the use of not to reading title, so and cartoon was so cute and clear. I naturally gonna understood mechanism and astonished by her explaination ability. Her value, which she was taught by many people so want to do same things, moved me. Volume of her knowledge, that just reading post title is a deal of work, amazed me. "
     }, {
-    "id": 28,
+    "id": 29,
     "url": "http://localhost:4000/2019/11/coc-retropective/",
     "title": "Retrospective on Pycon 2019 Korea (CoC Committee)",
     "body": "2019/11/05 - When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized our responsibility, and if I passed each other’s burden, It could be my burden next time. In solidarity of the obligation, we finished conference well. And after participating PyCon Korea 2018 as volunteer, I’ve joined PyCon Korea Organizer last year. &lt;Figure 1&gt; First meeting of PyCon 2019 Korea Organizers It’s been a while since PyCon 2019 finished. It’s held on Aug 15 - 18, at Coex Grand Balloom &lt;Figure 2&gt; Ongoing session, speaking on news comment processing &lt;Figure 3&gt; Sponsor Booth iin Coex Hall &lt;Figure 4&gt; After PyCon 2019, with all of volunteer, organizer, speakers 😍 🥰  Serving as part of the coc TF, I spent large fraction of last year doing CoC job. here’s the path what we’ve been grappled with to grasp a solution. First half: Before the conference Toward Diverse Community:  Formally we’ve been reusing and modifying PyCon US CoC, but we needed fit in Korean and I was part of that to revise code of conduct. Except ‘That’ Diversity, Because it is ‘Harassment’:  Specific point was harassment, and the others were not. process of finding the points. How can we settle this point?Second half: During the conference Handling the potential Harassment: Disjunction of policy and real-time situation: This ‘PyCon 2019 Korea retrospective series’ would be devided into 3 Episodes.  “Retrospective on Pycon 2019 Korea (CoC Committee)” “Retrospective on Pycon 2019 Korea (Program Chair)” (20 Nov, To Be Update) “Maintaining participation while still making timely decisions” (29 Nov, To Be Update)"
     }, {
-    "id": 29,
+    "id": 30,
     "url": "http://localhost:4000/2019/11/elif-shafak/",
     "title": "Elif Shafak",
     "body": "2019/11/05 - This is journey to find out ‘who am I trying to be?’: For creative-minded people, Istanbul is a treasure. ’ Photo © Chris Boland, licensed under CC BY-NC-ND 2. 0    it suddenly felt like what I was trying to convey was more complicated and detailed than what the circumstances allowed me to say.     And I did what I usually do in similar situations: I stammered, I shut down, and I stopped talking. I stopped talking because the truth was complicated, even though I knew, deep within, that one should never, ever remain silent for fear of complexity.         &lt;Figure 1&gt; Elif Shafak   Photo credit: www. elifsafak. com. tr      I want to talk about emotions and the need to boost our emotional intelligence. I think it’s a pity that mainstream political theory pays very little attention to emotions.     Oftentimes, analysts and experts are so busy with data and metrics that they seem to forget those things in life that are difficult to measure and perhaps impossible to cluster under statistical models. But I think this is a mistake, for two main reasons. We are emotional beings.     I think it’s going to be one of our biggest intellectual challenges, because our political systems are replete with emotions. In country after country, we have seen illiberal politicians exploiting these emotions. And yet within the academia and among the intelligentsia, we are yet to take emotions seriously. I think we should.  1 2 Reference:       British Council Worldwide &#8617;        Ted Talk &#8617;    "
     }, {
-    "id": 30,
+    "id": 31,
     "url": "http://localhost:4000/2019/01/dps-week1/",
     "title": "Digital Product School week 1",
     "body": "2019/01/11 - The 1th week retropect at Digital Product School [This week’s schedule] CONTENT:  Welcome to Digital Product School!     Trip to Spitzingsee   Welcome to Design Office   Specifying our goal of product   Welcome to Digital Product School!: Trip to Spitzingsee:    At the first day of Digital Product School, we had a off-site with all of batch 9 people. All the costs were managed by dps. At the beautiful mountain, we settled the team, and got my team goal.     Basically, there are two kind of team in DPS. (1) Wild team - the team has fixed topic(2) Company team - the team which has specific stakeholders, and also topic defined by that stakeholders  The Core-team will fix what team you will join in DPS for 3 months based on ymy professionals, they announce it at off-site. [My team for 3 months at DPS]    And we decide on my batch #9 theme song. How? Each team draw for songs and pitch ‘why this song should be batch #9 theme song’The result? Imagine dragon - Believer (I didn’t know at the moment, this song would be stamped in my memory)     We have a workshop for getting to know each other. For example, we share 1) what do I expect from 3 months of dps, 2) when I feel happy in my life time, 3) what I worked for last week, 4) what was my last project and 5) what plays important role in my life      My team's board  Cero  Welcome to Design Office:  At first day of design office, we had workshop, which celebrates my day in dps also discuss specific rule, menifesto and stakeholders    We get sticker and attach it in map depends on my nationality  Now time to get to know my team’s stakeholders. What they want for us? What they expect from us? How free my team are on the topic?To be honest, it is endless tug-of-war. We should discuss with my stakeholders, endlessly, and find out solution which can meet interest of users, stakeholders and my team. Basically, my team’s main stakeholder is ADAC, but BMW, City of munich and Nokia will also participate as my team’s stakeholders. Specifying our goal of product: "
diff --git a/_site/resume.html b/_site/resume.html
index 9f7fe3ba5f..790a74fe2a 100644
--- a/_site/resume.html
+++ b/_site/resume.html
@@ -109,7 +109,7 @@
     "id": 2,
     "url": "http://localhost:4000/author-dionne.html",
     "title": "dionne",
-    "body": "                        dionne Follow:         https://spellonyou. github. io         Welcome, I'm dionne. My old hobby is collecting and systemizing data related with me. (I'm with my kindergarten newsletter) I am an observationist. In the meantime, I enjoy the difference between my expectation and result which I've been facinated(my korean-sarcasm project). And I love a film written and directed by Paul Thomas Anderson. Now, I'm interested in AI ethics, and especially studying bias.                                   Posts by dionne:                   		Gradient backward, Chain Rule, Refactoring	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 02, 2020						            		Implement forward&amp;backward pass from scratch	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						            		What's inside Pytorch Operator?	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						            		What is the meaning of 'deep-learning from foundations?'	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Feb 29, 2020						            		Digging into convolution	: 		  Issues 	 			In 				Fast. AI-v3, 								Feb 28, 2020						            		Digital Product School week 8&amp;9	: 		  The 8th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 24, 2020						            		Algorithmic bias	: 		  Algorithms can encode &amp; magnify human bias	 			In 				Fast. AI-v3, 								Feb 20, 2020						            		Making a classifier with image dataset made from gooogle	: 		  CONTENTS	 			In 				toy-project, 								Feb 15, 2020						            		Digital Product School week 5	: 		  The 5th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 09, 2020						            		4 reasons took much time to setting GPU for fast. ai than I expected	: 		  Motivation	 			In 				Resource, 								Feb 05, 2020						            		Digital Product School week 4	: 		  The 4th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 01, 2020						            		Retrospect of ACL 2020 paper writing	: 		  2020 Annual Conference of the Association for Computational Linguistics	 			In 				Retrospect, 								Jan 29, 2020						            		Why am I not listed as a contributor?!	: 		  From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’. . . 	 			In 				Resource, 								Jan 10, 2020						            		Fine Grained Classification	: 		  Finally you can solve the mystery behind this weird drawing. . through this course. 	 			In 				Fast. AI-v3, 								Dec 31, 2019						            		Jeremy Howard	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Dec 15, 2019						            		Julia Evans	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 20, 2019						            		Retrospective on Pycon 2019 Korea (CoC Committee)	: 		  When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized. . . 	 			In 				Retrospect, 								Nov 05, 2019						            		Elif Shafak	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 05, 2019						            		Digital Product School week 1	: 		  The 1th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Jan 11, 2019						        "
+    "body": "                        dionne Follow:         https://spellonyou. github. io         Welcome, I'm dionne. My old hobby is collecting and systemizing data related with me. (I'm with my kindergarten newsletter) I am an observationist. In the meantime, I enjoy the difference between my expectation and result which I've been facinated(my korean-sarcasm project). And I love a film written and directed by Paul Thomas Anderson. Now, I'm interested in AI ethics, and especially studying bias.                                   Posts by dionne:                   		fastai 2019 course-v3 Part1, lesson06	: 		  Lesson 06	 			In 				fastai-v3, 								Apr 15, 2020						            		[Q&amp;A] Image Segmentation, using Unet with Driving Video data	: 		  This post is about my questions while I was studying USF Deep Learning course about image segmentation task. All the answers are from the course, source code, library document, or docu. . . 	 			In 				Fast. AI-v3, 								Apr 02, 2020						            		Gradient backward, Chain Rule, Refactoring	: 		   This note is divided into 4 section.      Section1: What is the meaning of ‘deep-learning from foundations?’   Section2: What’s inside Pytorch Operator?   Section3: Implem. . . 	 			In 				fastai-v3, 								Mar 02, 2020						            		Implement forward&amp;backward pass from scratch	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						            		What's inside Pytorch Operator?	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						            		What is the meaning of 'deep-learning from foundations?'	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Feb 29, 2020						            		Digging into convolution	: 		  Issues 	 			In 				Fast. AI-v3, 								Feb 28, 2020						            		My life in Digital Product School - week 8/19/10	: 		  The 8/9/10th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 24, 2020						            		Algorithmic bias	: 		  Algorithms can encode &amp; magnify human bias	 			In 				Fast. AI-v3, 								Feb 20, 2020						            		Making a classifier with image dataset made from gooogle	: 		  CONTENTS	 			In 				toy-project, 								Feb 15, 2020						            		Digital Product School week 5	: 		  The 5th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 09, 2020						            		4 reasons took much time to setting GPU for fast. ai than I expected	: 		  Motivation	 			In 				Resource, 								Feb 05, 2020						            		Digital Product School week 4	: 		  The 4th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 01, 2020						            		Retrospect of ACL 2020 paper writing	: 		  2020 Annual Conference of the Association for Computational Linguistics	 			In 				Retrospect, 								Jan 29, 2020						            		Why am I not listed as a contributor?!	: 		  From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’. . . 	 			In 				Resource, 								Jan 10, 2020						            		Jeremy Howard	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Dec 15, 2019						            		Julia Evans	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 20, 2019						            		Retrospective on Pycon 2019 Korea (CoC Committee)	: 		  When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized. . . 	 			In 				Retrospect, 								Nov 05, 2019						            		Elif Shafak	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 05, 2019						            		Digital Product School week 1	: 		  The 1th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Jan 11, 2019						        "
     }, {
     "id": 3,
     "url": "http://localhost:4000/authors-list.html",
@@ -119,7 +119,7 @@
     "id": 4,
     "url": "http://localhost:4000/categories.html",
     "title": "Categories",
-    "body": "          Categories               DigitalProductSchool:                                  		Digital Product School week 8&amp;9	: 		  The 8th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 24, 2020						                                 		Digital Product School week 5	: 		  The 5th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 09, 2020						                                 		Digital Product School week 4	: 		  The 4th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 01, 2020						                                 		Digital Product School week 1	: 		  The 1th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Jan 11, 2019						                              People in the World:                                  		Jeremy Howard	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Dec 15, 2019						                                 		Julia Evans	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 20, 2019						                                 		Elif Shafak	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 05, 2019						                              Retrospect:                                  		Retrospect of ACL 2020 paper writing	: 		  2020 Annual Conference of the Association for Computational Linguistics	 			In 				Retrospect, 								Jan 29, 2020						                                 		Retrospective on Pycon 2019 Korea (CoC Committee)	: 		  When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized. . . 	 			In 				Retrospect, 								Nov 05, 2019						                              Fast. AI-v3:                                  		Gradient backward, Chain Rule, Refactoring	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 02, 2020						                                 		Implement forward&amp;backward pass from scratch	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						                                 		What's inside Pytorch Operator?	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						                                 		What is the meaning of 'deep-learning from foundations?'	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Feb 29, 2020						                                 		Digging into convolution	: 		  Issues 	 			In 				Fast. AI-v3, 								Feb 28, 2020						                                 		Algorithmic bias	: 		  Algorithms can encode &amp; magnify human bias	 			In 				Fast. AI-v3, 								Feb 20, 2020						                                 		Fine Grained Classification	: 		  Finally you can solve the mystery behind this weird drawing. . through this course. 	 			In 				Fast. AI-v3, 								Dec 31, 2019						                              Resource:                                  		4 reasons took much time to setting GPU for fast. ai than I expected	: 		  Motivation	 			In 				Resource, 								Feb 05, 2020						                                 		Why am I not listed as a contributor?!	: 		  From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’. . . 	 			In 				Resource, 								Jan 10, 2020						                              toy-project:                                  		Making a classifier with image dataset made from gooogle	: 		  CONTENTS	 			In 				toy-project, 								Feb 15, 2020						                                             Featured:    				                                          Implement forward&amp;backward pass from scratch                          In                     Fast. AI-v3,                                                                                           What's inside Pytorch Operator?                          In                     Fast. AI-v3,                                                                                           What is the meaning of 'deep-learning from foundations?'                          In                     Fast. AI-v3,                                                                                           Why am I not listed as a contributor?!                          In                     Resource,                                                                                           Jeremy Howard                          In                     People in the World,                                                                   "
+    "body": "          Categories               DigitalProductSchool:                                  		My life in Digital Product School - week 8/19/10	: 		  The 8/9/10th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 24, 2020						                                 		Digital Product School week 5	: 		  The 5th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 09, 2020						                                 		Digital Product School week 4	: 		  The 4th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 01, 2020						                                 		Digital Product School week 1	: 		  The 1th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Jan 11, 2019						                              People in the World:                                  		Jeremy Howard	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Dec 15, 2019						                                 		Julia Evans	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 20, 2019						                                 		Elif Shafak	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 05, 2019						                              Retrospect:                                  		Retrospect of ACL 2020 paper writing	: 		  2020 Annual Conference of the Association for Computational Linguistics	 			In 				Retrospect, 								Jan 29, 2020						                                 		Retrospective on Pycon 2019 Korea (CoC Committee)	: 		  When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized. . . 	 			In 				Retrospect, 								Nov 05, 2019						                              Resource:                                  		4 reasons took much time to setting GPU for fast. ai than I expected	: 		  Motivation	 			In 				Resource, 								Feb 05, 2020						                                 		Why am I not listed as a contributor?!	: 		  From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’. . . 	 			In 				Resource, 								Jan 10, 2020						                              toy-project:                                  		Making a classifier with image dataset made from gooogle	: 		  CONTENTS	 			In 				toy-project, 								Feb 15, 2020						                              Fast. AI-v3:                                  		[Q&amp;A] Image Segmentation, using Unet with Driving Video data	: 		  This post is about my questions while I was studying USF Deep Learning course about image segmentation task. All the answers are from the course, source code, library document, or docu. . . 	 			In 				Fast. AI-v3, 								Apr 02, 2020						                                 		Implement forward&amp;backward pass from scratch	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						                                 		What's inside Pytorch Operator?	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						                                 		What is the meaning of 'deep-learning from foundations?'	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Feb 29, 2020						                                 		Digging into convolution	: 		  Issues 	 			In 				Fast. AI-v3, 								Feb 28, 2020						                                 		Algorithmic bias	: 		  Algorithms can encode &amp; magnify human bias	 			In 				Fast. AI-v3, 								Feb 20, 2020						                              fastai-v3:                                  		fastai 2019 course-v3 Part1, lesson06	: 		  Lesson 06	 			In 				fastai-v3, 								Apr 15, 2020						                                 		Gradient backward, Chain Rule, Refactoring	: 		   This note is divided into 4 section.      Section1: What is the meaning of ‘deep-learning from foundations?’   Section2: What’s inside Pytorch Operator?   Section3: Implem. . . 	 			In 				fastai-v3, 								Mar 02, 2020						                                             Featured:    				                                          [Q&amp;A] Image Segmentation, using Unet with Driving Video data                          In                     Fast. AI-v3,                                                                                           Implement forward&amp;backward pass from scratch                          In                     Fast. AI-v3,                                                                                           What's inside Pytorch Operator?                          In                     Fast. AI-v3,                                                                                           What is the meaning of 'deep-learning from foundations?'                          In                     Fast. AI-v3,                                                                                           Why am I not listed as a contributor?!                          In                     Resource,                                                                                           Jeremy Howard                          In                     People in the World,                                                                   "
     }, {
     "id": 5,
     "url": "http://localhost:4000/contact.html",
@@ -129,7 +129,7 @@
     "id": 6,
     "url": "http://localhost:4000/",
     "title": "Jiwon Kim | Dionne Blog",
-    "body": "                                  Gradient backward, Chain Rule, Refactoring  :       This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pyt. . .               In                 Fast. AI-v3,                                        Mar 02, 2020                                                                                                                             Implement forward&amp;backward pass from scratch          :                       In                         Fast. AI-v3,                                                                  Mar 01, 2020                                                                                                                                     What's inside Pytorch Operator?          :                       In                         Fast. AI-v3,                                                                  Mar 01, 2020                                                                                                                                    What is the meaning of 'deep-learning from foundations?'          :                       In                         Fast. AI-v3,                                                                  Feb 29, 2020                                                       Gradient backward, Chain Rule, Refactoring                  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pyt. . .                 Read More            	                                        All Stories:                   		Gradient backward, Chain Rule, Refactoring	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 02, 2020						                  		Implement forward&amp;backward pass from scratch	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						                  		What's inside Pytorch Operator?	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						                  		What is the meaning of 'deep-learning from foundations?'	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Feb 29, 2020						                  		Digging into convolution	: 		  Issues 	 			In 				Fast. AI-v3, 								Feb 28, 2020						                  		Digital Product School week 8&amp;9	: 		  The 8th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 24, 2020						                  		Algorithmic bias	: 		  Algorithms can encode &amp; magnify human bias	 			In 				Fast. AI-v3, 								Feb 20, 2020						                  		Making a classifier with image dataset made from gooogle	: 		  CONTENTS	 			In 				toy-project, 								Feb 15, 2020						                  		Digital Product School week 5	: 		  The 5th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 09, 2020						                  		4 reasons took much time to setting GPU for fast. ai than I expected	: 		  Motivation	 			In 				Resource, 								Feb 05, 2020						                                                &laquo;                              1                               2                              Next &raquo;                                          Featured:    				                                          Implement forward&amp;backward pass from scratch                          In                     Fast. AI-v3,                                                                                           What's inside Pytorch Operator?                          In                     Fast. AI-v3,                                                                                           What is the meaning of 'deep-learning from foundations?'                          In                     Fast. AI-v3,                                                                                           Why am I not listed as a contributor?!                          In                     Resource,                                                                                           Jeremy Howard                          In                     People in the World,                                                               "
+    "body": "                                  fastai 2019 course-v3 Part1, lesson06  :       Lesson 06               In                 fastai-v3,                                        Apr 15, 2020                                                                                                                             [Q&amp;A] Image Segmentation, using Unet with Driving Video data          :                       In                         Fast. AI-v3,                                                                  Apr 02, 2020                                                                                                                                     Gradient backward, Chain Rule, Refactoring          :                       In                         fastai-v3,                                                                  Mar 02, 2020                                                                                                                                    Implement forward&amp;backward pass from scratch          :                       In                         Fast. AI-v3,                                                                  Mar 01, 2020                                                        Gradient backward, Chain Rule, Refactoring                   This note is divided into 4 section.      Section1: What is the meaning of ‘deep-learning from foundations?’   Section2: Wha. . .                 Read More            	                                       All Stories:                   		fastai 2019 course-v3 Part1, lesson06	: 		  Lesson 06	 			In 				fastai-v3, 								Apr 15, 2020						                  		[Q&amp;A] Image Segmentation, using Unet with Driving Video data	: 		  This post is about my questions while I was studying USF Deep Learning course about image segmentation task. All the answers are from the course, source code, library document, or docu. . . 	 			In 				Fast. AI-v3, 								Apr 02, 2020						                  		Gradient backward, Chain Rule, Refactoring	: 		   This note is divided into 4 section.      Section1: What is the meaning of ‘deep-learning from foundations?’   Section2: What’s inside Pytorch Operator?   Section3: Implem. . . 	 			In 				fastai-v3, 								Mar 02, 2020						                  		Implement forward&amp;backward pass from scratch	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						                  		What's inside Pytorch Operator?	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						                  		What is the meaning of 'deep-learning from foundations?'	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Feb 29, 2020						                  		Digging into convolution	: 		  Issues 	 			In 				Fast. AI-v3, 								Feb 28, 2020						                  		My life in Digital Product School - week 8/19/10	: 		  The 8/9/10th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 24, 2020						                  		Algorithmic bias	: 		  Algorithms can encode &amp; magnify human bias	 			In 				Fast. AI-v3, 								Feb 20, 2020						                  		Making a classifier with image dataset made from gooogle	: 		  CONTENTS	 			In 				toy-project, 								Feb 15, 2020						                                                &laquo;                              1                               2                              Next &raquo;                                          Featured:    				                                          [Q&amp;A] Image Segmentation, using Unet with Driving Video data                          In                     Fast. AI-v3,                                                                                           Implement forward&amp;backward pass from scratch                          In                     Fast. AI-v3,                                                                                           What's inside Pytorch Operator?                          In                     Fast. AI-v3,                                                                                           What is the meaning of 'deep-learning from foundations?'                          In                     Fast. AI-v3,                                                                                           Why am I not listed as a contributor?!                          In                     Resource,                                                                                           Jeremy Howard                          In                     People in the World,                                                               "
     }, {
     "id": 7,
     "url": "http://localhost:4000/privacy-policy.html",
@@ -157,96 +157,101 @@
     "body": "  {% if page. url ==  /  %}            {% assign latest_post = site. posts[0] %}          &lt;div class= topfirstimage  style= background-image: url({% if latest_post. image contains  ://  %}{{ latest_post. image }}{% else %} {{site. baseurl}}/{{ latest_post. image}}{% endif %}); height: 200px;  background-size: cover;  background-repeat: no-repeat; &gt;&lt;/div&gt;           {{ latest_post. title }}  :       {{ latest_post. excerpt | strip_html | strip_newlines | truncate: 136 }}               In         {% for category in latest_post. categories %}        {{ category }},         {% endfor %}                                {{ latest_post. date | date: '%b %d, %Y' }}                            {%- assign second_post = site. posts[1] -%}                        {% if second_post. image %}                         &lt;img class= w-100  src= {% if second_post. image contains  ://  %}{{ second_post. image }}{% else %}{{ second_post. image | absolute_url }}{% endif %}  alt= {{ second_post. title }} &gt;                        {% endif %}                                    {{ second_post. title }}          :                       In             {% for category in second_post. categories %}            {{ category }},             {% endfor %}                                                      {{ second_post. date | date: '%b %d, %Y' }}                                    {%- assign third_post = site. posts[2] -%}                        {% if third_post. image %}                         &lt;img class= w-100  src= {% if third_post. image contains  ://  %}{{ third_post. image }}{% else %}{{site. baseurl}}/{{ third_post. image }}{% endif %}  alt= {{ third_post. title }} &gt;                        {% endif %}                                    {{ third_post. title }}          :                       In             {% for category in third_post. categories %}            {{ category }},             {% endfor %}                                                      {{ third_post. date | date: '%b %d, %Y' }}                                    {%- assign fourth_post = site. posts[3] -%}                        {% if fourth_post. image %}                        &lt;img class= w-100  src= {% if fourth_post. image contains  ://  %}{{ fourth_post. image }}{% else %}{{site. baseurl}}/{{ fourth_post. image }}{% endif %}  alt= {{ fourth_post. title }} &gt;                        {% endif %}                                    {{ fourth_post. title }}          :                       In             {% for category in fourth_post. categories %}            {{ category }},             {% endfor %}                                                      {{ fourth_post. date | date: '%b %d, %Y' }}                                  {% for post in site. posts %} {% if post. tags contains  sticky  %}                    {{post. title}}                  {{ post. excerpt | strip_html | strip_newlines | truncate: 136 }}                 Read More            	             {% endif %}{% endfor %}  {% endif %}                All Stories:         {% for post in paginator. posts %}          {% include main-loop-card. html %}        {% endfor %}                   {% if paginator. total_pages &gt; 1 %}              {% if paginator. previous_page %}        &laquo; Prev       {% else %}        &laquo;       {% endif %}       {% for page in (1. . paginator. total_pages) %}        {% if page == paginator. page %}        {{ page }}        {% elsif page == 1 %}        {{ page }}        {% else %}        {{ page }}        {% endif %}       {% endfor %}       {% if paginator. next_page %}        Next &raquo;       {% else %}        &raquo;       {% endif %}            {% endif %}                     {% include sidebar-featured. html %}      "
     }, {
     "id": 12,
+    "url": "http://localhost:4000/2020/04/v3-2019-lesson06-note/",
+    "title": "fastai 2019 course-v3 Part1, lesson06",
+    "body": "2020/04/15 - Lesson 06Rossmann(Tabular):  Tabular data: be careful on Categorical variable vs Continuous variable.  if datatype is int, fastai think it is classification, not a regression.  Root mean square percentage error. as loss function.  When you assign the y_range, it’s better to assign little bit more than actual maximum. &gt; because it’s sigmoid.  intermediate layers, which is weight matrix is 1) 1000, and 2) 500 -&gt; which means our parameter would be 500*1000. learn. modelWhat is dropout and embedding dropout?: Nitish Srivastava, Dropout: A Simple way to prevent Neural Networks from Overfitting  you can dropout with p value, make it specified to specific layer, or make it applied to all the layers.  Pytorch code 1) bernoulli, which decides whether you will hold it? 2) and divide the noise value depends on noise value. so noise became 2 or remain 0.      According to pytorch code, We do change at training time, but we do nothing at test time. and this means you don’t have to do anything special with inference time. ’   TODO: find at forums what is inference time - Related to NVIDIA, GPU.     Embedding dropout is just a dropout.      It’s different between continuous variable and embedding layer.  TODO Still can’t understand. why embedding dropout is effective. or,… in need.    Let’s delete at random, some of the results of the embedding.    and It worked well especially at Kaggle   Batch Normalization: Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift -&gt; came out false! According to How Does Batch Normalization Help Optimization?  The key was multiplicative bias {\gamma} and additive bias {\beta}` Explain     Let $$ \hat{y} = f(w_1, w_2, w_3, … , x)} $$ , loss = MSE , Then y_range should be between 1 and 5`   And Activation function ends with -1 -&gt; +1   To mitigate this problem, we can add the other parameter, like $$w_n$$   But there’re so much interactions in the process so just re-scale the output.    Momentum parameter at BatchNorm1d:  Different from momentum like in optimization.  This momentum is Exponentially weighted moving average of the mean, instead of deviation.      If this is small number: mean standard deviation would be less from mini_batch to mini_batch » less regularization effect. (If this is large number, variation would be greater from mini_batch to mini_batch » more regularization effect)   TODO: can’t sure, but i understand, this is not about how to update parameter but about how much reflect previous value when scale and shift   Q. Preference between batchnorm and the other regularizations(drop out, weight decay)A. Nope, always try and see the results## lesson6-pets-more### Data Augmentation- Last reg- `get_transforms` has lots of params (even not yet learned all) -&gt; check documentation	- Remember you can implement all the doc contents bc it's made from nbdev	- TODO: try this!!- Essence of data augmentation is you should maintain the label, while somewhat making sense. 	- ex) tilt, because it's optically sensible, you can always change the angle of the data view. - zeros, border, and reflection but always `reflection` works most of the time, so that is the default### Convolutional Kernel(What is convolution?)- Will make heat\_map from scratch, which means the parts convolution focuses on![setosa_visualization]()- http://setosa. io/ev/image-kernels/	- javascript thing	- How convolution works	- Kernel. which does element-wise multiplication, and sum them up	- so it has on pixel less at borders -&gt; so it uses padding, and fastai uses reflection as said. - why this Kernel(matrix) helps catching horizontal edge side?	- because this kernel`(picture2)` weights differently, depends on `x axis`	- why familiar, because it's similar intuition with fugus`(paper)` paper- CNN from different viewpoints`link`	- output of pixel is results from different linear equations. 	- If you connect this with represents of neural network nodes, you can see that the specific inp nodes connected with specific out nodes. 	- **Summarize**: cnn does 1) matmul some of the elements are always zero 2) same weight for every row, which is called `weight time? weight. . ?, 1:18:50` `(picture)`#### Further lowdown- Because generally image has 3 channels, we need rank 3 kernel. - And **do multiply with all channel output is one pixel**. (`draw by your self`)	- but this kernel will catch one feature, like horizontal, so that we make more kernel so that output becomes (h * w * kernel)	- And that `kernel` come to `channel`- **Conv2d**: with 3 by 3 kernel, stride 2 conv -&gt; (h/2 * w/2 * kernel)	- skip or jump over input pixel	- to protect from memory out of control~~~pythonlearn. modellearn. summary()~~~TODO: understand yourself the blocks of conv-kernel: - Usually use big kernel size at first layer (will study this at part2)- Bottom right highlighting kernel(`pic / draw`)- `torch. tensor. expand`: for memory efficient, because we should do RGB- We do not make separate kernel, but make rank 4 kernel	- 4d tensor is just stacked kernel- `t[None]. shape` create new unit axis, and why? we make this -&gt; it should move unit of batch, not one size image. ### Average pooling, feature- suppose our pre-trained model results in size of `11 by 11 by 512 ` `pic 4` and my classification task has 37 classes	* take the first face of channel, which is 11 by 11 and `mean` it, so that make rank 2 tensor, 512 by 1	* and make 2d matrix, which is 512 by 37 and multiply so that we can get 37 by 1 matrix. - Feature, at convolution block	- So, when we transfer-learning without unfreeze, every element of last matrix (512 by 1) should represent(or could catch) each feature. 	### Heatmap, Hook~~~hook_output(model[0]) -&gt; acts -&gt; avg_acts~~~- if we average the block with `axis=feature`, result of matrix(11 by 11) depicts `how activated was that area?` -&gt; it is heatmap, `avg_acts`- and acts comes from hook, which is more advanced pytorch feature. 	- hook into pytorch machine itself, and run any arbitrary Pytorch code	- Why this is cool?: Normally it gives set of outputs of forward pass, but we can interrupt and hook the forward pass. 	- Also can store the output of the convolutional part of the model, which is before avg_pooling- Thinking back when we do cut off `after` the conv part. 	- but with fast. ai the original convolutional part of the model would be *the first thing in the model*, specifically could be given from `learn. model. eval()[0]`	- And this is gotten from `hooked_output` and having hooked the output, we can pass our x_minibatch to output. 	- Not directly, but with normalized, minibatch, put on to the gpu	- `one_item()` function do it, when we have one data `TODO: this is assignment` do it yourself without one_item function	- and `. cuda()` put it on gpu- you should print out very often the shape of tensor, and try think why. "
+    }, {
+    "id": 13,
+    "url": "http://localhost:4000/2020/04/qna-image-segmentation/",
+    "title": "[Q&A] Image Segmentation, using Unet with Driving Video data",
+    "body": "2020/04/02 - This post is about my questions while I was studying USF Deep Learning course about image segmentation task. All the answers are from the course, source code, library document, or document. I cared about being clear at reporting information including source of information, however if there are still anything unclear, please contact me. And thank you Jeremy&amp;Rachael for everything. Also Thank you Cambridge Computer Vision Lab to made us to study with your labor. The Cambridge-driving Labeled Video Database (CamVid) is the first collection of videos with object class semantic labels, complete with metadata. The database provides ground truth labels that associate each pixel with one of 32 semantic classes. If someone is interested in this project, please check the site and see the details. Now, let’s start first using jupyter’s one of tricks which I love most. It enables cell to print the code without print function. from IPython. core. interactiveshell import InteractiveShell# pretty print all cell's output and not just the last oneInteractiveShell. ast_node_interactivity =  all from fastai. vision import *from fastai. callbacks. hooks import *from fastai. utils. mem import *path = untar_data(URLs. CAMVID) # The locations where the data and models are downloaded are set in config. ymlpath. ls() I’m trying to accustomed to using pathlib module, not just it became built-in module in python, but I felt uncomfortable myself with os module. However, still unpredictable conflicts are remain, even in the quite standard library like Pytorch, tensorflow, onnx. (it require me string for path. not PosixPath. will send PR. . ) [PosixPath('/root/. fastai/data/camvid/valid. txt'), PosixPath('/root/. fastai/data/camvid/images'), PosixPath('/root/. fastai/data/camvid/labels'), PosixPath('/root/. fastai/data/camvid/codes. txt')]path_img = path/'images'path_lbl = path/'labels'fnames = get_image_files(path_img) #filenamelbl_names = get_image_files(path_lbl)1. (Play with data) My Hypothesis:  File name has A_B format. and A / B would be at key-value position.  Use collections - defaultdict  Default Dict: Link:   easy to group a sequence of key and value pairs into a dictionary of list?from collections import defaultdictfnames[0], lbl_names[0](PosixPath('/root/. fastai/data/camvid/images/0001TP_009210. png'), PosixPath('/root/. fastai/data/camvid/labels/0016E5_01800_P. png'))files = [tuple(i. stem. split('_')) for i in fnames]labels = [tuple(i. stem. split('_')[:-1]) for i in lbl_names]d = defaultdict(list)for k, v in files: d[k]. append(v)d. keys()len(d['0001TP'])124for k, v in d. items():  print(k, v)0001TP ['009210', '008850', '007350', '008970', '009840', '010140', '008490', '008520', '009540', '008250', '008340', '006840', '007860', '007410', '007740', '009870', '010080', '007890', '008790', '010020', '008400', '007080', '008280', '010380', '009330', '009060', '007470', '006810', '009720', '008580', '007110', '008730', '009150', '007680', '009780', '007800', '007290', '008760', '009510', '008640', '008310', '007440', '006900', '007500', '008460', '009030', '008130', '009480', '009900', '010230', '009270', '008040', '007590', '007950', '009990', '008550', '007260', '008100', '007530', '006960', '008190', '009420', '009930', '009000', '007830', '008940', '006690', '009570', '008880', '010170', '007560', '009300', '006750', '009360', '010200', '007320', '008010', '009120', '007620', '007200', '007140', '010320', '006720', '008670', '007230', '008370', '010260', '009690', '006930', '009090', '007770', '010290', '010350', '008610', '008070', '009600', '008430', '009450', '007380', '009240', '007710', '007170', '008160', '008910', '007020', '006780', '007050', '009960', '009810', '008220', '009180', '009750', '010050', '009660', '010110', '007920', '009630', '007650', '006990', '008700', '009390', '007980', '008820', '006870']0016E5 ['01290', '08159', '05760', '08133', '08063', '06660', '00960', '05850', '00750', '06960', '08035', '08107', '07975', '08017', '05610', '07140', '08119', '08027', '07170', '08400', '08093', '02100', '06390', '04470', '08340', '06060', '00600', '07470', '08151', '07800', '01620', '05730', '01530', '00690', '08430', '05940', '01980', '07320', '08069', '07965', '04380', '05430', '01410', '06780', '08007', '08087', '08079', '06600', '08109', '05490', '00901', '04590', '04680', '08045', '01770', '06690', '08085', '06810', '00420', '08011', '07440', '02190', '06300', '04800', '01500', '00450', '08029', '01470', '06330', '07997', '08067', '05370', '08013', '08190', '00840', '02370', '08049', '08135', '01440', '06870', '05820', '05280', '08051', '04440', '08091', '01380', '00630', '07290', '05520', '04770', '00540', '07995', '07999', '05550', '07920', '08101', '08141', '08053', '04620', '08103', '05160', '07350', '08057', '06030', '06000', '08550', '07963', '08089', '05970', '08047', '05640', '06240', '05220', '04350', '01590', '07959', '01950', '08117', '06180', '01560', '05400', '08043', '07680', '00780', '08081', '07050', '01020', '01350', '04530', '06720', '07969', '08149', '08003', '08131', '08129', '08033', '05460', '01650', '07530', '08023', '05340', '08640', '05100', '08075', '01230', '04980', '02070', '01080', '06210', '05910', '08009', '01800', '05190', '02400', '08083', '08019', '07620', '07200', '07890', '08059', '06990', '04410', '08121', '08123', '06930', '08137', '08147', '08095', '06570', '06150', '08153', '06840', '05250', '00510', '08370', '08580', '08113', '07410', '08097', '01200', '04950', '07770', '07650', '04710', '06090', '08055', '07110', '07981', '00990', '08250', '08127', '01920', '07985', '08220', '08005', '08157', '05130', '08071', '01140', '04830', '07740', '08143', '06120', '02040', '08111', '08115', '00660', '08280', '06420', '07983', '02220', '05700', '01860', '01260', '04920', '06510', '07020', '08073', '08105', '08125', '06360', '07860', '07993', '00810', '06540', '08099', '08139', '02010', '07973', '08155', '07991', '06630', '00480', '06750', '04890', '08001', '08025', '00870', '08490', '01830', '07977', '05010', '01170', '07961', '01680', '01050', '07987', '07080', '04560', '00930', '05310', '02340', '05790', '08460', '00720', '08031', '02280', '08039', '08037', '08065', '06270', '08077', '06900', '04650', '06480', '07230', '08041', '06450', '00570', '07989', '04740', '07979', '02250', '07380', '00390', '01710', '07590', '08021', '08520', '07500', '01110', '04500', '02310', '07971', '02130', '05580', '05880', '08610', '08310', '08145', '05670', '04860', '07260', '08015', '07967', '01740', '01320', '07560', '07830', '01890', '08061', '02160', '07710', '05070', '05040']Seq05VD ['f00030', 'f02550', 'f03450', 'f01110', 'f00480', 'f00210', 'f04590', 'f04170', 'f01800', 'f03990', 'f03360', 'f03900', 'f02070', 'f00810', 'f03690', 'f01350', 'f01530', 'f04980', 'f05100', 'f03060', 'f00900', 'f03870', 'f02460', 'f01470', 'f02370', 'f02820', 'f04080', 'f02760', 'f04860', 'f02250', 'f04200', 'f00270', 'f03720', 'f02850', 'f04410', 'f01200', 'f03090', 'f02010', 'f03930', 'f00090', 'f01650', 'f01890', 'f03840', 'f03030', 'f02130', 'f01230', 'f04110', 'f02520', 'f04140', 'f04020', 'f00060', 'f03420', 'f01560', 'f00120', 'f04290', 'f02340', 'f00300', 'f01380', 'f00870', 'f01860', 'f02970', 'f04560', 'f02730', 'f00330', 'f04530', 'f03780', 'f01770', 'f03390', 'f05040', 'f02430', 'f03330', 'f00660', 'f01740', 'f02100', 'f04800', 'f04050', 'f00510', 'f02790', 'f04350', 'f00690', 'f00540', 'f02490', 'f00960', 'f00930', 'f04230', 'f02880', 'f03600', 'f01020', 'f01500', 'f02400', 'f04830', 'f04470', 'f03300', 'f02670', 'f00450', 'f01980', 'f01170', 'f01620', 'f04500', 'f01080', 'f03180', 'f05070', 'f03150', 'f04950', 'f01440', 'f03510', 'f01710', 'f00360', 'f04770', 'f02910', 'f01050', 'f00630', 'f04320', 'f00570', 'f03240', 'f02190', 'f01140', 'f03540', 'f02220', 'f02640', 'f03960', 'f00000', 'f04920', 'f01950', 'f00990', 'f03480', 'f03000', 'f00420', 'f04620', 'f03210', 'f00780', 'f03570', 'f01590', 'f00750', 'f01920', 'f04650', 'f03750', 'f03630', 'f02310', 'f02610', 'f02580', 'f04740', 'f02280', 'f04680', 'f00390', 'f00720', 'f03660', 'f02040', 'f03270', 'f00180', 'f03810', 'f01410', 'f01290', 'f03120', 'f00840', 'f04440', 'f00150', 'f01260', 'f02700', 'f02940', 'f00600', 'f01830', 'f04260', 'f05010', 'f04890', 'f02160', 'f00240', 'f04380', 'f01680', 'f04710', 'f01320']0006R0 ['f02820', 'f03690', 'f03180', 'f02550', 'f01020', 'f03660', 'f02340', 'f01170', 'f02610', 'f02940', 'f01290', 'f02100', 'f01350', 'f03270', 'f03870', 'f01380', 'f01980', 'f03810', 'f02430', 'f02310', 'f01830', 'f03480', 'f02970', 'f01890', 'f03210', 'f03930', 'f02040', 'f02070', 'f02400', 'f01560', 'f03030', 'f01770', 'f01590', 'f01950', 'f03420', 'f01650', 'f03450', 'f00990', 'f03630', 'f01500', 'f03570', 'f00930', 'f03090', 'f03360', 'f02880', 'f02460', 'f01440', 'f01920', 'f01230', 'f03840', 'f02730', 'f01620', 'f02220', 'f03750', 'f03330', 'f03540', 'f02520', 'f02790', 'f01050', 'f03120', 'f01800', 'f01140', 'f01860', 'f01530', 'f01470', 'f02670', 'f02490', 'f01260', 'f01110', 'f02760', 'f01680', 'f03150', 'f02580', 'f03300', 'f02280', 'f01200', 'f03390', 'f03510', 'f02640', 'f02190', 'f02370', 'f01320', 'f02130', 'f03600', 'f03240', 'f03780', 'f03720', 'f02700', 'f01410', 'f01080', 'f02850', 'f01710', 'f03900', 'f03060', 'f01740', 'f02010', 'f02250', 'f00960', 'f03000', 'f02160', 'f02910']for k, v in d. items():  print(k, len(d[k]))0001TP 1240016E5 305Seq05VD 1710006R0 101for i in d2. keys():  print(i,len(d2[i]))0016E5 3050001TP 1240006R0 101Seq05VD 171files[0], labels[0](('0001TP', '009210'), ('0016E5', '01800'))2. My question: Link:  Why do we need masking? and does color from fastai library? (have to look into source code) What do the parameter alpha do? When people make masked img, would it be have ranged integer limit? Does image normalization related with this?lbl_sorted = sorted(lbl_names)f_sorted = sorted(fnames)lbl_1 = lbl_sorted[33]f_1 = f_sorted[33]img = open_image(lbl_1)mask = open_mask(lbl_1)_,axs = plt. subplots(1,2, figsize=(10,5))# img. show(ax=axs[0], y=mask, title='masked')img. show(ax=axs[0], title='1')mask. show(ax=axs[1], title='2', alpha=1. ) img_2 = open_image(f_1)mask_2 = open_mask(f_1)_,axs = plt. subplots(1,2, figsize=(10,5))# img. show(ax=axs[0], y=mask, title='masked')img_2. show(ax=axs[0], title='3',)mask_2. show(ax=axs[1], title='4', alpha=1. ) open_mask(lbl_1). data. shapetorch. Size([1, 720, 960])open_mask(lbl_1). data. shapetorch. Size([1, 720, 960])open_image(f_1). data. shapetorch. Size([3, 720, 960])open_image(f_1). data. shapetorch. Size([3, 720, 960])img. data #labeled datatensor([[[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]],    [[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]],    [[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]]])mask. data # after mask, labeled datatensor([[[ 4, 4, 4, . . . , 21, 21, 21],     [ 4, 4, 4, . . . , 21, 21, 21],     [ 4, 4, 4, . . . , 21, 21, 21],     . . . ,     [17, 17, 17, . . . , 30, 30, 30],     [17, 17, 17, . . . , 30, 30, 30],     [17, 17, 17, . . . , 30, 30, 30]]])img_2. data, mask_2. data(tensor([[[0. 0706, 0. 0667, 0. 0706, . . . , 0. 6431, 0. 6549, 0. 6627],     [0. 0745, 0. 0706, 0. 0706, . . . , 0. 6431, 0. 6510, 0. 6549],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 6392, 0. 6588, 0. 6588],     . . . ,     [0. 0863, 0. 0824, 0. 0824, . . . , 0. 1333, 0. 1216, 0. 1255],     [0. 0902, 0. 0863, 0. 0824, . . . , 0. 1255, 0. 1176, 0. 1216],     [0. 0863, 0. 0824, 0. 0784, . . . , 0. 1137, 0. 1059, 0. 1137]],     [[0. 0706, 0. 0667, 0. 0706, . . . , 0. 7490, 0. 7608, 0. 7686],     [0. 0745, 0. 0706, 0. 0706, . . . , 0. 7451, 0. 7569, 0. 7608],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 7412, 0. 7529, 0. 7529],     . . . ,     [0. 0980, 0. 0941, 0. 0941, . . . , 0. 1804, 0. 1686, 0. 1725],     [0. 1059, 0. 1020, 0. 0980, . . . , 0. 1725, 0. 1647, 0. 1686],     [0. 1020, 0. 0980, 0. 0941, . . . , 0. 1608, 0. 1529, 0. 1608]],     [[0. 0784, 0. 0745, 0. 0784, . . . , 0. 7569, 0. 7686, 0. 7765],     [0. 0824, 0. 0784, 0. 0784, . . . , 0. 7647, 0. 7647, 0. 7686],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 7608, 0. 7647, 0. 7647],     . . . ,     [0. 1216, 0. 1176, 0. 1176, . . . , 0. 2000, 0. 1882, 0. 1922],     [0. 1176, 0. 1137, 0. 1098, . . . , 0. 1843, 0. 1765, 0. 1804],     [0. 1137, 0. 1098, 0. 1059, . . . , 0. 1725, 0. 1647, 0. 1725]]]), tensor([[[ 18, 17, 18, . . . , 183, 186, 188],     [ 19, 18, 18, . . . , 183, 185, 186],     [ 20, 18, 19, . . . , 182, 185, 185],     . . . ,     [ 25, 24, 24, . . . , 43, 40, 41],     [ 26, 25, 24, . . . , 41, 39, 40],     [ 25, 24, 23, . . . , 38, 36, 38]]]))3. What is a difference between image and imageSegment?: imageSegment  An ImageSegment object has the same properties as an Image. The only difference is that when applying the transformations to an ImageSegment, it will ignore the functions that deal with lighting and keep values of 0 and 1.  It’s easy to show the segmentation mask over the associated Image by using the y argument of show_image. img = open_image(fnames[0])mask = open_mask(lbl_names[0])_,axs = plt. subplots(1,3, figsize=(8,4))img. show(ax=axs[0], title='no mask')img. show(ax=axs[1], y=mask, title='masked') #seg mask over the img using y argmask. show(ax=axs[2], title='mask only', alpha=1. ) vision. image ##4. Why/How img div by 255 and how it results fast. ai : vision. image - If div=True, pixel values are divided by 255. to become floats between 0. and 1.  At times, you want to get rid of distortions caused by lights and shadows in an image.    Normalizing the RGB values of an image can at times be a simple and effective way of achieving this.     So sum of the pixel’s value over all channels(which is S) divides each intensified channel so that nomalized value will be R/S, G/S and B/S (where, S=R+G+B).   Detailed explain here4. Python Evaluation Order: Python evaluates expressions from left to right. Notice that while evaluating an assignment, the right-hand side is evaluated before the left-hand side. mask_tmp, trg_tmp, void_tmp = 2, 1, 10mask_tmp = trg_tmp != void_tmpprint(mask_tmp, trg_tmp, void_tmp) # (1) target is not same with voidTrue 1 10# Example 1x = 1y = 2x,y = y,x+yx, y(2, 3)# Example 2x = 1y = 2x = yy = x+yx, y(2, 4)5. model learner parameter :: pct_start: A: Percentage of total number of epochs when learning rate rises during one cycle. Q: Sorry, I still confused that one cycle in the new API only runs one epoch. How the percentage of total number of epochs works? Can you give a example? If learn. fit_one_cycle(10, slice(1e-4,1e-3,1e-2), pct_start=0. 05)??A: Ok, strictly correct answer would be percentage of iterations, so you can have lr both increase and decrease during same epoch. In your example, say, you have 100 iterations per epoch, then for half an epoch (0. 05 * (10 * 100) = 50) lr will rise, then slowly decrease. Q2: Thanks for this explanation … so essentially, it is the percentage of overall iterations where the LR is increasing, correct? So, given the default of 0. 3, it means that your LR is going up for 30% of your iterations and then decreasing over the last 70%. Is that a correct summation of what is happening? A2: Yes, I think that’s correct. You can verify that by changing its value and check:learn. recorder. plot_lr() For example if pct_start = 0. 2 source: forums. fastai "
+    }, {
+    "id": 14,
     "url": "http://localhost:4000/2020/03/note08-fastai-4/",
     "title": "Gradient backward, Chain Rule, Refactoring",
-    "body": "2020/03/02 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves   ⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
+    "body": "2020/03/02 -  This note is divided into 4 section.      Section1: What is the meaning of ‘deep-learning from foundations?’   Section2: What’s inside Pytorch Operator?   Section3: Implement forward&amp;backward pass from scratch   Section4: Gradient backward, Chain Rule, Refactoring   ” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves    Postfix underscore means in pytorch, in-place function, What is in-place function?⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
     }, {
-    "id": 13,
+    "id": 15,
     "url": "http://localhost:4000/2020/03/note08-fastai-3/",
     "title": "Implement forward&backward pass from scratch",
     "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring1. The forward and backward passes: 1. 1 Normalization: train_mean,train_std = x_train. mean(),x_train. std()&gt;&gt;&gt; train_mean,train_std(tensor(0. 1304), tensor(0. 3073))Remember!  Dataset, which is x_train, mean and standard deviation is not 0&amp;1. But we need them to be which means we should substract means and divide data by std.  You should not standarlize validation set because training set and validation set should be aparted.  after normalize, mean is close to zero, and standard deviation is close to 1. 1. 2 Variable definition:  n,m: size of the training set c: the number of activations we need in our model2. Foundation Version: 2. 1 Basic architecture:  Our model has one hidden layer, output to have 10 activations, used in cross entropy.    But in process of building architecture, we will use mean square error, output to have 1 activations and lator change it to cross entropy   number of hidden unit; 50see below pic  We want to make w1&amp;w2 mean and std be 0&amp;1.      why initializating and make mean zero and std one is important?   paper highlighting importance of normalisation - training 10,000 layer network without regularisation1   2. 1. 1 simplified kaiming initQ: Why we did init, normalize with only validation data? Because we can not handle and get statistics from each value of x_valid?{: style=”color:red; font-size: 130%; text-align: center;”}  what about hidden(first) layer?w1 = torch. randn(m,nh)b1 = torch. zeros(nh)t = lin(x_valid, w1, b1) # hidden&gt;&gt;&gt; t. mean(), t. std()((tensor(2. 3191), tensor(27. 0303))In output(second) layer, w2 = torch. randn(nh,1)b2 = torch. zeros(1)t2 = lin(t, w2, b2) # output&gt;&gt;&gt; t2. mean(), t2. std()(tensor(-58. 2665), tensor(170. 9717))   which is terribly far from normalzed value.     But if we apply simplified kaiming init  w1 = torch. randn(m,nh)/math. sqrt(m); b1 = torch. zeros(nh)w2 = torch. randn(nh,1)/math. sqrt(nh); b2 = torch. zeros(1)t = lin(x_valid, w1, b1)t. mean(),t. std()&gt;&gt;&gt; (tensor(-0. 0516), tensor(0. 9354)) But, actually, we use activations not only linear function After applying activations relu at linear layer, mean and deviation became 0. 5. 2. 1. 2 Glorrot initializationPaper2: Understanding the difficulty of training deep feedforward neural networks  Gaussian(, bell shaped, normal distributions) is not trained very well.  How to initialize neural nets? with the size of layer , the number of filters .  But there is No acount for import of ReLU If we got 1000 layers, vanishing gradients problem emerges2. 1. 3 Kaiming initializatingPaper3: Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification  Kaiming He, explained here rectifier: rectified linear unit rectifier network: neural network with rectifier linear units  This is kaiming init, and why suddenly replace one to two on a top?     to avoid vanishing gradient(weights)   But it doesn’t give very nice mean tough.    2. 1. 4 Pytorch package Why fan_out?     according to pytorch documentation,   choosing 'fan_in' preserves the magnitude of the variance of the wights in the forward pass. choosing 'fan_out' preserves the magnitues in the backward pass(, which means matmul; with transposed matrix) ➡️ in the other words, torch use fan_out cz pytorch transpose in linear transformaton.  What about CNN in Pytorch?I tried torch. nn. Conv2d. conv2d_forward?? Jeremy digged into using torch. nn. modules. conv. _ConvNd. reset_parameters?? 2  in Pytorch, it doesn’t seem to be implemented kaiming init in right formula. so we should use our own operation.  But actually, this has been discussed in Pytorch community before. 3 4 Jeremy said it enhanced variance also, so I sampled 100 times and counted better results.  To make sure the shape seems sensible. check with assert. (remember we will replace 1 to 10 in cross entropy)assert model(x_valid). shape==torch. Size([x_valid. shape[0],1])&gt;&gt;&gt; model(x_valid). shape(10000, 1) We have made Relu, init, linear, it seems we can forward pass code we need for basic architecture nh = 50def lin(x, w, b): return x@w + b;w1 = torch. randn(m,nh)*math. sqrt(2. /m ); b1 = torch. zeros(nh)w2 = torch. randn(nh,1); b2 = torch. zeros(1)def relu(x): return x. clamp_min(0. ) - 0. 5t1 = relu(lin(x_valid, w1, b1))def model(xb):  l1 = lin(xb, w1, b1)  l2 = relu(l1)  l3 = lin(l2, w2, b2)  return l32. 2 Loss function: MSE:  Mean squared error need unit vector, so we remove unit axis.   def mse(output, targ): return (output. squeeze(-1) - targ). pow(2). mean()    In python, in case you remove axis, you use ‘squeeze’, or add axis use ‘unsqueeze’ torch. squeeze where code commonly broken. so, when you use squeeze, clarify dimension axis you want to removetmp = torch. tensor([1,1])tmp. squeeze()&gt;&gt;&gt; tensor([1, 1]) make sure to make as float when you calculateBut why??? because it is tensor?{: style=”color:red; font-size: 130%;”} Here’s the error when I don’t transform the data type ---------------------------------------------------------------------------TypeError                 Traceback (most recent call last)&lt;ipython-input-22-ae6009bef8b4&gt; in &lt;module&gt;()----&gt; 1 y_train = get_data()[1] # call data again   2 mse(preds, y_train)TypeError: 'map' object is not subscriptable This is forward passFootnote: Other materials:  Understanding the difficulty of training deep feedforward neural networks, paper that introduced Xavier initialization      Fixup Initialization: Residual Learning Without Normalization &#8617;        Pytorch implementaion on Kaiming init of conv and linear layers &#8617;        Pytorch kaiming init issue &#8617;        Pytorch kaiming init explained &#8617;    "
     }, {
-    "id": 14,
+    "id": 16,
     "url": "http://localhost:4000/2020/03/note08-fastai-2/",
     "title": "What's inside Pytorch Operator?",
     "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, RefactoringWhat’s inside Pytorch Operator?: Section02 Time comparison with pure Python:    Matmul with broadcasting&gt; 3194. 95 times faster     Einstein summation&gt; 16090. 91 times faster     Pytorch’s operator&gt; 49166. 67 times faster  1. Elementwise op: 1. 1 Frobenius norm:   above converted into (m*m). sum(). sqrt() Plus, don’t suffer from mathmatical symbols. He also copy and paste that equations from wikipedia.  and if you need latex form, download it from archive. 2. Elementwise Matmul:  What is the meaning of elementwise?   We do not calculate each component. But all of the component at once. Because, length of column of A and row of B are fixed.   How much time we saved?  So now that takes 1. 37ms. We have removed one line of code and it is a 178 times faster…#TODOI don’t know where the 5 from. but keep it. Maybe this is related with frobenius norm…?as a result, the code before for k in range(ac):  c[i,j] += a[i,k] + b[k,j]the code after c[i,j] = (a[i,:] * b[:,j]). sum()To compare it (result betweet original and adjusted version) we use not test_eq but other function. The reason for this is that due to rounding errors from math operations, matrices may not be exactly the same. As a result, we want a function that will “is a equal to b within some tolerance” #exportdef near(a,b):   return torch. allclose(a, b, rtol=1e-3, atol=1e-5)def test_near(a,b):   test(a,b,near)test_near(t1, matmul(m1, m2))3. Broadcasting:  Now, we will use the broadcasting and removec[i,j] = (a[i,:] * b[:,j]). sum() How it works?&gt;&gt;&gt; a=tensor([[10,10,10],     [20,20,20],     [30,30,30]])&gt;&gt;&gt; b=tensor([1,2,3,])&gt;&gt;&gt; a,b  (tensor([[10, 10, 10],     [20, 20, 20],     [30, 30, 30]]),tensor([1, 2, 3]))     &gt;&gt;&gt; a+btensor([[11, 12, 13],    [21, 22, 23],    [31, 32, 33]])  &lt;Figure 2&gt; demonstrated how array b is broadcasting(or copied but not occupy memory) to compatible with a. Refered from numpy_tutorial   there is no loop, but it seems there is exactly the loop.     This is not from jeremy (actually after a moment he cover it) but i wondered How to broadcast an array by columns?  c=tensor([[1],[2],[3]])a+ctensor([[11, 11, 11],    [22, 22, 22],    [33, 33, 33]])s  What is tensor. stride()?help(t. stride)Help on built-in function stride:  stride(…) method of torch. Tensor instancestride(dim) -&gt; tuple or intReturns the stride of :attr:’self’ tensor. Stride is the jump necessary to go from one element to the next one in the specified dimension :attr:’dim’. A tuple of all strides is returned when no argument is passed in. Otherwise, an integer value is returned as the stride in the particular dimension :attr:’dim’. Args:  dim (int, optional): the desired dimension in which stride is requiredExample::* x = torch. tensor([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]])`x. stride()&gt;&gt;&gt; (5, 1)x. stride(0)&gt;&gt;&gt; 5x. stride(-1)&gt;&gt;&gt; 1   unsqueeze &amp; None index   We can manipulate rank of tensor Special value ‘None’, which means please squeeze a new axis here== please broadcast herec = torch. tensor([10,20,30])c[None,:] in c, squeeze a new axis in here please. 2. 2 Matmul with broadcasting: for i in range(ar):#  c[i,j] = (a[i,:]).      *[:,j]. sum() #previous  c[i]  = (a[i]. unsqueeze(-1) * b). sum(dim=0) And Using None also (As howard teached)c[i]  = (a[i ]. unsqueeze(-1) * b). sum(dim=0) #howardc[i]  = (a[i][:,None] * b). sum(dim=0) # using Nonec[i]  = (a[i,:,None]*b). sum(dim=0)⭐️Tips🌟 1) Anytime there’s a trailinng(final) colon in numpy or pytorch you can delete it ex) c[i, :] = c [i]2) any number of colon commas at the start, you can switch it with the single elipsis.  ex) c[:,:,:,:,i] = c […,i] 2. 3 Broadcasting Rules:  What if we tensor. size([1,3]) * tensor. size([3,1])?  torch. Size([3, 3])    What is scale????   What if they are one array is times of the other array? ex) Image : 256 x 256 x 3Scale : 128 x 256 x 3Result: ?   Why I did not inserted axis via None, but happened broadcasting? &gt;&gt;&gt; c * c[:,None]tensor([[100. , 200. , 300. ],    [200. , 400. , 600. ],    [300. , 600. , 900. ]])maybe it broadcast cz following array has 3 rows as same principle, no matter what nature shape was, if we do the operation tensor broadcasts to the other. &gt;&gt;&gt; c==c[None]tensor([[True, True, True]])&gt;&gt;&gt; c[None]==c[None,:]tensor([[True, True, True]])&gt;&gt;&gt;c[None,:]==ctensor([[True, True, True]])3. Einstein summation:  Creates batch-wise, remove inner most loop, and replaced it with an elementwise producta. k. ac[i,j] += a[i,k] * b[k,j]inner most loop c[i,j] = (a[i,:] * b[:,j]). sum()elementwise product  Because K is repeated so we do a dot product. And it is torch. Usage of einsum()1) transpose2) diagnalisation tracing3) batch-wise (matmul) …  einstein summation notationdef matmul(a,b): return torch. einsum('ik,kj-&gt;ij', a, b)so after all, we are now 16000 times faster than Python. 4. Pytorch op: 49166. 67 times faster than pure python And we will use this matrix multiplication in Fully Connect forward, with some initialized parameters and ReLU. But before that, we need initialized parameters and ReLU, Footnote:  TensorRank ti noteResources:  Frobenius Norm Review Broadcasting Review (especially Rule)     Refer colab! (I totally confused with extension of arrays)    torch. allclose Review np. einsum Reviewh "
     }, {
-    "id": 15,
+    "id": 17,
     "url": "http://localhost:4000/2020/02/note08-fastai-1/",
     "title": "What is the meaning of 'deep-learning from foundations?'",
     "body": "2020/02/29 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ I don’t know if you read this article, but I heartily appreciate Rachael Thomas and Jeremy Howard for providing these priceless lectures for free Homework:  Review concepts 16 concepts from Course 1 (lessons 1 - 7)(1) Affine Functions &amp; non-linearities; 2) Parameters &amp; activations; 3) Random initialization &amp; transfer learning; 4) SGD, Momentum, Adam; 5) Convolutions; Batch-norm; 6) Dropout; 7) Data augmentation; 8) Weight decay; 9) Res/dense blocks; 10) Image classification and regression; 11)Embeddings; 12) Continuous &amp; Categorical variables; 13) Collaborative filtering; 14) Language models; 15) NLP classification; 16) Segmentation; U-net; GANS) Make sure you understand broadcasting Read section 2. 2 in Delving Deep into Rectifiers Try to replicate as much of the notebooks as you can without peeking; when you get stuck, peek at the lesson notebook, but then close it and try to do it yourself calculus for machine learning     based on weight…    einsum conventionCONTENTS:  What is going on in this course?     What is ‘from foundations’?   Steps to a basic modern CNN model   Today’s implementation goal: 1) matmul -&gt; 4) FC backward    Library development using jupyter notebook     jupyter notebook certainly can make module    Elementwise ops     How can we make python faster?         What is element wise operation?           FootnoteWhat is going on in this course?: What is ‘from foundations’?: 1) Recreate fast. ai and Pytorch 2) using pure python  Evade OverfittingOverfit : validation error getting worsetraining loss &lt; validation loss  Know the name of the symbol you usefind in this page if you don’t know the symbol that you are using or just draw it here (run by ML!) Steps to a basic modern CNN model:  1) Matrix multiplication -&gt; 2) Relu/Initialization -&gt; 3) Fully-connected Forward-&gt; 4) Fully-connected Backward -&gt; 5) Train loop -&gt; 6) Convolution-&gt; 7) Optimization -&gt;8) Batchnormalization -&gt; 9) Resnet Today’s implementation goal: 1) matmul -&gt; 4) FC backward: Library development using jupyter notebook: what is assers? jupyter notebook certainly can make module:  There will be #export tag that Howard (and we) want to extract special notebook2script. py will detect sign of #expert and convert following into python module and test ittest\_eq(TEST,'test')test\_eq(TEST,'test1')  what is run_notebook. py?     when you want to test your module in command line interface   		!python run\_notebook. py 01_matmul. ipynb  Is there any difference between 1) and 2)?1) test -&gt; test01 2) test01 -&gt; test #TODO I don’t know yet  look into run_notebook. py, package fire Jeremy used. What is that?read and run the code in a notebook, and in the process, Jeremy made Python Fire library called!shockingly, fire takes any kind of function and converts into CLI command. fire library was released by Google open source, Thursday, March 2, 2017    Get data   pytorch and numpy are pretty much same.  variable c explains how many pixels there are in in MNIST, 28 pixels PyTorch’s view() method: torch function that manipulating tensor, and squeeze() in torch &amp; mathmatical operation similar function Rao &amp; McMahan said usually this functions result in feature vector.    In part 1, you can use view function several times.     Initial python model     Which is Linear, like $Xw$(weight)$+a$(bias) $= Y$     If you don’t know hou to multiple matrix, refer this site matmul visulization site   How many time spends if we we use pure python   function matmul, typical matrix multiplication function, takes about 1 second for calculating 1 single train data! (maybe assumed stochastic, 5 data points in validation)     it takes about 11. 36 hours to update parameters even single layer and 1 iteration! (if that was my computer, it would be 14 hours. . )🤪   THIS is why we need to consider ‘time’&amp;’space’ This is kinda slow - what if we could speed it up by 50,000 times? Let’s try! Elementwise ops: How can we make python faster?:  If we want to calculate faster, then do remove pythonic calcuation, by passing its computation down to something that is written something other than python, like pytorch.  According to PyTorch doc it uses C++ (via ATen), so we are going to implement that function with python. What is element wise operation?:  items makes a pair, operate corresponding componentFootnote:  notebooks material video broadcasting excel"
     }, {
-    "id": 16,
+    "id": 18,
     "url": "http://localhost:4000/2020/02/what-is-convolution/",
     "title": "Digging into convolution",
     "body": "2020/02/28 - Issues 1) Kaiming Initializtion in Pytorch was in trouble. 1 2) Jeremy started to dig in, in lesson09, but I didn’t know why the size of tensor is 2 and even understand this spreadsheet data. 3 Homework: Read Visualizing and Understanding Convolutional Networks paper  What is a convolution?     Visualization         one kernel     Matthew D Zeiler &amp; Rob Fergus Paper          Convolution can be represented as matmul   Padding   Kernel has rank 3   How can we find a side-edge, a gradient and area of constant weight?   What is a convolution?: A convolutional neural network is that your red, green, and blue pixels go into the simple computation, and something comes out of that, and then the result of that goes into a second layer, and the result of that goes into the third layer and so forth. Visualization: one kernel Refer this site for visualizing CNN filteringMatthew D Zeiler &amp; Rob Fergus PaperLecture01     Nine examples of the actual coefficients from the **first layer**  Convolution can be represented as matmul: CNNs from different viewpoints {align-items: center;}    [A B C D E F G H I J] is 3 by 3 image data flatten to vector.   As a result, convolution is a just matrix just two things happens     Some of entries are set to zeros at all the times   same color always have the same weight. That called weight time / wegith sharing    So, we can implement a convolution with matrix multiplication. But, we don’t do that because it’s slow!Padding:  What most of libraries do is just put zeros asdie of matrix  fast. ai uses reflection paddings (what is this? Jeremy said he uttered it)Kernel has rank 3:  As standard picture input would be 4 5, it would be actually 3d, not 2d.  If we make kernel as a 3x3 size, we pass over same kernel all the different Red, Green, Blue Pixels.      This could make problem, because, if we want to detect frog, which is green, we would want more activations on the green(I made a test cell in my colab 6)   How can we find a side-edge, a gradient and area of constant weight?: Not top-edge!  One kernel can find only the top-edge, so we should stack the kernels 7 So, we pass it through bunch of kernels to the input images, and that process gives us height x width x corresponding number of kernels.  Usually that number of chanel is 16 And if we want to get the more channels and features, we should repeat that process     This process gives rise to memory out of control, we do the stride   #### conv-example. xlsx  2 convolutional filters At a second layer, filter is 3x3x2 tensor, because to add up together the first layer’s channel. Reference:       Problem was math. sqrt(5) was not kaiming initialization formula, Implementation in Pytorch &#8617;        size of tensor, lecture09 &#8617;        conv-example. xlsx &#8617;        Why do computer use red, green and blue instead of primary colors &#8617;        Grayscale is a group of shades without any visible color. … Each of these dots has its own brightness level as well and, therefore, can be converted to grayscale. A grayscale image is one with all color information removed.  &#8617;        Testing RGB and grayscale &#8617;        stack kernel and make new rank of tensor at output, Lesson06-2019 &#8617;    "
     }, {
-    "id": 17,
+    "id": 19,
     "url": "http://localhost:4000/2020/02/dps-week8/",
-    "title": "Digital Product School week 8&9",
-    "body": "2020/02/24 - The 8th week retropect at Digital Product School Week 8/9 - Ship your MVP/Release next iteration each day     This week's schedule  CONTENT:  Preparing engineering weekly Agile Process     Daily Stand-up    Making application flowchart (feat draw. io) / ER diagram     Flowchart, understaning user journey   ER diagram    Engineering weekly AI lunch Connecting firebase andPreparing engineering weekly: This week at Wednesday, I planned to explain the Language Modelings, mainly focusing ELMo, ULMFiT, BERT and GPT-2. Slides is available here Changed the presentation, because there were people who are not in ML domain. hereWhenever I do the presentation, I learn more than the information I give them. At the same time, I realize I need to learn more than I know. Agile Process: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m not sure ‘what is agile’ but this is what we tried to make agile process.  Daily Stand-up: Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Making application flowchart (feat draw. io) / ER diagram:  Before we start the party, we should clarify the flowchart and ER diagram of our application. Flowchart, understaning user journey: Thanks for google, we could use draw. io for our framechart framework. Actually, we cana choice other good flatform, but draw. io has connected app throgh google drive, most of our engineer was used to it. And after this job, I got to know there is also (of course) rule with the symbols, color, size, space, scaling and direction of arrow -reference. But why we should do this? WE have made our storymap before!! I think storymap is for visualize our status and app. So it should be shared with whole the team, and they should able to understand each role’s issue. But flowchart is more like testing technical feasibility, and error that user can experience. So it could be little more specific, complicated, and hypothetical.     This week engineer's main plan  ER diagram: Even if we use NoSQL database through firebase, my team was accustomed to SQL more. That what we educated when we were at college, so we had to organize our concept while we were learning NoSQL. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Before today, my AI collegues presented regression, knn and it was my turn. I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! AI lunch: Connecting firebase and: "
+    "title": "My life in Digital Product School - week 8/19/10",
+    "body": "2020/02/24 - The 8/9/10th week retropect at Digital Product School Week 8 - Ship your MVPWeek 9/10 - Release next iteration each day     Week 8th schedule  CONTENT:  Agile Product Development     Daily Stand-up(planning)   Gemba Walk   Sprint Reviews    Engineering weeklyAgile Product Development: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m still not sure ‘what is agile’ but this is how we tried to make agile process.  Daily Stand-up(planning): Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Gemba Walk:     Team Cero with core team  Every 2 weeks, we do the Gemba work, which is ‘question everything to the core team’ time. At this period, people can ask anything related to our product, workshop, and framework. Core team will help just for each team, and each team can solve the problem related to their work. &lt; br/&gt;Why we need this session? because with workshop and general schedule, core team has no time just focus on each team. So through this session, we can have opportunity to understand each program and workshop, like why we are using this platform, and when is the due of our small project, and we have this problem and we need help for this. whatever small problem you have, core team is always willing to help you. Sprint Reviews: Every Friday, we have time to summarise what we did for the week. Maybe we need HMW question and our storymap to share our process and then tell and share what we did try, what point we succeeded and what point it was deviant of our prediction, and why we tried it. .     Sprint of Ve-link  And then, just after all team’s ppt, we do vote with such a cute marvel. Always it’s very difficult to vote (of course you can’t vote to your team!) Because it depends on criteria what do I value!But since this is process of our agile work, I try to focus on what they have changed since last week, and why they did it, how they did it. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Everyone have their knowledge to share and we can be tutor and at the same time can be of tutee. Previously, my AI collegues presented regression, knn. And because I’m somewhat specialized to NLP, I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! "
     }, {
-    "id": 18,
+    "id": 20,
     "url": "http://localhost:4000/2020/02/fast.ai-nlp-note-16/",
     "title": "Algorithmic bias",
     "body": "2020/02/20 - Algorithms can encode &amp; magnify human bias Case Study 1: Facial Recognition &amp; Predictive Policing:  Joy Buolamwini &amp; Timnit Gebru, gendershades. org     Microsoft, FACE+, IBM - All of these things are sell now.    Largest gap between $\therefore\ Lighter Male\ &gt;\ Darker\ Female $      This US mayor joked cops should “mount . 50-caliber” guns where AI predicts crime      With machine learning, with automation, there’s a 99% success, so that robot is ㅡwill beㅡ99% accurate in telling us what is going to happen next, which is really interesting.     - city official in Lancater, CA, approving on using IBM for public security  Bias:  Bias is type of error Statistical Bias: difference between a statistic’s expected value and the true value Unjust Bias: disproportionate preference for or prejudice against a group Unconscious bias: bias that we don’t realize we have But, term bias is too generic to be productive. Different sources of bias have different causes Representation Bias: Dataset was not representative of the algorithm that might be used on later. Above : Data is okay, but algorithm has some problem. Below : Data has error.  For example, object detection production that performs very well in common product of US. But in contrast, change of target product region, like Zimbabwe, Solomon Island, and so on, reduced the performence remarkably. It is not the algorithmic problem, so we should care about data volume of region. Evaluation Bias: Benchmark datasets spur on research, 4. 4% of IJB-A images are dark-skinned women. 2/3 of ImageNet images from the West (Sharkar et al, 2017) Case Study 2: Recidivism Algorithm Used Prison Sentencing: Case Study 3: Online Ad Delivery: Bias in NLP: ( Nothing to do with the course, but I’m researching this field these days. )    But all about Englsih     ImpactThe person is doctor. The person is nurse -&gt; 그는 의사다. 그녀는 간호사다.  Concept of “biased data” often too generic to be useful:  Different sources of bias have different sources Data, models and systems are not unchanging numbers on a screen. They’re the result of a complex process that starts with years of historical context and involves a series of choices and norms, from data measurement to model evaluation to human interpretation. - Harini Suresh, “The problem with Biased Data” Five Sources of Bias in ML:  Representation Bias Evaluation Bias Measurement Bias Aggregation Bias(46:02) Historical Bias(46:26)     A few studies(47:13)   Racial Bias, Even when we have good intentions(new york times)(47:10)   gender(48:59)   Humans are biased, so why does algorithmic bias matter?: Algorithms &amp; humans are used differently (humans are usually decision maker)  Algorithms are accurate and objective No way to apeal if there if error processed large scale cheap Machine learning can amplify bias Machine learning can create feedback loops. Technology is power. And with that comes responsibility. Solutions:  Analyze a project at work/school:     Questions about AI   5 types of bias (Suresh &amp; Guttag)   Datasheets for datasets, Modelcards for model reporting   Accuracy rate on different sub-groups    Work with domain experts &amp; those impacted Increase diversity in our workspace Advocate for good policy Be on the ongoing lookout for bias"
     }, {
-    "id": 19,
+    "id": 21,
     "url": "http://localhost:4000/2020/02/classifier-city/",
     "title": "Making a classifier with image dataset made from gooogle",
     "body": "2020/02/15 - CONTENTS:  Creating dataset from google images     Using google_images_download   Create ImageDataBunch    Train model     fit_one_cycle()   Let’s find-tune   Let’s train the whole model!   Let’s make batch size bigger!    Interpretation Model in productionCode can be found hereDeployed model here Making a classifier which can distinguish Seoul from Munich and Sanfrancisco!(hoping my well in Munich!) Creating dataset from google images: In machine learning, you always need data before you build your model. You can use either URLs or google_images_download package. Since Jeremy explained specifically, I will try the other. Using google_images_download: note: This is not google official package Refer to Official Doncument, put that arguments. from google_images_download import google_images_downloadresponse = google_images_download. googleimagesdownload()  #class instantiationout_dir = os. path. abspath('. . /. . /materials/dataset/pkg/')os. mkdir(out_dir)arguments = { keywords : Cebu,Munich,Seoul ,        print_urls :True,        suffix_keywords : city ,        output_directory :out_dir,        type : photo ,      }paths = response. download(arguments)  #passing the arguments to the functionprint(paths)and if you need, here is main code. Create ImageDataBunch: We need to separate validation set because we just grabbed these imagese from Google. Most of the dataset we use (kaggle/research) splited into train / validation / test so if they are not devided beforehand we should make databunch, and Jeremy recommended assign 20% to validation. Help on function verify_images in module fastai. vision. data:verify_images(path: Union[pathlib. Path, str], delete: bool = True, max_workers: int = 4, max_size: int = None, recurse: bool = False, dest: Union[pathlib. Path, str] = '. ', n_channels: int = 3, interp=2, ext: str = None, img_format: str = None, resume: bool = None, **kwargs)  Check if the images in `path` aren't broken, maybe resize them and copy it in `dest`. Data from google image url Data from package Train model:           len(class)   len(train)   len(valid)         Data_url   3   432   108       Data_pkg   3   216   53   Uisng model: restnet34 1, Measurement: accuracy 2 fit_one_cycle(): What is fit one cycle? Cyclical Learning Rates for Training Neural Networks One of the way to find good learning rate. Core idea is to start with small learning rate (like 1e-4, 1e-3) and increase the learning rate after each mini-batch till loss starts exploding. And pick up learning rate one order lower than exploding point. For example, plotted learning rate is like below picture, picking up around 1e-2 is the best way.  Why this methods Traditionally, the learning rate is decreased as the learning starts converging with time. But this paper suggests to cycle our learning rate, because it makes us avoid local minimum. Basically this cyclic method enables us to explore whole of loss function so that find out global minimum. In other words, higher learning rate behaves like regularisation. Let’s find-tune: Do train just one last layer by learning rate found by find_lr This section you should find the strongest downward slope that kind of sticking around for quite a while. And choose just one order lower than lowest point. As explained before, I will pick up 1e-2. And of course, this is fine-tuning, we don’t need discriminative learning rate yet.  Let’s train the whole model!: link When you plot the learning rate again, maybe you will get soaring shape of learning rate. Rule of thumb, When you slice the learning rate, use learning rate you used at unfrozen part. Divide it by 5 or 10 and put it on maximum bound. At minimum bound, get the point just before it soared, and divide it by 10.  Let’s make batch size bigger!: Since default batch size is 64, I tried it to 128. And it gets way more better result(even it’s still underfitting!) And if I freeze model and train whole model again, the model would be better. Also, you can use this method to the other big dataset model training! Interpretation: See the confusion matrix. Result is quite great. *Since I’m using colab, I will skip data cleansing. But I highly recommend you to use ImageCleaner widget, only if you are using jupyter notebook (not jupyter lab) Model in production: You can deploy your model in simple way. I referred fast. ai, and used render(it’s free for limited time). You can find detailed document here. and you can create a route like this. @app. route( /classify-url , methods=[ GET ])async def classify_url(request):  bytes = await get_bytes(request. query_params[ url ])  img = open_image(BytesIO(bytes))  _,_,losses = learner. predict(img)  return JSONResponse({     predictions : sorted(      zip(cat_learner. data. classes, map(float, losses)),      key=lambda p: p[1],      reverse=True    )  })You can find my deployed model here Reference: How to create a deep learning dataset using Google Images towardsdatascience - one cycle policy       Deep Residual Learning for Image Recognition &#8617;        Accuracy_and_precision &#8617;    "
     }, {
-    "id": 20,
+    "id": 22,
     "url": "http://localhost:4000/2020/02/dps-week5/",
     "title": "Digital Product School week 5",
     "body": "2020/02/09 - The 5th week retropect at Digital Product School Week 5 - Create a Storymap and sync it with Lean Canvas     This week's schedule  CONTENT:  How to create our story map Prepare your story Discover your product’s AI potentialMondayHow to create our story map:     We need this 'aha' moment  There was a Milestone workshop, about our weekly goal. As we are agile working, we go fast and change every week’s goal. This week we will finalize our story map based on user’s pain-point and HMW questions.     How should we make our story-map  Basically we should make story map based on this rule  Tell stories, don’t just write them!     We always need context, that means all the story component should be connected    Visualize your product to establish a shared understanding and speed up discussions!     Post-it filled of text is not enough, we should fill it with visualizations then team mates can understand it fast    Only discuss in front our your story map! (Speed)     So we can update our story-map as soon as we change our opinion   And also    Use a story map to find the parts that matter most and to identify holes in your idea!     Since the story map consists of techinical part, we should consider each story’s technical feasibility    Minimise output, maximise outcome and impact! Build tests to figure out what’s minimum and what’s viable!     This story map functions to find out our minimum value of ideas    Work iteratively: Change your story map according to your learnings!     We should repeat this process again and again    PMs: Make sure Storymap is up to date!Prepare your story:     team cero, our whole story map    Our goal    Technical feasibility of our storyWhat is your strategy to make user achieve something? This would be our expand point Discover your product’s AI potential: How can we apply AI to our product? Let’s write down our ‘HMW’ questions, and find out all p ossibilities. These are suggestion of possibilities, so don’t attached to feasibility (we will do in at lean start-up)     Software section's expectation    AI section's expectationTuesday    Engineer's task, week5This 5th week, engineers settled WendesdayThursdayFriday"
     }, {
-    "id": 21,
+    "id": 23,
     "url": "http://localhost:4000/2020/02/GPU-time/",
     "title": "4 reasons took much time to setting GPU for fast.ai than I expected",
     "body": "2020/02/05 - Motivation: Before now, me as a undergraduate student, I was parsimony who usually depend on colab, kaggle, friend’s server(occasional) whenever i need GPU. . And this time it’s been for a while to install GPU than I expected and I share the several component that stood in my way. Written at Oct 24 2019, if you think this is deprecated, please do not have a leap of faith. Just for the record, I’ve used Kaggle, Colab, GCP, Azure, EC2 as GPU cloud. 1. Did not know there is JupyterLab option in Google Cloud Platform. : At the first time when GCP came out, there was no AI Platform service. So from starting vm instance to launching jupyter and installing packages, I did all of the things myself. (and I learned 🤗) $	curl -O https://repo. continuum. io/archive/Anaconda3-5. 0. 1-Linux-x86_64. sh[Downloading conda in ssh] I created VM instance,selected zone, machine type and disk type. Then, define firewall rules and in ssh terminal, install jupyter and other packages. But you can do all of these things just using AI Platform.  [AI Platform] I think it especially save your time if you are living in Asia-Pacific, which google doesn’t support not that much GPU resources.  2. Consider if the platform has limited resources in a region you live in. : I live in South Korea, East Asia, and it seems like this region has lots of limitation in GPU (except quite expensive AWS) And the Taiwan which was the only one region where I can launch my own VM with GPU (I tried all the other regions in the list) sometimes do normaly, but not always. 😥After launching, I did several works and next day I could not start VM. (I didn’t count it, but tried it a few hours because I didn’t want cost any more time…) Endlessly failed to start instance, then I choose to move AWS as an alternative way. 3. Fast. ai gives deliberate guide and I didn’t know it. : Fast. ai offer the guide for all available platform. (Colab, salamander, Gradient, Kaggle, Colab, and so on) It is so important, and really needs, because cloud computing options are vary as occasion and purpose arise. I didn’t know fast. ai has manual to running GCP, and I think it’s as good a reason as any for me to be have taken time. It helped me so much when I had aws and shortened my time. I don’t want to read all of the manual in amazno. . (It is recommended. . but I’d rather read GIT PRO now…) ssh -i ~/. ssh/&lt;your_private_key_pair&gt; -L localhost:8888:localhost:8888 ubuntu@&lt;your instance IP&gt;4. You should wait to add more volume just after add volume, by building AWS EC2. : Since Elastic Block Store(EBS) storage supports optimized storage, users can’t extend storage volume two times in a row.  Unfortunately, at the first time, I didn’t know it (again 👻) and when VM lacked volume, I doubled dist capacity (76*2) at a rough but It needs more.  &lt;!– this time I installed GPU in two years, and it became little complicated compared to 2 years ago. And this time for the first time(maybe not the first time. . but i handled it in my class or with my friend. but it’s my first time on my own. ) I very I’m started to using used google colab, kaggleand, GCP-JupyterLab, ec2 - friend made, aws vm machine but I had a environment variable but i did not know of it. On these days, I could not get a resources from taiwan…    I couldn’t notice a deliberate     Anyway, as a result I tried myself gcp myself and aws ec2 with fast. ai But I think doing on my self surely takes much time (in this point I wonder why I’m doing this, and should remind me, especially I was studying disk volume optimization)     disk volume exceed - https://askubuntu. com/questions/919748/no-space-left-on-device-even-though-there-is:  "
     }, {
-    "id": 22,
+    "id": 24,
     "url": "http://localhost:4000/2020/02/dps-week4/",
     "title": "Digital Product School week 4",
     "body": "2020/02/01 - The 4th week retropect at Digital Product School Week 4 - Find solution ideas and run experiments   [This week’s schedule] CONTENT:  Ideation Techniques     What is ideation techniques?    Generating idea in my team AIdeation Team brain storming of idea Die Produkt MacherMondayIdeation Techniques:   [slides from @steffen] What is ideation techniques?: We tried to find out user’s painpoint last week. Tried to users talk about their, pain point. No question directly, but extract from them their pain with transportation. Generating idea in my team:   AIdeation:   TuesdayTeam brain storming of idea:   Based on generated idea on Monday, we extended our idea doing rolling-paper!     Die Produkt Macher: What is lean start-up?  Lean startup is a methodology for developing businesses and products that aims to shorten product development cycles and rapidly discover if a proposed business model is viable; this is achieved by adopting a combination of business-hypothesis-driven experimentation, iterative product releases, and validated learning. - wikipedia WendesdayThursdayFriday"
     }, {
-    "id": 23,
+    "id": 25,
     "url": "http://localhost:4000/2020/01/retrosprect-of-acl-paper-2020/",
     "title": "Retrospect of ACL 2020 paper writing",
     "body": "2020/01/29 - 2020 Annual Conference of the Association for Computational Linguistics Why I can’t use ‘Cebuano’ for the research?: Why I had to change target language from ‘Cebuano’ to ‘Tagalog’?-&gt; No language translator options except google translation. But before knowing that I already consult my friend, whose mother tongue is English. So I had to aplogize her, but couldn’t tell her why suddenly I changed my plan. -&gt; I realized there are many languages even can’t be researched at all. . -&gt; Getting accustomed to discrimination makes misunderstanding, sometimes. At my country, we couldn’t use music streaming service, because of legal problem. But at that moment, I thought it was discrimination, which is done by music company. "
     }, {
-    "id": 24,
+    "id": 26,
     "url": "http://localhost:4000/2020/01/Git-Merge/",
     "title": "Why am I not listed as a contributor?!",
     "body": "2020/01/10 - From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’m not listed as contributor of repo which my PR accessed. How did I come to a repository?: When I’m stuck, I would prefer to code, than to go deep in theory. (It must be so. . too much to understand 🤒)It was BERT released by Google AI I felt keenly the necessity of implementing, because not only couldn’t understand the way they figured out positional encoding formula, but how it actually works. What does it mean to “scale” dot product in Attention? (Now I know it’s far from my section 😂) Figure 1. Scaled Dot Product. Adopted from tensorflow blogWhat was the code error?: For implement code in paper, I read the papers Transformer and BERT, structured the model, and refered the others’ code. Meanwhile, I found out a small error in tokenization process, which was changing a token into [MASK], enabled bidirectional representation. I’ve made PR, and got merged. But I was not in contributors. Why?: Figure 2. Merged Pull request Adopted from graykode projectActually I happened to know there can be couple of reasons github doesn’t include my name as contributor. Well, if contributors tab has more than 100 people, in which case it shows you up only if you are in the top 100 contributors because displaying too many contributors can make webpages down. Somethimes, however, it doesn’t that problem. Why not? Two possibilities are there.    First, According to Joel-Glovier, if repository maintainer merged-as-a-rebase PR will end up showing as maintainer’s commit. But maintainer shouldn’t normally do this.     Second, if you happend to commit using a different git email that what is in your GitHub profile, it will not be attached to your Github user, and “doesn’t show up” as you.  Reference:  Michał Chromiak’s blog Github: why are my contributions are not showing on my profile atlassian-gitfetch"
     }, {
-    "id": 25,
-    "url": "http://localhost:4000/2019/12/lesson1-fastai/",
-    "title": "Fine Grained Classification",
-    "body": "2019/12/31 - Finally you can solve the mystery behind this weird drawing. . through this course. juptyer notebook magic: %reload_ext autoreload%autoreload 2%matplotlib inlinethis is special directives to jupyter notebook, not python code. And it is called ‘magics’ (but i think jeremy is magicion)  If somebody changes underlying library code while I’m running this, please reload it automatically If somebody asks to plot something, then please plot it here in this Jupyter NotebookDon’t hesitate to import start~ Digging into untar_data, path. ls: Union[pathlib. Path, str]: typed programming language? -&gt; maybe i think disclaim the type beforehand for sure.  Q. like assert? path. ls()this is some module that fast. ai made because os. listdir(‘path’) is unconvinient. Python3 pathlib library!: pathlib "
-    }, {
-    "id": 26,
+    "id": 27,
     "url": "http://localhost:4000/2019/12/jeremy-howard/",
     "title": "Jeremy Howard",
     "body": "2019/12/15 - This is journey to find out ‘who am I trying to be?’: How he impacted me?  The person who made me start Computer Vision again. He emphasized the importance of studying NLP and Computer together to understand the deep-learning. He didn’t order it to study, but always he pursuade me with reasonable way.  “It’s not just something I can throw away. NLP and computer vision a few weeks apart and that’s going to force your brain to realize like ‘oh I have to remember this’”  He made me admit my failure in deep-learning. I started to objectify where am I. What should I do when I’m frustrated.  “Keep going. You’re not expected to remember everything. Yet.  You’re not expected to understand everything. Yet. You’re not expected to know why everything works.  Yet. ”  His articles are numerous, below.      What is torch. nn Really?   High Performance Numeric Programming with Swift: Explorations and Reflections   C++11, random distributions, and Swift    And especially, I like this book. Designing great data products Great predictive modeling is an important part of the solution, but it no longer stands on its own; as products become more sophisticated, it disappears into the plumbing. Designing great data products And he is also famous for words. Here are some.  we’re going to try and use that to really understand what’s going on. So to warn you, none of it is rocket science but a lot of its going to look really new. So don’t expect to get it the first time but expect to listen and jump into the notebook try a few things test things out look particularly at like tensor shapes and inputs and outputs to check your understanding then go back and listen again. But and kind of try it, a few times, because you will get there right, it’s just that there’s going to be a lot of new concepts because we haven’t done that much stuff in pure Pytorch. Lesson 6: Deep Learning 2019 "
     }, {
-    "id": 27,
+    "id": 28,
     "url": "http://localhost:4000/2019/11/julia-evans/",
     "title": "Julia Evans",
     "body": "2019/11/20 - This is journey to find out ‘who am I trying to be?’: The women who surprised me in many ways. First, she approached me to teaching some concepts drawing cartoons. It was at Hackers news, which was hightest ranks. Personally I have the use of not to reading title, so and cartoon was so cute and clear. I naturally gonna understood mechanism and astonished by her explaination ability. Her value, which she was taught by many people so want to do same things, moved me. Volume of her knowledge, that just reading post title is a deal of work, amazed me. "
     }, {
-    "id": 28,
+    "id": 29,
     "url": "http://localhost:4000/2019/11/coc-retropective/",
     "title": "Retrospective on Pycon 2019 Korea (CoC Committee)",
     "body": "2019/11/05 - When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized our responsibility, and if I passed each other’s burden, It could be my burden next time. In solidarity of the obligation, we finished conference well. And after participating PyCon Korea 2018 as volunteer, I’ve joined PyCon Korea Organizer last year. &lt;Figure 1&gt; First meeting of PyCon 2019 Korea Organizers It’s been a while since PyCon 2019 finished. It’s held on Aug 15 - 18, at Coex Grand Balloom &lt;Figure 2&gt; Ongoing session, speaking on news comment processing &lt;Figure 3&gt; Sponsor Booth iin Coex Hall &lt;Figure 4&gt; After PyCon 2019, with all of volunteer, organizer, speakers 😍 🥰  Serving as part of the coc TF, I spent large fraction of last year doing CoC job. here’s the path what we’ve been grappled with to grasp a solution. First half: Before the conference Toward Diverse Community:  Formally we’ve been reusing and modifying PyCon US CoC, but we needed fit in Korean and I was part of that to revise code of conduct. Except ‘That’ Diversity, Because it is ‘Harassment’:  Specific point was harassment, and the others were not. process of finding the points. How can we settle this point?Second half: During the conference Handling the potential Harassment: Disjunction of policy and real-time situation: This ‘PyCon 2019 Korea retrospective series’ would be devided into 3 Episodes.  “Retrospective on Pycon 2019 Korea (CoC Committee)” “Retrospective on Pycon 2019 Korea (Program Chair)” (20 Nov, To Be Update) “Maintaining participation while still making timely decisions” (29 Nov, To Be Update)"
     }, {
-    "id": 29,
+    "id": 30,
     "url": "http://localhost:4000/2019/11/elif-shafak/",
     "title": "Elif Shafak",
     "body": "2019/11/05 - This is journey to find out ‘who am I trying to be?’: For creative-minded people, Istanbul is a treasure. ’ Photo © Chris Boland, licensed under CC BY-NC-ND 2. 0    it suddenly felt like what I was trying to convey was more complicated and detailed than what the circumstances allowed me to say.     And I did what I usually do in similar situations: I stammered, I shut down, and I stopped talking. I stopped talking because the truth was complicated, even though I knew, deep within, that one should never, ever remain silent for fear of complexity.         &lt;Figure 1&gt; Elif Shafak   Photo credit: www. elifsafak. com. tr      I want to talk about emotions and the need to boost our emotional intelligence. I think it’s a pity that mainstream political theory pays very little attention to emotions.     Oftentimes, analysts and experts are so busy with data and metrics that they seem to forget those things in life that are difficult to measure and perhaps impossible to cluster under statistical models. But I think this is a mistake, for two main reasons. We are emotional beings.     I think it’s going to be one of our biggest intellectual challenges, because our political systems are replete with emotions. In country after country, we have seen illiberal politicians exploiting these emotions. And yet within the academia and among the intelligentsia, we are yet to take emotions seriously. I think we should.  1 2 Reference:       British Council Worldwide &#8617;        Ted Talk &#8617;    "
     }, {
-    "id": 30,
+    "id": 31,
     "url": "http://localhost:4000/2019/01/dps-week1/",
     "title": "Digital Product School week 1",
     "body": "2019/01/11 - The 1th week retropect at Digital Product School [This week’s schedule] CONTENT:  Welcome to Digital Product School!     Trip to Spitzingsee   Welcome to Design Office   Specifying our goal of product   Welcome to Digital Product School!: Trip to Spitzingsee:    At the first day of Digital Product School, we had a off-site with all of batch 9 people. All the costs were managed by dps. At the beautiful mountain, we settled the team, and got my team goal.     Basically, there are two kind of team in DPS. (1) Wild team - the team has fixed topic(2) Company team - the team which has specific stakeholders, and also topic defined by that stakeholders  The Core-team will fix what team you will join in DPS for 3 months based on ymy professionals, they announce it at off-site. [My team for 3 months at DPS]    And we decide on my batch #9 theme song. How? Each team draw for songs and pitch ‘why this song should be batch #9 theme song’The result? Imagine dragon - Believer (I didn’t know at the moment, this song would be stamped in my memory)     We have a workshop for getting to know each other. For example, we share 1) what do I expect from 3 months of dps, 2) when I feel happy in my life time, 3) what I worked for last week, 4) what was my last project and 5) what plays important role in my life      My team's board  Cero  Welcome to Design Office:  At first day of design office, we had workshop, which celebrates my day in dps also discuss specific rule, menifesto and stakeholders    We get sticker and attach it in map depends on my nationality  Now time to get to know my team’s stakeholders. What they want for us? What they expect from us? How free my team are on the topic?To be honest, it is endless tug-of-war. We should discuss with my stakeholders, endlessly, and find out solution which can meet interest of users, stakeholders and my team. Basically, my team’s main stakeholder is ADAC, but BMW, City of munich and Nokia will also participate as my team’s stakeholders. Specifying our goal of product: "
diff --git a/_site/sitemap.xml b/_site/sitemap.xml
index 9f2a285533..70cccf21a8 100644
--- a/_site/sitemap.xml
+++ b/_site/sitemap.xml
@@ -2,79 +2,83 @@
 <urlset xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd" xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
 <url>
 <loc>http://localhost:4000/2019/01/dps-week1/</loc>
-<lastmod>2019-01-11T00:00:00+01:00</lastmod>
+<lastmod>2019-01-11T00:00:00+09:00</lastmod>
 </url>
 <url>
 <loc>http://localhost:4000/2019/11/elif-shafak/</loc>
-<lastmod>2019-11-05T00:00:00+01:00</lastmod>
+<lastmod>2019-11-05T00:00:00+09:00</lastmod>
 </url>
 <url>
 <loc>http://localhost:4000/2019/11/coc-retropective/</loc>
-<lastmod>2019-11-05T00:00:00+01:00</lastmod>
+<lastmod>2019-11-05T00:00:00+09:00</lastmod>
 </url>
 <url>
 <loc>http://localhost:4000/2019/11/julia-evans/</loc>
-<lastmod>2019-11-20T00:00:00+01:00</lastmod>
+<lastmod>2019-11-20T00:00:00+09:00</lastmod>
 </url>
 <url>
 <loc>http://localhost:4000/2019/12/jeremy-howard/</loc>
-<lastmod>2019-12-15T00:00:00+01:00</lastmod>
-</url>
-<url>
-<loc>http://localhost:4000/2019/12/lesson1-fastai/</loc>
-<lastmod>2019-12-31T00:00:00+01:00</lastmod>
+<lastmod>2019-12-15T00:00:00+09:00</lastmod>
 </url>
 <url>
 <loc>http://localhost:4000/2020/01/Git-Merge/</loc>
-<lastmod>2020-01-10T00:00:00+01:00</lastmod>
+<lastmod>2020-01-10T00:00:00+09:00</lastmod>
 </url>
 <url>
 <loc>http://localhost:4000/2020/01/retrosprect-of-acl-paper-2020/</loc>
-<lastmod>2020-01-29T00:00:00+01:00</lastmod>
+<lastmod>2020-01-29T00:00:00+09:00</lastmod>
 </url>
 <url>
 <loc>http://localhost:4000/2020/02/dps-week4/</loc>
-<lastmod>2020-02-01T00:00:00+01:00</lastmod>
+<lastmod>2020-02-01T00:00:00+09:00</lastmod>
 </url>
 <url>
 <loc>http://localhost:4000/2020/02/GPU-time/</loc>
-<lastmod>2020-02-05T00:00:00+01:00</lastmod>
+<lastmod>2020-02-05T00:00:00+09:00</lastmod>
 </url>
 <url>
 <loc>http://localhost:4000/2020/02/dps-week5/</loc>
-<lastmod>2020-02-09T00:00:00+01:00</lastmod>
+<lastmod>2020-02-09T00:00:00+09:00</lastmod>
 </url>
 <url>
 <loc>http://localhost:4000/2020/02/classifier-city/</loc>
-<lastmod>2020-02-15T00:00:00+01:00</lastmod>
+<lastmod>2020-02-15T00:00:00+09:00</lastmod>
 </url>
 <url>
 <loc>http://localhost:4000/2020/02/fast.ai-nlp-note-16/</loc>
-<lastmod>2020-02-20T00:00:00+01:00</lastmod>
+<lastmod>2020-02-20T00:00:00+09:00</lastmod>
 </url>
 <url>
 <loc>http://localhost:4000/2020/02/dps-week8/</loc>
-<lastmod>2020-02-24T00:00:00+01:00</lastmod>
+<lastmod>2020-02-24T00:00:00+09:00</lastmod>
 </url>
 <url>
 <loc>http://localhost:4000/2020/02/what-is-convolution/</loc>
-<lastmod>2020-02-28T00:00:00+01:00</lastmod>
+<lastmod>2020-02-28T00:00:00+09:00</lastmod>
 </url>
 <url>
 <loc>http://localhost:4000/2020/02/note08-fastai-1/</loc>
-<lastmod>2020-02-29T00:00:00+01:00</lastmod>
+<lastmod>2020-02-29T00:00:00+09:00</lastmod>
 </url>
 <url>
 <loc>http://localhost:4000/2020/03/note08-fastai-2/</loc>
-<lastmod>2020-03-01T00:00:00+01:00</lastmod>
+<lastmod>2020-03-01T00:00:00+09:00</lastmod>
 </url>
 <url>
 <loc>http://localhost:4000/2020/03/note08-fastai-3/</loc>
-<lastmod>2020-03-01T00:00:00+01:00</lastmod>
+<lastmod>2020-03-01T00:00:00+09:00</lastmod>
 </url>
 <url>
 <loc>http://localhost:4000/2020/03/note08-fastai-4/</loc>
-<lastmod>2020-03-02T00:00:00+01:00</lastmod>
+<lastmod>2020-03-02T00:00:00+09:00</lastmod>
+</url>
+<url>
+<loc>http://localhost:4000/2020/04/qna-image-segmentation/</loc>
+<lastmod>2020-04-02T00:00:00+09:00</lastmod>
+</url>
+<url>
+<loc>http://localhost:4000/2020/04/v3-2019-lesson06-note/</loc>
+<lastmod>2020-04-15T00:00:00+09:00</lastmod>
 </url>
 <url>
 <loc>http://localhost:4000/404/</loc>
diff --git a/_site/tags.html b/_site/tags.html
index 83b34b042f..fe0094fe08 100644
--- a/_site/tags.html
+++ b/_site/tags.html
@@ -109,7 +109,7 @@
     "id": 2,
     "url": "http://localhost:4000/author-dionne.html",
     "title": "dionne",
-    "body": "                        dionne Follow:         https://spellonyou. github. io         Welcome, I'm dionne. My old hobby is collecting and systemizing data related with me. (I'm with my kindergarten newsletter) I am an observationist. In the meantime, I enjoy the difference between my expectation and result which I've been facinated(my korean-sarcasm project). And I love a film written and directed by Paul Thomas Anderson. Now, I'm interested in AI ethics, and especially studying bias.                                   Posts by dionne:                   		Gradient backward, Chain Rule, Refactoring	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 02, 2020						            		Implement forward&amp;backward pass from scratch	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						            		What's inside Pytorch Operator?	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						            		What is the meaning of 'deep-learning from foundations?'	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Feb 29, 2020						            		Digging into convolution	: 		  Issues 	 			In 				Fast. AI-v3, 								Feb 28, 2020						            		Digital Product School week 8&amp;9	: 		  The 8th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 24, 2020						            		Algorithmic bias	: 		  Algorithms can encode &amp; magnify human bias	 			In 				Fast. AI-v3, 								Feb 20, 2020						            		Making a classifier with image dataset made from gooogle	: 		  CONTENTS	 			In 				toy-project, 								Feb 15, 2020						            		Digital Product School week 5	: 		  The 5th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 09, 2020						            		4 reasons took much time to setting GPU for fast. ai than I expected	: 		  Motivation	 			In 				Resource, 								Feb 05, 2020						            		Digital Product School week 4	: 		  The 4th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 01, 2020						            		Retrospect of ACL 2020 paper writing	: 		  2020 Annual Conference of the Association for Computational Linguistics	 			In 				Retrospect, 								Jan 29, 2020						            		Why am I not listed as a contributor?!	: 		  From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’. . . 	 			In 				Resource, 								Jan 10, 2020						            		Fine Grained Classification	: 		  Finally you can solve the mystery behind this weird drawing. . through this course. 	 			In 				Fast. AI-v3, 								Dec 31, 2019						            		Jeremy Howard	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Dec 15, 2019						            		Julia Evans	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 20, 2019						            		Retrospective on Pycon 2019 Korea (CoC Committee)	: 		  When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized. . . 	 			In 				Retrospect, 								Nov 05, 2019						            		Elif Shafak	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 05, 2019						            		Digital Product School week 1	: 		  The 1th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Jan 11, 2019						        "
+    "body": "                        dionne Follow:         https://spellonyou. github. io         Welcome, I'm dionne. My old hobby is collecting and systemizing data related with me. (I'm with my kindergarten newsletter) I am an observationist. In the meantime, I enjoy the difference between my expectation and result which I've been facinated(my korean-sarcasm project). And I love a film written and directed by Paul Thomas Anderson. Now, I'm interested in AI ethics, and especially studying bias.                                   Posts by dionne:                   		fastai 2019 course-v3 Part1, lesson06	: 		  Lesson 06	 			In 				fastai-v3, 								Apr 15, 2020						            		[Q&amp;A] Image Segmentation, using Unet with Driving Video data	: 		  This post is about my questions while I was studying USF Deep Learning course about image segmentation task. All the answers are from the course, source code, library document, or docu. . . 	 			In 				Fast. AI-v3, 								Apr 02, 2020						            		Gradient backward, Chain Rule, Refactoring	: 		   This note is divided into 4 section.      Section1: What is the meaning of ‘deep-learning from foundations?’   Section2: What’s inside Pytorch Operator?   Section3: Implem. . . 	 			In 				fastai-v3, 								Mar 02, 2020						            		Implement forward&amp;backward pass from scratch	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						            		What's inside Pytorch Operator?	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						            		What is the meaning of 'deep-learning from foundations?'	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Feb 29, 2020						            		Digging into convolution	: 		  Issues 	 			In 				Fast. AI-v3, 								Feb 28, 2020						            		My life in Digital Product School - week 8/19/10	: 		  The 8/9/10th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 24, 2020						            		Algorithmic bias	: 		  Algorithms can encode &amp; magnify human bias	 			In 				Fast. AI-v3, 								Feb 20, 2020						            		Making a classifier with image dataset made from gooogle	: 		  CONTENTS	 			In 				toy-project, 								Feb 15, 2020						            		Digital Product School week 5	: 		  The 5th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 09, 2020						            		4 reasons took much time to setting GPU for fast. ai than I expected	: 		  Motivation	 			In 				Resource, 								Feb 05, 2020						            		Digital Product School week 4	: 		  The 4th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 01, 2020						            		Retrospect of ACL 2020 paper writing	: 		  2020 Annual Conference of the Association for Computational Linguistics	 			In 				Retrospect, 								Jan 29, 2020						            		Why am I not listed as a contributor?!	: 		  From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’. . . 	 			In 				Resource, 								Jan 10, 2020						            		Jeremy Howard	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Dec 15, 2019						            		Julia Evans	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 20, 2019						            		Retrospective on Pycon 2019 Korea (CoC Committee)	: 		  When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized. . . 	 			In 				Retrospect, 								Nov 05, 2019						            		Elif Shafak	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 05, 2019						            		Digital Product School week 1	: 		  The 1th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Jan 11, 2019						        "
     }, {
     "id": 3,
     "url": "http://localhost:4000/authors-list.html",
@@ -119,7 +119,7 @@
     "id": 4,
     "url": "http://localhost:4000/categories.html",
     "title": "Categories",
-    "body": "          Categories               DigitalProductSchool:                                  		Digital Product School week 8&amp;9	: 		  The 8th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 24, 2020						                                 		Digital Product School week 5	: 		  The 5th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 09, 2020						                                 		Digital Product School week 4	: 		  The 4th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 01, 2020						                                 		Digital Product School week 1	: 		  The 1th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Jan 11, 2019						                              People in the World:                                  		Jeremy Howard	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Dec 15, 2019						                                 		Julia Evans	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 20, 2019						                                 		Elif Shafak	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 05, 2019						                              Retrospect:                                  		Retrospect of ACL 2020 paper writing	: 		  2020 Annual Conference of the Association for Computational Linguistics	 			In 				Retrospect, 								Jan 29, 2020						                                 		Retrospective on Pycon 2019 Korea (CoC Committee)	: 		  When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized. . . 	 			In 				Retrospect, 								Nov 05, 2019						                              Fast. AI-v3:                                  		Gradient backward, Chain Rule, Refactoring	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 02, 2020						                                 		Implement forward&amp;backward pass from scratch	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						                                 		What's inside Pytorch Operator?	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						                                 		What is the meaning of 'deep-learning from foundations?'	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Feb 29, 2020						                                 		Digging into convolution	: 		  Issues 	 			In 				Fast. AI-v3, 								Feb 28, 2020						                                 		Algorithmic bias	: 		  Algorithms can encode &amp; magnify human bias	 			In 				Fast. AI-v3, 								Feb 20, 2020						                                 		Fine Grained Classification	: 		  Finally you can solve the mystery behind this weird drawing. . through this course. 	 			In 				Fast. AI-v3, 								Dec 31, 2019						                              Resource:                                  		4 reasons took much time to setting GPU for fast. ai than I expected	: 		  Motivation	 			In 				Resource, 								Feb 05, 2020						                                 		Why am I not listed as a contributor?!	: 		  From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’. . . 	 			In 				Resource, 								Jan 10, 2020						                              toy-project:                                  		Making a classifier with image dataset made from gooogle	: 		  CONTENTS	 			In 				toy-project, 								Feb 15, 2020						                                             Featured:    				                                          Implement forward&amp;backward pass from scratch                          In                     Fast. AI-v3,                                                                                           What's inside Pytorch Operator?                          In                     Fast. AI-v3,                                                                                           What is the meaning of 'deep-learning from foundations?'                          In                     Fast. AI-v3,                                                                                           Why am I not listed as a contributor?!                          In                     Resource,                                                                                           Jeremy Howard                          In                     People in the World,                                                                   "
+    "body": "          Categories               DigitalProductSchool:                                  		My life in Digital Product School - week 8/19/10	: 		  The 8/9/10th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 24, 2020						                                 		Digital Product School week 5	: 		  The 5th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 09, 2020						                                 		Digital Product School week 4	: 		  The 4th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 01, 2020						                                 		Digital Product School week 1	: 		  The 1th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Jan 11, 2019						                              People in the World:                                  		Jeremy Howard	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Dec 15, 2019						                                 		Julia Evans	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 20, 2019						                                 		Elif Shafak	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Nov 05, 2019						                              Retrospect:                                  		Retrospect of ACL 2020 paper writing	: 		  2020 Annual Conference of the Association for Computational Linguistics	 			In 				Retrospect, 								Jan 29, 2020						                                 		Retrospective on Pycon 2019 Korea (CoC Committee)	: 		  When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized. . . 	 			In 				Retrospect, 								Nov 05, 2019						                              Resource:                                  		4 reasons took much time to setting GPU for fast. ai than I expected	: 		  Motivation	 			In 				Resource, 								Feb 05, 2020						                                 		Why am I not listed as a contributor?!	: 		  From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’. . . 	 			In 				Resource, 								Jan 10, 2020						                              toy-project:                                  		Making a classifier with image dataset made from gooogle	: 		  CONTENTS	 			In 				toy-project, 								Feb 15, 2020						                              Fast. AI-v3:                                  		[Q&amp;A] Image Segmentation, using Unet with Driving Video data	: 		  This post is about my questions while I was studying USF Deep Learning course about image segmentation task. All the answers are from the course, source code, library document, or docu. . . 	 			In 				Fast. AI-v3, 								Apr 02, 2020						                                 		Implement forward&amp;backward pass from scratch	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						                                 		What's inside Pytorch Operator?	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						                                 		What is the meaning of 'deep-learning from foundations?'	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Feb 29, 2020						                                 		Digging into convolution	: 		  Issues 	 			In 				Fast. AI-v3, 								Feb 28, 2020						                                 		Algorithmic bias	: 		  Algorithms can encode &amp; magnify human bias	 			In 				Fast. AI-v3, 								Feb 20, 2020						                              fastai-v3:                                  		fastai 2019 course-v3 Part1, lesson06	: 		  Lesson 06	 			In 				fastai-v3, 								Apr 15, 2020						                                 		Gradient backward, Chain Rule, Refactoring	: 		   This note is divided into 4 section.      Section1: What is the meaning of ‘deep-learning from foundations?’   Section2: What’s inside Pytorch Operator?   Section3: Implem. . . 	 			In 				fastai-v3, 								Mar 02, 2020						                                             Featured:    				                                          [Q&amp;A] Image Segmentation, using Unet with Driving Video data                          In                     Fast. AI-v3,                                                                                           Implement forward&amp;backward pass from scratch                          In                     Fast. AI-v3,                                                                                           What's inside Pytorch Operator?                          In                     Fast. AI-v3,                                                                                           What is the meaning of 'deep-learning from foundations?'                          In                     Fast. AI-v3,                                                                                           Why am I not listed as a contributor?!                          In                     Resource,                                                                                           Jeremy Howard                          In                     People in the World,                                                                   "
     }, {
     "id": 5,
     "url": "http://localhost:4000/contact.html",
@@ -129,7 +129,7 @@
     "id": 6,
     "url": "http://localhost:4000/",
     "title": "Jiwon Kim | Dionne Blog",
-    "body": "                                  Gradient backward, Chain Rule, Refactoring  :       This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pyt. . .               In                 Fast. AI-v3,                                        Mar 02, 2020                                                                                                                             Implement forward&amp;backward pass from scratch          :                       In                         Fast. AI-v3,                                                                  Mar 01, 2020                                                                                                                                     What's inside Pytorch Operator?          :                       In                         Fast. AI-v3,                                                                  Mar 01, 2020                                                                                                                                    What is the meaning of 'deep-learning from foundations?'          :                       In                         Fast. AI-v3,                                                                  Feb 29, 2020                                                       Gradient backward, Chain Rule, Refactoring                  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pyt. . .                 Read More            	                                        All Stories:                   		Gradient backward, Chain Rule, Refactoring	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 02, 2020						                  		Implement forward&amp;backward pass from scratch	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						                  		What's inside Pytorch Operator?	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						                  		What is the meaning of 'deep-learning from foundations?'	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Feb 29, 2020						                  		Digging into convolution	: 		  Issues 	 			In 				Fast. AI-v3, 								Feb 28, 2020						                  		Digital Product School week 8&amp;9	: 		  The 8th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 24, 2020						                  		Algorithmic bias	: 		  Algorithms can encode &amp; magnify human bias	 			In 				Fast. AI-v3, 								Feb 20, 2020						                  		Making a classifier with image dataset made from gooogle	: 		  CONTENTS	 			In 				toy-project, 								Feb 15, 2020						                  		Digital Product School week 5	: 		  The 5th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 09, 2020						                  		4 reasons took much time to setting GPU for fast. ai than I expected	: 		  Motivation	 			In 				Resource, 								Feb 05, 2020						                                                &laquo;                              1                               2                              Next &raquo;                                          Featured:    				                                          Implement forward&amp;backward pass from scratch                          In                     Fast. AI-v3,                                                                                           What's inside Pytorch Operator?                          In                     Fast. AI-v3,                                                                                           What is the meaning of 'deep-learning from foundations?'                          In                     Fast. AI-v3,                                                                                           Why am I not listed as a contributor?!                          In                     Resource,                                                                                           Jeremy Howard                          In                     People in the World,                                                               "
+    "body": "                                  fastai 2019 course-v3 Part1, lesson06  :       Lesson 06               In                 fastai-v3,                                        Apr 15, 2020                                                                                                                             [Q&amp;A] Image Segmentation, using Unet with Driving Video data          :                       In                         Fast. AI-v3,                                                                  Apr 02, 2020                                                                                                                                     Gradient backward, Chain Rule, Refactoring          :                       In                         fastai-v3,                                                                  Mar 02, 2020                                                                                                                                    Implement forward&amp;backward pass from scratch          :                       In                         Fast. AI-v3,                                                                  Mar 01, 2020                                                        Gradient backward, Chain Rule, Refactoring                   This note is divided into 4 section.      Section1: What is the meaning of ‘deep-learning from foundations?’   Section2: Wha. . .                 Read More            	                                       All Stories:                   		fastai 2019 course-v3 Part1, lesson06	: 		  Lesson 06	 			In 				fastai-v3, 								Apr 15, 2020						                  		[Q&amp;A] Image Segmentation, using Unet with Driving Video data	: 		  This post is about my questions while I was studying USF Deep Learning course about image segmentation task. All the answers are from the course, source code, library document, or docu. . . 	 			In 				Fast. AI-v3, 								Apr 02, 2020						                  		Gradient backward, Chain Rule, Refactoring	: 		   This note is divided into 4 section.      Section1: What is the meaning of ‘deep-learning from foundations?’   Section2: What’s inside Pytorch Operator?   Section3: Implem. . . 	 			In 				fastai-v3, 								Mar 02, 2020						                  		Implement forward&amp;backward pass from scratch	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						                  		What's inside Pytorch Operator?	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						                  		What is the meaning of 'deep-learning from foundations?'	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Feb 29, 2020						                  		Digging into convolution	: 		  Issues 	 			In 				Fast. AI-v3, 								Feb 28, 2020						                  		My life in Digital Product School - week 8/19/10	: 		  The 8/9/10th week retropect at Digital Product School	 			In 				DigitalProductSchool, 								Feb 24, 2020						                  		Algorithmic bias	: 		  Algorithms can encode &amp; magnify human bias	 			In 				Fast. AI-v3, 								Feb 20, 2020						                  		Making a classifier with image dataset made from gooogle	: 		  CONTENTS	 			In 				toy-project, 								Feb 15, 2020						                                                &laquo;                              1                               2                              Next &raquo;                                          Featured:    				                                          [Q&amp;A] Image Segmentation, using Unet with Driving Video data                          In                     Fast. AI-v3,                                                                                           Implement forward&amp;backward pass from scratch                          In                     Fast. AI-v3,                                                                                           What's inside Pytorch Operator?                          In                     Fast. AI-v3,                                                                                           What is the meaning of 'deep-learning from foundations?'                          In                     Fast. AI-v3,                                                                                           Why am I not listed as a contributor?!                          In                     Resource,                                                                                           Jeremy Howard                          In                     People in the World,                                                               "
     }, {
     "id": 7,
     "url": "http://localhost:4000/privacy-policy.html",
@@ -144,7 +144,7 @@
     "id": 9,
     "url": "http://localhost:4000/tags.html",
     "title": "Tags",
-    "body": "          Tags               featured:                                  		Implement forward&amp;backward pass from scratch	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						                                 		What's inside Pytorch Operator?	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						                                 		What is the meaning of 'deep-learning from foundations?'	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Feb 29, 2020						                                 		Why am I not listed as a contributor?!	: 		  From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’. . . 	 			In 				Resource, 								Jan 10, 2020						                                 		Jeremy Howard	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Dec 15, 2019						                              feature:                                  		Digging into convolution	: 		  Issues 	 			In 				Fast. AI-v3, 								Feb 28, 2020						                                 		Making a classifier with image dataset made from gooogle	: 		  CONTENTS	 			In 				toy-project, 								Feb 15, 2020						                              sticky:                                  		Gradient backward, Chain Rule, Refactoring	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 02, 2020						                                             Featured:    				                                          Implement forward&amp;backward pass from scratch                          In                     Fast. AI-v3,                                                                                           What's inside Pytorch Operator?                          In                     Fast. AI-v3,                                                                                           What is the meaning of 'deep-learning from foundations?'                          In                     Fast. AI-v3,                                                                                           Why am I not listed as a contributor?!                          In                     Resource,                                                                                           Jeremy Howard                          In                     People in the World,                                                                   "
+    "body": "          Tags               featured:                                  		[Q&amp;A] Image Segmentation, using Unet with Driving Video data	: 		  This post is about my questions while I was studying USF Deep Learning course about image segmentation task. All the answers are from the course, source code, library document, or docu. . . 	 			In 				Fast. AI-v3, 								Apr 02, 2020						                                 		Implement forward&amp;backward pass from scratch	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						                                 		What's inside Pytorch Operator?	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Mar 01, 2020						                                 		What is the meaning of 'deep-learning from foundations?'	: 		  This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;ba. . . 	 			In 				Fast. AI-v3, 								Feb 29, 2020						                                 		Why am I not listed as a contributor?!	: 		  From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’. . . 	 			In 				Resource, 								Jan 10, 2020						                                 		Jeremy Howard	: 		  This is journey to find out ‘who am I trying to be?’	 			In 				People in the World, 								Dec 15, 2019						                              feature:                                  		Digging into convolution	: 		  Issues 	 			In 				Fast. AI-v3, 								Feb 28, 2020						                                 		Making a classifier with image dataset made from gooogle	: 		  CONTENTS	 			In 				toy-project, 								Feb 15, 2020						                              sticky:                                  		Gradient backward, Chain Rule, Refactoring	: 		   This note is divided into 4 section.      Section1: What is the meaning of ‘deep-learning from foundations?’   Section2: What’s inside Pytorch Operator?   Section3: Implem. . . 	 			In 				fastai-v3, 								Mar 02, 2020						                                             Featured:    				                                          [Q&amp;A] Image Segmentation, using Unet with Driving Video data                          In                     Fast. AI-v3,                                                                                           Implement forward&amp;backward pass from scratch                          In                     Fast. AI-v3,                                                                                           What's inside Pytorch Operator?                          In                     Fast. AI-v3,                                                                                           What is the meaning of 'deep-learning from foundations?'                          In                     Fast. AI-v3,                                                                                           Why am I not listed as a contributor?!                          In                     Resource,                                                                                           Jeremy Howard                          In                     People in the World,                                                                   "
     }, {
     "id": 10,
     "url": "http://localhost:4000/robots.txt",
@@ -157,96 +157,101 @@
     "body": "  {% if page. url ==  /  %}            {% assign latest_post = site. posts[0] %}          &lt;div class= topfirstimage  style= background-image: url({% if latest_post. image contains  ://  %}{{ latest_post. image }}{% else %} {{site. baseurl}}/{{ latest_post. image}}{% endif %}); height: 200px;  background-size: cover;  background-repeat: no-repeat; &gt;&lt;/div&gt;           {{ latest_post. title }}  :       {{ latest_post. excerpt | strip_html | strip_newlines | truncate: 136 }}               In         {% for category in latest_post. categories %}        {{ category }},         {% endfor %}                                {{ latest_post. date | date: '%b %d, %Y' }}                            {%- assign second_post = site. posts[1] -%}                        {% if second_post. image %}                         &lt;img class= w-100  src= {% if second_post. image contains  ://  %}{{ second_post. image }}{% else %}{{ second_post. image | absolute_url }}{% endif %}  alt= {{ second_post. title }} &gt;                        {% endif %}                                    {{ second_post. title }}          :                       In             {% for category in second_post. categories %}            {{ category }},             {% endfor %}                                                      {{ second_post. date | date: '%b %d, %Y' }}                                    {%- assign third_post = site. posts[2] -%}                        {% if third_post. image %}                         &lt;img class= w-100  src= {% if third_post. image contains  ://  %}{{ third_post. image }}{% else %}{{site. baseurl}}/{{ third_post. image }}{% endif %}  alt= {{ third_post. title }} &gt;                        {% endif %}                                    {{ third_post. title }}          :                       In             {% for category in third_post. categories %}            {{ category }},             {% endfor %}                                                      {{ third_post. date | date: '%b %d, %Y' }}                                    {%- assign fourth_post = site. posts[3] -%}                        {% if fourth_post. image %}                        &lt;img class= w-100  src= {% if fourth_post. image contains  ://  %}{{ fourth_post. image }}{% else %}{{site. baseurl}}/{{ fourth_post. image }}{% endif %}  alt= {{ fourth_post. title }} &gt;                        {% endif %}                                    {{ fourth_post. title }}          :                       In             {% for category in fourth_post. categories %}            {{ category }},             {% endfor %}                                                      {{ fourth_post. date | date: '%b %d, %Y' }}                                  {% for post in site. posts %} {% if post. tags contains  sticky  %}                    {{post. title}}                  {{ post. excerpt | strip_html | strip_newlines | truncate: 136 }}                 Read More            	             {% endif %}{% endfor %}  {% endif %}                All Stories:         {% for post in paginator. posts %}          {% include main-loop-card. html %}        {% endfor %}                   {% if paginator. total_pages &gt; 1 %}              {% if paginator. previous_page %}        &laquo; Prev       {% else %}        &laquo;       {% endif %}       {% for page in (1. . paginator. total_pages) %}        {% if page == paginator. page %}        {{ page }}        {% elsif page == 1 %}        {{ page }}        {% else %}        {{ page }}        {% endif %}       {% endfor %}       {% if paginator. next_page %}        Next &raquo;       {% else %}        &raquo;       {% endif %}            {% endif %}                     {% include sidebar-featured. html %}      "
     }, {
     "id": 12,
+    "url": "http://localhost:4000/2020/04/v3-2019-lesson06-note/",
+    "title": "fastai 2019 course-v3 Part1, lesson06",
+    "body": "2020/04/15 - Lesson 06Rossmann(Tabular):  Tabular data: be careful on Categorical variable vs Continuous variable.  if datatype is int, fastai think it is classification, not a regression.  Root mean square percentage error. as loss function.  When you assign the y_range, it’s better to assign little bit more than actual maximum. &gt; because it’s sigmoid.  intermediate layers, which is weight matrix is 1) 1000, and 2) 500 -&gt; which means our parameter would be 500*1000. learn. modelWhat is dropout and embedding dropout?: Nitish Srivastava, Dropout: A Simple way to prevent Neural Networks from Overfitting  you can dropout with p value, make it specified to specific layer, or make it applied to all the layers.  Pytorch code 1) bernoulli, which decides whether you will hold it? 2) and divide the noise value depends on noise value. so noise became 2 or remain 0.      According to pytorch code, We do change at training time, but we do nothing at test time. and this means you don’t have to do anything special with inference time. ’   TODO: find at forums what is inference time - Related to NVIDIA, GPU.     Embedding dropout is just a dropout.      It’s different between continuous variable and embedding layer.  TODO Still can’t understand. why embedding dropout is effective. or,… in need.    Let’s delete at random, some of the results of the embedding.    and It worked well especially at Kaggle   Batch Normalization: Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift -&gt; came out false! According to How Does Batch Normalization Help Optimization?  The key was multiplicative bias {\gamma} and additive bias {\beta}` Explain     Let $$ \hat{y} = f(w_1, w_2, w_3, … , x)} $$ , loss = MSE , Then y_range should be between 1 and 5`   And Activation function ends with -1 -&gt; +1   To mitigate this problem, we can add the other parameter, like $$w_n$$   But there’re so much interactions in the process so just re-scale the output.    Momentum parameter at BatchNorm1d:  Different from momentum like in optimization.  This momentum is Exponentially weighted moving average of the mean, instead of deviation.      If this is small number: mean standard deviation would be less from mini_batch to mini_batch » less regularization effect. (If this is large number, variation would be greater from mini_batch to mini_batch » more regularization effect)   TODO: can’t sure, but i understand, this is not about how to update parameter but about how much reflect previous value when scale and shift   Q. Preference between batchnorm and the other regularizations(drop out, weight decay)A. Nope, always try and see the results## lesson6-pets-more### Data Augmentation- Last reg- `get_transforms` has lots of params (even not yet learned all) -&gt; check documentation	- Remember you can implement all the doc contents bc it's made from nbdev	- TODO: try this!!- Essence of data augmentation is you should maintain the label, while somewhat making sense. 	- ex) tilt, because it's optically sensible, you can always change the angle of the data view. - zeros, border, and reflection but always `reflection` works most of the time, so that is the default### Convolutional Kernel(What is convolution?)- Will make heat\_map from scratch, which means the parts convolution focuses on![setosa_visualization]()- http://setosa. io/ev/image-kernels/	- javascript thing	- How convolution works	- Kernel. which does element-wise multiplication, and sum them up	- so it has on pixel less at borders -&gt; so it uses padding, and fastai uses reflection as said. - why this Kernel(matrix) helps catching horizontal edge side?	- because this kernel`(picture2)` weights differently, depends on `x axis`	- why familiar, because it's similar intuition with fugus`(paper)` paper- CNN from different viewpoints`link`	- output of pixel is results from different linear equations. 	- If you connect this with represents of neural network nodes, you can see that the specific inp nodes connected with specific out nodes. 	- **Summarize**: cnn does 1) matmul some of the elements are always zero 2) same weight for every row, which is called `weight time? weight. . ?, 1:18:50` `(picture)`#### Further lowdown- Because generally image has 3 channels, we need rank 3 kernel. - And **do multiply with all channel output is one pixel**. (`draw by your self`)	- but this kernel will catch one feature, like horizontal, so that we make more kernel so that output becomes (h * w * kernel)	- And that `kernel` come to `channel`- **Conv2d**: with 3 by 3 kernel, stride 2 conv -&gt; (h/2 * w/2 * kernel)	- skip or jump over input pixel	- to protect from memory out of control~~~pythonlearn. modellearn. summary()~~~TODO: understand yourself the blocks of conv-kernel: - Usually use big kernel size at first layer (will study this at part2)- Bottom right highlighting kernel(`pic / draw`)- `torch. tensor. expand`: for memory efficient, because we should do RGB- We do not make separate kernel, but make rank 4 kernel	- 4d tensor is just stacked kernel- `t[None]. shape` create new unit axis, and why? we make this -&gt; it should move unit of batch, not one size image. ### Average pooling, feature- suppose our pre-trained model results in size of `11 by 11 by 512 ` `pic 4` and my classification task has 37 classes	* take the first face of channel, which is 11 by 11 and `mean` it, so that make rank 2 tensor, 512 by 1	* and make 2d matrix, which is 512 by 37 and multiply so that we can get 37 by 1 matrix. - Feature, at convolution block	- So, when we transfer-learning without unfreeze, every element of last matrix (512 by 1) should represent(or could catch) each feature. 	### Heatmap, Hook~~~hook_output(model[0]) -&gt; acts -&gt; avg_acts~~~- if we average the block with `axis=feature`, result of matrix(11 by 11) depicts `how activated was that area?` -&gt; it is heatmap, `avg_acts`- and acts comes from hook, which is more advanced pytorch feature. 	- hook into pytorch machine itself, and run any arbitrary Pytorch code	- Why this is cool?: Normally it gives set of outputs of forward pass, but we can interrupt and hook the forward pass. 	- Also can store the output of the convolutional part of the model, which is before avg_pooling- Thinking back when we do cut off `after` the conv part. 	- but with fast. ai the original convolutional part of the model would be *the first thing in the model*, specifically could be given from `learn. model. eval()[0]`	- And this is gotten from `hooked_output` and having hooked the output, we can pass our x_minibatch to output. 	- Not directly, but with normalized, minibatch, put on to the gpu	- `one_item()` function do it, when we have one data `TODO: this is assignment` do it yourself without one_item function	- and `. cuda()` put it on gpu- you should print out very often the shape of tensor, and try think why. "
+    }, {
+    "id": 13,
+    "url": "http://localhost:4000/2020/04/qna-image-segmentation/",
+    "title": "[Q&A] Image Segmentation, using Unet with Driving Video data",
+    "body": "2020/04/02 - This post is about my questions while I was studying USF Deep Learning course about image segmentation task. All the answers are from the course, source code, library document, or document. I cared about being clear at reporting information including source of information, however if there are still anything unclear, please contact me. And thank you Jeremy&amp;Rachael for everything. Also Thank you Cambridge Computer Vision Lab to made us to study with your labor. The Cambridge-driving Labeled Video Database (CamVid) is the first collection of videos with object class semantic labels, complete with metadata. The database provides ground truth labels that associate each pixel with one of 32 semantic classes. If someone is interested in this project, please check the site and see the details. Now, let’s start first using jupyter’s one of tricks which I love most. It enables cell to print the code without print function. from IPython. core. interactiveshell import InteractiveShell# pretty print all cell's output and not just the last oneInteractiveShell. ast_node_interactivity =  all from fastai. vision import *from fastai. callbacks. hooks import *from fastai. utils. mem import *path = untar_data(URLs. CAMVID) # The locations where the data and models are downloaded are set in config. ymlpath. ls() I’m trying to accustomed to using pathlib module, not just it became built-in module in python, but I felt uncomfortable myself with os module. However, still unpredictable conflicts are remain, even in the quite standard library like Pytorch, tensorflow, onnx. (it require me string for path. not PosixPath. will send PR. . ) [PosixPath('/root/. fastai/data/camvid/valid. txt'), PosixPath('/root/. fastai/data/camvid/images'), PosixPath('/root/. fastai/data/camvid/labels'), PosixPath('/root/. fastai/data/camvid/codes. txt')]path_img = path/'images'path_lbl = path/'labels'fnames = get_image_files(path_img) #filenamelbl_names = get_image_files(path_lbl)1. (Play with data) My Hypothesis:  File name has A_B format. and A / B would be at key-value position.  Use collections - defaultdict  Default Dict: Link:   easy to group a sequence of key and value pairs into a dictionary of list?from collections import defaultdictfnames[0], lbl_names[0](PosixPath('/root/. fastai/data/camvid/images/0001TP_009210. png'), PosixPath('/root/. fastai/data/camvid/labels/0016E5_01800_P. png'))files = [tuple(i. stem. split('_')) for i in fnames]labels = [tuple(i. stem. split('_')[:-1]) for i in lbl_names]d = defaultdict(list)for k, v in files: d[k]. append(v)d. keys()len(d['0001TP'])124for k, v in d. items():  print(k, v)0001TP ['009210', '008850', '007350', '008970', '009840', '010140', '008490', '008520', '009540', '008250', '008340', '006840', '007860', '007410', '007740', '009870', '010080', '007890', '008790', '010020', '008400', '007080', '008280', '010380', '009330', '009060', '007470', '006810', '009720', '008580', '007110', '008730', '009150', '007680', '009780', '007800', '007290', '008760', '009510', '008640', '008310', '007440', '006900', '007500', '008460', '009030', '008130', '009480', '009900', '010230', '009270', '008040', '007590', '007950', '009990', '008550', '007260', '008100', '007530', '006960', '008190', '009420', '009930', '009000', '007830', '008940', '006690', '009570', '008880', '010170', '007560', '009300', '006750', '009360', '010200', '007320', '008010', '009120', '007620', '007200', '007140', '010320', '006720', '008670', '007230', '008370', '010260', '009690', '006930', '009090', '007770', '010290', '010350', '008610', '008070', '009600', '008430', '009450', '007380', '009240', '007710', '007170', '008160', '008910', '007020', '006780', '007050', '009960', '009810', '008220', '009180', '009750', '010050', '009660', '010110', '007920', '009630', '007650', '006990', '008700', '009390', '007980', '008820', '006870']0016E5 ['01290', '08159', '05760', '08133', '08063', '06660', '00960', '05850', '00750', '06960', '08035', '08107', '07975', '08017', '05610', '07140', '08119', '08027', '07170', '08400', '08093', '02100', '06390', '04470', '08340', '06060', '00600', '07470', '08151', '07800', '01620', '05730', '01530', '00690', '08430', '05940', '01980', '07320', '08069', '07965', '04380', '05430', '01410', '06780', '08007', '08087', '08079', '06600', '08109', '05490', '00901', '04590', '04680', '08045', '01770', '06690', '08085', '06810', '00420', '08011', '07440', '02190', '06300', '04800', '01500', '00450', '08029', '01470', '06330', '07997', '08067', '05370', '08013', '08190', '00840', '02370', '08049', '08135', '01440', '06870', '05820', '05280', '08051', '04440', '08091', '01380', '00630', '07290', '05520', '04770', '00540', '07995', '07999', '05550', '07920', '08101', '08141', '08053', '04620', '08103', '05160', '07350', '08057', '06030', '06000', '08550', '07963', '08089', '05970', '08047', '05640', '06240', '05220', '04350', '01590', '07959', '01950', '08117', '06180', '01560', '05400', '08043', '07680', '00780', '08081', '07050', '01020', '01350', '04530', '06720', '07969', '08149', '08003', '08131', '08129', '08033', '05460', '01650', '07530', '08023', '05340', '08640', '05100', '08075', '01230', '04980', '02070', '01080', '06210', '05910', '08009', '01800', '05190', '02400', '08083', '08019', '07620', '07200', '07890', '08059', '06990', '04410', '08121', '08123', '06930', '08137', '08147', '08095', '06570', '06150', '08153', '06840', '05250', '00510', '08370', '08580', '08113', '07410', '08097', '01200', '04950', '07770', '07650', '04710', '06090', '08055', '07110', '07981', '00990', '08250', '08127', '01920', '07985', '08220', '08005', '08157', '05130', '08071', '01140', '04830', '07740', '08143', '06120', '02040', '08111', '08115', '00660', '08280', '06420', '07983', '02220', '05700', '01860', '01260', '04920', '06510', '07020', '08073', '08105', '08125', '06360', '07860', '07993', '00810', '06540', '08099', '08139', '02010', '07973', '08155', '07991', '06630', '00480', '06750', '04890', '08001', '08025', '00870', '08490', '01830', '07977', '05010', '01170', '07961', '01680', '01050', '07987', '07080', '04560', '00930', '05310', '02340', '05790', '08460', '00720', '08031', '02280', '08039', '08037', '08065', '06270', '08077', '06900', '04650', '06480', '07230', '08041', '06450', '00570', '07989', '04740', '07979', '02250', '07380', '00390', '01710', '07590', '08021', '08520', '07500', '01110', '04500', '02310', '07971', '02130', '05580', '05880', '08610', '08310', '08145', '05670', '04860', '07260', '08015', '07967', '01740', '01320', '07560', '07830', '01890', '08061', '02160', '07710', '05070', '05040']Seq05VD ['f00030', 'f02550', 'f03450', 'f01110', 'f00480', 'f00210', 'f04590', 'f04170', 'f01800', 'f03990', 'f03360', 'f03900', 'f02070', 'f00810', 'f03690', 'f01350', 'f01530', 'f04980', 'f05100', 'f03060', 'f00900', 'f03870', 'f02460', 'f01470', 'f02370', 'f02820', 'f04080', 'f02760', 'f04860', 'f02250', 'f04200', 'f00270', 'f03720', 'f02850', 'f04410', 'f01200', 'f03090', 'f02010', 'f03930', 'f00090', 'f01650', 'f01890', 'f03840', 'f03030', 'f02130', 'f01230', 'f04110', 'f02520', 'f04140', 'f04020', 'f00060', 'f03420', 'f01560', 'f00120', 'f04290', 'f02340', 'f00300', 'f01380', 'f00870', 'f01860', 'f02970', 'f04560', 'f02730', 'f00330', 'f04530', 'f03780', 'f01770', 'f03390', 'f05040', 'f02430', 'f03330', 'f00660', 'f01740', 'f02100', 'f04800', 'f04050', 'f00510', 'f02790', 'f04350', 'f00690', 'f00540', 'f02490', 'f00960', 'f00930', 'f04230', 'f02880', 'f03600', 'f01020', 'f01500', 'f02400', 'f04830', 'f04470', 'f03300', 'f02670', 'f00450', 'f01980', 'f01170', 'f01620', 'f04500', 'f01080', 'f03180', 'f05070', 'f03150', 'f04950', 'f01440', 'f03510', 'f01710', 'f00360', 'f04770', 'f02910', 'f01050', 'f00630', 'f04320', 'f00570', 'f03240', 'f02190', 'f01140', 'f03540', 'f02220', 'f02640', 'f03960', 'f00000', 'f04920', 'f01950', 'f00990', 'f03480', 'f03000', 'f00420', 'f04620', 'f03210', 'f00780', 'f03570', 'f01590', 'f00750', 'f01920', 'f04650', 'f03750', 'f03630', 'f02310', 'f02610', 'f02580', 'f04740', 'f02280', 'f04680', 'f00390', 'f00720', 'f03660', 'f02040', 'f03270', 'f00180', 'f03810', 'f01410', 'f01290', 'f03120', 'f00840', 'f04440', 'f00150', 'f01260', 'f02700', 'f02940', 'f00600', 'f01830', 'f04260', 'f05010', 'f04890', 'f02160', 'f00240', 'f04380', 'f01680', 'f04710', 'f01320']0006R0 ['f02820', 'f03690', 'f03180', 'f02550', 'f01020', 'f03660', 'f02340', 'f01170', 'f02610', 'f02940', 'f01290', 'f02100', 'f01350', 'f03270', 'f03870', 'f01380', 'f01980', 'f03810', 'f02430', 'f02310', 'f01830', 'f03480', 'f02970', 'f01890', 'f03210', 'f03930', 'f02040', 'f02070', 'f02400', 'f01560', 'f03030', 'f01770', 'f01590', 'f01950', 'f03420', 'f01650', 'f03450', 'f00990', 'f03630', 'f01500', 'f03570', 'f00930', 'f03090', 'f03360', 'f02880', 'f02460', 'f01440', 'f01920', 'f01230', 'f03840', 'f02730', 'f01620', 'f02220', 'f03750', 'f03330', 'f03540', 'f02520', 'f02790', 'f01050', 'f03120', 'f01800', 'f01140', 'f01860', 'f01530', 'f01470', 'f02670', 'f02490', 'f01260', 'f01110', 'f02760', 'f01680', 'f03150', 'f02580', 'f03300', 'f02280', 'f01200', 'f03390', 'f03510', 'f02640', 'f02190', 'f02370', 'f01320', 'f02130', 'f03600', 'f03240', 'f03780', 'f03720', 'f02700', 'f01410', 'f01080', 'f02850', 'f01710', 'f03900', 'f03060', 'f01740', 'f02010', 'f02250', 'f00960', 'f03000', 'f02160', 'f02910']for k, v in d. items():  print(k, len(d[k]))0001TP 1240016E5 305Seq05VD 1710006R0 101for i in d2. keys():  print(i,len(d2[i]))0016E5 3050001TP 1240006R0 101Seq05VD 171files[0], labels[0](('0001TP', '009210'), ('0016E5', '01800'))2. My question: Link:  Why do we need masking? and does color from fastai library? (have to look into source code) What do the parameter alpha do? When people make masked img, would it be have ranged integer limit? Does image normalization related with this?lbl_sorted = sorted(lbl_names)f_sorted = sorted(fnames)lbl_1 = lbl_sorted[33]f_1 = f_sorted[33]img = open_image(lbl_1)mask = open_mask(lbl_1)_,axs = plt. subplots(1,2, figsize=(10,5))# img. show(ax=axs[0], y=mask, title='masked')img. show(ax=axs[0], title='1')mask. show(ax=axs[1], title='2', alpha=1. ) img_2 = open_image(f_1)mask_2 = open_mask(f_1)_,axs = plt. subplots(1,2, figsize=(10,5))# img. show(ax=axs[0], y=mask, title='masked')img_2. show(ax=axs[0], title='3',)mask_2. show(ax=axs[1], title='4', alpha=1. ) open_mask(lbl_1). data. shapetorch. Size([1, 720, 960])open_mask(lbl_1). data. shapetorch. Size([1, 720, 960])open_image(f_1). data. shapetorch. Size([3, 720, 960])open_image(f_1). data. shapetorch. Size([3, 720, 960])img. data #labeled datatensor([[[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]],    [[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]],    [[0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     [0. 0157, 0. 0157, 0. 0157, . . . , 0. 0824, 0. 0824, 0. 0824],     . . . ,     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176],     [0. 0667, 0. 0667, 0. 0667, . . . , 0. 1176, 0. 1176, 0. 1176]]])mask. data # after mask, labeled datatensor([[[ 4, 4, 4, . . . , 21, 21, 21],     [ 4, 4, 4, . . . , 21, 21, 21],     [ 4, 4, 4, . . . , 21, 21, 21],     . . . ,     [17, 17, 17, . . . , 30, 30, 30],     [17, 17, 17, . . . , 30, 30, 30],     [17, 17, 17, . . . , 30, 30, 30]]])img_2. data, mask_2. data(tensor([[[0. 0706, 0. 0667, 0. 0706, . . . , 0. 6431, 0. 6549, 0. 6627],     [0. 0745, 0. 0706, 0. 0706, . . . , 0. 6431, 0. 6510, 0. 6549],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 6392, 0. 6588, 0. 6588],     . . . ,     [0. 0863, 0. 0824, 0. 0824, . . . , 0. 1333, 0. 1216, 0. 1255],     [0. 0902, 0. 0863, 0. 0824, . . . , 0. 1255, 0. 1176, 0. 1216],     [0. 0863, 0. 0824, 0. 0784, . . . , 0. 1137, 0. 1059, 0. 1137]],     [[0. 0706, 0. 0667, 0. 0706, . . . , 0. 7490, 0. 7608, 0. 7686],     [0. 0745, 0. 0706, 0. 0706, . . . , 0. 7451, 0. 7569, 0. 7608],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 7412, 0. 7529, 0. 7529],     . . . ,     [0. 0980, 0. 0941, 0. 0941, . . . , 0. 1804, 0. 1686, 0. 1725],     [0. 1059, 0. 1020, 0. 0980, . . . , 0. 1725, 0. 1647, 0. 1686],     [0. 1020, 0. 0980, 0. 0941, . . . , 0. 1608, 0. 1529, 0. 1608]],     [[0. 0784, 0. 0745, 0. 0784, . . . , 0. 7569, 0. 7686, 0. 7765],     [0. 0824, 0. 0784, 0. 0784, . . . , 0. 7647, 0. 7647, 0. 7686],     [0. 0784, 0. 0706, 0. 0745, . . . , 0. 7608, 0. 7647, 0. 7647],     . . . ,     [0. 1216, 0. 1176, 0. 1176, . . . , 0. 2000, 0. 1882, 0. 1922],     [0. 1176, 0. 1137, 0. 1098, . . . , 0. 1843, 0. 1765, 0. 1804],     [0. 1137, 0. 1098, 0. 1059, . . . , 0. 1725, 0. 1647, 0. 1725]]]), tensor([[[ 18, 17, 18, . . . , 183, 186, 188],     [ 19, 18, 18, . . . , 183, 185, 186],     [ 20, 18, 19, . . . , 182, 185, 185],     . . . ,     [ 25, 24, 24, . . . , 43, 40, 41],     [ 26, 25, 24, . . . , 41, 39, 40],     [ 25, 24, 23, . . . , 38, 36, 38]]]))3. What is a difference between image and imageSegment?: imageSegment  An ImageSegment object has the same properties as an Image. The only difference is that when applying the transformations to an ImageSegment, it will ignore the functions that deal with lighting and keep values of 0 and 1.  It’s easy to show the segmentation mask over the associated Image by using the y argument of show_image. img = open_image(fnames[0])mask = open_mask(lbl_names[0])_,axs = plt. subplots(1,3, figsize=(8,4))img. show(ax=axs[0], title='no mask')img. show(ax=axs[1], y=mask, title='masked') #seg mask over the img using y argmask. show(ax=axs[2], title='mask only', alpha=1. ) vision. image ##4. Why/How img div by 255 and how it results fast. ai : vision. image - If div=True, pixel values are divided by 255. to become floats between 0. and 1.  At times, you want to get rid of distortions caused by lights and shadows in an image.    Normalizing the RGB values of an image can at times be a simple and effective way of achieving this.     So sum of the pixel’s value over all channels(which is S) divides each intensified channel so that nomalized value will be R/S, G/S and B/S (where, S=R+G+B).   Detailed explain here4. Python Evaluation Order: Python evaluates expressions from left to right. Notice that while evaluating an assignment, the right-hand side is evaluated before the left-hand side. mask_tmp, trg_tmp, void_tmp = 2, 1, 10mask_tmp = trg_tmp != void_tmpprint(mask_tmp, trg_tmp, void_tmp) # (1) target is not same with voidTrue 1 10# Example 1x = 1y = 2x,y = y,x+yx, y(2, 3)# Example 2x = 1y = 2x = yy = x+yx, y(2, 4)5. model learner parameter :: pct_start: A: Percentage of total number of epochs when learning rate rises during one cycle. Q: Sorry, I still confused that one cycle in the new API only runs one epoch. How the percentage of total number of epochs works? Can you give a example? If learn. fit_one_cycle(10, slice(1e-4,1e-3,1e-2), pct_start=0. 05)??A: Ok, strictly correct answer would be percentage of iterations, so you can have lr both increase and decrease during same epoch. In your example, say, you have 100 iterations per epoch, then for half an epoch (0. 05 * (10 * 100) = 50) lr will rise, then slowly decrease. Q2: Thanks for this explanation … so essentially, it is the percentage of overall iterations where the LR is increasing, correct? So, given the default of 0. 3, it means that your LR is going up for 30% of your iterations and then decreasing over the last 70%. Is that a correct summation of what is happening? A2: Yes, I think that’s correct. You can verify that by changing its value and check:learn. recorder. plot_lr() For example if pct_start = 0. 2 source: forums. fastai "
+    }, {
+    "id": 14,
     "url": "http://localhost:4000/2020/03/note08-fastai-4/",
     "title": "Gradient backward, Chain Rule, Refactoring",
-    "body": "2020/03/02 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves   ⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
+    "body": "2020/03/02 -  This note is divided into 4 section.      Section1: What is the meaning of ‘deep-learning from foundations?’   Section2: What’s inside Pytorch Operator?   Section3: Implement forward&amp;backward pass from scratch   Section4: Gradient backward, Chain Rule, Refactoring   ” Lecture 08 - Deep Learning From Foundations-part2 “ Homework:  calculus for machine learning einsum conventionCONTENTS:  Foundation version     Gradients backward pass         decompose function     chain rule with code     check the result using Pytorch autograd           Refactor model     Layers as classes   Modue. forward()   Without einsum   nn. Linear and nn. Module    Forward process Foundation version: Gradients backward pass:  Gradients is output with respect to parameter we’ve done this work in this path(below)  to simplify this calculus, we can just change it into,  So, you should know of the derivative of each bit on its own, and then you multiply them all together. As a result, it would be over cross over the data.  So you can get gradient, output with respect to parameter  What order should we calculate? BTW, why Jeremy wrote , not Loss function?1 decompose function We want to get derivative of which forms  But, we have a estimation of answer (we call it y hat) now So, I will decompose funciton to trace target variable.  Using the above forward pass, we can suppose some function from the end.  start from , We know MSE funciton got two parameters, output, and target .  from MSE’s input we know function’s output and supposing v is input of that function,  similarly, v became output of chain rule with code   examplify backward process by random sampling     To get a variable, I modified forward model a little  def model_ping(out = 'x_train'):  l1 = lin(x_train, w1, b1) # one linear layer  l2 = relu(l1) # one relu layer  l3 = lin(l2, w2, b2) # one more linear layer  return eval(out) Be careful we don’t use mse_loss in backward process1) start with the very last function, which is loss funciton. MSE  If we codify this formula,def mse_grad(inp, targ): #mse_input(1000,1), mse_targ (1000,1)  # grad of loss with respect to output of previous layer  inp. g = 2. * (inp. squeeze() - targ). unsqueeze(-1) / inp. shape[0] And, this can be examplified like below.  Notice that input of gradient function is same with forward functiony_hat = model_ping('l3') #get value from forward modely_hat. g = ((y_hat. squeeze(-1)-y_train). unsqueeze(-1))/y_hat. shape[0]y_hat. g. shape&gt;&gt;&gt; torch. Size([50000, 1]) We can just calculate using broadcasting, not using squeeze. then why should do and unsqueeze again?🎯 It’s related with random access memory(RAM). . If I don’t squeeze, (I’m using colab) it out of RAM. 2) Derivative of linear2 function  This process’s weight dimensions defined by axis=1, axis=2.  axis=0 dimension means size of data. This will be summazed by . sum(0) method.  unsqeeze(-1)&amp;unsqeeze(1) seperates the dimension, and make a dot product, and vanish axis=0 dimension. def lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowlin2 = model_ping('l2'); #get value from forward modellin2. g = y_hat. g@w2. t(); w2. g = (lin2. unsqueeze(-1) * y_hat. g. unsqueeze(1)). sum(0);b2. g = y_hat. g. sum(0);lin2. g. shape, w2. g. shape, b2. g. shape&gt;&gt;&gt; torch. Size([50000, 50])torch. Size([50, 1])torch. Size([1]) Notice going reverse order, we’re passing in gradient backward3) derivative of ReLU  def relu_grad(inp, out):  # grad of relu with respect to input activations  inp. g = (inp&gt;0). float() * out. g Examplified belowlin1=model_ping('l1') #get value from forward modellin1. g = (lin1&gt;0). float() * lin2. g;lin1. g. shape&gt;&gt;&gt; torch. Size([50000, 50])4) Derivative of linear1  Same process with 2) but, this process’s weight hasdef lin_grad(inp, out, w, b):  # grad of matmul with respect to input  inp. g = out. g @ w. t()  w. g = (inp. unsqueeze(-1) * out. g. unsqueeze(1)). sum(0)  b. g = out. g. sum(0) Examplified belowx_train. g = lin1. g @ w1. t(); w1. g = (x_train. unsqueeze(-1) * lin1. g. unsqueeze(1)). sum(0); b1. g = lin1. g. sum(0);x_train. g. shape, w1. g. shape, b1. g. shape&gt;&gt;&gt; torch. Size([50000, 784])torch. Size([784, 50])torch. Size([50])5) Then it goes backward pass def forward_and_backward(inp, targ):  # forward pass:  l1 = inp @ w1 + b1  l2 = relu(l1)  out = l2 @ w2 + b2  # we don't actually need the loss in backward!  loss = mse(out, targ)    # backward pass:  mse_grad(out, targ)  lin_grad(l2, out, w2, b2)  relu_grad(l1, l2)  lin_grad(inp, l1, w1, b1)Version 1 (Basic)- Wall time: 1. 95 s Summary  Notice that output of function at forward pass became input of backward pass backpropagation is just the chain rule value loss (loss=mse(out,targ)) is not used in gradient calcuation.      Because, it doesn’t appear with the weight.     w1g, w2g, b1g, b2g, ig will be used for optimizercheck the result using Pytorch autograd require_grad_ is the magical function, which can automatic differentiation. 2     This magical auto gradified tensor keep track what happend in forward (taking loss function),   and do the backward3   So it saves our time to differentiate ourselves    Postfix underscore means in pytorch, in-place function, What is in-place function?⤵️ THis is benchmark…. . Version 2 (torch autograd)- Wall time: 3. 81 µs Refactor model:  Amazingly, just refactoring our main pieces, it comes down up to Pytorch package. 🌟 Implement yourself, Practice, practice, practice! 🌟 Layers as classes:    Relu and Linear are layers in oue neural net. -&gt; make it as classes     For the forward, using __call__ for the both of forward &amp; backward. Because ‘call’ means we treat this as a function.  class Lin():  def __init__(self, w, b): self. w,self. b = w,b      def __call__(self, inp):    self. inp = inp    self. out = inp@self. w + self. b    return self. out    def backward(self):    self. inp. g = self. out. g @ self. w. t()    # Creating a giant outer product, just to sum it, is inefficient!    self. w. g = (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    self. b. g = self. out. g. sum(0) Remember that in lin_grad function, we save bias&amp;weight!!!!!💬 inp. g : gradient of the output with respect to the input. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 w. g : gradient of the output with respect to the weight. {: style=”color:grey; font-size: 90%; text-align: center;”} 💬 b. g : gradient of the output with respect to the bias. {: style=”color:grey; font-size: 90%; text-align: center;”} class Model():  def __init__(self, w1, b1, w2, b2):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse()      def __call__(self, x, targ):    for l in self. layers: x = l(x)    return self. loss(x, targ)    def backward(self):    self. loss. backward()    for l in reversed(self. layers): l. backward()   refer to Jeremy’s Model class, he put layers in list   Dionne’s self-study note: Decomposing Jeremy’s Model class     init needs weight, bias but not x data   when call that class(a. k. a function) it gave x data and y label!   jeremy composited function in layers. x = l(x) so concise…. .    also utilized that layer list when backward ust reversing it (using python list’s method)    And he is recursively calling the function on the result of the previous thing. ⬇️for l in self. layers:  x = l(x)Q2: Don’t I need to declare magical autograd function, requires_grad_?{: style=”color:red; font-size: 130%; text-align: center;”} [The questions migrated to this article] Version 3 (refactoring - layer to class)- Wall time: 5. 25 µs Modue. forward():  Duplicate code makes execution time slow.      Role of __call__ changed. No more __call__ for implementing forward pass.    By initializing the forward with __call__, Module. forward() use overriding to maximize reusability. So any layer inherit Module, can use parent’s function.     gradient of the output with respect to the weight  (self. inp. unsqueeze(-1) * self. out. g. unsqueeze(1)). sum(0)    can be reexpressed using einsum,   torch. einsum( bi,bj-&gt;ij , inp, out. g)    Defining forward and Module enables Pytorch to out almost duplicatesVersion 4 (Module &amp; einsum)- Wall time: 4. 29 µs Q2: Isn’t there any way to use broadcasting? Why we should use outer product?{: style=”color:red; font-size: 130%; text-align: center;”} Without einsum: Replacing einsum to matrix product is even more faster. torch. einsum( bi,bj-&gt;ij , inp, out. g)can be reexpressed using matrix product, inp. t() @ out. gVersion 5 (without einsum)- Wall time: 3. 81 µs nn. Linear and nn. Module: Torch’s package nn. Linear and nn. Module Version 6 (torch package)- Wall time: 5. 01 µs  Final, Using torch. nn. Linear &amp; torch. nn. Module~~~pythonclass Model(nn. Module):  def init(self, n_in, nh, n_out):    super(). init()    self. layers = [nn. Linear(n_in,nh), nn. ReLU(), nn. Linear(nh,n_out)]    self. loss = mse def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x. squeeze(), targ)class Model():  def init(self):    self. layers = [Lin(w1,b1), Relu(), Lin(w2,b2)]    self. loss = Mse() def __call__(self, x, targ):  for l in self. layers: x = l(x)  return self. loss(x, targ)def backward(self):  self. loss. backward()  for l in reversed(self. layers): l. backward()    ~~~ Footnote:       fast. ai forums Lesson-8 &#8617;        pytorch docs - autograd &#8617;        stackoverflow - finding methods a object has &#8617;    "
     }, {
-    "id": 13,
+    "id": 15,
     "url": "http://localhost:4000/2020/03/note08-fastai-3/",
     "title": "Implement forward&backward pass from scratch",
     "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring1. The forward and backward passes: 1. 1 Normalization: train_mean,train_std = x_train. mean(),x_train. std()&gt;&gt;&gt; train_mean,train_std(tensor(0. 1304), tensor(0. 3073))Remember!  Dataset, which is x_train, mean and standard deviation is not 0&amp;1. But we need them to be which means we should substract means and divide data by std.  You should not standarlize validation set because training set and validation set should be aparted.  after normalize, mean is close to zero, and standard deviation is close to 1. 1. 2 Variable definition:  n,m: size of the training set c: the number of activations we need in our model2. Foundation Version: 2. 1 Basic architecture:  Our model has one hidden layer, output to have 10 activations, used in cross entropy.    But in process of building architecture, we will use mean square error, output to have 1 activations and lator change it to cross entropy   number of hidden unit; 50see below pic  We want to make w1&amp;w2 mean and std be 0&amp;1.      why initializating and make mean zero and std one is important?   paper highlighting importance of normalisation - training 10,000 layer network without regularisation1   2. 1. 1 simplified kaiming initQ: Why we did init, normalize with only validation data? Because we can not handle and get statistics from each value of x_valid?{: style=”color:red; font-size: 130%; text-align: center;”}  what about hidden(first) layer?w1 = torch. randn(m,nh)b1 = torch. zeros(nh)t = lin(x_valid, w1, b1) # hidden&gt;&gt;&gt; t. mean(), t. std()((tensor(2. 3191), tensor(27. 0303))In output(second) layer, w2 = torch. randn(nh,1)b2 = torch. zeros(1)t2 = lin(t, w2, b2) # output&gt;&gt;&gt; t2. mean(), t2. std()(tensor(-58. 2665), tensor(170. 9717))   which is terribly far from normalzed value.     But if we apply simplified kaiming init  w1 = torch. randn(m,nh)/math. sqrt(m); b1 = torch. zeros(nh)w2 = torch. randn(nh,1)/math. sqrt(nh); b2 = torch. zeros(1)t = lin(x_valid, w1, b1)t. mean(),t. std()&gt;&gt;&gt; (tensor(-0. 0516), tensor(0. 9354)) But, actually, we use activations not only linear function After applying activations relu at linear layer, mean and deviation became 0. 5. 2. 1. 2 Glorrot initializationPaper2: Understanding the difficulty of training deep feedforward neural networks  Gaussian(, bell shaped, normal distributions) is not trained very well.  How to initialize neural nets? with the size of layer , the number of filters .  But there is No acount for import of ReLU If we got 1000 layers, vanishing gradients problem emerges2. 1. 3 Kaiming initializatingPaper3: Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification  Kaiming He, explained here rectifier: rectified linear unit rectifier network: neural network with rectifier linear units  This is kaiming init, and why suddenly replace one to two on a top?     to avoid vanishing gradient(weights)   But it doesn’t give very nice mean tough.    2. 1. 4 Pytorch package Why fan_out?     according to pytorch documentation,   choosing 'fan_in' preserves the magnitude of the variance of the wights in the forward pass. choosing 'fan_out' preserves the magnitues in the backward pass(, which means matmul; with transposed matrix) ➡️ in the other words, torch use fan_out cz pytorch transpose in linear transformaton.  What about CNN in Pytorch?I tried torch. nn. Conv2d. conv2d_forward?? Jeremy digged into using torch. nn. modules. conv. _ConvNd. reset_parameters?? 2  in Pytorch, it doesn’t seem to be implemented kaiming init in right formula. so we should use our own operation.  But actually, this has been discussed in Pytorch community before. 3 4 Jeremy said it enhanced variance also, so I sampled 100 times and counted better results.  To make sure the shape seems sensible. check with assert. (remember we will replace 1 to 10 in cross entropy)assert model(x_valid). shape==torch. Size([x_valid. shape[0],1])&gt;&gt;&gt; model(x_valid). shape(10000, 1) We have made Relu, init, linear, it seems we can forward pass code we need for basic architecture nh = 50def lin(x, w, b): return x@w + b;w1 = torch. randn(m,nh)*math. sqrt(2. /m ); b1 = torch. zeros(nh)w2 = torch. randn(nh,1); b2 = torch. zeros(1)def relu(x): return x. clamp_min(0. ) - 0. 5t1 = relu(lin(x_valid, w1, b1))def model(xb):  l1 = lin(xb, w1, b1)  l2 = relu(l1)  l3 = lin(l2, w2, b2)  return l32. 2 Loss function: MSE:  Mean squared error need unit vector, so we remove unit axis.   def mse(output, targ): return (output. squeeze(-1) - targ). pow(2). mean()    In python, in case you remove axis, you use ‘squeeze’, or add axis use ‘unsqueeze’ torch. squeeze where code commonly broken. so, when you use squeeze, clarify dimension axis you want to removetmp = torch. tensor([1,1])tmp. squeeze()&gt;&gt;&gt; tensor([1, 1]) make sure to make as float when you calculateBut why??? because it is tensor?{: style=”color:red; font-size: 130%;”} Here’s the error when I don’t transform the data type ---------------------------------------------------------------------------TypeError                 Traceback (most recent call last)&lt;ipython-input-22-ae6009bef8b4&gt; in &lt;module&gt;()----&gt; 1 y_train = get_data()[1] # call data again   2 mse(preds, y_train)TypeError: 'map' object is not subscriptable This is forward passFootnote: Other materials:  Understanding the difficulty of training deep feedforward neural networks, paper that introduced Xavier initialization      Fixup Initialization: Residual Learning Without Normalization &#8617;        Pytorch implementaion on Kaiming init of conv and linear layers &#8617;        Pytorch kaiming init issue &#8617;        Pytorch kaiming init explained &#8617;    "
     }, {
-    "id": 14,
+    "id": 16,
     "url": "http://localhost:4000/2020/03/note08-fastai-2/",
     "title": "What's inside Pytorch Operator?",
     "body": "2020/03/01 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, RefactoringWhat’s inside Pytorch Operator?: Section02 Time comparison with pure Python:    Matmul with broadcasting&gt; 3194. 95 times faster     Einstein summation&gt; 16090. 91 times faster     Pytorch’s operator&gt; 49166. 67 times faster  1. Elementwise op: 1. 1 Frobenius norm:   above converted into (m*m). sum(). sqrt() Plus, don’t suffer from mathmatical symbols. He also copy and paste that equations from wikipedia.  and if you need latex form, download it from archive. 2. Elementwise Matmul:  What is the meaning of elementwise?   We do not calculate each component. But all of the component at once. Because, length of column of A and row of B are fixed.   How much time we saved?  So now that takes 1. 37ms. We have removed one line of code and it is a 178 times faster…#TODOI don’t know where the 5 from. but keep it. Maybe this is related with frobenius norm…?as a result, the code before for k in range(ac):  c[i,j] += a[i,k] + b[k,j]the code after c[i,j] = (a[i,:] * b[:,j]). sum()To compare it (result betweet original and adjusted version) we use not test_eq but other function. The reason for this is that due to rounding errors from math operations, matrices may not be exactly the same. As a result, we want a function that will “is a equal to b within some tolerance” #exportdef near(a,b):   return torch. allclose(a, b, rtol=1e-3, atol=1e-5)def test_near(a,b):   test(a,b,near)test_near(t1, matmul(m1, m2))3. Broadcasting:  Now, we will use the broadcasting and removec[i,j] = (a[i,:] * b[:,j]). sum() How it works?&gt;&gt;&gt; a=tensor([[10,10,10],     [20,20,20],     [30,30,30]])&gt;&gt;&gt; b=tensor([1,2,3,])&gt;&gt;&gt; a,b  (tensor([[10, 10, 10],     [20, 20, 20],     [30, 30, 30]]),tensor([1, 2, 3]))     &gt;&gt;&gt; a+btensor([[11, 12, 13],    [21, 22, 23],    [31, 32, 33]])  &lt;Figure 2&gt; demonstrated how array b is broadcasting(or copied but not occupy memory) to compatible with a. Refered from numpy_tutorial   there is no loop, but it seems there is exactly the loop.     This is not from jeremy (actually after a moment he cover it) but i wondered How to broadcast an array by columns?  c=tensor([[1],[2],[3]])a+ctensor([[11, 11, 11],    [22, 22, 22],    [33, 33, 33]])s  What is tensor. stride()?help(t. stride)Help on built-in function stride:  stride(…) method of torch. Tensor instancestride(dim) -&gt; tuple or intReturns the stride of :attr:’self’ tensor. Stride is the jump necessary to go from one element to the next one in the specified dimension :attr:’dim’. A tuple of all strides is returned when no argument is passed in. Otherwise, an integer value is returned as the stride in the particular dimension :attr:’dim’. Args:  dim (int, optional): the desired dimension in which stride is requiredExample::* x = torch. tensor([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]])`x. stride()&gt;&gt;&gt; (5, 1)x. stride(0)&gt;&gt;&gt; 5x. stride(-1)&gt;&gt;&gt; 1   unsqueeze &amp; None index   We can manipulate rank of tensor Special value ‘None’, which means please squeeze a new axis here== please broadcast herec = torch. tensor([10,20,30])c[None,:] in c, squeeze a new axis in here please. 2. 2 Matmul with broadcasting: for i in range(ar):#  c[i,j] = (a[i,:]).      *[:,j]. sum() #previous  c[i]  = (a[i]. unsqueeze(-1) * b). sum(dim=0) And Using None also (As howard teached)c[i]  = (a[i ]. unsqueeze(-1) * b). sum(dim=0) #howardc[i]  = (a[i][:,None] * b). sum(dim=0) # using Nonec[i]  = (a[i,:,None]*b). sum(dim=0)⭐️Tips🌟 1) Anytime there’s a trailinng(final) colon in numpy or pytorch you can delete it ex) c[i, :] = c [i]2) any number of colon commas at the start, you can switch it with the single elipsis.  ex) c[:,:,:,:,i] = c […,i] 2. 3 Broadcasting Rules:  What if we tensor. size([1,3]) * tensor. size([3,1])?  torch. Size([3, 3])    What is scale????   What if they are one array is times of the other array? ex) Image : 256 x 256 x 3Scale : 128 x 256 x 3Result: ?   Why I did not inserted axis via None, but happened broadcasting? &gt;&gt;&gt; c * c[:,None]tensor([[100. , 200. , 300. ],    [200. , 400. , 600. ],    [300. , 600. , 900. ]])maybe it broadcast cz following array has 3 rows as same principle, no matter what nature shape was, if we do the operation tensor broadcasts to the other. &gt;&gt;&gt; c==c[None]tensor([[True, True, True]])&gt;&gt;&gt; c[None]==c[None,:]tensor([[True, True, True]])&gt;&gt;&gt;c[None,:]==ctensor([[True, True, True]])3. Einstein summation:  Creates batch-wise, remove inner most loop, and replaced it with an elementwise producta. k. ac[i,j] += a[i,k] * b[k,j]inner most loop c[i,j] = (a[i,:] * b[:,j]). sum()elementwise product  Because K is repeated so we do a dot product. And it is torch. Usage of einsum()1) transpose2) diagnalisation tracing3) batch-wise (matmul) …  einstein summation notationdef matmul(a,b): return torch. einsum('ik,kj-&gt;ij', a, b)so after all, we are now 16000 times faster than Python. 4. Pytorch op: 49166. 67 times faster than pure python And we will use this matrix multiplication in Fully Connect forward, with some initialized parameters and ReLU. But before that, we need initialized parameters and ReLU, Footnote:  TensorRank ti noteResources:  Frobenius Norm Review Broadcasting Review (especially Rule)     Refer colab! (I totally confused with extension of arrays)    torch. allclose Review np. einsum Reviewh "
     }, {
-    "id": 15,
+    "id": 17,
     "url": "http://localhost:4000/2020/02/note08-fastai-1/",
     "title": "What is the meaning of 'deep-learning from foundations?'",
     "body": "2020/02/29 - This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’ Section2: What’s inside Pytorch Operator? Section3: Implement forward&amp;backward pass from scratch Section4: Gradient backward, Chain Rule, Refactoring” Lecture 08 - Deep Learning From Foundations-part2 “ I don’t know if you read this article, but I heartily appreciate Rachael Thomas and Jeremy Howard for providing these priceless lectures for free Homework:  Review concepts 16 concepts from Course 1 (lessons 1 - 7)(1) Affine Functions &amp; non-linearities; 2) Parameters &amp; activations; 3) Random initialization &amp; transfer learning; 4) SGD, Momentum, Adam; 5) Convolutions; Batch-norm; 6) Dropout; 7) Data augmentation; 8) Weight decay; 9) Res/dense blocks; 10) Image classification and regression; 11)Embeddings; 12) Continuous &amp; Categorical variables; 13) Collaborative filtering; 14) Language models; 15) NLP classification; 16) Segmentation; U-net; GANS) Make sure you understand broadcasting Read section 2. 2 in Delving Deep into Rectifiers Try to replicate as much of the notebooks as you can without peeking; when you get stuck, peek at the lesson notebook, but then close it and try to do it yourself calculus for machine learning     based on weight…    einsum conventionCONTENTS:  What is going on in this course?     What is ‘from foundations’?   Steps to a basic modern CNN model   Today’s implementation goal: 1) matmul -&gt; 4) FC backward    Library development using jupyter notebook     jupyter notebook certainly can make module    Elementwise ops     How can we make python faster?         What is element wise operation?           FootnoteWhat is going on in this course?: What is ‘from foundations’?: 1) Recreate fast. ai and Pytorch 2) using pure python  Evade OverfittingOverfit : validation error getting worsetraining loss &lt; validation loss  Know the name of the symbol you usefind in this page if you don’t know the symbol that you are using or just draw it here (run by ML!) Steps to a basic modern CNN model:  1) Matrix multiplication -&gt; 2) Relu/Initialization -&gt; 3) Fully-connected Forward-&gt; 4) Fully-connected Backward -&gt; 5) Train loop -&gt; 6) Convolution-&gt; 7) Optimization -&gt;8) Batchnormalization -&gt; 9) Resnet Today’s implementation goal: 1) matmul -&gt; 4) FC backward: Library development using jupyter notebook: what is assers? jupyter notebook certainly can make module:  There will be #export tag that Howard (and we) want to extract special notebook2script. py will detect sign of #expert and convert following into python module and test ittest\_eq(TEST,'test')test\_eq(TEST,'test1')  what is run_notebook. py?     when you want to test your module in command line interface   		!python run\_notebook. py 01_matmul. ipynb  Is there any difference between 1) and 2)?1) test -&gt; test01 2) test01 -&gt; test #TODO I don’t know yet  look into run_notebook. py, package fire Jeremy used. What is that?read and run the code in a notebook, and in the process, Jeremy made Python Fire library called!shockingly, fire takes any kind of function and converts into CLI command. fire library was released by Google open source, Thursday, March 2, 2017    Get data   pytorch and numpy are pretty much same.  variable c explains how many pixels there are in in MNIST, 28 pixels PyTorch’s view() method: torch function that manipulating tensor, and squeeze() in torch &amp; mathmatical operation similar function Rao &amp; McMahan said usually this functions result in feature vector.    In part 1, you can use view function several times.     Initial python model     Which is Linear, like $Xw$(weight)$+a$(bias) $= Y$     If you don’t know hou to multiple matrix, refer this site matmul visulization site   How many time spends if we we use pure python   function matmul, typical matrix multiplication function, takes about 1 second for calculating 1 single train data! (maybe assumed stochastic, 5 data points in validation)     it takes about 11. 36 hours to update parameters even single layer and 1 iteration! (if that was my computer, it would be 14 hours. . )🤪   THIS is why we need to consider ‘time’&amp;’space’ This is kinda slow - what if we could speed it up by 50,000 times? Let’s try! Elementwise ops: How can we make python faster?:  If we want to calculate faster, then do remove pythonic calcuation, by passing its computation down to something that is written something other than python, like pytorch.  According to PyTorch doc it uses C++ (via ATen), so we are going to implement that function with python. What is element wise operation?:  items makes a pair, operate corresponding componentFootnote:  notebooks material video broadcasting excel"
     }, {
-    "id": 16,
+    "id": 18,
     "url": "http://localhost:4000/2020/02/what-is-convolution/",
     "title": "Digging into convolution",
     "body": "2020/02/28 - Issues 1) Kaiming Initializtion in Pytorch was in trouble. 1 2) Jeremy started to dig in, in lesson09, but I didn’t know why the size of tensor is 2 and even understand this spreadsheet data. 3 Homework: Read Visualizing and Understanding Convolutional Networks paper  What is a convolution?     Visualization         one kernel     Matthew D Zeiler &amp; Rob Fergus Paper          Convolution can be represented as matmul   Padding   Kernel has rank 3   How can we find a side-edge, a gradient and area of constant weight?   What is a convolution?: A convolutional neural network is that your red, green, and blue pixels go into the simple computation, and something comes out of that, and then the result of that goes into a second layer, and the result of that goes into the third layer and so forth. Visualization: one kernel Refer this site for visualizing CNN filteringMatthew D Zeiler &amp; Rob Fergus PaperLecture01     Nine examples of the actual coefficients from the **first layer**  Convolution can be represented as matmul: CNNs from different viewpoints {align-items: center;}    [A B C D E F G H I J] is 3 by 3 image data flatten to vector.   As a result, convolution is a just matrix just two things happens     Some of entries are set to zeros at all the times   same color always have the same weight. That called weight time / wegith sharing    So, we can implement a convolution with matrix multiplication. But, we don’t do that because it’s slow!Padding:  What most of libraries do is just put zeros asdie of matrix  fast. ai uses reflection paddings (what is this? Jeremy said he uttered it)Kernel has rank 3:  As standard picture input would be 4 5, it would be actually 3d, not 2d.  If we make kernel as a 3x3 size, we pass over same kernel all the different Red, Green, Blue Pixels.      This could make problem, because, if we want to detect frog, which is green, we would want more activations on the green(I made a test cell in my colab 6)   How can we find a side-edge, a gradient and area of constant weight?: Not top-edge!  One kernel can find only the top-edge, so we should stack the kernels 7 So, we pass it through bunch of kernels to the input images, and that process gives us height x width x corresponding number of kernels.  Usually that number of chanel is 16 And if we want to get the more channels and features, we should repeat that process     This process gives rise to memory out of control, we do the stride   #### conv-example. xlsx  2 convolutional filters At a second layer, filter is 3x3x2 tensor, because to add up together the first layer’s channel. Reference:       Problem was math. sqrt(5) was not kaiming initialization formula, Implementation in Pytorch &#8617;        size of tensor, lecture09 &#8617;        conv-example. xlsx &#8617;        Why do computer use red, green and blue instead of primary colors &#8617;        Grayscale is a group of shades without any visible color. … Each of these dots has its own brightness level as well and, therefore, can be converted to grayscale. A grayscale image is one with all color information removed.  &#8617;        Testing RGB and grayscale &#8617;        stack kernel and make new rank of tensor at output, Lesson06-2019 &#8617;    "
     }, {
-    "id": 17,
+    "id": 19,
     "url": "http://localhost:4000/2020/02/dps-week8/",
-    "title": "Digital Product School week 8&9",
-    "body": "2020/02/24 - The 8th week retropect at Digital Product School Week 8/9 - Ship your MVP/Release next iteration each day     This week's schedule  CONTENT:  Preparing engineering weekly Agile Process     Daily Stand-up    Making application flowchart (feat draw. io) / ER diagram     Flowchart, understaning user journey   ER diagram    Engineering weekly AI lunch Connecting firebase andPreparing engineering weekly: This week at Wednesday, I planned to explain the Language Modelings, mainly focusing ELMo, ULMFiT, BERT and GPT-2. Slides is available here Changed the presentation, because there were people who are not in ML domain. hereWhenever I do the presentation, I learn more than the information I give them. At the same time, I realize I need to learn more than I know. Agile Process: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m not sure ‘what is agile’ but this is what we tried to make agile process.  Daily Stand-up: Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Making application flowchart (feat draw. io) / ER diagram:  Before we start the party, we should clarify the flowchart and ER diagram of our application. Flowchart, understaning user journey: Thanks for google, we could use draw. io for our framechart framework. Actually, we cana choice other good flatform, but draw. io has connected app throgh google drive, most of our engineer was used to it. And after this job, I got to know there is also (of course) rule with the symbols, color, size, space, scaling and direction of arrow -reference. But why we should do this? WE have made our storymap before!! I think storymap is for visualize our status and app. So it should be shared with whole the team, and they should able to understand each role’s issue. But flowchart is more like testing technical feasibility, and error that user can experience. So it could be little more specific, complicated, and hypothetical.     This week engineer's main plan  ER diagram: Even if we use NoSQL database through firebase, my team was accustomed to SQL more. That what we educated when we were at college, so we had to organize our concept while we were learning NoSQL. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Before today, my AI collegues presented regression, knn and it was my turn. I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! AI lunch: Connecting firebase and: "
+    "title": "My life in Digital Product School - week 8/19/10",
+    "body": "2020/02/24 - The 8/9/10th week retropect at Digital Product School Week 8 - Ship your MVPWeek 9/10 - Release next iteration each day     Week 8th schedule  CONTENT:  Agile Product Development     Daily Stand-up(planning)   Gemba Walk   Sprint Reviews    Engineering weeklyAgile Product Development: One of a priceless lesson I learnt from digital product school, was experience of doing agile work. Before I came here, it was a little bit vague concept. I’m still not sure ‘what is agile’ but this is how we tried to make agile process.  Daily Stand-up(planning): Sharing the works everyday helps interdisciplinary team to work better. Since product started to get higher fidelity, the gap between engineer and non-engineer increased. Actually I didn’t planned to explain concept because I thougth I would be lose my audience when I start to explain. But as daily stand-up, which shares our progess, goes day by day, I planed and reported the issues. And it made each other’s topic feel more familiar. I think point is very important, because at that point people start to be curious. So we can actively ask to the others, and that momwnr, we can explain the point teammate dosen’t know.     Each color means every different section. Red: Our team goal, Blue: Interaction designer, Green: Product manager, Yellow: Software/AI engineer      This week engineer's main plan  Each of us try to explain what we are doing, but things become easier when we are asked. Because we explained something was important to us before, but if we asked it is something important for the others. Gemba Walk:     Team Cero with core team  Every 2 weeks, we do the Gemba work, which is ‘question everything to the core team’ time. At this period, people can ask anything related to our product, workshop, and framework. Core team will help just for each team, and each team can solve the problem related to their work. &lt; br/&gt;Why we need this session? because with workshop and general schedule, core team has no time just focus on each team. So through this session, we can have opportunity to understand each program and workshop, like why we are using this platform, and when is the due of our small project, and we have this problem and we need help for this. whatever small problem you have, core team is always willing to help you. Sprint Reviews: Every Friday, we have time to summarise what we did for the week. Maybe we need HMW question and our storymap to share our process and then tell and share what we did try, what point we succeeded and what point it was deviant of our prediction, and why we tried it. .     Sprint of Ve-link  And then, just after all team’s ppt, we do vote with such a cute marvel. Always it’s very difficult to vote (of course you can’t vote to your team!) Because it depends on criteria what do I value!But since this is process of our agile work, I try to focus on what they have changed since last week, and why they did it, how they did it. Engineering weekly: Every engineering weekly we exchange our knowledge each other so that we can grow together. Everyone have their knowledge to share and we can be tutor and at the same time can be of tutee. Previously, my AI collegues presented regression, knn. And because I’m somewhat specialized to NLP, I prepared slide that explain about pre-trained language model, but my header advised me if I go deep of theoretical things, I would lose my audience. So I decided to brief BERT mode, how I can contribute to other team’s project. Since BERT was breakthrough of NLP industry, I tried to explain how it can be applied to hands on product and how it can help people in their product. The result was quite motivative to me. They gave feedback that since it wasn’t that much theoretical, they could enjoy it, and useful information. Someone asked me do I had learned of presentation before. I was really happy with their feedback! "
     }, {
-    "id": 18,
+    "id": 20,
     "url": "http://localhost:4000/2020/02/fast.ai-nlp-note-16/",
     "title": "Algorithmic bias",
     "body": "2020/02/20 - Algorithms can encode &amp; magnify human bias Case Study 1: Facial Recognition &amp; Predictive Policing:  Joy Buolamwini &amp; Timnit Gebru, gendershades. org     Microsoft, FACE+, IBM - All of these things are sell now.    Largest gap between $\therefore\ Lighter Male\ &gt;\ Darker\ Female $      This US mayor joked cops should “mount . 50-caliber” guns where AI predicts crime      With machine learning, with automation, there’s a 99% success, so that robot is ㅡwill beㅡ99% accurate in telling us what is going to happen next, which is really interesting.     - city official in Lancater, CA, approving on using IBM for public security  Bias:  Bias is type of error Statistical Bias: difference between a statistic’s expected value and the true value Unjust Bias: disproportionate preference for or prejudice against a group Unconscious bias: bias that we don’t realize we have But, term bias is too generic to be productive. Different sources of bias have different causes Representation Bias: Dataset was not representative of the algorithm that might be used on later. Above : Data is okay, but algorithm has some problem. Below : Data has error.  For example, object detection production that performs very well in common product of US. But in contrast, change of target product region, like Zimbabwe, Solomon Island, and so on, reduced the performence remarkably. It is not the algorithmic problem, so we should care about data volume of region. Evaluation Bias: Benchmark datasets spur on research, 4. 4% of IJB-A images are dark-skinned women. 2/3 of ImageNet images from the West (Sharkar et al, 2017) Case Study 2: Recidivism Algorithm Used Prison Sentencing: Case Study 3: Online Ad Delivery: Bias in NLP: ( Nothing to do with the course, but I’m researching this field these days. )    But all about Englsih     ImpactThe person is doctor. The person is nurse -&gt; 그는 의사다. 그녀는 간호사다.  Concept of “biased data” often too generic to be useful:  Different sources of bias have different sources Data, models and systems are not unchanging numbers on a screen. They’re the result of a complex process that starts with years of historical context and involves a series of choices and norms, from data measurement to model evaluation to human interpretation. - Harini Suresh, “The problem with Biased Data” Five Sources of Bias in ML:  Representation Bias Evaluation Bias Measurement Bias Aggregation Bias(46:02) Historical Bias(46:26)     A few studies(47:13)   Racial Bias, Even when we have good intentions(new york times)(47:10)   gender(48:59)   Humans are biased, so why does algorithmic bias matter?: Algorithms &amp; humans are used differently (humans are usually decision maker)  Algorithms are accurate and objective No way to apeal if there if error processed large scale cheap Machine learning can amplify bias Machine learning can create feedback loops. Technology is power. And with that comes responsibility. Solutions:  Analyze a project at work/school:     Questions about AI   5 types of bias (Suresh &amp; Guttag)   Datasheets for datasets, Modelcards for model reporting   Accuracy rate on different sub-groups    Work with domain experts &amp; those impacted Increase diversity in our workspace Advocate for good policy Be on the ongoing lookout for bias"
     }, {
-    "id": 19,
+    "id": 21,
     "url": "http://localhost:4000/2020/02/classifier-city/",
     "title": "Making a classifier with image dataset made from gooogle",
     "body": "2020/02/15 - CONTENTS:  Creating dataset from google images     Using google_images_download   Create ImageDataBunch    Train model     fit_one_cycle()   Let’s find-tune   Let’s train the whole model!   Let’s make batch size bigger!    Interpretation Model in productionCode can be found hereDeployed model here Making a classifier which can distinguish Seoul from Munich and Sanfrancisco!(hoping my well in Munich!) Creating dataset from google images: In machine learning, you always need data before you build your model. You can use either URLs or google_images_download package. Since Jeremy explained specifically, I will try the other. Using google_images_download: note: This is not google official package Refer to Official Doncument, put that arguments. from google_images_download import google_images_downloadresponse = google_images_download. googleimagesdownload()  #class instantiationout_dir = os. path. abspath('. . /. . /materials/dataset/pkg/')os. mkdir(out_dir)arguments = { keywords : Cebu,Munich,Seoul ,        print_urls :True,        suffix_keywords : city ,        output_directory :out_dir,        type : photo ,      }paths = response. download(arguments)  #passing the arguments to the functionprint(paths)and if you need, here is main code. Create ImageDataBunch: We need to separate validation set because we just grabbed these imagese from Google. Most of the dataset we use (kaggle/research) splited into train / validation / test so if they are not devided beforehand we should make databunch, and Jeremy recommended assign 20% to validation. Help on function verify_images in module fastai. vision. data:verify_images(path: Union[pathlib. Path, str], delete: bool = True, max_workers: int = 4, max_size: int = None, recurse: bool = False, dest: Union[pathlib. Path, str] = '. ', n_channels: int = 3, interp=2, ext: str = None, img_format: str = None, resume: bool = None, **kwargs)  Check if the images in `path` aren't broken, maybe resize them and copy it in `dest`. Data from google image url Data from package Train model:           len(class)   len(train)   len(valid)         Data_url   3   432   108       Data_pkg   3   216   53   Uisng model: restnet34 1, Measurement: accuracy 2 fit_one_cycle(): What is fit one cycle? Cyclical Learning Rates for Training Neural Networks One of the way to find good learning rate. Core idea is to start with small learning rate (like 1e-4, 1e-3) and increase the learning rate after each mini-batch till loss starts exploding. And pick up learning rate one order lower than exploding point. For example, plotted learning rate is like below picture, picking up around 1e-2 is the best way.  Why this methods Traditionally, the learning rate is decreased as the learning starts converging with time. But this paper suggests to cycle our learning rate, because it makes us avoid local minimum. Basically this cyclic method enables us to explore whole of loss function so that find out global minimum. In other words, higher learning rate behaves like regularisation. Let’s find-tune: Do train just one last layer by learning rate found by find_lr This section you should find the strongest downward slope that kind of sticking around for quite a while. And choose just one order lower than lowest point. As explained before, I will pick up 1e-2. And of course, this is fine-tuning, we don’t need discriminative learning rate yet.  Let’s train the whole model!: link When you plot the learning rate again, maybe you will get soaring shape of learning rate. Rule of thumb, When you slice the learning rate, use learning rate you used at unfrozen part. Divide it by 5 or 10 and put it on maximum bound. At minimum bound, get the point just before it soared, and divide it by 10.  Let’s make batch size bigger!: Since default batch size is 64, I tried it to 128. And it gets way more better result(even it’s still underfitting!) And if I freeze model and train whole model again, the model would be better. Also, you can use this method to the other big dataset model training! Interpretation: See the confusion matrix. Result is quite great. *Since I’m using colab, I will skip data cleansing. But I highly recommend you to use ImageCleaner widget, only if you are using jupyter notebook (not jupyter lab) Model in production: You can deploy your model in simple way. I referred fast. ai, and used render(it’s free for limited time). You can find detailed document here. and you can create a route like this. @app. route( /classify-url , methods=[ GET ])async def classify_url(request):  bytes = await get_bytes(request. query_params[ url ])  img = open_image(BytesIO(bytes))  _,_,losses = learner. predict(img)  return JSONResponse({     predictions : sorted(      zip(cat_learner. data. classes, map(float, losses)),      key=lambda p: p[1],      reverse=True    )  })You can find my deployed model here Reference: How to create a deep learning dataset using Google Images towardsdatascience - one cycle policy       Deep Residual Learning for Image Recognition &#8617;        Accuracy_and_precision &#8617;    "
     }, {
-    "id": 20,
+    "id": 22,
     "url": "http://localhost:4000/2020/02/dps-week5/",
     "title": "Digital Product School week 5",
     "body": "2020/02/09 - The 5th week retropect at Digital Product School Week 5 - Create a Storymap and sync it with Lean Canvas     This week's schedule  CONTENT:  How to create our story map Prepare your story Discover your product’s AI potentialMondayHow to create our story map:     We need this 'aha' moment  There was a Milestone workshop, about our weekly goal. As we are agile working, we go fast and change every week’s goal. This week we will finalize our story map based on user’s pain-point and HMW questions.     How should we make our story-map  Basically we should make story map based on this rule  Tell stories, don’t just write them!     We always need context, that means all the story component should be connected    Visualize your product to establish a shared understanding and speed up discussions!     Post-it filled of text is not enough, we should fill it with visualizations then team mates can understand it fast    Only discuss in front our your story map! (Speed)     So we can update our story-map as soon as we change our opinion   And also    Use a story map to find the parts that matter most and to identify holes in your idea!     Since the story map consists of techinical part, we should consider each story’s technical feasibility    Minimise output, maximise outcome and impact! Build tests to figure out what’s minimum and what’s viable!     This story map functions to find out our minimum value of ideas    Work iteratively: Change your story map according to your learnings!     We should repeat this process again and again    PMs: Make sure Storymap is up to date!Prepare your story:     team cero, our whole story map    Our goal    Technical feasibility of our storyWhat is your strategy to make user achieve something? This would be our expand point Discover your product’s AI potential: How can we apply AI to our product? Let’s write down our ‘HMW’ questions, and find out all p ossibilities. These are suggestion of possibilities, so don’t attached to feasibility (we will do in at lean start-up)     Software section's expectation    AI section's expectationTuesday    Engineer's task, week5This 5th week, engineers settled WendesdayThursdayFriday"
     }, {
-    "id": 21,
+    "id": 23,
     "url": "http://localhost:4000/2020/02/GPU-time/",
     "title": "4 reasons took much time to setting GPU for fast.ai than I expected",
     "body": "2020/02/05 - Motivation: Before now, me as a undergraduate student, I was parsimony who usually depend on colab, kaggle, friend’s server(occasional) whenever i need GPU. . And this time it’s been for a while to install GPU than I expected and I share the several component that stood in my way. Written at Oct 24 2019, if you think this is deprecated, please do not have a leap of faith. Just for the record, I’ve used Kaggle, Colab, GCP, Azure, EC2 as GPU cloud. 1. Did not know there is JupyterLab option in Google Cloud Platform. : At the first time when GCP came out, there was no AI Platform service. So from starting vm instance to launching jupyter and installing packages, I did all of the things myself. (and I learned 🤗) $	curl -O https://repo. continuum. io/archive/Anaconda3-5. 0. 1-Linux-x86_64. sh[Downloading conda in ssh] I created VM instance,selected zone, machine type and disk type. Then, define firewall rules and in ssh terminal, install jupyter and other packages. But you can do all of these things just using AI Platform.  [AI Platform] I think it especially save your time if you are living in Asia-Pacific, which google doesn’t support not that much GPU resources.  2. Consider if the platform has limited resources in a region you live in. : I live in South Korea, East Asia, and it seems like this region has lots of limitation in GPU (except quite expensive AWS) And the Taiwan which was the only one region where I can launch my own VM with GPU (I tried all the other regions in the list) sometimes do normaly, but not always. 😥After launching, I did several works and next day I could not start VM. (I didn’t count it, but tried it a few hours because I didn’t want cost any more time…) Endlessly failed to start instance, then I choose to move AWS as an alternative way. 3. Fast. ai gives deliberate guide and I didn’t know it. : Fast. ai offer the guide for all available platform. (Colab, salamander, Gradient, Kaggle, Colab, and so on) It is so important, and really needs, because cloud computing options are vary as occasion and purpose arise. I didn’t know fast. ai has manual to running GCP, and I think it’s as good a reason as any for me to be have taken time. It helped me so much when I had aws and shortened my time. I don’t want to read all of the manual in amazno. . (It is recommended. . but I’d rather read GIT PRO now…) ssh -i ~/. ssh/&lt;your_private_key_pair&gt; -L localhost:8888:localhost:8888 ubuntu@&lt;your instance IP&gt;4. You should wait to add more volume just after add volume, by building AWS EC2. : Since Elastic Block Store(EBS) storage supports optimized storage, users can’t extend storage volume two times in a row.  Unfortunately, at the first time, I didn’t know it (again 👻) and when VM lacked volume, I doubled dist capacity (76*2) at a rough but It needs more.  &lt;!– this time I installed GPU in two years, and it became little complicated compared to 2 years ago. And this time for the first time(maybe not the first time. . but i handled it in my class or with my friend. but it’s my first time on my own. ) I very I’m started to using used google colab, kaggleand, GCP-JupyterLab, ec2 - friend made, aws vm machine but I had a environment variable but i did not know of it. On these days, I could not get a resources from taiwan…    I couldn’t notice a deliberate     Anyway, as a result I tried myself gcp myself and aws ec2 with fast. ai But I think doing on my self surely takes much time (in this point I wonder why I’m doing this, and should remind me, especially I was studying disk volume optimization)     disk volume exceed - https://askubuntu. com/questions/919748/no-space-left-on-device-even-though-there-is:  "
     }, {
-    "id": 22,
+    "id": 24,
     "url": "http://localhost:4000/2020/02/dps-week4/",
     "title": "Digital Product School week 4",
     "body": "2020/02/01 - The 4th week retropect at Digital Product School Week 4 - Find solution ideas and run experiments   [This week’s schedule] CONTENT:  Ideation Techniques     What is ideation techniques?    Generating idea in my team AIdeation Team brain storming of idea Die Produkt MacherMondayIdeation Techniques:   [slides from @steffen] What is ideation techniques?: We tried to find out user’s painpoint last week. Tried to users talk about their, pain point. No question directly, but extract from them their pain with transportation. Generating idea in my team:   AIdeation:   TuesdayTeam brain storming of idea:   Based on generated idea on Monday, we extended our idea doing rolling-paper!     Die Produkt Macher: What is lean start-up?  Lean startup is a methodology for developing businesses and products that aims to shorten product development cycles and rapidly discover if a proposed business model is viable; this is achieved by adopting a combination of business-hypothesis-driven experimentation, iterative product releases, and validated learning. - wikipedia WendesdayThursdayFriday"
     }, {
-    "id": 23,
+    "id": 25,
     "url": "http://localhost:4000/2020/01/retrosprect-of-acl-paper-2020/",
     "title": "Retrospect of ACL 2020 paper writing",
     "body": "2020/01/29 - 2020 Annual Conference of the Association for Computational Linguistics Why I can’t use ‘Cebuano’ for the research?: Why I had to change target language from ‘Cebuano’ to ‘Tagalog’?-&gt; No language translator options except google translation. But before knowing that I already consult my friend, whose mother tongue is English. So I had to aplogize her, but couldn’t tell her why suddenly I changed my plan. -&gt; I realized there are many languages even can’t be researched at all. . -&gt; Getting accustomed to discrimination makes misunderstanding, sometimes. At my country, we couldn’t use music streaming service, because of legal problem. But at that moment, I thought it was discrimination, which is done by music company. "
     }, {
-    "id": 24,
+    "id": 26,
     "url": "http://localhost:4000/2020/01/Git-Merge/",
     "title": "Why am I not listed as a contributor?!",
     "body": "2020/01/10 - From the end of last year, big changes have witnessed in NLP research. Embracing an unprecedented growth, I started to study new exciting results and advances. In doing so, I noticed I’m not listed as contributor of repo which my PR accessed. How did I come to a repository?: When I’m stuck, I would prefer to code, than to go deep in theory. (It must be so. . too much to understand 🤒)It was BERT released by Google AI I felt keenly the necessity of implementing, because not only couldn’t understand the way they figured out positional encoding formula, but how it actually works. What does it mean to “scale” dot product in Attention? (Now I know it’s far from my section 😂) Figure 1. Scaled Dot Product. Adopted from tensorflow blogWhat was the code error?: For implement code in paper, I read the papers Transformer and BERT, structured the model, and refered the others’ code. Meanwhile, I found out a small error in tokenization process, which was changing a token into [MASK], enabled bidirectional representation. I’ve made PR, and got merged. But I was not in contributors. Why?: Figure 2. Merged Pull request Adopted from graykode projectActually I happened to know there can be couple of reasons github doesn’t include my name as contributor. Well, if contributors tab has more than 100 people, in which case it shows you up only if you are in the top 100 contributors because displaying too many contributors can make webpages down. Somethimes, however, it doesn’t that problem. Why not? Two possibilities are there.    First, According to Joel-Glovier, if repository maintainer merged-as-a-rebase PR will end up showing as maintainer’s commit. But maintainer shouldn’t normally do this.     Second, if you happend to commit using a different git email that what is in your GitHub profile, it will not be attached to your Github user, and “doesn’t show up” as you.  Reference:  Michał Chromiak’s blog Github: why are my contributions are not showing on my profile atlassian-gitfetch"
     }, {
-    "id": 25,
-    "url": "http://localhost:4000/2019/12/lesson1-fastai/",
-    "title": "Fine Grained Classification",
-    "body": "2019/12/31 - Finally you can solve the mystery behind this weird drawing. . through this course. juptyer notebook magic: %reload_ext autoreload%autoreload 2%matplotlib inlinethis is special directives to jupyter notebook, not python code. And it is called ‘magics’ (but i think jeremy is magicion)  If somebody changes underlying library code while I’m running this, please reload it automatically If somebody asks to plot something, then please plot it here in this Jupyter NotebookDon’t hesitate to import start~ Digging into untar_data, path. ls: Union[pathlib. Path, str]: typed programming language? -&gt; maybe i think disclaim the type beforehand for sure.  Q. like assert? path. ls()this is some module that fast. ai made because os. listdir(‘path’) is unconvinient. Python3 pathlib library!: pathlib "
-    }, {
-    "id": 26,
+    "id": 27,
     "url": "http://localhost:4000/2019/12/jeremy-howard/",
     "title": "Jeremy Howard",
     "body": "2019/12/15 - This is journey to find out ‘who am I trying to be?’: How he impacted me?  The person who made me start Computer Vision again. He emphasized the importance of studying NLP and Computer together to understand the deep-learning. He didn’t order it to study, but always he pursuade me with reasonable way.  “It’s not just something I can throw away. NLP and computer vision a few weeks apart and that’s going to force your brain to realize like ‘oh I have to remember this’”  He made me admit my failure in deep-learning. I started to objectify where am I. What should I do when I’m frustrated.  “Keep going. You’re not expected to remember everything. Yet.  You’re not expected to understand everything. Yet. You’re not expected to know why everything works.  Yet. ”  His articles are numerous, below.      What is torch. nn Really?   High Performance Numeric Programming with Swift: Explorations and Reflections   C++11, random distributions, and Swift    And especially, I like this book. Designing great data products Great predictive modeling is an important part of the solution, but it no longer stands on its own; as products become more sophisticated, it disappears into the plumbing. Designing great data products And he is also famous for words. Here are some.  we’re going to try and use that to really understand what’s going on. So to warn you, none of it is rocket science but a lot of its going to look really new. So don’t expect to get it the first time but expect to listen and jump into the notebook try a few things test things out look particularly at like tensor shapes and inputs and outputs to check your understanding then go back and listen again. But and kind of try it, a few times, because you will get there right, it’s just that there’s going to be a lot of new concepts because we haven’t done that much stuff in pure Pytorch. Lesson 6: Deep Learning 2019 "
     }, {
-    "id": 27,
+    "id": 28,
     "url": "http://localhost:4000/2019/11/julia-evans/",
     "title": "Julia Evans",
     "body": "2019/11/20 - This is journey to find out ‘who am I trying to be?’: The women who surprised me in many ways. First, she approached me to teaching some concepts drawing cartoons. It was at Hackers news, which was hightest ranks. Personally I have the use of not to reading title, so and cartoon was so cute and clear. I naturally gonna understood mechanism and astonished by her explaination ability. Her value, which she was taught by many people so want to do same things, moved me. Volume of her knowledge, that just reading post title is a deal of work, amazed me. "
     }, {
-    "id": 28,
+    "id": 29,
     "url": "http://localhost:4000/2019/11/coc-retropective/",
     "title": "Retrospective on Pycon 2019 Korea (CoC Committee)",
     "body": "2019/11/05 - When I was volunteer, it seems like busy and hectic to managing that crowded conference. In my experience, to get things moving, it needs hierarchy. But it didn’t. Organizers emphasized our responsibility, and if I passed each other’s burden, It could be my burden next time. In solidarity of the obligation, we finished conference well. And after participating PyCon Korea 2018 as volunteer, I’ve joined PyCon Korea Organizer last year. &lt;Figure 1&gt; First meeting of PyCon 2019 Korea Organizers It’s been a while since PyCon 2019 finished. It’s held on Aug 15 - 18, at Coex Grand Balloom &lt;Figure 2&gt; Ongoing session, speaking on news comment processing &lt;Figure 3&gt; Sponsor Booth iin Coex Hall &lt;Figure 4&gt; After PyCon 2019, with all of volunteer, organizer, speakers 😍 🥰  Serving as part of the coc TF, I spent large fraction of last year doing CoC job. here’s the path what we’ve been grappled with to grasp a solution. First half: Before the conference Toward Diverse Community:  Formally we’ve been reusing and modifying PyCon US CoC, but we needed fit in Korean and I was part of that to revise code of conduct. Except ‘That’ Diversity, Because it is ‘Harassment’:  Specific point was harassment, and the others were not. process of finding the points. How can we settle this point?Second half: During the conference Handling the potential Harassment: Disjunction of policy and real-time situation: This ‘PyCon 2019 Korea retrospective series’ would be devided into 3 Episodes.  “Retrospective on Pycon 2019 Korea (CoC Committee)” “Retrospective on Pycon 2019 Korea (Program Chair)” (20 Nov, To Be Update) “Maintaining participation while still making timely decisions” (29 Nov, To Be Update)"
     }, {
-    "id": 29,
+    "id": 30,
     "url": "http://localhost:4000/2019/11/elif-shafak/",
     "title": "Elif Shafak",
     "body": "2019/11/05 - This is journey to find out ‘who am I trying to be?’: For creative-minded people, Istanbul is a treasure. ’ Photo © Chris Boland, licensed under CC BY-NC-ND 2. 0    it suddenly felt like what I was trying to convey was more complicated and detailed than what the circumstances allowed me to say.     And I did what I usually do in similar situations: I stammered, I shut down, and I stopped talking. I stopped talking because the truth was complicated, even though I knew, deep within, that one should never, ever remain silent for fear of complexity.         &lt;Figure 1&gt; Elif Shafak   Photo credit: www. elifsafak. com. tr      I want to talk about emotions and the need to boost our emotional intelligence. I think it’s a pity that mainstream political theory pays very little attention to emotions.     Oftentimes, analysts and experts are so busy with data and metrics that they seem to forget those things in life that are difficult to measure and perhaps impossible to cluster under statistical models. But I think this is a mistake, for two main reasons. We are emotional beings.     I think it’s going to be one of our biggest intellectual challenges, because our political systems are replete with emotions. In country after country, we have seen illiberal politicians exploiting these emotions. And yet within the academia and among the intelligentsia, we are yet to take emotions seriously. I think we should.  1 2 Reference:       British Council Worldwide &#8617;        Ted Talk &#8617;    "
     }, {
-    "id": 30,
+    "id": 31,
     "url": "http://localhost:4000/2019/01/dps-week1/",
     "title": "Digital Product School week 1",
     "body": "2019/01/11 - The 1th week retropect at Digital Product School [This week’s schedule] CONTENT:  Welcome to Digital Product School!     Trip to Spitzingsee   Welcome to Design Office   Specifying our goal of product   Welcome to Digital Product School!: Trip to Spitzingsee:    At the first day of Digital Product School, we had a off-site with all of batch 9 people. All the costs were managed by dps. At the beautiful mountain, we settled the team, and got my team goal.     Basically, there are two kind of team in DPS. (1) Wild team - the team has fixed topic(2) Company team - the team which has specific stakeholders, and also topic defined by that stakeholders  The Core-team will fix what team you will join in DPS for 3 months based on ymy professionals, they announce it at off-site. [My team for 3 months at DPS]    And we decide on my batch #9 theme song. How? Each team draw for songs and pitch ‘why this song should be batch #9 theme song’The result? Imagine dragon - Believer (I didn’t know at the moment, this song would be stamped in my memory)     We have a workshop for getting to know each other. For example, we share 1) what do I expect from 3 months of dps, 2) when I feel happy in my life time, 3) what I worked for last week, 4) what was my last project and 5) what plays important role in my life      My team's board  Cero  Welcome to Design Office:  At first day of design office, we had workshop, which celebrates my day in dps also discuss specific rule, menifesto and stakeholders    We get sticker and attach it in map depends on my nationality  Now time to get to know my team’s stakeholders. What they want for us? What they expect from us? How free my team are on the topic?To be honest, it is endless tug-of-war. We should discuss with my stakeholders, endlessly, and find out solution which can meet interest of users, stakeholders and my team. Basically, my team’s main stakeholder is ADAC, but BMW, City of munich and Nokia will also participate as my team’s stakeholders. Specifying our goal of product: "
@@ -330,6 +335,39 @@ <h4 class="font-weight-bold spanborder text-capitalize" id="featured"><span>feat
         
           
          
+            <div class="mb-5 d-flex justify-content-between main-loop-card">
+<div class="pr-3">
+	<h2 class="mb-1 h4 font-weight-bold">
+	<a class="text-dark" href="/2020/04/qna-image-segmentation/">[Q&A] Image Segmentation, using Unet with Driving Video data</a>
+	</h2>
+	<p class="excerpt">
+	   This post is about my questions while I was studying USF Deep Learning course about image segmentation task.All the answers are from the course, source code, library document, or docu...
+	</p>
+	<small class="d-block text-muted">
+		In <span class="catlist">
+		
+		<a class="text-capitalize text-muted smoothscroll" href="/categories.html#fast.ai-v3">Fast.AI-v3</a><span class="sep">, </span>
+		
+		</span>
+	</small>
+	<small class="text-muted">
+		Apr 02, 2020
+	</small>
+</div>
+
+	<div class="col-md-3 pr-0 text-right">
+	<a href="/2020/04/qna-image-segmentation/">
+	<img class="w-100" src="/assets/images/output_31_0.png" alt="[Q&A] Image Segmentation, using Unet with Driving Video data">
+	</a>
+	</div>
+
+</div>
+          
+        
+        
+        
+          
+         
             <div class="mb-5 d-flex justify-content-between main-loop-card">
 <div class="pr-3">
 	<h2 class="mb-1 h4 font-weight-bold">
@@ -583,12 +621,12 @@ <h2 class="mb-1 h4 font-weight-bold">
 	<a class="text-dark" href="/2020/03/note08-fastai-4/">Gradient backward, Chain Rule, Refactoring</a>
 	</h2>
 	<p class="excerpt">
-	   This note is divided into 4 section.  Section1: What is the meaning of ‘deep-learning from foundations?’  Section2: What’s inside Pytorch Operator?  Section3: Implement forward&amp;ba...
+	     This note is divided into 4 section.          Section1: What is the meaning of ‘deep-learning from foundations?’      Section2: What’s inside Pytorch Operator?      Section3: Implem...
 	</p>
 	<small class="d-block text-muted">
 		In <span class="catlist">
 		
-		<a class="text-capitalize text-muted smoothscroll" href="/categories.html#fast.ai-v3">Fast.AI-v3</a><span class="sep">, </span>
+		<a class="text-capitalize text-muted smoothscroll" href="/categories.html#fastai-v3">fastai-v3</a><span class="sep">, </span>
 		
 		</span>
 	</small>
@@ -619,6 +657,21 @@ <h2 class="mb-1 h4 font-weight-bold">
     <h4 class="font-weight-bold spanborder"><span>Featured</span></h4>  
     <ol class="list-featured">				
                         
+            <li class="mb-4">
+            <span>
+                <h6 class="font-weight-bold">
+                    <a href="/2020/04/qna-image-segmentation/" class="text-dark">[Q&A] Image Segmentation, using Unet with Driving Video data</a>
+                </h6>
+                <span class="d-block text-muted">
+                    In <span class="catlist">
+                    
+                    <a class="text-capitalize text-muted smoothscroll" href="/categories.html#fast.ai-v3">Fast.AI-v3</a><span class="sep">, </span>
+                    
+                    </span>
+                </span>
+            </span>
+            </li>                
+                        
             <li class="mb-4">
             <span>
                 <h6 class="font-weight-bold">
diff --git a/assets/.DS_Store b/assets/.DS_Store
index bbac453444..f3d6fe6b5c 100644
Binary files a/assets/.DS_Store and b/assets/.DS_Store differ