In [None]:
{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "235df9f1",
   "metadata": {},
   "source": [
    "## Fetching Dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d2b44f34",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.datasets import fetch_openml"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4c2c34ae",
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0faa2af6",
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "08c2fca3",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "315f7611",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.linear_model import LogisticRegression"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a0a4b0ee",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import cross_val_score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "81e0c93d",
   "metadata": {},
   "outputs": [],
   "source": [
    "mnist = fetch_openml('mnist_784')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a6700364",
   "metadata": {},
   "outputs": [],
   "source": [
    "x, y = mnist['data'], mnist['target']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "611d0d0f",
   "metadata": {},
   "outputs": [],
   "source": [
    "some_digit = x.to_numpy()[36001]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fa6df032",
   "metadata": {},
   "outputs": [],
   "source": [
    "some_digit_image = some_digit.reshape(28, 28)  # let's reshape to plot it"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "be29be7f",
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.imshow(some_digit_image, cmap=matplotlib.cm.binary, interpolation='nearest')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e07b3eaa",
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.axis(\"off\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "70203c31",
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1e378d2e",
   "metadata": {},
   "outputs": [],
   "source": [
    "x_train, x_test = x[:60000], x[6000:70000]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9bc1d120",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_train, y_test = y[:60000], y[6000:70000]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "19a0dd78",
   "metadata": {},
   "outputs": [],
   "source": [
    "shuffle_index = np.random.permutation(60000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "eefbaaae",
   "metadata": {},
   "outputs": [],
   "source": [
    "x_train, y_train = x_train.[shuffle_index], y_train.[shuffle_index]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "498c21cf",
   "metadata": {},
   "source": [
    "# Creating a 2-detector"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8288e1e0",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_train = y_train.astype(np.int8)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "11fd2cab",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_test = y_test.astype(np.int8)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "37c1b315",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_train_2 = (y_train == '2')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dc8c7e46",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_test_2 = (y_test == '2')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c44116e5",
   "metadata": {},
   "source": [
    " # Train a logistic regression classifier"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "11caf1d3",
   "metadata": {},
   "outputs": [],
   "source": [
    "clf = LogisticRegression(tol=0.1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c84fbb18",
   "metadata": {},
   "outputs": [],
   "source": [
    "clf.fit(x_train, y_train_2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f2adf415",
   "metadata": {},
   "outputs": [],
   "source": [
    "example = clf.predict([some_digit])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "07c2515e",
   "metadata": {},
   "outputs": [],
   "source": [
    "print(example)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a50dbccb",
   "metadata": {},
   "source": [
    "# Cross Validation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "22ce3a76",
   "metadata": {},
   "outputs": [],
   "source": [
    "a = cross_val_score(clf, x_train, y_train_2, cv=3, scoring=\"accuracy\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e13407dd",
   "metadata": {},
   "outputs": [],
   "source": [
    "print(a.mean())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f6f8e437",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
