diff --git a/nonteach.ipynb b/nonteach.ipynb
new file mode 100644
index 0000000..9d8620c
--- /dev/null
+++ b/nonteach.ipynb
@@ -0,0 +1,131758 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "a4ca745c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "import matplotlib.pyplot as plt"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "id": "f85d56c1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sklearn.model_selection import train_test_split\n",
+ "from sklearn.linear_model import LinearRegression\n",
+ "from sklearn.metrics import mean_squared_error\n",
+ "from sklearn.metrics import r2_score\n",
+ "from sklearn.metrics import mean_absolute_error\n",
+ "from sklearn.model_selection import GridSearchCV\n",
+ "from sklearn.ensemble import RandomForestClassifier\n",
+ "from sklearn.metrics import roc_auc_score\n",
+ "from sklearn.preprocessing import StandardScaler\n",
+ "from sklearn.manifold import TSNE\n",
+ "from sklearn.cluster import KMeans"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "ae5b0649",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%matplotlib inline\n",
+ "%config InlineBackend.figure_format = 'svg'\n",
+ "plt.style.use('fivethirtyeight')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d162abc4",
+ "metadata": {},
+ "source": [
+ "# Задание №1"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "b87e434e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sklearn.datasets import fetch_california_housing\n",
+ "housing = fetch_california_housing()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "9fbb3c82",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "data = housing.data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "857bdd9a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "feature_names = housing.feature_names"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "c6518a25",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "target = housing.target"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "id": "3360482e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " MedInc | \n",
+ " HouseAge | \n",
+ " AveRooms | \n",
+ " AveBedrms | \n",
+ " Population | \n",
+ " AveOccup | \n",
+ " Latitude | \n",
+ " Longitude | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 8.3252 | \n",
+ " 41.0 | \n",
+ " 6.984127 | \n",
+ " 1.023810 | \n",
+ " 322.0 | \n",
+ " 2.555556 | \n",
+ " 37.88 | \n",
+ " -122.23 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 8.3014 | \n",
+ " 21.0 | \n",
+ " 6.238137 | \n",
+ " 0.971880 | \n",
+ " 2401.0 | \n",
+ " 2.109842 | \n",
+ " 37.86 | \n",
+ " -122.22 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 7.2574 | \n",
+ " 52.0 | \n",
+ " 8.288136 | \n",
+ " 1.073446 | \n",
+ " 496.0 | \n",
+ " 2.802260 | \n",
+ " 37.85 | \n",
+ " -122.24 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 5.6431 | \n",
+ " 52.0 | \n",
+ " 5.817352 | \n",
+ " 1.073059 | \n",
+ " 558.0 | \n",
+ " 2.547945 | \n",
+ " 37.85 | \n",
+ " -122.25 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 3.8462 | \n",
+ " 52.0 | \n",
+ " 6.281853 | \n",
+ " 1.081081 | \n",
+ " 565.0 | \n",
+ " 2.181467 | \n",
+ " 37.85 | \n",
+ " -122.25 | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " 4.0368 | \n",
+ " 52.0 | \n",
+ " 4.761658 | \n",
+ " 1.103627 | \n",
+ " 413.0 | \n",
+ " 2.139896 | \n",
+ " 37.85 | \n",
+ " -122.25 | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " 3.6591 | \n",
+ " 52.0 | \n",
+ " 4.931907 | \n",
+ " 0.951362 | \n",
+ " 1094.0 | \n",
+ " 2.128405 | \n",
+ " 37.84 | \n",
+ " -122.25 | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " 3.1200 | \n",
+ " 52.0 | \n",
+ " 4.797527 | \n",
+ " 1.061824 | \n",
+ " 1157.0 | \n",
+ " 1.788253 | \n",
+ " 37.84 | \n",
+ " -122.25 | \n",
+ "
\n",
+ " \n",
+ " | 8 | \n",
+ " 2.0804 | \n",
+ " 42.0 | \n",
+ " 4.294118 | \n",
+ " 1.117647 | \n",
+ " 1206.0 | \n",
+ " 2.026891 | \n",
+ " 37.84 | \n",
+ " -122.26 | \n",
+ "
\n",
+ " \n",
+ " | 9 | \n",
+ " 3.6912 | \n",
+ " 52.0 | \n",
+ " 4.970588 | \n",
+ " 0.990196 | \n",
+ " 1551.0 | \n",
+ " 2.172269 | \n",
+ " 37.84 | \n",
+ " -122.25 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " MedInc HouseAge AveRooms AveBedrms Population AveOccup Latitude \\\n",
+ "0 8.3252 41.0 6.984127 1.023810 322.0 2.555556 37.88 \n",
+ "1 8.3014 21.0 6.238137 0.971880 2401.0 2.109842 37.86 \n",
+ "2 7.2574 52.0 8.288136 1.073446 496.0 2.802260 37.85 \n",
+ "3 5.6431 52.0 5.817352 1.073059 558.0 2.547945 37.85 \n",
+ "4 3.8462 52.0 6.281853 1.081081 565.0 2.181467 37.85 \n",
+ "5 4.0368 52.0 4.761658 1.103627 413.0 2.139896 37.85 \n",
+ "6 3.6591 52.0 4.931907 0.951362 1094.0 2.128405 37.84 \n",
+ "7 3.1200 52.0 4.797527 1.061824 1157.0 1.788253 37.84 \n",
+ "8 2.0804 42.0 4.294118 1.117647 1206.0 2.026891 37.84 \n",
+ "9 3.6912 52.0 4.970588 0.990196 1551.0 2.172269 37.84 \n",
+ "\n",
+ " Longitude \n",
+ "0 -122.23 \n",
+ "1 -122.22 \n",
+ "2 -122.24 \n",
+ "3 -122.25 \n",
+ "4 -122.25 \n",
+ "5 -122.25 \n",
+ "6 -122.25 \n",
+ "7 -122.25 \n",
+ "8 -122.26 \n",
+ "9 -122.25 "
+ ]
+ },
+ "execution_count": 24,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "X = pd.DataFrame(data, columns=feature_names)\n",
+ "X.head(10)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "bea881c0",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " price | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 4.526 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 3.585 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3.521 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 3.413 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 3.422 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " price\n",
+ "0 4.526\n",
+ "1 3.585\n",
+ "2 3.521\n",
+ "3 3.413\n",
+ "4 3.422"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "Y = pd.DataFrame(target, columns=[\"price\"])\n",
+ "Y.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "65bd4ebc",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.20, random_state=42)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "id": "3ebed40d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "scaler = StandardScaler()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "id": "17940ac4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "X_train_scaled = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)\n",
+ "X_test_scaled = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "id": "11f08ee0",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " MedInc | \n",
+ " HouseAge | \n",
+ " AveRooms | \n",
+ " AveBedrms | \n",
+ " Population | \n",
+ " AveOccup | \n",
+ " Latitude | \n",
+ " Longitude | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " -0.326196 | \n",
+ " 0.348490 | \n",
+ " -0.174916 | \n",
+ " -0.208365 | \n",
+ " 0.768276 | \n",
+ " 0.051376 | \n",
+ " -1.372811 | \n",
+ " 1.272587 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " -0.035843 | \n",
+ " 1.618118 | \n",
+ " -0.402835 | \n",
+ " -0.128530 | \n",
+ " -0.098901 | \n",
+ " -0.117362 | \n",
+ " -0.876696 | \n",
+ " 0.709162 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 0.144701 | \n",
+ " -1.952710 | \n",
+ " 0.088216 | \n",
+ " -0.257538 | \n",
+ " -0.449818 | \n",
+ " -0.032280 | \n",
+ " -0.460146 | \n",
+ " -0.447603 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " -1.017864 | \n",
+ " 0.586545 | \n",
+ " -0.600015 | \n",
+ " -0.145156 | \n",
+ " -0.007434 | \n",
+ " 0.077507 | \n",
+ " -1.382172 | \n",
+ " 1.232698 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " -0.171488 | \n",
+ " 1.142008 | \n",
+ " 0.349007 | \n",
+ " 0.086624 | \n",
+ " -0.485877 | \n",
+ " -0.068832 | \n",
+ " 0.532084 | \n",
+ " -0.108551 | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " 1.439919 | \n",
+ " -0.683082 | \n",
+ " 0.354757 | \n",
+ " -0.203013 | \n",
+ " 1.115675 | \n",
+ " 0.023111 | \n",
+ " 0.831625 | \n",
+ " -1.135679 | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " -0.715013 | \n",
+ " -0.048268 | \n",
+ " -0.456530 | \n",
+ " -0.050652 | \n",
+ " -0.062842 | \n",
+ " 0.034142 | \n",
+ " -0.750327 | \n",
+ " 0.803897 | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " 2.157529 | \n",
+ " 0.665897 | \n",
+ " 0.258670 | \n",
+ " -0.328365 | \n",
+ " -0.375941 | \n",
+ " -0.060568 | \n",
+ " 1.060961 | \n",
+ " -1.469745 | \n",
+ "
\n",
+ " \n",
+ " | 8 | \n",
+ " -1.250241 | \n",
+ " -1.873359 | \n",
+ " -0.760128 | \n",
+ " -0.186074 | \n",
+ " -0.534249 | \n",
+ " -0.040033 | \n",
+ " -0.708204 | \n",
+ " 0.838800 | \n",
+ "
\n",
+ " \n",
+ " | 9 | \n",
+ " 1.336938 | \n",
+ " -1.873359 | \n",
+ " 0.542474 | \n",
+ " -0.152530 | \n",
+ " 7.036405 | \n",
+ " 0.025712 | \n",
+ " -0.759688 | \n",
+ " 0.893646 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " MedInc HouseAge AveRooms AveBedrms Population AveOccup Latitude \\\n",
+ "0 -0.326196 0.348490 -0.174916 -0.208365 0.768276 0.051376 -1.372811 \n",
+ "1 -0.035843 1.618118 -0.402835 -0.128530 -0.098901 -0.117362 -0.876696 \n",
+ "2 0.144701 -1.952710 0.088216 -0.257538 -0.449818 -0.032280 -0.460146 \n",
+ "3 -1.017864 0.586545 -0.600015 -0.145156 -0.007434 0.077507 -1.382172 \n",
+ "4 -0.171488 1.142008 0.349007 0.086624 -0.485877 -0.068832 0.532084 \n",
+ "5 1.439919 -0.683082 0.354757 -0.203013 1.115675 0.023111 0.831625 \n",
+ "6 -0.715013 -0.048268 -0.456530 -0.050652 -0.062842 0.034142 -0.750327 \n",
+ "7 2.157529 0.665897 0.258670 -0.328365 -0.375941 -0.060568 1.060961 \n",
+ "8 -1.250241 -1.873359 -0.760128 -0.186074 -0.534249 -0.040033 -0.708204 \n",
+ "9 1.336938 -1.873359 0.542474 -0.152530 7.036405 0.025712 -0.759688 \n",
+ "\n",
+ " Longitude \n",
+ "0 1.272587 \n",
+ "1 0.709162 \n",
+ "2 -0.447603 \n",
+ "3 1.232698 \n",
+ "4 -0.108551 \n",
+ "5 -1.135679 \n",
+ "6 0.803897 \n",
+ "7 -1.469745 \n",
+ "8 0.838800 \n",
+ "9 0.893646 "
+ ]
+ },
+ "execution_count": 32,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "X_train_scaled.head(10)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "id": "58048984",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tsne = TSNE(learning_rate=250, random_state=42) #так как n_components по умолчанию равен 2 то его не трогал"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 34,
+ "id": "5945bacf",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "X_train_tsne = tsne.fit_transform(X_train_scaled)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "id": "ef8b283e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/svg+xml": [
+ "\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "plt.scatter(X_train_tsne[:, 0], X_train_tsne[:, 1])\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "bb293aeb",
+ "metadata": {},
+ "source": [
+ "# Задание №2"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "da5b8870",
+ "metadata": {},
+ "source": [
+ "#я тут немного посамовольничал, выполнение задания чуть нижу"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 59,
+ "id": "74c45c10",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "inertia_arr = []\n",
+ "for i in range(3,200):\n",
+ " temp_model = KMeans(n_clusters=i, random_state=42, max_iter=100, n_init = 10)\n",
+ " temp_model.fit_predict(X_train_scaled)\n",
+ " inertia_arr.append(temp_model.inertia_)\n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 60,
+ "id": "1470bac2",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/svg+xml": [
+ "\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "plt.plot([i for i in range(3,200)], inertia_arr)\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c16d65e8",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 61,
+ "id": "21312e20",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "23 1778\n",
+ "0 1643\n",
+ "6 1506\n",
+ "24 1286\n",
+ "14 1233\n",
+ "18 1159\n",
+ "11 1133\n",
+ "1 1119\n",
+ "22 1051\n",
+ "4 957\n",
+ "8 772\n",
+ "20 661\n",
+ "16 615\n",
+ "19 534\n",
+ "7 504\n",
+ "17 200\n",
+ "5 152\n",
+ "9 135\n",
+ "2 51\n",
+ "13 9\n",
+ "15 8\n",
+ "21 2\n",
+ "12 2\n",
+ "3 1\n",
+ "10 1\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 61,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "kmeans = KMeans(n_clusters=25, random_state=42, max_iter=100, n_init = 10)# Выставил n_init так как этого требовала подсказка\n",
+ "labels_train = kmeans.fit_predict(X_train_scaled)\n",
+ "pd.value_counts(labels_train) "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 62,
+ "id": "de2c33a6",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/svg+xml": [
+ "\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "plt.scatter(X_train_tsne[:, 0], X_train_tsne[:, 1], c=labels_train)\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "76149635",
+ "metadata": {},
+ "source": [
+ "Выполнение задания"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 138,
+ "id": "df6e21e8",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0 9510\n",
+ "1 6937\n",
+ "2 65\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 138,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "kmeans = KMeans(n_clusters=3, random_state=42, max_iter=100, n_init = 10)# Выставил n_init так как этого требовала подсказка\n",
+ "labels_train = kmeans.fit_predict(X_train_scaled)\n",
+ "pd.value_counts(labels_train)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 86,
+ "id": "ebb8f010",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/svg+xml": [
+ "\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "plt.scatter(X_train_tsne[:, 0], X_train_tsne[:, 1], c=labels_train)\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 65,
+ "id": "7f26a92e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "price 2.142137\n",
+ "dtype: float64"
+ ]
+ },
+ "execution_count": 65,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "y_train[labels_train == 0].mean()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 66,
+ "id": "8424c832",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "price 1.979968\n",
+ "dtype: float64"
+ ]
+ },
+ "execution_count": 66,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "y_train[labels_train == 1].mean()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 67,
+ "id": "a8816ffe",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "price 1.618831\n",
+ "dtype: float64"
+ ]
+ },
+ "execution_count": 67,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "y_train[labels_train == 2].mean()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 81,
+ "id": "d0c0a487",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " MedInc | \n",
+ " HouseAge | \n",
+ " AveRooms | \n",
+ " AveBedrms | \n",
+ " Population | \n",
+ " AveOccup | \n",
+ " Latitude | \n",
+ " Longitude | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 14196 | \n",
+ " 3.2596 | \n",
+ " 33.0 | \n",
+ " 5.017657 | \n",
+ " 1.006421 | \n",
+ " 2300.0 | \n",
+ " 3.691814 | \n",
+ " 32.71 | \n",
+ " -117.03 | \n",
+ "
\n",
+ " \n",
+ " | 8267 | \n",
+ " 3.8125 | \n",
+ " 49.0 | \n",
+ " 4.473545 | \n",
+ " 1.041005 | \n",
+ " 1314.0 | \n",
+ " 1.738095 | \n",
+ " 33.77 | \n",
+ " -118.16 | \n",
+ "
\n",
+ " \n",
+ " | 17445 | \n",
+ " 4.1563 | \n",
+ " 4.0 | \n",
+ " 5.645833 | \n",
+ " 0.985119 | \n",
+ " 915.0 | \n",
+ " 2.723214 | \n",
+ " 34.66 | \n",
+ " -120.48 | \n",
+ "
\n",
+ " \n",
+ " | 14265 | \n",
+ " 1.9425 | \n",
+ " 36.0 | \n",
+ " 4.002817 | \n",
+ " 1.033803 | \n",
+ " 1418.0 | \n",
+ " 3.994366 | \n",
+ " 32.69 | \n",
+ " -117.11 | \n",
+ "
\n",
+ " \n",
+ " | 2271 | \n",
+ " 3.5542 | \n",
+ " 43.0 | \n",
+ " 6.268421 | \n",
+ " 1.134211 | \n",
+ " 874.0 | \n",
+ " 2.300000 | \n",
+ " 36.78 | \n",
+ " -119.80 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 11284 | \n",
+ " 6.3700 | \n",
+ " 35.0 | \n",
+ " 6.129032 | \n",
+ " 0.926267 | \n",
+ " 658.0 | \n",
+ " 3.032258 | \n",
+ " 33.78 | \n",
+ " -117.96 | \n",
+ "
\n",
+ " \n",
+ " | 11964 | \n",
+ " 3.0500 | \n",
+ " 33.0 | \n",
+ " 6.868597 | \n",
+ " 1.269488 | \n",
+ " 1753.0 | \n",
+ " 3.904232 | \n",
+ " 34.02 | \n",
+ " -117.43 | \n",
+ "
\n",
+ " \n",
+ " | 5390 | \n",
+ " 2.9344 | \n",
+ " 36.0 | \n",
+ " 3.986717 | \n",
+ " 1.079696 | \n",
+ " 1756.0 | \n",
+ " 3.332068 | \n",
+ " 34.03 | \n",
+ " -118.38 | \n",
+ "
\n",
+ " \n",
+ " | 860 | \n",
+ " 5.7192 | \n",
+ " 15.0 | \n",
+ " 6.395349 | \n",
+ " 1.067979 | \n",
+ " 1777.0 | \n",
+ " 3.178891 | \n",
+ " 37.58 | \n",
+ " -121.96 | \n",
+ "
\n",
+ " \n",
+ " | 15795 | \n",
+ " 2.5755 | \n",
+ " 52.0 | \n",
+ " 3.402576 | \n",
+ " 1.058776 | \n",
+ " 2619.0 | \n",
+ " 2.108696 | \n",
+ " 37.77 | \n",
+ " -122.42 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
16512 rows × 8 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " MedInc HouseAge AveRooms AveBedrms Population AveOccup Latitude \\\n",
+ "14196 3.2596 33.0 5.017657 1.006421 2300.0 3.691814 32.71 \n",
+ "8267 3.8125 49.0 4.473545 1.041005 1314.0 1.738095 33.77 \n",
+ "17445 4.1563 4.0 5.645833 0.985119 915.0 2.723214 34.66 \n",
+ "14265 1.9425 36.0 4.002817 1.033803 1418.0 3.994366 32.69 \n",
+ "2271 3.5542 43.0 6.268421 1.134211 874.0 2.300000 36.78 \n",
+ "... ... ... ... ... ... ... ... \n",
+ "11284 6.3700 35.0 6.129032 0.926267 658.0 3.032258 33.78 \n",
+ "11964 3.0500 33.0 6.868597 1.269488 1753.0 3.904232 34.02 \n",
+ "5390 2.9344 36.0 3.986717 1.079696 1756.0 3.332068 34.03 \n",
+ "860 5.7192 15.0 6.395349 1.067979 1777.0 3.178891 37.58 \n",
+ "15795 2.5755 52.0 3.402576 1.058776 2619.0 2.108696 37.77 \n",
+ "\n",
+ " Longitude \n",
+ "14196 -117.03 \n",
+ "8267 -118.16 \n",
+ "17445 -120.48 \n",
+ "14265 -117.11 \n",
+ "2271 -119.80 \n",
+ "... ... \n",
+ "11284 -117.96 \n",
+ "11964 -117.43 \n",
+ "5390 -118.38 \n",
+ "860 -121.96 \n",
+ "15795 -122.42 \n",
+ "\n",
+ "[16512 rows x 8 columns]"
+ ]
+ },
+ "execution_count": 81,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "X_train"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 83,
+ "id": "2817be16",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "28.473080967402733"
+ ]
+ },
+ "execution_count": 83,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "X_train['HouseAge'][labels_train == 0].mean() # Так как у меня нет параметра CRIM вычислил средний возраст домов"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 84,
+ "id": "ff919650",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "28.890442554418335"
+ ]
+ },
+ "execution_count": 84,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "X_train['HouseAge'][labels_train == 1].mean()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 85,
+ "id": "e203a38b",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "18.276923076923076"
+ ]
+ },
+ "execution_count": 85,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "X_train['HouseAge'][labels_train == 2].mean()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ff98801c",
+ "metadata": {},
+ "source": [
+ "# Задание №3"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 87,
+ "id": "04c14f54",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "X_test_tsne = tsne.fit_transform(X_test_scaled)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 139,
+ "id": "8a548475",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " test | \n",
+ " train | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 0.586725 | \n",
+ " 0.575945 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 0.409641 | \n",
+ " 0.420119 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 0.003634 | \n",
+ " 0.003937 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " test train\n",
+ "0 0.586725 0.575945\n",
+ "1 0.409641 0.420119\n",
+ "2 0.003634 0.003937"
+ ]
+ },
+ "execution_count": 139,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "labels_test = kmeans.predict(X_test_scaled)\n",
+ "cnt = pd.concat([pd.value_counts(labels_test,normalize=True), \n",
+ " pd.value_counts(labels_train,normalize=True)], \n",
+ " axis=1)\n",
+ "cnt.columns=['test', 'train']\n",
+ "cnt"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 140,
+ "id": "8c5d0818",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/svg+xml": [
+ "\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "plt.scatter(X_test_tsne[:, 0], X_test_tsne[:, 1], c=labels_test)\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 141,
+ "id": "c806f9f1",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Среднее значение Price по группам:\n",
+ "1-я группа 2.131\n",
+ "2-я группа 1.950\n",
+ "3-я группа 1.607\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(f'\\\n",
+ "Среднее значение Price по группам:\\n\\\n",
+ "1-я группа {y_test[labels_test == 0].mean().values[0]:.3f}\\n\\\n",
+ "2-я группа {y_test[labels_test == 1].mean().values[0]:.3f}\\n\\\n",
+ "3-я группа {y_test[labels_test == 2].mean().values[0]:.3f}\\n')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 165,
+ "id": "45bed323",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Среднее значение HouseAge по группам:\n",
+ "1-я группа 28.278\n",
+ "2-я группа 29.535\n",
+ "3-я группа 20.400\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "print('\\\n",
+ "Среднее значение HouseAge по группам:\\n\\\n",
+ "1-я группа {0:.3f}\\n\\\n",
+ "2-я группа {1:.3f}\\n\\\n",
+ "3-я группа {2:.3f}\\n'.format(\n",
+ "X_test['HouseAge'][labels_test == 0].mean(),\n",
+ "X_test['HouseAge'][labels_test == 1].mean(),\n",
+ "X_test['HouseAge'][labels_test == 2].mean()))"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.7"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}