From 6185d6f73601cc5854c605fc937432ba72f37271 Mon Sep 17 00:00:00 2001 From: cookedbrick Date: Thu, 12 Jan 2023 00:07:48 +0300 Subject: [PATCH] sdf --- numpy_#1.ipynb | 280 ++++++++++++++++++ pandas#1.ipynb | 771 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 1051 insertions(+) create mode 100644 numpy_#1.ipynb create mode 100644 pandas#1.ipynb diff --git a/numpy_#1.ipynb b/numpy_#1.ipynb new file mode 100644 index 0000000..6960062 --- /dev/null +++ b/numpy_#1.ipynb @@ -0,0 +1,280 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "1426cd93", + "metadata": {}, + "source": [ + "#### nympy task 1" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "4c6dff0e", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "3124b9a3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[ 1 6]\n", + " [ 2 8]\n", + " [ 3 11]\n", + " [ 3 10]\n", + " [ 1 7]]\n" + ] + } + ], + "source": [ + "a = np.array([[1,6],\n", + " [2,8],\n", + " [3,11],\n", + " [3,10],\n", + " [1,7]])\n", + "print(a)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "0c26a132", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2. 8.4]\n" + ] + } + ], + "source": [ + "mean_a = np.mean(a, axis = 0)\n", + "print(mean_a)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "f548d9aa", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(5, 2)" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a.shape" + ] + }, + { + "cell_type": "markdown", + "id": "08a460ef", + "metadata": {}, + "source": [ + "#### nympy task 2" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "bb310327", + "metadata": {}, + "outputs": [], + "source": [ + "a_centered = a - mean_a" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "2cc48f6a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[-1. , -2.4],\n", + " [ 0. , -0.4],\n", + " [ 1. , 2.6],\n", + " [ 1. , 1.6],\n", + " [-1. , -1.4]])" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a_centered" + ] + }, + { + "cell_type": "markdown", + "id": "06c9cf90", + "metadata": {}, + "source": [ + "#### nympy task 3" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "d1d847ac", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([-2.4, -0.4, 2.6, 1.6, -1.4])" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a_centered.T[1]" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "6c638ade", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[-1.],\n", + " [ 0.],\n", + " [ 1.],\n", + " [ 1.],\n", + " [-1.]])" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a_centered[:,:1]" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "id": "f2962a5f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2.0" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a_centered_sp = np.dot(a_centered.T[1], a_centered.T[0]) \n", + "a_centered_sp / (a_centered.shape[0] - 1)" + ] + }, + { + "cell_type": "markdown", + "id": "5437a025", + "metadata": {}, + "source": [ + "#### nympy task 4" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "23547665", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2.0" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.cov(a.T)[0, 1]" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "72bf00b5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[1. , 2. ],\n", + " [2. , 4.3]])" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.cov(a.T)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/pandas#1.ipynb b/pandas#1.ipynb new file mode 100644 index 0000000..8670396 --- /dev/null +++ b/pandas#1.ipynb @@ -0,0 +1,771 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "f0803aaf", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np" + ] + }, + { + "cell_type": "markdown", + "id": "6f495e40", + "metadata": {}, + "source": [ + "# pandas task 1" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "083c8a0b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
author_idauthor_name
01Тургенев
12Чехов
23Островский
\n", + "
" + ], + "text/plain": [ + " author_id author_name\n", + "0 1 Тургенев\n", + "1 2 Чехов\n", + "2 3 Островский" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "authors = pd.DataFrame(\n", + " {\n", + " 'author_id':[1,2,3],\n", + " 'author_name':['Тургенев', 'Чехов', 'Островский']\n", + " }\n", + ")\n", + "authors" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "044d1217", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
author_idbook_titleprice
01Отцы и дети450
11Рудин300
21Дворянское гнездо350
32Толстый и тонкий500
42Дама с собачкой450
53Гроза370
63Таланты и поклонники290
\n", + "
" + ], + "text/plain": [ + " author_id book_title price\n", + "0 1 Отцы и дети 450\n", + "1 1 Рудин 300\n", + "2 1 Дворянское гнездо 350\n", + "3 2 Толстый и тонкий 500\n", + "4 2 Дама с собачкой 450\n", + "5 3 Гроза 370\n", + "6 3 Таланты и поклонники 290" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "book = pd.DataFrame(\n", + " {\n", + " 'author_id':[1, 1, 1, 2, 2, 3, 3],\n", + " 'book_title':['Отцы и дети', 'Рудин', 'Дворянское гнездо',\n", + " 'Толстый и тонкий', 'Дама с собачкой', 'Гроза', 'Таланты и поклонники'],\n", + " 'price':[450, 300, 350, 500, 450, 370, 290]\n", + " }\n", + ")\n", + "book" + ] + }, + { + "cell_type": "markdown", + "id": "ee979488", + "metadata": {}, + "source": [ + "# pandas task 2" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "b8758f3d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
author_idauthor_namebook_titleprice
01ТургеневОтцы и дети450
11ТургеневРудин300
21ТургеневДворянское гнездо350
32ЧеховТолстый и тонкий500
42ЧеховДама с собачкой450
53ОстровскийГроза370
63ОстровскийТаланты и поклонники290
\n", + "
" + ], + "text/plain": [ + " author_id author_name book_title price\n", + "0 1 Тургенев Отцы и дети 450\n", + "1 1 Тургенев Рудин 300\n", + "2 1 Тургенев Дворянское гнездо 350\n", + "3 2 Чехов Толстый и тонкий 500\n", + "4 2 Чехов Дама с собачкой 450\n", + "5 3 Островский Гроза 370\n", + "6 3 Островский Таланты и поклонники 290" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "authors_price = pd.merge(authors, book, on = 'author_id', how = 'left')\n", + "authors_price" + ] + }, + { + "cell_type": "markdown", + "id": "0598d426", + "metadata": {}, + "source": [ + "# pandas task 3" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "3ce3408c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
author_idauthor_namebook_titleprice
32ЧеховТолстый и тонкий500
01ТургеневОтцы и дети450
42ЧеховДама с собачкой450
53ОстровскийГроза370
21ТургеневДворянское гнездо350
\n", + "
" + ], + "text/plain": [ + " author_id author_name book_title price\n", + "3 2 Чехов Толстый и тонкий 500\n", + "0 1 Тургенев Отцы и дети 450\n", + "4 2 Чехов Дама с собачкой 450\n", + "5 3 Островский Гроза 370\n", + "2 1 Тургенев Дворянское гнездо 350" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "top5 = authors_price.nlargest(5, 'price')\n", + "top5" + ] + }, + { + "cell_type": "markdown", + "id": "c9274648", + "metadata": {}, + "source": [ + "# pandas task 4" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "id": "085622d0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
price
max_pricemin_pricemean_price
author_name
Островский370290330.000000
Тургенев450300366.666667
Чехов500450475.000000
\n", + "
" + ], + "text/plain": [ + " price \n", + " max_price min_price mean_price\n", + "author_name \n", + "Островский 370 290 330.000000\n", + "Тургенев 450 300 366.666667\n", + "Чехов 500 450 475.000000" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "authors_stat = authors_price.groupby('author_name').agg({'price':['max','min','mean']})\n", + "authors_stat = authors_stat.rename(columns={'min':'min_price', 'max':'max_price', 'mean':'mean_price'})\n", + "authors_stat\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "f5b0897a", + "metadata": {}, + "source": [ + "# pandas task 5" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "id": "1a8e9191", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
author_idauthor_namebook_titlepricecover
01ТургеневОтцы и дети450твердая
11ТургеневРудин300мягкая
21ТургеневДворянское гнездо350мягкая
32ЧеховТолстый и тонкий500твердая
42ЧеховДама с собачкой450твердая
53ОстровскийГроза370мягкая
63ОстровскийТаланты и поклонники290мягкая
\n", + "
" + ], + "text/plain": [ + " author_id author_name book_title price cover\n", + "0 1 Тургенев Отцы и дети 450 твердая\n", + "1 1 Тургенев Рудин 300 мягкая\n", + "2 1 Тургенев Дворянское гнездо 350 мягкая\n", + "3 2 Чехов Толстый и тонкий 500 твердая\n", + "4 2 Чехов Дама с собачкой 450 твердая\n", + "5 3 Островский Гроза 370 мягкая\n", + "6 3 Островский Таланты и поклонники 290 мягкая" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "authors_price['cover'] = ['твердая', 'мягкая', 'мягкая', 'твердая', 'твердая', 'мягкая', 'мягкая']\n", + "authors_price" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "7495a705", + "metadata": {}, + "outputs": [], + "source": [ + "?pd.pivot_table" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "id": "edc99efe", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
coverтвердаямягкая
author_name
Тургенев450.0650.0
Чехов950.00.0
Островский0.0660.0
\n", + "
" + ], + "text/plain": [ + "cover твердая мягкая\n", + "author_name \n", + "Тургенев 450.0 650.0\n", + "Чехов 950.0 0.0\n", + "Островский 0.0 660.0" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "book_info = pd.pivot_table(authors_price, values='price', index=['author_name'], columns=['cover'], aggfunc=np.sum, sort = False)\n", + "book_info['мягкая'] = book_info['мягкая'].fillna(0)\n", + "book_info['твердая'] = book_info['твердая'].fillna(0)\n", + "book_info\n" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "id": "887d0f05", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "book_info.to_pickle('book_info.pkl')\n", + "book_info2 = pd.read_pickle('book_info.pkl')\n", + "book_info.equals(book_info2)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}