In [105]:
import numpy as np
import pandas as pd

## Загрузка данных

##### requests

In [106]:
import requests
import zipfile, io

In [109]:
# Скачивание архива с интернет-ресурса
r = requests.get('https://archive.ics.uci.edu/ml/machine-learning-databases/00222/bank.zip')
z = zipfile.ZipFile(io.BytesIO(r.content))

# Просмотр архива
z.namelist() 

['bank-full.csv', 'bank-names.txt', 'bank.csv']

##### pd.read_csv()

In [111]:
# Сохранение 'bank-full.csv' в DataFrame
df = pd.read_csv(z.open(str(z.namelist()[0])), sep=';')
df.sample(5)

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
29012,49,blue-collar,single,secondary,no,-367,yes,no,cellular,2,feb,98,1,207,1,failure,no
34244,67,retired,married,unknown,no,696,no,no,telephone,4,may,77,2,-1,0,unknown,no
7748,45,technician,single,unknown,no,7108,yes,yes,unknown,30,may,1212,3,-1,0,unknown,no
37262,47,admin.,married,secondary,no,411,yes,no,cellular,13,may,157,1,-1,0,unknown,no
37721,25,student,single,secondary,no,963,yes,no,telephone,14,may,230,4,301,1,failure,no


## Основные структуры

1. **Series** - Одномерный объект индексированных данных (Контейнер для скаляров)
2. **DataFrame** - Двумерный объект-таблица (Контейнер для объектов Series)
3. **Index** - Индекс. Ключевой или порядковый определитель данных

#### Index

#### Series

#### DataFrame

In [3]:
data = (np.random.random((100, 3)) *10).round(2)
data2 = np.random.choice((0, 1), size=100).reshape((-1, 1))
data3 = np.hstack([data, data2])
df = pd.DataFrame(data3)

## Индексация

##### Срезы

##### set_index

Устанавливает новые индексы датафрейму, к примеру ротирует столбец ДФа в его индексы.

In [256]:
df2 = df.set_index(keys='balance')
df2.head(3)

Unnamed: 0_level_0,age,job,marital,education,default,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
balance,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2143,58,management,married,tertiary,no,yes,no,unknown,5,may,261,1,-1,0,unknown,0
29,44,technician,single,secondary,no,yes,no,unknown,5,may,151,1,-1,0,unknown,0
2,33,entrepreneur,married,secondary,no,yes,yes,unknown,5,may,76,1,-1,0,unknown,0


##### Индексатор loc

Обращение к строкам по их действительному отображаемому(ключевому) индексу

In [278]:
# balance выставлен в качестве индаксации, индексы дублируются и не упорядочены
df2.head(3)

Unnamed: 0_level_0,age,job,marital,education,default,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
balance,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2143,58,management,married,tertiary,no,yes,no,unknown,5,may,261,1,-1,0,unknown,0
29,44,technician,single,secondary,no,yes,no,unknown,5,may,151,1,-1,0,unknown,0
2,33,entrepreneur,married,secondary,no,yes,yes,unknown,5,may,76,1,-1,0,unknown,0


In [324]:
# Если указанный индекс один в датафрейме- возвращается строка развернутая в объект Series 
df2.loc[4562]

age                  62
job          management
marital         married
education     secondary
default              no
                ...    
campaign              1
pdays                -1
previous              0
poutcome        unknown
y                     1
Name: 4562, Length: 16, dtype: object

In [285]:
# Если указанный индекс в датафрейме один, но передать списком из одного элемента [[]] - вернется одна строка датафрейма а не объект Series
df.loc[[4562]]

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
4562,33,services,married,unknown,no,89,yes,no,unknown,20,may,341,1,-1,0,unknown,0


In [294]:
# Если указанный индекс не один в датафрейме - возвращается объект DataFrame строк с этим индексом
df2.loc[29]

Unnamed: 0_level_0,age,job,marital,education,default,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
balance,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
29,44,technician,single,secondary,no,yes,no,unknown,5,may,151,1,-1,0,unknown,0
29,58,blue-collar,divorced,unknown,no,yes,no,unknown,6,may,253,1,-1,0,unknown,0
29,39,blue-collar,single,secondary,no,yes,no,unknown,7,may,214,2,-1,0,unknown,0
29,32,blue-collar,single,secondary,no,no,no,unknown,16,may,132,13,-1,0,unknown,0
29,45,technician,divorced,secondary,no,no,no,unknown,12,jun,385,1,-1,0,unknown,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29,34,blue-collar,married,secondary,no,yes,no,cellular,15,may,285,5,-1,0,unknown,1
29,52,blue-collar,divorced,primary,no,yes,no,cellular,15,may,223,2,298,1,failure,0
29,36,admin.,married,tertiary,no,no,no,cellular,12,aug,576,1,-1,0,unknown,1
29,20,student,single,secondary,no,no,no,cellular,14,sep,85,2,-1,0,unknown,0


In [298]:
# При передаче списка - вернутся все соответствующие найденные по индексу строки
df2.loc[[4562, 29, 2]]

Unnamed: 0_level_0,age,job,marital,education,default,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
balance,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
4562,62,management,married,secondary,no,yes,no,cellular,3,mar,168,1,-1,0,unknown,1
29,44,technician,single,secondary,no,yes,no,unknown,5,may,151,1,-1,0,unknown,0
29,58,blue-collar,divorced,unknown,no,yes,no,unknown,6,may,253,1,-1,0,unknown,0
29,39,blue-collar,single,secondary,no,yes,no,unknown,7,may,214,2,-1,0,unknown,0
29,32,blue-collar,single,secondary,no,no,no,unknown,16,may,132,13,-1,0,unknown,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2,62,housemaid,married,secondary,no,no,no,cellular,3,nov,322,1,-1,0,unknown,1
2,29,admin.,single,secondary,no,no,no,cellular,4,nov,85,1,142,1,failure,0
2,63,housemaid,married,secondary,no,no,no,cellular,4,feb,45,2,93,1,success,0
2,60,retired,divorced,tertiary,no,no,no,cellular,2,mar,228,4,390,2,other,1


In [322]:
# Срез по ключевыым индексам
# Срез возможен только если используются НЕ дублирующие друг друга индексы
df2.loc[4562:4575]

Unnamed: 0_level_0,age,job,marital,education,default,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
balance,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
4562,62,management,married,secondary,no,yes,no,cellular,3,mar,168,1,-1,0,unknown,1
10180,58,management,divorced,secondary,no,no,no,cellular,3,mar,180,2,-1,0,unknown,1
0,46,management,married,tertiary,no,no,no,cellular,3,mar,305,13,-1,0,unknown,1
281,56,retired,married,tertiary,no,no,no,cellular,3,mar,96,2,-1,0,unknown,1
523,49,technician,married,tertiary,no,yes,no,cellular,3,mar,152,2,-1,0,unknown,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
-579,27,blue-collar,single,primary,no,yes,no,cellular,18,may,220,1,367,3,failure,0
95,43,blue-collar,married,secondary,no,yes,no,cellular,18,may,296,1,-1,0,unknown,0
251,26,student,single,tertiary,no,yes,no,cellular,18,may,12,5,-1,0,unknown,0
471,37,admin.,divorced,unknown,no,yes,no,cellular,18,may,200,1,-1,0,unknown,0


In [323]:
# При передаче нескольких индексов списком - вернутся все соответствующие порядку индексы
df2.loc[[4562, 23, 29, 2]].loc[4562:29]

Unnamed: 0_level_0,age,job,marital,education,default,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
balance,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
4562,62,management,married,secondary,no,yes,no,cellular,3,mar,168,1,-1,0,unknown,1
23,32,blue-collar,single,primary,no,yes,yes,unknown,5,may,160,1,-1,0,unknown,0
23,55,blue-collar,married,primary,no,yes,no,unknown,5,may,291,1,-1,0,unknown,0
23,36,blue-collar,married,primary,no,yes,no,unknown,6,may,400,2,-1,0,unknown,0
23,40,services,single,secondary,no,yes,no,unknown,8,may,179,5,-1,0,unknown,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29,34,blue-collar,married,secondary,no,yes,no,cellular,15,may,285,5,-1,0,unknown,1
29,52,blue-collar,divorced,primary,no,yes,no,cellular,15,may,223,2,298,1,failure,0
29,36,admin.,married,tertiary,no,no,no,cellular,12,aug,576,1,-1,0,unknown,1
29,20,student,single,secondary,no,no,no,cellular,14,sep,85,2,-1,0,unknown,0


##### Индексатор iloc

Обращение к строкам по их неявному исходному изначальному-скрытому(порядковому) индексу.
В исходной порядковой индексации индексы себя не дублируют

In [326]:
df2.head()

Unnamed: 0_level_0,age,job,marital,education,default,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
balance,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2143,58,management,married,tertiary,no,yes,no,unknown,5,may,261,1,-1,0,unknown,0
29,44,technician,single,secondary,no,yes,no,unknown,5,may,151,1,-1,0,unknown,0
2,33,entrepreneur,married,secondary,no,yes,yes,unknown,5,may,76,1,-1,0,unknown,0
1506,47,blue-collar,married,unknown,no,yes,no,unknown,5,may,92,1,-1,0,unknown,0
1,33,unknown,single,unknown,no,no,no,unknown,5,may,198,1,-1,0,unknown,0


In [329]:
# При передаче одного индекса - возвращается развернутая в объект Series строка датафрейма
df2.iloc[0]

age                  58
job          management
marital         married
education      tertiary
default              no
                ...    
campaign              1
pdays                -1
previous              0
poutcome        unknown
y                     0
Name: 2143, Length: 16, dtype: object

In [331]:
# При передаче списком из одного элемента [[]] - возвращается строка датафрейма
df2.iloc[[0]]

Unnamed: 0_level_0,age,job,marital,education,default,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
balance,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2143,58,management,married,tertiary,no,yes,no,unknown,5,may,261,1,-1,0,unknown,0


In [334]:
# Срез по исходным индексам
df2.iloc[5:10]

Unnamed: 0_level_0,age,job,marital,education,default,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
balance,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
231,35,management,married,tertiary,no,yes,no,unknown,5,may,139,1,-1,0,unknown,0
447,28,management,single,tertiary,no,yes,yes,unknown,5,may,217,1,-1,0,unknown,0
2,42,entrepreneur,divorced,tertiary,yes,yes,no,unknown,5,may,380,1,-1,0,unknown,0
121,58,retired,married,primary,no,yes,no,unknown,5,may,50,1,-1,0,unknown,0
593,43,technician,single,secondary,no,yes,no,unknown,5,may,55,1,-1,0,unknown,0


In [336]:
# Передача списка индексов
df2.iloc[[1, 0, 10, 1000]]

Unnamed: 0_level_0,age,job,marital,education,default,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
balance,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
29,44,technician,single,secondary,no,yes,no,unknown,5,may,151,1,-1,0,unknown,0
2143,58,management,married,tertiary,no,yes,no,unknown,5,may,261,1,-1,0,unknown,0
270,41,admin.,divorced,secondary,no,yes,no,unknown,5,may,222,1,-1,0,unknown,0
0,47,admin.,married,unknown,no,yes,no,unknown,7,may,164,1,-1,0,unknown,0


##### reset_index

Извлекает ключевой столбец и возвращает порядковую индексацию датафрейма\
`inplace` - применяет изменения к датафрейму

In [337]:
df2.head()

Unnamed: 0_level_0,age,job,marital,education,default,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
balance,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2143,58,management,married,tertiary,no,yes,no,unknown,5,may,261,1,-1,0,unknown,0
29,44,technician,single,secondary,no,yes,no,unknown,5,may,151,1,-1,0,unknown,0
2,33,entrepreneur,married,secondary,no,yes,yes,unknown,5,may,76,1,-1,0,unknown,0
1506,47,blue-collar,married,unknown,no,yes,no,unknown,5,may,92,1,-1,0,unknown,0
1,33,unknown,single,unknown,no,no,no,unknown,5,may,198,1,-1,0,unknown,0


In [340]:
df2.reset_index(inplace=True)

In [341]:
df2.head()

Unnamed: 0,balance,age,job,marital,education,default,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
0,2143,58,management,married,tertiary,no,yes,no,unknown,5,may,261,1,-1,0,unknown,0
1,29,44,technician,single,secondary,no,yes,no,unknown,5,may,151,1,-1,0,unknown,0
2,2,33,entrepreneur,married,secondary,no,yes,yes,unknown,5,may,76,1,-1,0,unknown,0
3,1506,47,blue-collar,married,unknown,no,yes,no,unknown,5,may,92,1,-1,0,unknown,0
4,1,33,unknown,single,unknown,no,no,no,unknown,5,may,198,1,-1,0,unknown,0


## Функции

##### Selection funcs

`df.head()` - Вывод n первых строк\
`df.sample()` - Вывод n случайных строк\
`df.tail()` - Вывод n последних строк\

In [173]:
df.head(3)

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
0,58,management,married,tertiary,no,2143,yes,no,unknown,5,may,261,1,-1,0,unknown,0
1,44,technician,single,secondary,no,29,yes,no,unknown,5,may,151,1,-1,0,unknown,0
2,33,entrepreneur,married,secondary,no,2,yes,yes,unknown,5,may,76,1,-1,0,unknown,0


In [174]:
df.tail(2)

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
45209,57,blue-collar,married,secondary,no,668,no,no,telephone,17,nov,508,4,-1,0,unknown,0
45210,37,entrepreneur,married,secondary,no,2971,no,no,cellular,17,nov,361,2,188,11,other,0


In [119]:
df.sample(5)

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
37276,35,services,single,secondary,no,-639,yes,yes,cellular,13,may,427,1,-1,0,unknown,no
41845,61,technician,married,primary,no,6384,no,no,cellular,16,oct,55,2,-1,0,unknown,no
35025,35,management,married,tertiary,no,272,yes,no,cellular,6,may,251,4,289,2,failure,no
36592,33,entrepreneur,married,secondary,no,1034,yes,no,cellular,12,may,23,6,295,6,failure,no
34164,31,management,single,tertiary,no,354,no,no,cellular,30,apr,502,1,9,2,success,yes


##### pd.apply()

Применяет производную функцию к каждому элементу датафрейма

In [124]:
df['y'] = df['y'].apply(lambda x: 1 if x == 'yes' else 0)

In [132]:
df['y'].sample(5)

40444    1
35455    0
26992    0
31818    0
5339     0
Name: y, dtype: int64

##### pd.nsmallest

Возвращает n наименьших значений по столбцу

In [161]:
df.nsmallest(columns='age', n=3)

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
40736,18,student,single,primary,no,1944,no,no,telephone,10,aug,122,3,-1,0,unknown,0
40744,18,student,single,unknown,no,108,no,no,cellular,10,aug,167,1,-1,0,unknown,1
40887,18,student,single,primary,no,608,no,no,cellular,12,aug,267,1,-1,0,unknown,1


In [169]:
df['age'].nsmallest(3, keep='last') # keep = параметр выбора среди дубликатов

44644    18
43637    18
42954    18
Name: age, dtype: int64

In [170]:
df['age'].nsmallest(3, keep='first') # keep = параметр выбора среди дубликатов

40736    18
40744    18
40887    18
Name: age, dtype: int64

##### pd.nlargest

Возвращает n наибольших значений по столбцу

In [171]:
df['balance'].nlargest(3)

39989    102127
26227     98417
42558     81204
Name: balance, dtype: int64

In [172]:
df.nlargest(1, columns='balance')

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
39989,51,management,single,tertiary,no,102127,no,no,cellular,3,jun,90,1,-1,0,unknown,0


##### pd.sort_values()

Сортировка датафрейма\
`by` По какому столбцу(Можно передать несколько столбцов для разрешения дубликатов)\
`ascending` - Возростанию/убыванию\
`kind` - Алгоритм сортировки

In [186]:
df.sort_values(by='age', ascending=False, kind='QuickSort').head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
41663,95,retired,married,secondary,no,0,no,no,telephone,1,oct,215,1,-1,0,unknown,0
33699,95,retired,divorced,primary,no,2282,no,no,telephone,21,apr,207,17,-1,0,unknown,1
31233,94,retired,divorced,secondary,no,1234,no,no,cellular,3,mar,212,1,-1,0,unknown,0
44420,93,retired,married,unknown,no,775,no,no,cellular,4,aug,476,2,13,9,success,1
44262,93,retired,married,unknown,no,775,no,no,cellular,22,jul,860,2,177,7,success,1


##### pd.value_counts()
Возвращает объект Series - количество уникальных значений\
`dropna` - Включить/Исключить дубликаты

In [197]:
df['marital'].value_counts()

marital
married     27214
single      12790
divorced     5207
Name: count, dtype: int64

In [198]:
df.value_counts(subset='marital', dropna=False)

marital
married     27214
single      12790
divorced     5207
Name: count, dtype: int64

##### pd.drop()

##### pd.dropna()

##### pd.fillna()

##### pd.query()

Вывод датафрейма в соответствии с запросом по булевому условию

In [215]:
df.head(3)

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
0,58,management,married,tertiary,no,2143,yes,no,unknown,5,may,261,1,-1,0,unknown,0
1,44,technician,single,secondary,no,29,yes,no,unknown,5,may,151,1,-1,0,unknown,0
2,33,entrepreneur,married,secondary,no,2,yes,yes,unknown,5,may,76,1,-1,0,unknown,0


In [214]:
df.query('housing == "yes" & education == "tertiary"').head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
0,58,management,married,tertiary,no,2143,yes,no,unknown,5,may,261,1,-1,0,unknown,0
5,35,management,married,tertiary,no,231,yes,no,unknown,5,may,139,1,-1,0,unknown,0
6,28,management,single,tertiary,no,447,yes,yes,unknown,5,may,217,1,-1,0,unknown,0
7,42,entrepreneur,divorced,tertiary,yes,2,yes,no,unknown,5,may,380,1,-1,0,unknown,0
21,56,management,married,tertiary,no,779,yes,no,unknown,5,may,164,1,-1,0,unknown,0


##### pd.concat()

##### pd.append()

##### pd.merge()

##### pd.groupby()

##### pd.join()

##### pd.pivot_table()

##### pd.crosstab()

## Настройка Pandas

In [71]:
# Функция возвращается описание параметра
pd.describe_option("max_rows")

display.max_rows : int
    If max_rows is exceeded, switch to truncate view. Depending on
    `large_repr`, objects are either centrally truncated or printed as
    a summary view. 'None' value means unlimited.

    In case python/IPython is running in a terminal and `large_repr`
    equals 'truncate' this can be set to 0 and pandas will auto-detect
    the height of the terminal and print a truncated object which fits
    the screen height. The IPython notebook, IPython qtconsole, or
    IDLE do not run in a terminal and hence it is not possible to do
    correct auto-detection.
    [default: 60] [currently: 60]
styler.render.max_rows : int, optional
    The maximum number of rows that will be rendered. May still be reduced to
    satisfy ``max_elements``, which takes precedence.
    [default: None] [currently: None]


Изменение параметров отображения глобально влияют на все последующие ячейки **Jupyter** \
`pd.get_option()` - Получить значение параметра \
`pd.set_option()` - Установить значение параметра \
Эквиволентно прямому изменению атрибута `pd.options.display.max_rows`

Изменение параметров для единичного случая - Менеджер контекста 

In [98]:
pd.options.display.max_rows # Максимальное количество выводимы строк
pd.options.display.max_columns # Максимальное количество выводимых столбцов
pd.options.display.precision # Точность(Количество знаков после запятой)
pd.options.display.max_colwidth # Максимальная ширина столбца
pd.options.display.chop_threshold # Порог округления до нуля

In [103]:
# Параметры контекста
pd.option_context

pandas._config.config.option_context

## Выгрузка данных

##### pd.to_()