# Модуль pandas
Библиотека для анализа и обработки данных.

Документация [здесь](https://pandas.pydata.org/docs/reference/index.html).

In [102]:
import numpy as np
import pandas as pd

## Series (ряд) - одномерный ndarray с именоваными данными (метки)
Документация [здесь](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.html).

### Создание
pandas.Series(data=None, index=None, dtype=None, name=None, ...)

data - Словарь, список, массив или скалярное значение.

index - Список меток. Длина списка равна длине data.

dtype - Тип данных numpy.dtype.

name - Имя ряда.

#### из списка

In [103]:
sr = pd.Series([1, 2, 3, 4, 5])
print(sr)

0    1
1    2
2    3
3    4
4    5
dtype: int64


In [104]:
sr = pd.Series([1, 2, 3, 4, 5], dtype=np.float, name="Series")
print(sr)

AttributeError: module 'numpy' has no attribute 'float'.
`np.float` was a deprecated alias for the builtin `float`. To avoid this error in existing code, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.
The aliases was originally deprecated in NumPy 1.20; for more details and guidance see the original release note at:
    https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations

In [105]:
sr = pd.Series([1, 2, 3, 4, 5], ['a', 'b', 'c', 'd', 'e'])
print(sr)

a    1
b    2
c    3
d    4
e    5
dtype: int64


#### из словаря

In [106]:
d = {'a':1, 'b':2, 'c':3, 'd':4, 'e':5}
sr = pd.Series(d)
print(sr)

a    1
b    2
c    3
d    4
e    5
dtype: int64


#### из массива ndarray

In [107]:
arr = np.array([1, 2, 3, 4, 5])
sr = pd.Series(arr)
print(sr)

0    1
1    2
2    3
3    4
4    5
dtype: int32


In [108]:
arr = np.array([1, 2, 3, 4, 5])
names = ["Index " + str(i) for i in range(len(arr))]
sr = pd.Series(arr, names)
print(sr)

Index 0    1
Index 1    2
Index 2    3
Index 3    4
Index 4    5
dtype: int32


#### из скалярной величины

In [109]:
names = ["Index " + str(i) for i in range(5)]
sr = pd.Series(33, names)
print(sr)

Index 0    33
Index 1    33
Index 2    33
Index 3    33
Index 4    33
dtype: int64


### Обращение к элементам

In [110]:
sr = pd.Series([1, 2, 3, 4, 5], ['a', 'b', 'c', 'd', 'e'])
print(sr)

a    1
b    2
c    3
d    4
e    5
dtype: int64


#### по индексу

In [111]:
sr[0]

1

#### по метке

In [112]:
sr['b']

2

#### через срезы

In [113]:
sr[2:]

c    3
d    4
e    5
dtype: int64

In [114]:
sr[:-3]

a    1
b    2
dtype: int64

#### через условие

In [115]:
sr[sr > 2]

c    3
d    4
e    5
dtype: int64

In [116]:
sr[sr == 2]

b    2
dtype: int64

### Присваивание значений

In [117]:
sr[sr > 2] = 1
print(sr)

a    1
b    2
c    1
d    1
e    1
dtype: int64


In [118]:
sr[:2] = 4
print(sr)

a    4
b    4
c    1
d    1
e    1
dtype: int64


In [119]:
sr[:2] = [4, 3]
print(sr)

a    4
b    3
c    1
d    1
e    1
dtype: int64


### Атрибуты

In [120]:
sr = pd.Series([1, 2, 3, 4, 5], ['a', 'b', 'c', 'd', 'e'], name='test')
print(sr)

a    1
b    2
c    3
d    4
e    5
Name: test, dtype: int64


#### obj.name - возвращает название ряда

In [121]:
sr.name

'test'

#### obj.axes - название осей

In [122]:
sr.axes

[Index(['a', 'b', 'c', 'd', 'e'], dtype='object')]

#### obj.dtype - тип элементов

In [123]:
sr.dtype

dtype('int64')

#### obj.at - доступ по метке к одному элементу

In [124]:
sr.at['a']

1

#### obj.iat - доступ по индексу к одному элементу

In [125]:
sr.iat[0]

1

#### obj.loc - доступ по меткам к группе элеменов

In [126]:
sr.loc[['a', 'b']]

a    1
b    2
Name: test, dtype: int64

In [127]:
sr.loc[np.array(['a', 'b'])]

a    1
b    2
Name: test, dtype: int64

#### obj.iloc - доступ по индексам к группе элеменов

In [128]:
sr.iloc[[0, 1]]

a    1
b    2
Name: test, dtype: int64

In [129]:
sr.iloc[np.array([0, 1])]

a    1
b    2
Name: test, dtype: int64

#### obj.index - индексы

In [130]:
sr.index

Index(['a', 'b', 'c', 'd', 'e'], dtype='object')

#### obj.values - значения

In [131]:
sr.values

array([1, 2, 3, 4, 5], dtype=int64)

#### obj.hasnans - истина, если есть  хоть один NaN

In [132]:
sr.hasnans

False

In [133]:
sr = pd.Series([1, 2, 3, 4, 5], ['a', 'b', 'c', 'd', 'e'], name='test')
print(sr)

a    1
b    2
c    3
d    4
e    5
Name: test, dtype: int64


#### obj.is_monotonic - истина, если значения монотонно возрастают

In [134]:
sr.is_monotonic

AttributeError: 'Series' object has no attribute 'is_monotonic'

#### obj.is_monotonic_increasing - псевдоним для is_monotonic

In [135]:
sr.is_monotonic_increasing

True

#### obj.is_monotonic_decreasing - истина, если значения монотонно убывают

In [136]:
sr.is_monotonic_decreasing

False

#### obj.is_unique - истина, если значения уникальны

In [137]:
sr.is_unique

True

#### obj.nbytes - объем всех данных

In [138]:
sr.nbytes

40

#### obj.ndim - количество измерений (всегда 1)

In [139]:
sr.ndim

1

#### obj.shape - форма

In [140]:
sr.shape

(5,)

#### obj.size - количество элементов

In [39]:
sr.size

5

In [40]:
len(sr)

5

## DataFrame - двумерный ndarray с именоваными данными (метки)
Документация [здесь](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html).

### Создание
DataFrame(data=None, index=None, columns=None, dtype=None, ...)

data - Словарь, список, массив.

index - Список меток строк. (0, 1, 2, ..., m) если не указано.

columns - Список меток колонок. (0, 1, 2, ..., n) если не указано.

dtype - Тип данных numpy.dtype.

#### из списка

In [41]:
df = pd.DataFrame([1, 2, 3, 4, 5])
print(df)

   0
0  1
1  2
2  3
3  4
4  5


In [42]:
df = pd.DataFrame([[1, 2], [3, 4], [5, 6]])
print(df)

   0  1
0  1  2
1  3  4
2  5  6


In [43]:
df = pd.DataFrame([[1, 2], [3, 4], [5, 6]], dtype=np.float)
print(df)

AttributeError: module 'numpy' has no attribute 'float'.
`np.float` was a deprecated alias for the builtin `float`. To avoid this error in existing code, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.
The aliases was originally deprecated in NumPy 1.20; for more details and guidance see the original release note at:
    https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations

In [44]:
df = pd.DataFrame([[1, 2], [3, 4], [5, 6]], ['a', 'b', 'c'])
print(df)

   0  1
a  1  2
b  3  4
c  5  6


In [45]:
df = pd.DataFrame([[1, 2], [3, 4], [5, 6]], columns=['d', 'e'])
print(df)

   d  e
0  1  2
1  3  4
2  5  6


In [46]:
df = pd.DataFrame([[1, 2], [3, 4], [5, 6]], ['a', 'b', 'c'], ['d', 'e'])
print(df)

   d  e
a  1  2
b  3  4
c  5  6


#### из словаря

In [47]:
d = {'color' : ['red', 'green', 'blue'],
     'object' : ['hat', 'day', 'origin'],
     'price' : [1, 5, 9.7]}
df = pd.DataFrame(d)
print(df)

   color  object  price
0    red     hat    1.0
1  green     day    5.0
2   blue  origin    9.7


#### из массива ndarray

In [48]:
arr = np.arange(12).reshape((3, 4))
df = pd.DataFrame(arr, columns=list("abcd"), index=list("efg"))
print(df)

   a  b   c   d
e  0  1   2   3
f  4  5   6   7
g  8  9  10  11


In [49]:
arr = np.random.uniform(0, 1, 10).reshape(5, 2)
df = pd.DataFrame(arr, index=["Ind " + str(i) for i in range(5)], columns=["Col " + str(i) for i in range(2)])
print(df)

          Col 0     Col 1
Ind 0  0.672688  0.551612
Ind 1  0.684593  0.011952
Ind 2  0.637697  0.988610
Ind 3  0.152314  0.696908
Ind 4  0.529425  0.151685


### Обращение к элементам

In [50]:
m = 6
n = 4
arr = np.array(np.random.randint(0, 100, size=m * n).reshape(m, n))
df = pd.DataFrame(arr, index=['r' + str(i) for i in range(m)], columns=[chr(i + 97) for i in range(n)])
print(df)

     a   b   c   d
r0  75  37  46  32
r1  26  99  61  15
r2  47  23  34  97
r3  54  19  41  81
r4  80  63  49  23
r5  14  73  36   7


#### столбец по метке

In [51]:
df['a']

r0    75
r1    26
r2    47
r3    54
r4    80
r5    14
Name: a, dtype: int32

#### строки через срезы

In [52]:
df[2:]

Unnamed: 0,a,b,c,d
r2,47,23,34,97
r3,54,19,41,81
r4,80,63,49,23
r5,14,73,36,7


In [53]:
df[:-3]

Unnamed: 0,a,b,c,d
r0,75,37,46,32
r1,26,99,61,15
r2,47,23,34,97


#### через условие

In [54]:
df

Unnamed: 0,a,b,c,d
r0,75,37,46,32
r1,26,99,61,15
r2,47,23,34,97
r3,54,19,41,81
r4,80,63,49,23
r5,14,73,36,7


In [55]:
print(df)

     a   b   c   d
r0  75  37  46  32
r1  26  99  61  15
r2  47  23  34  97
r3  54  19  41  81
r4  80  63  49  23
r5  14  73  36   7


In [56]:
df[df['a'] > 70]

Unnamed: 0,a,b,c,d
r0,75,37,46,32
r4,80,63,49,23


In [57]:
df[(df['a'] > 70) & (df['b'] <= 50)]

Unnamed: 0,a,b,c,d
r0,75,37,46,32


### Атрибуты

#### obj.axes - название осей

In [58]:
df.axes

[Index(['r0', 'r1', 'r2', 'r3', 'r4', 'r5'], dtype='object'),
 Index(['a', 'b', 'c', 'd'], dtype='object')]

#### obj.index - индексы

In [59]:
df.index

Index(['r0', 'r1', 'r2', 'r3', 'r4', 'r5'], dtype='object')

#### obj.columns - колонки

In [60]:
df.columns

Index(['a', 'b', 'c', 'd'], dtype='object')

#### obj.values - значения

In [61]:
df.values

array([[75, 37, 46, 32],
       [26, 99, 61, 15],
       [47, 23, 34, 97],
       [54, 19, 41, 81],
       [80, 63, 49, 23],
       [14, 73, 36,  7]])

#### obj.at - доступ по меткам к одному элементу

In [62]:
df.at['r2', 'a']

47

#### obj.iat - доступ по индексу к одному элементу

In [63]:
df.iat[2, 0]

47

#### obj.loc - доступ по меткам к группе элеменов

In [64]:
df.loc['r1']

a    26
b    99
c    61
d    15
Name: r1, dtype: int32

In [65]:
df.loc[np.array(['r1', 'r2'])]

Unnamed: 0,a,b,c,d
r1,26,99,61,15
r2,47,23,34,97


#### obj.iloc - доступ по индексам к группе элеменов

In [66]:
sr.iloc[[0, 1]]

a    1
b    2
Name: test, dtype: int64

In [67]:
sr.iloc[np.array([0, 1])]

a    1
b    2
Name: test, dtype: int64

#### obj.dtypes - тип элементов

In [68]:
df.dtypes

a    int32
b    int32
c    int32
d    int32
dtype: object

#### obj.ndim - количество измерений

In [69]:
df.ndim

2

#### obj.shape - форма

In [70]:
df.shape

(6, 4)

#### obj.size - количество элементов

In [71]:
df.size

24

In [72]:
len(df)

6

### С методами разберемся на примерах

#### загрузка из csv файла

In [73]:
df = pd.read_csv('titanic.csv', sep=',', index_col='PassengerId')

In [74]:
df

Unnamed: 0_level_0,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...
887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


#### просмотр первых строк

In [75]:
df.head()

Unnamed: 0_level_0,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [76]:
df.head(10)

Unnamed: 0_level_0,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S
6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q
7,0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S
8,0,3,"Palsson, Master. Gosta Leonard",male,2.0,3,1,349909,21.075,,S
9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27.0,0,2,347742,11.1333,,S
10,1,2,"Nasser, Mrs. Nicholas (Adele Achem)",female,14.0,1,0,237736,30.0708,,C


#### просмотр последних строк

In [77]:
df.tail(10)

Unnamed: 0_level_0,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
882,0,3,"Markun, Mr. Johann",male,33.0,0,0,349257,7.8958,,S
883,0,3,"Dahlberg, Miss. Gerda Ulrika",female,22.0,0,0,7552,10.5167,,S
884,0,2,"Banfield, Mr. Frederick James",male,28.0,0,0,C.A./SOTON 34068,10.5,,S
885,0,3,"Sutehall, Mr. Henry Jr",male,25.0,0,0,SOTON/OQ 392076,7.05,,S
886,0,3,"Rice, Mrs. William (Margaret Norton)",female,39.0,0,5,382652,29.125,,Q
887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0,,S
888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0,B42,S
889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.45,,S
890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0,C148,C
891,0,3,"Dooley, Mr. Patrick",male,32.0,0,0,370376,7.75,,Q


In [78]:
df.shape[0]

891

#### подсчет заполненных значений

In [79]:
df.count()

Survived    891
Pclass      891
Name        891
Sex         891
Age         714
SibSp       891
Parch       891
Ticket      891
Fare        891
Cabin       204
Embarked    889
dtype: int64

#### подсчет количества в разрезе значений (умерших/количество выживших)

In [80]:
df['Survived'].value_counts()

Survived
0    549
1    342
Name: count, dtype: int64

#### подсчет среднего/медианного (средний/медианный возраст)

In [81]:
df['Age'].mean()

29.69911764705882

In [82]:
df['Age'].median()

28.0

#### поиск максимума (максимальная стоимость билета)

In [83]:
df['Fare'].max()

512.3292

In [84]:
df['Fare'].sum()

28693.9493

#### корреляция (братья и сестры (SibSp) / дети и родители (Parch))

In [85]:
df['SibSp'].corr(df['Parch']) * 100

41.48376986201567

#### удаление пропусков

In [86]:
df.head(10)

Unnamed: 0_level_0,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S
6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q
7,0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S
8,0,3,"Palsson, Master. Gosta Leonard",male,2.0,3,1,349909,21.075,,S
9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27.0,0,2,347742,11.1333,,S
10,1,2,"Nasser, Mrs. Nicholas (Adele Achem)",female,14.0,1,0,237736,30.0708,,C


In [87]:
new_df = df.dropna()
new_df.head(10)

Unnamed: 0_level_0,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
7,0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S
11,1,3,"Sandstrom, Miss. Marguerite Rut",female,4.0,1,1,PP 9549,16.7,G6,S
12,1,1,"Bonnell, Miss. Elizabeth",female,58.0,0,0,113783,26.55,C103,S
22,1,2,"Beesley, Mr. Lawrence",male,34.0,0,0,248698,13.0,D56,S
24,1,1,"Sloper, Mr. William Thompson",male,28.0,0,0,113788,35.5,A6,S
28,0,1,"Fortune, Mr. Charles Alexander",male,19.0,3,2,19950,263.0,C23 C25 C27,S
53,1,1,"Harper, Mrs. Henry Sleeper (Myna Haxtun)",female,49.0,1,0,PC 17572,76.7292,D33,C
55,0,1,"Ostby, Mr. Engelhart Cornelius",male,65.0,0,1,113509,61.9792,B30,C


In [88]:
new_df.count()

Survived    183
Pclass      183
Name        183
Sex         183
Age         183
SibSp       183
Parch       183
Ticket      183
Fare        183
Cabin       183
Embarked    183
dtype: int64

#### исключить строки с NaN в определенном столбце (исключение пассажиров с неуказанной каютой)

In [89]:
new_df = df[df['Cabin'].notna()]
new_df.head(10)

Unnamed: 0_level_0,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
7,0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S
11,1,3,"Sandstrom, Miss. Marguerite Rut",female,4.0,1,1,PP 9549,16.7,G6,S
12,1,1,"Bonnell, Miss. Elizabeth",female,58.0,0,0,113783,26.55,C103,S
22,1,2,"Beesley, Mr. Lawrence",male,34.0,0,0,248698,13.0,D56,S
24,1,1,"Sloper, Mr. William Thompson",male,28.0,0,0,113788,35.5,A6,S
28,0,1,"Fortune, Mr. Charles Alexander",male,19.0,3,2,19950,263.0,C23 C25 C27,S
32,1,1,"Spencer, Mrs. William Augustus (Marie Eugenie)",female,,1,0,PC 17569,146.5208,B78,C
53,1,1,"Harper, Mrs. Henry Sleeper (Myna Haxtun)",female,49.0,1,0,PC 17572,76.7292,D33,C


#### запрос с помощью логического выражения (выжившие женщины)

In [90]:
new_df = df.query('Sex == "female" & Survived == 1') 
print(new_df["Age"].mean())
new_df.head()

28.84771573604061


Unnamed: 0_level_0,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27.0,0,2,347742,11.1333,,S
10,1,2,"Nasser, Mrs. Nicholas (Adele Achem)",female,14.0,1,0,237736,30.0708,,C


In [91]:
df["Name"].str.contains("Miss.")

PassengerId
1      False
2      False
3       True
4      False
5      False
       ...  
887    False
888     True
889     True
890    False
891    False
Name: Name, Length: 891, dtype: bool

#### почситаем средний возраст выживших и незамужних женщин

In [92]:
new_df = df.query('Sex == "female" & Survived == 1')
new_df = new_df[new_df["Name"].str.contains("Miss.")]
new_df.head()

Unnamed: 0_level_0,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
11,1,3,"Sandstrom, Miss. Marguerite Rut",female,4.0,1,1,PP 9549,16.7,G6,S
12,1,1,"Bonnell, Miss. Elizabeth",female,58.0,0,0,113783,26.55,C103,S
23,1,3,"McGowan, Miss. Anna ""Annie""",female,15.0,0,0,330923,8.0292,,Q
29,1,3,"O'Dwyer, Miss. Ellen ""Nellie""",female,,0,0,330959,7.8792,,Q


In [93]:
new_df.count()

Survived    127
Pclass      127
Name        127
Sex         127
Age         105
SibSp       127
Parch       127
Ticket      127
Fare        127
Cabin        44
Embarked    126
dtype: int64

In [94]:
new_df["Age"].mean()

22.914285714285715

In [97]:

data = [['red', 150], ['orange', 200], ['brown', 100], ['green', 170]]
index = ['tomato', 'carrot', 'potatoes', 'cucumber']
columns = ['color', 'weight']
df = pd.DataFrame(data, index=index, columns=columns)
print(df)

           color  weight
tomato       red     150
carrot    orange     200
potatoes   brown     100
cucumber   green     170


In [98]:
df = pd.DataFrame({'index': ['tomato', 'carrot', 'potatoes', 'cucumber'], 'color': ['red', 'orange', 'brown', 'green'], 'weight': [150, 200, 100, 170]})
print(df)

      index   color  weight
0    tomato     red     150
1    carrot  orange     200
2  potatoes   brown     100
3  cucumber   green     170


In [99]:
df = pd.DataFrame({'tomato': ['red', 150], 'carrot':['orange', 200], 'potatoes':['brown', 100], 'cucumber':['green', 170]})
print(df)

  tomato  carrot potatoes cucumber
0    red  orange    brown    green
1    150     200      100      170


In [149]:
data = [['red', ], ['orange', ], ['brown', ], ['green', ]]
index = ['tomato', 'carrot', 'potatoes', 'cucumber']
columns = ['color', 'weight']
df = pd.DataFrame(data, index=index, columns=columns)
sr[::3]
print(df)

ValueError: 2 columns passed, passed data had 1 columns