### Основы

In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.read_csv('data/StudentsPerformance.csv')

In [3]:
df.head()

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
0,female,group B,bachelor's degree,standard,none,72,72,74
1,female,group C,some college,standard,completed,69,90,88
2,female,group B,master's degree,standard,none,90,95,93
3,male,group A,associate's degree,free/reduced,none,47,57,44
4,male,group C,some college,standard,none,76,78,75


In [4]:
df.describe()

Unnamed: 0,math score,reading score,writing score
count,1000.0,1000.0,1000.0
mean,66.089,69.169,68.054
std,15.16308,14.600192,15.195657
min,0.0,17.0,10.0
25%,57.0,59.0,57.75
50%,66.0,70.0,69.0
75%,77.0,79.0,79.0
max,100.0,100.0,100.0


In [7]:
df.dtypes

gender                         object
race/ethnicity                 object
parental level of education    object
lunch                          object
test preparation course        object
math score                      int64
reading score                   int64
writing score                   int64
dtype: object

In [8]:
df.shape

(1000, 8)

In [9]:
df.groupby('gender').aggregate({'writing score' : 'mean'})

Unnamed: 0_level_0,writing score
gender,Unnamed: 1_level_1
female,72.467181
male,63.311203


In [10]:
df.size

8000

In [11]:
df.iloc[0:5, 0:3]

Unnamed: 0,gender,race/ethnicity,parental level of education
0,female,group B,bachelor's degree
1,female,group C,some college
2,female,group B,master's degree
3,male,group A,associate's degree
4,male,group C,some college


In [13]:
df.iloc[[0, 3, 5, -1], [0, 7, -1]]

Unnamed: 0,gender,writing score,writing score.1
0,female,74,74
3,male,44,44
5,female,78,78
999,female,86,86


In [18]:
df_with_names = df.iloc[[0, 3, 4, 7, 8]]

In [21]:
df_with_names.index = ['Cersei', 'Tywin', 'Gregor', 'Joffrey', 'Ilyn Payne']
df_with_names

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
Cersei,female,group B,bachelor's degree,standard,none,72,72,74
Tywin,male,group A,associate's degree,free/reduced,none,47,57,44
Gregor,male,group C,some college,standard,none,76,78,75
Joffrey,male,group B,some college,free/reduced,none,40,43,39
Ilyn Payne,male,group D,high school,free/reduced,completed,64,64,67


In [28]:
type(df_with_names.loc[['Cersei', 'Gregor'], ['race/ethnicity', 'lunch']])

pandas.core.frame.DataFrame

In [27]:
type(df_with_names.iloc[:, 0])

pandas.core.series.Series

In [37]:
series1 = pd.Series([1, 2, 3], index = ['Cersei', 'Gregor', 'Tywin'])
series2 = pd.Series([4, 5, 6, 7], index = ['Cersei', 'Gregor', 'Tywin', 'Joffrey'])

In [38]:
pd.DataFrame({'col_name1' : series1, 'col_name2' : series2})

Unnamed: 0,col_name1,col_name2
Cersei,1.0,4
Gregor,2.0,5
Joffrey,,7
Tywin,3.0,6


In [41]:
df_with_names['gender'].shape

(5,)

In [42]:
df_with_names[['gender']].shape

(5, 1)

In [52]:
df.index

RangeIndex(start=0, stop=1000, step=1)

In [62]:
df = pd.read_csv('data/titanic.csv')
df.dtypes

PassengerId      int64
Survived         int64
Pclass           int64
Name            object
Sex             object
Age            float64
SibSp            int64
Parch            int64
Ticket          object
Fare           float64
Cabin           object
Embarked        object
dtype: object

### Фильтрация данных

In [288]:
df = pd.read_csv('data/StudentsPerformance.csv')
df.loc[df.gender == 'female', ['gender', 'writing score']]

Unnamed: 0,gender,writing score
0,female,74
1,female,88
2,female,93
5,female,78
6,female,92
...,...,...
993,female,74
995,female,95
997,female,65
998,female,77


In [76]:
mean_writing_score = df['writing score'].mean()

In [77]:
df.loc[df['writing score'] > mean_writing_score]

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
0,female,group B,bachelor's degree,standard,none,72,72,74
1,female,group C,some college,standard,completed,69,90,88
2,female,group B,master's degree,standard,none,90,95,93
4,male,group C,some college,standard,none,76,78,75
5,female,group B,associate's degree,standard,none,71,83,78
...,...,...,...,...,...,...,...,...
992,female,group D,associate's degree,free/reduced,none,55,76,76
993,female,group D,bachelor's degree,free/reduced,none,62,72,74
995,female,group E,master's degree,standard,completed,88,99,95
998,female,group D,some college,standard,completed,68,78,77


In [80]:
# дурной тон задавать query, как query, нужно более подробно
query = (df['writing score'] > mean_writing_score) & (df['gender'] == 'female')
query

0       True
1       True
2       True
3      False
4      False
       ...  
995     True
996    False
997    False
998     True
999     True
Length: 1000, dtype: bool

In [82]:
result = df[query]
result

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
0,female,group B,bachelor's degree,standard,none,72,72,74
1,female,group C,some college,standard,completed,69,90,88
2,female,group B,master's degree,standard,none,90,95,93
5,female,group B,associate's degree,standard,none,71,83,78
6,female,group B,some college,standard,completed,88,95,92
...,...,...,...,...,...,...,...,...
992,female,group D,associate's degree,free/reduced,none,55,76,76
993,female,group D,bachelor's degree,free/reduced,none,62,72,74
995,female,group E,master's degree,standard,completed,88,99,95
998,female,group D,some college,standard,completed,68,78,77


**Задание:**

У какой доли студентов из датасэта в колонке lunch указано free/reduced?

In [93]:
df[df['lunch'] == 'free/reduced'].shape[0]/df.shape[0]

0.355

**Задание:**

Как различается среднее и дисперсия оценок по предметам у групп студентов со стандартным или урезанным ланчем?

In [118]:
df.loc[df['lunch'] == 'standard'].describe()

Unnamed: 0,math score,reading score,writing score
count,645.0,645.0,645.0
mean,70.034109,71.654264,70.823256
std,13.653501,13.830602,14.339487
min,19.0,26.0,22.0
25%,61.0,63.0,62.0
50%,69.0,72.0,72.0
75%,80.0,82.0,81.0
max,100.0,100.0,100.0


In [119]:
df.loc[df['lunch'] == 'free/reduced'].describe()

Unnamed: 0,math score,reading score,writing score
count,355.0,355.0,355.0
mean,58.921127,64.653521,63.022535
std,15.159956,14.895339,15.433823
min,0.0,17.0,10.0
25%,49.0,56.0,53.0
50%,60.0,65.0,64.0
75%,69.0,75.0,74.0
max,100.0,100.0,100.0


---

In [115]:
mean_marks_standart_lunch = df.loc[df['lunch'] == 'standard', ['math score', 'reading score', 'writing score']].mean()
dispersion_marks_standart_lunch = df.loc[df['lunch'] == 'standard', ['math score', 'reading score', 'writing score']].var()
mean_marks_reduced_lunch = df.loc[df['lunch'] == 'free/reduced', ['math score', 'reading score', 'writing score']].mean()
dispersion_marks_reduced_lunch = df.loc[df['lunch'] == 'free/reduced', ['math score', 'reading score', 'writing score']].var()
print(f'''
mean_marks_standart_lunch: 
{mean_marks_standart_lunch}

dispersion_marks_standart_lunch: 
{dispersion_marks_standart_lunch}

mean_marks_reduced_lunch: 
{mean_marks_reduced_lunch}

dispersion_marks_reduced_lunch: 
{dispersion_marks_reduced_lunch}
''')


mean_marks_standart_lunch: 
math score       70.034109
reading score    71.654264
writing score    70.823256
dtype: float64

dispersion_marks_standart_lunch: 
math score       186.418089
reading score    191.285560
writing score    205.620887
dtype: float64

mean_marks_reduced_lunch: 
math score       58.921127
reading score    64.653521
writing score    63.022535
dtype: float64

dispersion_marks_reduced_lunch: 
math score       229.824270
reading score    221.871139
writing score    238.202881
dtype: float64



In [121]:
#qf.query()
df

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
0,female,group B,bachelor's degree,standard,none,72,72,74
1,female,group C,some college,standard,completed,69,90,88
2,female,group B,master's degree,standard,none,90,95,93
3,male,group A,associate's degree,free/reduced,none,47,57,44
4,male,group C,some college,standard,none,76,78,75
...,...,...,...,...,...,...,...,...
995,female,group E,master's degree,standard,completed,88,99,95
996,male,group C,high school,free/reduced,none,62,55,55
997,female,group C,high school,free/reduced,completed,59,71,65
998,female,group D,some college,standard,completed,68,78,77


In [300]:
df = df \
    .rename(columns = 
            {'test preparation course' : 'test_preparation_course',
            'math score' : 'math_score',
            'reading score' : 'reading_score',
            'writing score' : 'writing_score'})
df.head()

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test_preparation_course,math_score,reading_score,writing_score
0,female,group B,bachelor's degree,standard,none,72,72,74
1,female,group C,some college,standard,completed,69,90,88
2,female,group B,master's degree,standard,none,90,95,93
3,male,group A,associate's degree,free/reduced,none,47,57,44
4,male,group C,some college,standard,none,76,78,75


In [301]:
df.math_score

0      72
1      69
2      90
3      47
4      76
       ..
995    88
996    62
997    59
998    68
999    77
Name: math_score, Length: 1000, dtype: int64

In [302]:
reading_score_query = 95

In [303]:
df.query("writing_score > 75 & gender == 'female' & reading_score > @reading_score_query")

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test_preparation_course,math_score,reading_score,writing_score
106,female,group D,master's degree,standard,none,87,100,100
114,female,group E,bachelor's degree,standard,completed,99,100,100
165,female,group C,bachelor's degree,standard,completed,96,100,100
179,female,group D,some high school,standard,completed,97,100,100
373,female,group D,some college,standard,completed,82,97,96
403,female,group D,high school,standard,completed,88,99,100
458,female,group E,bachelor's degree,standard,none,100,100,100
514,female,group B,master's degree,free/reduced,completed,77,97,94
546,female,group A,some high school,standard,completed,92,100,97
566,female,group E,bachelor's degree,free/reduced,completed,92,100,100


In [304]:
list(df)

['gender',
 'race/ethnicity',
 'parental level of education',
 'lunch',
 'test_preparation_course',
 'math_score',
 'reading_score',
 'writing_score']

In [305]:
score_columns = [i for i in list(df) if 'score' in i]
score_columns

['math_score', 'reading_score', 'writing_score']

In [306]:
df[score_columns].head()

Unnamed: 0,math_score,reading_score,writing_score
0,72,72,74
1,69,90,88
2,90,95,93
3,47,57,44
4,76,78,75


In [307]:
df.filter(like = 'score')

Unnamed: 0,math_score,reading_score,writing_score
0,72,72,74
1,69,90,88
2,90,95,93
3,47,57,44
4,76,78,75
...,...,...,...
995,88,99,95
996,62,55,55
997,59,71,65
998,68,78,77


In [308]:
df.filter(like = '2', axis = 0)

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test_preparation_course,math_score,reading_score,writing_score
2,female,group B,master's degree,standard,none,90,95,93
12,female,group B,high school,standard,none,65,81,73
20,male,group D,high school,standard,none,66,69,63
21,female,group B,some college,free/reduced,completed,65,75,70
22,male,group D,some college,standard,none,44,54,53
...,...,...,...,...,...,...,...,...
952,female,group E,some high school,free/reduced,none,74,74,72
962,female,group E,associate's degree,standard,none,100,100,100
972,female,group A,high school,free/reduced,completed,53,50,60
982,male,group B,some high school,standard,completed,79,85,86


### Группировка и агрегация

In [309]:
df.groupby('gender').mean()

Unnamed: 0_level_0,math_score,reading_score,writing_score
gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
female,63.633205,72.608108,72.467181
male,68.728216,65.473029,63.311203


In [310]:
mean_scores = df.groupby(['gender', 'race/ethnicity']) \
    .aggregate({'math_score':'mean', 'reading_score':'mean'}) \
    .rename(columns = {'math_score':'mean_math_score', 'reading_score':'mean_reading_score'})
mean_scores

Unnamed: 0_level_0,Unnamed: 1_level_0,mean_math_score,mean_reading_score
gender,race/ethnicity,Unnamed: 2_level_1,Unnamed: 3_level_1
female,group A,58.527778,69.0
female,group B,61.403846,71.076923
female,group C,62.033333,71.944444
female,group D,65.248062,74.046512
female,group E,70.811594,75.84058
male,group A,63.735849,61.735849
male,group B,65.930233,62.848837
male,group C,67.611511,65.42446
male,group D,69.413534,66.135338
male,group E,76.746479,70.295775


In [311]:
mean_scores.loc[[('female', 'group A'), ('female', 'group B')]]

Unnamed: 0_level_0,Unnamed: 1_level_0,mean_math_score,mean_reading_score
gender,race/ethnicity,Unnamed: 2_level_1,Unnamed: 3_level_1
female,group A,58.527778,69.0
female,group B,61.403846,71.076923


In [312]:
mean_scores = df.groupby(['gender', 'race/ethnicity'], as_index=False) \
    .aggregate({'math_score':'mean', 'reading_score':'mean'}) \
    .rename(columns = {'math_score':'mean_math_score', 'reading_score':'mean_reading_score'})
mean_scores

Unnamed: 0,gender,race/ethnicity,mean_math_score,mean_reading_score
0,female,group A,58.527778,69.0
1,female,group B,61.403846,71.076923
2,female,group C,62.033333,71.944444
3,female,group D,65.248062,74.046512
4,female,group E,70.811594,75.84058
5,male,group A,63.735849,61.735849
6,male,group B,65.930233,62.848837
7,male,group C,67.611511,65.42446
8,male,group D,69.413534,66.135338
9,male,group E,76.746479,70.295775


In [313]:
df.math_score.nunique()

81

In [314]:
df.groupby(['gender', 'race/ethnicity']).math_score.unique()

gender  race/ethnicity
female  group A           [50, 55, 41, 58, 51, 44, 71, 38, 49, 59, 47, 7...
        group B           [72, 90, 71, 88, 38, 65, 18, 63, 53, 47, 79, 5...
        group C           [69, 54, 67, 58, 66, 71, 33, 0, 60, 39, 73, 76...
        group D           [62, 69, 74, 50, 75, 57, 59, 58, 61, 71, 73, 6...
        group E           [56, 50, 82, 62, 63, 99, 42, 66, 75, 81, 83, 4...
male    group A           [47, 78, 73, 39, 62, 80, 50, 54, 57, 77, 72, 6...
        group B           [40, 69, 57, 59, 65, 67, 61, 44, 49, 79, 60, 9...
        group C           [76, 58, 88, 46, 70, 55, 82, 53, 61, 49, 27, 7...
        group D           [64, 40, 66, 44, 74, 88, 52, 58, 45, 63, 42, 6...
        group E           [97, 81, 53, 77, 79, 30, 72, 66, 83, 87, 70, 1...
Name: math_score, dtype: object

In [317]:
df.sort_values(['gender', 'math_score'], ascending=False) \
    .groupby('gender').head()

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test_preparation_course,math_score,reading_score,writing_score
149,male,group E,associate's degree,free/reduced,completed,100,100,93
623,male,group A,some college,standard,completed,100,96,86
625,male,group D,some college,standard,completed,100,97,99
916,male,group E,bachelor's degree,standard,completed,100,100,100
306,male,group E,some college,standard,completed,99,87,81
451,female,group E,some college,standard,none,100,92,97
458,female,group E,bachelor's degree,standard,none,100,100,100
962,female,group E,associate's degree,standard,none,100,100,100
114,female,group E,bachelor's degree,standard,completed,99,100,100
263,female,group E,high school,standard,none,99,93,90


In [184]:
df['total_score'] = df.math_score + df.reading_score + df.writing_score
df

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test_preparation_course,math_score,reading_score,writing_score,total_score
0,female,group B,bachelor's degree,standard,none,72,72,74,218
1,female,group C,some college,standard,completed,69,90,88,247
2,female,group B,master's degree,standard,none,90,95,93,278
3,male,group A,associate's degree,free/reduced,none,47,57,44,148
4,male,group C,some college,standard,none,76,78,75,229
...,...,...,...,...,...,...,...,...,...
995,female,group E,master's degree,standard,completed,88,99,95,282
996,male,group C,high school,free/reduced,none,62,55,55,172
997,female,group C,high school,free/reduced,completed,59,71,65,195
998,female,group D,some college,standard,completed,68,78,77,223


In [186]:
df = df.assign(total_score_log = np.log(df.total_score))
df

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test_preparation_course,math_score,reading_score,writing_score,total_score,total_score_log
0,female,group B,bachelor's degree,standard,none,72,72,74,218,5.384495
1,female,group C,some college,standard,completed,69,90,88,247,5.509388
2,female,group B,master's degree,standard,none,90,95,93,278,5.627621
3,male,group A,associate's degree,free/reduced,none,47,57,44,148,4.997212
4,male,group C,some college,standard,none,76,78,75,229,5.433722
...,...,...,...,...,...,...,...,...,...,...
995,female,group E,master's degree,standard,completed,88,99,95,282,5.641907
996,male,group C,high school,free/reduced,none,62,55,55,172,5.147494
997,female,group C,high school,free/reduced,completed,59,71,65,195,5.273000
998,female,group D,some college,standard,completed,68,78,77,223,5.407172


In [191]:
df.drop(['total_score', 'lunch'], axis = 1)

Unnamed: 0,gender,race/ethnicity,parental level of education,test_preparation_course,math_score,reading_score,writing_score,total_score_log
0,female,group B,bachelor's degree,none,72,72,74,5.384495
1,female,group C,some college,completed,69,90,88,5.509388
2,female,group B,master's degree,none,90,95,93,5.627621
3,male,group A,associate's degree,none,47,57,44,4.997212
4,male,group C,some college,none,76,78,75,5.433722
...,...,...,...,...,...,...,...,...
995,female,group E,master's degree,completed,88,99,95,5.641907
996,male,group C,high school,none,62,55,55,5.147494
997,female,group C,high school,completed,59,71,65,5.273000
998,female,group D,some college,completed,68,78,77,5.407172


In [192]:
df

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test_preparation_course,math_score,reading_score,writing_score,total_score,total_score_log
0,female,group B,bachelor's degree,standard,none,72,72,74,218,5.384495
1,female,group C,some college,standard,completed,69,90,88,247,5.509388
2,female,group B,master's degree,standard,none,90,95,93,278,5.627621
3,male,group A,associate's degree,free/reduced,none,47,57,44,148,4.997212
4,male,group C,some college,standard,none,76,78,75,229,5.433722
...,...,...,...,...,...,...,...,...,...,...
995,female,group E,master's degree,standard,completed,88,99,95,282,5.641907
996,male,group C,high school,free/reduced,none,62,55,55,172,5.147494
997,female,group C,high school,free/reduced,completed,59,71,65,195,5.273000
998,female,group D,some college,standard,completed,68,78,77,223,5.407172


**Задание:**

Пересчитаем число ног у героев игры Dota2! Сгруппируйте героев из датасэта по числу их ног (колонка legs).

In [225]:
df = pd.read_csv('data/dota_hero_stats.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,attack_type,id,legs,localized_name,name,primary_attr,roles
0,0,Melee,1,2,Anti-Mage,npc_dota_hero_antimage,agi,"['Carry', 'Escape', 'Nuker']"
1,1,Melee,2,2,Axe,npc_dota_hero_axe,str,"['Initiator', 'Durable', 'Disabler', 'Jungler']"
2,2,Ranged,3,4,Bane,npc_dota_hero_bane,int,"['Support', 'Disabler', 'Nuker', 'Durable']"
3,3,Melee,4,2,Bloodseeker,npc_dota_hero_bloodseeker,agi,"['Carry', 'Disabler', 'Jungler', 'Nuker', 'Ini..."
4,4,Ranged,5,2,Crystal Maiden,npc_dota_hero_crystal_maiden,int,"['Support', 'Disabler', 'Nuker', 'Jungler']"


In [226]:
df.legs.value_counts()

2    95
0    11
4     7
6     3
8     1
Name: legs, dtype: int64

In [227]:
df.groupby('legs').count()

Unnamed: 0_level_0,Unnamed: 0,attack_type,id,localized_name,name,primary_attr,roles
legs,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,11,11,11,11,11,11,11
2,95,95,95,95,95,95,95
4,7,7,7,7,7,7,7
6,3,3,3,3,3,3,3
8,1,1,1,1,1,1,1


In [228]:
df = pd.read_csv('data/accountancy.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,Executor,Type,Salary
0,0,Pupa,D,63
1,1,Pupa,A,158
2,2,Pupa,D,194
3,3,Pupa,E,109
4,4,Loopa,E,184


**Задание:**

К нам поступили данные из бухгалтерии о заработках Лупы и Пупы за разные задачи! Посмотрите у кого из них больше средний заработок в различных категориях (колонка Type) и заполните таблицу, указывая исполнителя с большим заработком в каждой из категорий.

In [229]:
df.groupby(['Type', 'Executor'])['Salary'].mean()

Type  Executor
A     Loopa        58.000000
      Pupa        160.833333
B     Loopa       145.166667
      Pupa         77.000000
C     Loopa       154.333333
      Pupa         74.500000
D     Loopa       137.714286
      Pupa        146.500000
E     Loopa       164.000000
      Pupa        131.200000
F     Loopa       238.000000
      Pupa        136.250000
Name: Salary, dtype: float64

In [230]:
df.groupby(['Type', 'Executor']).Salary.mean()

Type  Executor
A     Loopa        58.000000
      Pupa        160.833333
B     Loopa       145.166667
      Pupa         77.000000
C     Loopa       154.333333
      Pupa         74.500000
D     Loopa       137.714286
      Pupa        146.500000
E     Loopa       164.000000
      Pupa        131.200000
F     Loopa       238.000000
      Pupa        136.250000
Name: Salary, dtype: float64

**Задача:**

Продолжим исследование героев Dota2. Сгруппируйте по колонкам attack_type и primary_attr и выберите самый распространённый набор характеристик.

In [318]:
df = pd.read_csv('data/dota_hero_stats.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,attack_type,id,legs,localized_name,name,primary_attr,roles
0,0,Melee,1,2,Anti-Mage,npc_dota_hero_antimage,agi,"['Carry', 'Escape', 'Nuker']"
1,1,Melee,2,2,Axe,npc_dota_hero_axe,str,"['Initiator', 'Durable', 'Disabler', 'Jungler']"
2,2,Ranged,3,4,Bane,npc_dota_hero_bane,int,"['Support', 'Disabler', 'Nuker', 'Durable']"
3,3,Melee,4,2,Bloodseeker,npc_dota_hero_bloodseeker,agi,"['Carry', 'Disabler', 'Jungler', 'Nuker', 'Ini..."
4,4,Ranged,5,2,Crystal Maiden,npc_dota_hero_crystal_maiden,int,"['Support', 'Disabler', 'Nuker', 'Jungler']"


In [345]:
df.groupby(['attack_type', 'primary_attr']).count().sort_values('id', ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 0,id,legs,localized_name,name,roles
attack_type,primary_attr,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Ranged,int,40,40,40,40,40,40
Melee,str,35,35,35,35,35,35
Melee,agi,19,19,19,19,19,19
Ranged,agi,18,18,18,18,18,18
Ranged,str,3,3,3,3,3,3
Melee,int,2,2,2,2,2,2


**Задание:**

Пользуясь предыдущими данными, укажите через пробел (без запятых) чему равны минимальная, средняя и максимальная концентрации аланина (alanin) среди видов рода Fucus. Округлите до 2-ого знака, десятичным разделителем является точка.

In [349]:
df = pd.read_csv('data/algae.csv')
df.head()

Unnamed: 0,species,genus,group,sucrose,alanin,citrate,glucose,oleic_acid
0,Fucus_vesiculosus,Fucus,brown,3.001472,3.711498,5.004262,2.548459,6.405165
1,Saccharina_japonica,Saccharina,brown,6.73107,1.255251,5.621499,6.013219,4.1567
2,Fucus_serratus,Fucus,brown,3.27687,0.346431,1.216767,3.623225,0.304573
3,Fucus_distichus,Fucus,brown,6.786996,6.641303,6.423606,2.272724,3.393203
4,Cladophora_fracta,Cladophora,green,3.86147,1.64845,6.940588,2.316955,2.528886


In [397]:
df.loc[df['genus'] == 'Fucus'].alanin.round(decimals = 2)

0    3.71
2    0.35
3    6.64
Name: alanin, dtype: float64

In [402]:
round(df.loc[df['genus'] == 'Fucus'].alanin.mean(), ndigits = 2)

3.57

In [394]:
df.query("genus == 'Fucus'").alanin.round(decimals = 2)

0    3.71
2    0.35
3    6.64
Name: alanin, dtype: float64

---

In [403]:
df

Unnamed: 0,species,genus,group,sucrose,alanin,citrate,glucose,oleic_acid
0,Fucus_vesiculosus,Fucus,brown,3.001472,3.711498,5.004262,2.548459,6.405165
1,Saccharina_japonica,Saccharina,brown,6.73107,1.255251,5.621499,6.013219,4.1567
2,Fucus_serratus,Fucus,brown,3.27687,0.346431,1.216767,3.623225,0.304573
3,Fucus_distichus,Fucus,brown,6.786996,6.641303,6.423606,2.272724,3.393203
4,Cladophora_fracta,Cladophora,green,3.86147,1.64845,6.940588,2.316955,2.528886
5,Cladophora_compacta,Cladophora,green,5.712284,3.461692,3.082826,3.343707,1.432514
6,Cladophora_gracilis,Cladophora,green,2.452623,6.881024,5.84152,2.740165,2.829016
7,Palmaria_palmata,Palmaria,red,0.70458,3.17644,5.573905,3.24209,2.245538
8,Saccharina_latissima,Saccharina,brown,1.636122,5.793163,1.07192,3.947968,4.817804
9,Ascophyllum_nodosum,Ascophyllum,brown,6.825467,0.875429,5.253527,3.414961,2.432526


In [408]:
df_by_group = df.groupby('group')
df_by_group.count()

Unnamed: 0_level_0,species,genus,sucrose,alanin,citrate,glucose,oleic_acid
group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
brown,6,6,6,6,6,6,6
green,3,3,3,3,3,3,3
red,1,1,1,1,1,1,1


In [413]:
# Размах
df_by_group.sucrose.max() - df_by_group.sucrose.min()

group
brown    5.189345
green    3.259662
red      0.000000
Name: sucrose, dtype: float64

In [415]:
df_by_group.citrate.var()

group
brown    5.468942
green    3.950113
red           NaN
Name: citrate, dtype: float64

### Визуализация