# Pandas data analysis

In [71]:
import pandas as pd
import numpy as np

In [72]:
student_data = pd.read_csv('https://stepik.org/media/attachments/course/4852/StudentsPerformance.csv')

In [73]:
student_data.tail(10)

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
990,male,group E,high school,free/reduced,completed,86,81,75
991,female,group B,some high school,standard,completed,65,82,78
992,female,group D,associate's degree,free/reduced,none,55,76,76
993,female,group D,bachelor's degree,free/reduced,none,62,72,74
994,male,group A,high school,standard,none,63,63,62
995,female,group E,master's degree,standard,completed,88,99,95
996,male,group C,high school,free/reduced,none,62,55,55
997,female,group C,high school,free/reduced,completed,59,71,65
998,female,group D,some college,standard,completed,68,78,77
999,female,group D,some college,free/reduced,none,77,86,86


In [74]:
student_data.describe()

Unnamed: 0,math score,reading score,writing score
count,1000.0,1000.0,1000.0
mean,66.089,69.169,68.054
std,15.16308,14.600192,15.195657
min,0.0,17.0,10.0
25%,57.0,59.0,57.75
50%,66.0,70.0,69.0
75%,77.0,79.0,79.0
max,100.0,100.0,100.0


In [75]:
student_data.dtypes

gender                         object
race/ethnicity                 object
parental level of education    object
lunch                          object
test preparation course        object
math score                      int64
reading score                   int64
writing score                   int64
dtype: object

In [76]:
student_data.shape

(1000, 8)

In [8]:
student_data.groupby('gender').aggregate({'writing score': 'mean'})

Unnamed: 0_level_0,writing score
gender,Unnamed: 1_level_1
female,72.467181
male,63.311203


## Tip 1: Take some data by index location

In [9]:
student_data.iloc[0:5, 0:3]

Unnamed: 0,gender,race/ethnicity,parental level of education
0,female,group B,bachelor's degree
1,female,group C,some college
2,female,group B,master's degree
3,male,group A,associate's degree
4,male,group C,some college


## Tip 2: Take some data by index of data(labels)

In [10]:
student_data_with_name = student_data.iloc[[0,3,4,7,8]]
student_data_with_name.index = ['A', 'B', 'C', 'D', 'E']
student_data_with_name

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
A,female,group B,bachelor's degree,standard,none,72,72,74
B,male,group A,associate's degree,free/reduced,none,47,57,44
C,male,group C,some college,standard,none,76,78,75
D,male,group B,some college,free/reduced,none,40,43,39
E,male,group D,high school,free/reduced,completed,64,64,67


In [11]:
student_data_with_name.loc[["A", "E"], ['gender', 'reading score']]

Unnamed: 0,gender,reading score
A,female,72
E,male,64


## Tip 3: Transform data 

In [12]:
student_data_with_name.iloc[:, 0]

A    female
B      male
C      male
D      male
E      male
Name: gender, dtype: object

In [13]:
my_seria_1 = pd.Series([1,2,3], index=['A', 'B', "C"])
my_seria_2 = pd.Series([4,5,6], index=['A', 'B', "D"])

In [14]:
pd.DataFrame({'col_name_1':my_seria_1, 'col_name_2':my_seria_2})

Unnamed: 0,col_name_1,col_name_2
A,1.0,4.0
B,2.0,5.0
C,3.0,
D,,6.0


In [15]:
student_data_with_name['gender']

A    female
B      male
C      male
D      male
E      male
Name: gender, dtype: object

In [16]:
student_data_with_name[['gender']]

Unnamed: 0,gender
A,female
B,male
C,male
D,male
E,male


## Stepic tasks

In [17]:
data = pd.read_csv('https://stepik.org/media/attachments/course/4852/titanic.csv')

In [18]:
data.shape

(891, 12)

In [19]:
data.dtypes.value_counts()

int64      5
object     5
float64    2
dtype: int64

## Tip 4: Filter

In [22]:
data = pd.read_csv('https://stepik.org/media/attachments/course/4852/StudentsPerformance.csv')
data.head()

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
0,female,group B,bachelor's degree,standard,none,72,72,74
1,female,group C,some college,standard,completed,69,90,88
2,female,group B,master's degree,standard,none,90,95,93
3,male,group A,associate's degree,free/reduced,none,47,57,44
4,male,group C,some college,standard,none,76,78,75


In [23]:
data.loc[data.gender == 'female', ['gender', 'lunch']]

Unnamed: 0,gender,lunch
0,female,standard
1,female,standard
2,female,standard
5,female,standard
6,female,standard
...,...,...
993,female,free/reduced
995,female,standard
997,female,free/reduced
998,female,standard


In [24]:
ws_mean = data['writing score'].mean()

In [25]:
data.loc[data['writing score'] > ws_mean]

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
0,female,group B,bachelor's degree,standard,none,72,72,74
1,female,group C,some college,standard,completed,69,90,88
2,female,group B,master's degree,standard,none,90,95,93
4,male,group C,some college,standard,none,76,78,75
5,female,group B,associate's degree,standard,none,71,83,78
...,...,...,...,...,...,...,...,...
992,female,group D,associate's degree,free/reduced,none,55,76,76
993,female,group D,bachelor's degree,free/reduced,none,62,72,74
995,female,group E,master's degree,standard,completed,88,99,95
998,female,group D,some college,standard,completed,68,78,77


In [26]:
data[(data['writing score'] > ws_mean) & (data['gender'] == 'male')]

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
4,male,group C,some college,standard,none,76,78,75
13,male,group A,some college,standard,completed,78,72,70
16,male,group C,high school,standard,none,88,89,86
24,male,group D,bachelor's degree,free/reduced,completed,74,71,80
25,male,group A,master's degree,free/reduced,none,73,74,72
...,...,...,...,...,...,...,...,...
971,male,group C,some high school,standard,completed,78,72,69
981,male,group D,some high school,standard,none,81,78,78
982,male,group B,some high school,standard,completed,79,85,86
987,male,group E,some high school,standard,completed,81,75,76


In [27]:
data[data['lunch'] == 'free/reduced'].size / data.size

0.355

In [28]:
data['lunch'].value_counts(normalize=True)

standard        0.645
free/reduced    0.355
Name: lunch, dtype: float64

In [29]:
data[data['lunch'] == 'free/reduced'][['math score', 'reading score', 'writing score']].mean()

math score       58.921127
reading score    64.653521
writing score    63.022535
dtype: float64

In [30]:
data[data['lunch'] == 'free/reduced'][['math score', 'reading score', 'writing score']].var()

math score       229.824270
reading score    221.871139
writing score    238.202881
dtype: float64

In [31]:
data[data['lunch'] == 'standard'][['math score', 'reading score', 'writing score']].mean()

math score       70.034109
reading score    71.654264
writing score    70.823256
dtype: float64

In [32]:
data[data['lunch'] == 'standard'][['math score', 'reading score', 'writing score']].var()

math score       186.418089
reading score    191.285560
writing score    205.620887
dtype: float64

In [33]:
data[data['lunch'] == 'standard'].describe() - data[data['lunch'] == 'free/reduced'].describe()

Unnamed: 0,math score,reading score,writing score
count,290.0,290.0,290.0
mean,11.112982,7.000742,7.800721
std,-1.506455,-1.064737,-1.094336
min,19.0,9.0,12.0
25%,12.0,7.0,9.0
50%,9.0,7.0,8.0
75%,11.0,7.0,7.0
max,0.0,0.0,0.0


In [35]:
data.groupby('lunch').aggregate({'math score':['count', 'mean', 'std'], 'reading score':['count', 'mean', 'std'], 'writing score' :['count', 'mean', 'std']})

Unnamed: 0_level_0,math score,math score,math score,reading score,reading score,reading score,writing score,writing score,writing score
Unnamed: 0_level_1,count,mean,std,count,mean,std,count,mean,std
lunch,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
free/reduced,355,58.921127,15.159956,355,64.653521,14.895339,355,63.022535,15.433823
standard,645,70.034109,13.653501,645,71.654264,13.830602,645,70.823256,14.339487


## Tip 5: Query

In [36]:
data = data.rename(columns = {
    'parental level of education': 'parental_level_of_education',
    'math score': 'math_score',
    'reading score': 'reading_score',
    'writing score': 'writing_score',
    'test preparation course': 'test_preparation_course',
})

In [37]:
data.head()

Unnamed: 0,gender,race/ethnicity,parental_level_of_education,lunch,test_preparation_course,math_score,reading_score,writing_score
0,female,group B,bachelor's degree,standard,none,72,72,74
1,female,group C,some college,standard,completed,69,90,88
2,female,group B,master's degree,standard,none,90,95,93
3,male,group A,associate's degree,free/reduced,none,47,57,44
4,male,group C,some college,standard,none,76,78,75


In [39]:
w_q = 70
data.query("writing_score > @w_q").head()

Unnamed: 0,gender,race/ethnicity,parental_level_of_education,lunch,test_preparation_course,math_score,reading_score,writing_score
0,female,group B,bachelor's degree,standard,none,72,72,74
1,female,group C,some college,standard,completed,69,90,88
2,female,group B,master's degree,standard,none,90,95,93
4,male,group C,some college,standard,none,76,78,75
5,female,group B,associate's degree,standard,none,71,83,78


## Tip 6: Labels filter

In [40]:
data[list(filter(lambda x: 'score' in x, list(data)))].head()

Unnamed: 0,math_score,reading_score,writing_score
0,72,72,74
1,69,90,88
2,90,95,93
3,47,57,44
4,76,78,75


In [41]:
data.filter(like='score').head()

Unnamed: 0,math_score,reading_score,writing_score
0,72,72,74
1,69,90,88
2,90,95,93
3,47,57,44
4,76,78,75


## Tip 7: Groupby

In [43]:
data = pd.read_csv('https://stepik.org/media/attachments/course/4852/StudentsPerformance.csv')
data.head()

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
0,female,group B,bachelor's degree,standard,none,72,72,74
1,female,group C,some college,standard,completed,69,90,88
2,female,group B,master's degree,standard,none,90,95,93
3,male,group A,associate's degree,free/reduced,none,47,57,44
4,male,group C,some college,standard,none,76,78,75


In [45]:
data.groupby('gender').aggregate({'math score': 'mean'})

Unnamed: 0_level_0,math score
gender,Unnamed: 1_level_1
female,63.633205
male,68.728216


In [46]:
data = data.rename(columns = {
    'parental level of education': 'parental_level_of_education',
    'math score': 'math_score',
    'reading score': 'reading_score',
    'writing score': 'writing_score',
    'test preparation course': 'test_preparation_course',
})

In [47]:
data.groupby('gender', as_index=False) \
	.aggregate({'math_score': 'mean'}) \
	.rename(columns={'math_score': 'mean_math_score'})

Unnamed: 0,gender,mean_math_score
0,female,63.633205
1,male,68.728216


In [48]:
data.groupby(['gender', 'race/ethnicity'], as_index=False) \
	.aggregate({'math_score': 'mean'}) \
	.rename(columns={'math_score': 'mean_math_score'})

Unnamed: 0,gender,race/ethnicity,mean_math_score
0,female,group A,58.527778
1,female,group B,61.403846
2,female,group C,62.033333
3,female,group D,65.248062
4,female,group E,70.811594
5,male,group A,63.735849
6,male,group B,65.930233
7,male,group C,67.611511
8,male,group D,69.413534
9,male,group E,76.746479


In [49]:
data.groupby(['gender', 'race/ethnicity']).math_score.nunique()

gender  race/ethnicity
female  group A           29
        group B           51
        group C           59
        group D           53
        group E           44
male    group A           38
        group B           43
        group C           56
        group D           49
        group E           38
Name: math_score, dtype: int64

In [50]:
data.sort_values(['gender', 'math_score'], ascending=False) \
	.groupby('gender').head()

Unnamed: 0,gender,race/ethnicity,parental_level_of_education,lunch,test_preparation_course,math_score,reading_score,writing_score
149,male,group E,associate's degree,free/reduced,completed,100,100,93
623,male,group A,some college,standard,completed,100,96,86
625,male,group D,some college,standard,completed,100,97,99
916,male,group E,bachelor's degree,standard,completed,100,100,100
306,male,group E,some college,standard,completed,99,87,81
451,female,group E,some college,standard,none,100,92,97
458,female,group E,bachelor's degree,standard,none,100,100,100
962,female,group E,associate's degree,standard,none,100,100,100
114,female,group E,bachelor's degree,standard,completed,99,100,100
263,female,group E,high school,standard,none,99,93,90


## Tip 8: Create new column

In [51]:
data['total_score'] = data.math_score + data.reading_score + data.writing_score

In [52]:
data = data.assign(total_score_log = np.log(data.total_score))

In [53]:
data.head()

Unnamed: 0,gender,race/ethnicity,parental_level_of_education,lunch,test_preparation_course,math_score,reading_score,writing_score,total_score,total_score_log
0,female,group B,bachelor's degree,standard,none,72,72,74,218,5.384495
1,female,group C,some college,standard,completed,69,90,88,247,5.509388
2,female,group B,master's degree,standard,none,90,95,93,278,5.627621
3,male,group A,associate's degree,free/reduced,none,47,57,44,148,4.997212
4,male,group C,some college,standard,none,76,78,75,229,5.433722


In [54]:
data.drop(['total_score'], axis=1)

Unnamed: 0,gender,race/ethnicity,parental_level_of_education,lunch,test_preparation_course,math_score,reading_score,writing_score,total_score_log
0,female,group B,bachelor's degree,standard,none,72,72,74,5.384495
1,female,group C,some college,standard,completed,69,90,88,5.509388
2,female,group B,master's degree,standard,none,90,95,93,5.627621
3,male,group A,associate's degree,free/reduced,none,47,57,44,4.997212
4,male,group C,some college,standard,none,76,78,75,5.433722
...,...,...,...,...,...,...,...,...,...
995,female,group E,master's degree,standard,completed,88,99,95,5.641907
996,male,group C,high school,free/reduced,none,62,55,55,5.147494
997,female,group C,high school,free/reduced,completed,59,71,65,5.273000
998,female,group D,some college,standard,completed,68,78,77,5.407172


### Stepic task 1

In [55]:
data_dota = pd.read_csv('https://stepik.org/media/attachments/course/4852/dota_hero_stats.csv')
data_dota.head()

Unnamed: 0.1,Unnamed: 0,attack_type,id,legs,localized_name,name,primary_attr,roles
0,0,Melee,1,2,Anti-Mage,npc_dota_hero_antimage,agi,"['Carry', 'Escape', 'Nuker']"
1,1,Melee,2,2,Axe,npc_dota_hero_axe,str,"['Initiator', 'Durable', 'Disabler', 'Jungler']"
2,2,Ranged,3,4,Bane,npc_dota_hero_bane,int,"['Support', 'Disabler', 'Nuker', 'Durable']"
3,3,Melee,4,2,Bloodseeker,npc_dota_hero_bloodseeker,agi,"['Carry', 'Disabler', 'Jungler', 'Nuker', 'Ini..."
4,4,Ranged,5,2,Crystal Maiden,npc_dota_hero_crystal_maiden,int,"['Support', 'Disabler', 'Nuker', 'Jungler']"


In [56]:
data_dota.groupby('legs').describe()

Unnamed: 0_level_0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,id,id,id,id,id,id,id,id
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max
legs,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
0,11.0,46.0,32.227318,9.0,20.0,38.0,76.0,92.0,11.0,47.727273,32.554849,10.0,21.5,40.0,78.0,94.0
2,95.0,58.789474,34.307091,0.0,29.5,58.0,86.5,116.0,95.0,60.842105,34.994928,1.0,31.5,60.0,88.5,129.0
4,7.0,61.714286,31.721256,2.0,53.0,61.0,84.0,95.0,7.0,63.571429,32.035694,3.0,55.0,63.0,86.0,97.0
6,3.0,68.0,46.67976,15.0,50.5,86.0,94.5,103.0,3.0,69.666667,47.247575,16.0,52.0,88.0,96.5,105.0
8,1.0,59.0,,59.0,59.0,59.0,59.0,59.0,1.0,61.0,,61.0,61.0,61.0,61.0,61.0


### Stepic task 2

In [57]:
data_lp = pd.read_csv('https://stepik.org/media/attachments/course/4852/accountancy.csv')
data_lp.head()

Unnamed: 0.1,Unnamed: 0,Executor,Type,Salary
0,0,Pupa,D,63
1,1,Pupa,A,158
2,2,Pupa,D,194
3,3,Pupa,E,109
4,4,Loopa,E,184


In [58]:
data_lp.groupby(['Executor', 'Type'])['Salary'].describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,count,mean,std,min,25%,50%,75%,max
Executor,Type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Loopa,A,1.0,58.0,,58.0,58.0,58.0,58.0,58.0
Loopa,B,6.0,145.166667,76.851589,50.0,83.75,156.5,188.75,249.0
Loopa,C,6.0,154.333333,76.81059,54.0,97.0,160.0,210.25,249.0
Loopa,D,7.0,137.714286,35.761778,78.0,119.0,146.0,160.5,181.0
Loopa,E,6.0,164.0,54.680892,83.0,130.5,170.0,210.25,220.0
Loopa,F,2.0,238.0,8.485281,232.0,235.0,238.0,241.0,244.0
Pupa,A,6.0,160.833333,67.008706,55.0,125.75,172.0,209.25,234.0
Pupa,B,1.0,77.0,,77.0,77.0,77.0,77.0,77.0
Pupa,C,2.0,74.5,27.577164,55.0,64.75,74.5,84.25,94.0
Pupa,D,4.0,146.5,59.769,63.0,123.75,164.5,187.25,194.0


### Stepic task 3

In [60]:
data_dota.head()

Unnamed: 0.1,Unnamed: 0,attack_type,id,legs,localized_name,name,primary_attr,roles
0,0,Melee,1,2,Anti-Mage,npc_dota_hero_antimage,agi,"['Carry', 'Escape', 'Nuker']"
1,1,Melee,2,2,Axe,npc_dota_hero_axe,str,"['Initiator', 'Durable', 'Disabler', 'Jungler']"
2,2,Ranged,3,4,Bane,npc_dota_hero_bane,int,"['Support', 'Disabler', 'Nuker', 'Durable']"
3,3,Melee,4,2,Bloodseeker,npc_dota_hero_bloodseeker,agi,"['Carry', 'Disabler', 'Jungler', 'Nuker', 'Ini..."
4,4,Ranged,5,2,Crystal Maiden,npc_dota_hero_crystal_maiden,int,"['Support', 'Disabler', 'Nuker', 'Jungler']"


In [61]:
data_dota.groupby(['attack_type', 'primary_attr'], as_index=False).aggregate({'id':'count'}).sort_values('id', ascending=False)

Unnamed: 0,attack_type,primary_attr,id
4,Ranged,int,40
2,Melee,str,35
0,Melee,agi,19
3,Ranged,agi,18
5,Ranged,str,3
1,Melee,int,2


### Stepic task 4

In [62]:
concentrations = pd.read_csv('http://stepik.org/media/attachments/course/4852/algae.csv')
concentrations.head()

Unnamed: 0,species,genus,group,sucrose,alanin,citrate,glucose,oleic_acid
0,Fucus_vesiculosus,Fucus,brown,3.001472,3.711498,5.004262,2.548459,6.405165
1,Saccharina_japonica,Saccharina,brown,6.73107,1.255251,5.621499,6.013219,4.1567
2,Fucus_serratus,Fucus,brown,3.27687,0.346431,1.216767,3.623225,0.304573
3,Fucus_distichus,Fucus,brown,6.786996,6.641303,6.423606,2.272724,3.393203
4,Cladophora_fracta,Cladophora,green,3.86147,1.64845,6.940588,2.316955,2.528886


In [63]:
concentrations.groupby('genus').describe()

Unnamed: 0_level_0,sucrose,sucrose,sucrose,sucrose,sucrose,sucrose,sucrose,sucrose,alanin,alanin,...,glucose,glucose,oleic_acid,oleic_acid,oleic_acid,oleic_acid,oleic_acid,oleic_acid,oleic_acid,oleic_acid
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,...,75%,max,count,mean,std,min,25%,50%,75%,max
genus,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
Ascophyllum,1.0,6.825467,,6.825467,6.825467,6.825467,6.825467,6.825467,1.0,0.875429,...,3.414961,3.414961,1.0,2.432526,,2.432526,2.432526,2.432526,2.432526,2.432526
Cladophora,3.0,4.008792,1.634817,2.452623,3.157047,3.86147,4.786877,5.712284,3.0,3.997055,...,3.041936,3.343707,3.0,2.263472,0.735111,1.432514,1.9807,2.528886,2.678951,2.829016
Fucus,3.0,4.355112,2.11057,3.001472,3.139171,3.27687,5.031933,6.786996,3.0,3.566411,...,3.085842,3.623225,3.0,3.367647,3.050376,0.304573,1.848888,3.393203,4.899184,6.405165
Palmaria,1.0,0.70458,,0.70458,0.70458,0.70458,0.70458,0.70458,1.0,3.17644,...,3.24209,3.24209,1.0,2.245538,,2.245538,2.245538,2.245538,2.245538,2.245538
Saccharina,2.0,4.183596,3.602672,1.636122,2.909859,4.183596,5.457333,6.73107,2.0,3.524207,...,5.496907,6.013219,2.0,4.487252,0.467471,4.1567,4.321976,4.487252,4.652528,4.817804


In [64]:
concentrations.groupby('genus').mean(numeric_only=True)

Unnamed: 0_level_0,sucrose,alanin,citrate,glucose,oleic_acid
genus,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Ascophyllum,6.825467,0.875429,5.253527,3.414961,2.432526
Cladophora,4.008792,3.997055,5.288311,2.800276,2.263472
Fucus,4.355112,3.566411,4.214878,2.814803,3.367647
Palmaria,0.70458,3.17644,5.573905,3.24209,2.245538
Saccharina,4.183596,3.524207,3.34671,4.980594,4.487252


### Stepic task 5

In [65]:
d = concentrations.groupby('genus').aggregate({'alanin':['min', 'mean', 'max']})
d

Unnamed: 0_level_0,alanin,alanin,alanin
Unnamed: 0_level_1,min,mean,max
genus,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
Ascophyllum,0.875429,0.875429,0.875429
Cladophora,1.64845,3.997055,6.881024
Fucus,0.346431,3.566411,6.641303
Palmaria,3.17644,3.17644,3.17644
Saccharina,1.255251,3.524207,5.793163


In [66]:
da = d['alanin']
list(map(lambda x: round(x, 2), list(da.iloc[2])))

[0.35, 3.57, 6.64]

In [67]:
np.around(concentrations.query("genus == 'Fucus'").alanin.describe().loc[['min', 'mean', 'max']].values, decimals=2)

array([0.35, 3.57, 6.64])

### Stepic task 6

In [68]:
data_last = pd.read_csv('http://stepik.org/media/attachments/course/4852/algae.csv')
data_last.head()

Unnamed: 0,species,genus,group,sucrose,alanin,citrate,glucose,oleic_acid
0,Fucus_vesiculosus,Fucus,brown,3.001472,3.711498,5.004262,2.548459,6.405165
1,Saccharina_japonica,Saccharina,brown,6.73107,1.255251,5.621499,6.013219,4.1567
2,Fucus_serratus,Fucus,brown,3.27687,0.346431,1.216767,3.623225,0.304573
3,Fucus_distichus,Fucus,brown,6.786996,6.641303,6.423606,2.272724,3.393203
4,Cladophora_fracta,Cladophora,green,3.86147,1.64845,6.940588,2.316955,2.528886


In [69]:
data_last.groupby('group').aggregate({'sucrose': ['max', 'min']})

Unnamed: 0_level_0,sucrose,sucrose
Unnamed: 0_level_1,max,min
group,Unnamed: 1_level_2,Unnamed: 2_level_2
brown,6.825467,1.636122
green,5.712284,2.452623
red,0.70458,0.70458


In [70]:
data_last.groupby('group').aggregate({'citrate': ['std']})

Unnamed: 0_level_0,citrate
Unnamed: 0_level_1,std
group,Unnamed: 1_level_2
brown,2.338577
green,1.987489
red,
