In [9]:
import pandas as pd 

# Create Data Frame from dict

In [11]:
df = pd.DataFrame({'age': 18, 'name': ['SOW', 'TRAORE', 'FAINKE'], 'cardio': [60, 70, 80]})
df 

Unnamed: 0,age,name,cardio
0,18,SOW,60
1,18,TRAORE,70
2,18,FAINKE,80


# Indexing df 

In [3]:
df['name']

0       SOW
1    TRAORE
2    FAINKE
Name: name, dtype: object

In [5]:
df.iloc[:, :2]

Unnamed: 0,age,name
0,18,SOW
1,18,TRAORE
2,18,FAINKE


In [7]:
df.iloc[:, 2:3]

Unnamed: 0,cardio
0,60
1,70
2,80


In [8]:
df.loc[:, 'cardio']

0    60
1    70
2    80
Name: cardio, dtype: int64

# Create Series

In [12]:
s = pd.Series([60, 70, 80])

In [16]:
print(s)

0    60
1    70
2    80
dtype: int64


In [18]:
s.values

array([60, 70, 80])

# Modifing DataFrame


In [14]:
df.loc[1:, 'age'] = 16

In [15]:
df 

Unnamed: 0,age,name,cardio
0,18,SOW,60
1,16,TRAORE,70
2,16,FAINKE,80


# Boolean indexing

In [21]:
df[df['cardio']>60]
#df['cardio']>60

Unnamed: 0,age,name,cardio
1,16,TRAORE,70
2,16,FAINKE,80


In [22]:
df[df['age']>16]

Unnamed: 0,age,name,cardio
0,18,SOW,60


In [25]:
df[df['age']<18]

Unnamed: 0,age,name,cardio
1,16,TRAORE,70
2,16,FAINKE,80


In [30]:
import numpy as np
target_el = np.array([[True, False, False], [False, True, False], [False, False, True]])
df[target_el]

Unnamed: 0,age,name,cardio
0,18,SOW,60
1,16,TRAORE,70
2,16,FAINKE,80


In [35]:
df[df['cardio']>60]['name']

1    TRAORE
2    FAINKE
Name: name, dtype: object

In [36]:
df.loc[:, ['name', 'age']]

Unnamed: 0,name,age
0,SOW,18
1,TRAORE,16
2,FAINKE,16


In [38]:
df['age'] = 22
df 

Unnamed: 0,age,name,cardio
0,22,SOW,60
1,22,TRAORE,70
2,22,FAINKE,80


In [41]:
df.loc[1:2, 'age'] = 24
df 

Unnamed: 0,age,name,cardio
0,22,SOW,60
1,24,TRAORE,70
2,24,FAINKE,80


In [43]:
df.loc[:, 'love'] = None 
df 

Unnamed: 0,age,name,cardio,love
0,22,SOW,60,
1,24,TRAORE,70,
2,24,FAINKE,80,


# Groupby() method

In [44]:
data = {'country': ['Canada', 'South Africa', 'Tanzania', 'Papua New Guinea', 
                    'Namibia', 'Mexico', 'India', 'Malaysia', 'USA'],
        'population': [37.59, 58.56, 58.01, 8.78, 2.49, 127.6, 1366, 31.95, 328.2],
        'continent': ['North America', 'Africa', 'Africa', 'Asia', 'Africa', 
                      'North America', 'Asia', 'Asia', 'North America']
} # population in million
df_pop = pd.DataFrame(data)
df_pop

Unnamed: 0,country,population,continent
0,Canada,37.59,North America
1,South Africa,58.56,Africa
2,Tanzania,58.01,Africa
3,Papua New Guinea,8.78,Asia
4,Namibia,2.49,Africa
5,Mexico,127.6,North America
6,India,1366.0,Asia
7,Malaysia,31.95,Asia
8,USA,328.2,North America


In [46]:
df_pop.population.mean()

224.35333333333335

In [47]:
df_pop.groupby(['continent']).mean() # compute the mean for all numeric column in df_pop

Unnamed: 0_level_0,population
continent,Unnamed: 1_level_1
Africa,39.686667
Asia,468.91
North America,164.463333


In [48]:
df_pop.groupby(['continent'])['population'].mean() # compute the mean for only population column

continent
Africa            39.686667
Asia             468.910000
North America    164.463333
Name: population, dtype: float64

# Methods to execute on the group

In [49]:
df_pop.groupby(['continent']).sum()

Unnamed: 0_level_0,population
continent,Unnamed: 1_level_1
Africa,119.06
Asia,1406.73
North America,493.39


In [53]:
df_pop.groupby(['continent']).max()

Unnamed: 0_level_0,country,population
continent,Unnamed: 1_level_1,Unnamed: 2_level_1
Africa,Tanzania,58.56
Asia,Papua New Guinea,1366.0
North America,USA,328.2


In [54]:
df_pop.groupby(['continent']).describe()

Unnamed: 0_level_0,population,population,population,population,population,population,population,population
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
continent,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
Africa,3.0,39.686667,32.214432,2.49,30.25,58.01,58.285,58.56
Asia,3.0,468.91,776.989101,8.78,20.365,31.95,698.975,1366.0
North America,3.0,164.463333,148.770703,37.59,82.595,127.6,227.9,328.2


# Computing multiple method with team

In [57]:
data_player = {
    'Team': ['Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Reds', 
'Reds', 'Reds', 'Reds', 'Reds'],
    'Position': ['Non Forward', 'Forward', 'Non Forward', 
'Non Forward', 'Forward', 'Non Forward', 'Forward', 
'Non Forward', 'Forward', 'Forward'],
    'Age': [23, 19, 31, 25, 27, 18, 41, 28, 23, 24],
    'Height': [1.98, 2.12, 1.97, 2.01, 2.21, 1.99, 2.05, 2.01, 2.12, 
2.14]
}

df_player = pd.DataFrame(data_player)
df_player 

Unnamed: 0,Team,Position,Age,Height
0,Blues,Non Forward,23,1.98
1,Blues,Forward,19,2.12
2,Blues,Non Forward,31,1.97
3,Blues,Non Forward,25,2.01
4,Blues,Forward,27,2.21
5,Reds,Non Forward,18,1.99
6,Reds,Forward,41,2.05
7,Reds,Non Forward,28,2.01
8,Reds,Forward,23,2.12
9,Reds,Forward,24,2.14


In [58]:
df_player.groupby('Team').agg(['median', 'mean', 'std'])


Unnamed: 0_level_0,Age,Age,Age,Height,Height,Height
Unnamed: 0_level_1,median,mean,std,median,mean,std
Team,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
Blues,25.0,25.0,4.472136,2.01,2.058,0.103779
Reds,24.0,26.8,8.700575,2.05,2.062,0.066106


In [61]:
df_player.groupby('Team').agg({'Age': ['median', 'mean'], 'Height': 'std'})

Unnamed: 0_level_0,Age,Age,Height
Unnamed: 0_level_1,median,mean,std
Team,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
Blues,25.0,25.0,0.103779
Reds,24.0,26.8,0.066106


In [63]:
df_player.groupby(['Team', 'Position']).agg({'Age': ['median', 'mean'], 'Height': 'max'})

Unnamed: 0_level_0,Unnamed: 1_level_0,Age,Age,Height
Unnamed: 0_level_1,Unnamed: 1_level_1,median,mean,max
Team,Position,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Blues,Forward,23.0,23.0,2.21
Blues,Non Forward,25.0,26.333333,2.01
Reds,Forward,24.0,29.333333,2.14
Reds,Non Forward,23.0,23.0,2.01


In [64]:
df_player.groupby(['Team', 'Position']).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,Age,Height
Team,Position,Unnamed: 2_level_1,Unnamed: 3_level_1
Blues,Forward,23.0,2.165
Blues,Non Forward,26.333333,1.986667
Reds,Forward,29.333333,2.103333
Reds,Non Forward,23.0,2.0
