<h1 style="color:cadetblue; font-size:2em;">Pivoting DataFrames</h1>

In [1]:
import pandas as pd

trials = pd.read_csv('datasets/trials_01.csv')
print(trials)

   id treatment gender  response
0   1         A      F         5
1   2         A      M         3
2   3         B      F         8
3   4         B      M         9


In [2]:
# Reshaping by pivoting
trials.pivot(index='treatment', columns='gender', values='response')

gender,F,M
treatment,Unnamed: 1_level_1,Unnamed: 2_level_1
A,5,3
B,8,9


In [3]:
# Pivoting multiple columns
trials.pivot(index='treatment', columns='gender')

Unnamed: 0_level_0,id,id,response,response
gender,F,M,F,M
treatment,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
A,1,2,5,3
B,3,4,8,9


<h1 style="color:cadetblue; font-size:2em;">Stacking & unstacking DataFrames</h1>

In [17]:
trials = pd.read_csv('datasets/trials_01.csv')
print(trials)

   id treatment gender  response
0   1         A      F         5
1   2         A      M         3
2   3         B      F         8
3   4         B      M         9


In [18]:
trials = trials.set_index(['treatment', 'gender'])
print(trials)

                  id  response
treatment gender              
A         F        1         5
          M        2         3
B         F        3         8
          M        4         9


In [19]:
print(trials)

                  id  response
treatment gender              
A         F        1         5
          M        2         3
B         F        3         8
          M        4         9


In [20]:
trials.unstack(level='gender')

Unnamed: 0_level_0,id,id,response,response
gender,F,M,F,M
treatment,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
A,1,2,5,3
B,3,4,8,9


In [21]:
print(trials)

                  id  response
treatment gender              
A         F        1         5
          M        2         3
B         F        3         8
          M        4         9


In [22]:
trials.unstack(level=1)

Unnamed: 0_level_0,id,id,response,response
gender,F,M,F,M
treatment,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
A,1,2,5,3
B,3,4,8,9


In [24]:
trials_by_gender = trials.unstack(level='gender')
trials_by_gender

Unnamed: 0_level_0,id,id,response,response
gender,F,M,F,M
treatment,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
A,1,2,5,3
B,3,4,8,9


In [25]:
trials_by_gender.stack(level='gender')

Unnamed: 0_level_0,Unnamed: 1_level_0,id,response
treatment,gender,Unnamed: 2_level_1,Unnamed: 3_level_1
A,F,1,5
A,M,2,3
B,F,3,8
B,M,4,9


In [26]:
stacked = trials_by_gender.stack(level='gender')
stacked

Unnamed: 0_level_0,Unnamed: 1_level_0,id,response
treatment,gender,Unnamed: 2_level_1,Unnamed: 3_level_1
A,F,1,5
A,M,2,3
B,F,3,8
B,M,4,9


In [27]:
# Swapping levels
swapped = stacked.swaplevel(0, 1)
print(swapped)

                  id  response
gender treatment              
F      A           1         5
M      A           2         3
F      B           3         8
M      B           4         9


In [28]:
# Sorting rows
sorted_trials = swapped.sort_index()
print(sorted_trials)

                  id  response
gender treatment              
F      A           1         5
       B           3         8
M      A           2         3
       B           4         9


<h1 style="color:cadetblue; font-size:2em;">Melting DataFrames</h1>

In [37]:
import pandas as pd
trials = pd.read_csv('datasets/trials_01.csv')
print(trials)

   id treatment gender  response
0   1         A      F         5
1   2         A      M         3
2   3         B      F         8
3   4         B      M         9


In [30]:
trials.pivot(index='treatment', columns='gender', values='response')

gender,F,M
treatment,Unnamed: 1_level_1,Unnamed: 2_level_1
A,5,3
B,8,9


In [32]:
new_trials = pd.read_csv('datasets/trials_02.csv')
print(new_trials)

  treatment  F  M
0         A  5  3
1         B  8  9


In [33]:
pd.melt(new_trials)

Unnamed: 0,variable,value
0,treatment,A
1,treatment,B
2,F,5
3,F,8
4,M,3
5,M,9


In [34]:
# Specifying id_vars
pd.melt(new_trials, id_vars=['treatment'])

Unnamed: 0,treatment,variable,value
0,A,F,5
1,B,F,8
2,A,M,3
3,B,M,9


In [35]:
# Specifying value_vars
pd.melt(new_trials, id_vars=['treatment'], value_vars=['F', 'M'])

Unnamed: 0,treatment,variable,value
0,A,F,5
1,B,F,8
2,A,M,3
3,B,M,9


In [36]:
# Specifying value_name
pd.melt(new_trials, id_vars=['treatment'], var_name='gender', value_name='response')

Unnamed: 0,treatment,gender,response
0,A,F,5
1,B,F,8
2,A,M,3
3,B,M,9


<h1 style="color:cadetblue; font-size:2em;">Pivot tables</h1>

In [38]:
import pandas as pd
more_trials = pd.read_csv('datasets/trials_03.csv')
print(more_trials)

   id treatment gender  response
0   1         A      F         5
1   2         A      M         3
2   3         A      M         8
3   4         A      F         9
4   5         B      F         1
5   6         B      M         8
6   7         B      F         4
7   8         B      F         6


In [39]:
more_trials.pivot(index='treatment', columns='gender', values='response')

ValueError: Index contains duplicate entries, cannot reshape

In [40]:
more_trials.pivot_table(index='treatment', columns='gender', values='response')

gender,F,M
treatment,Unnamed: 1_level_1,Unnamed: 2_level_1
A,7.0,5.5
B,3.666667,8.0


In [41]:
more_trials.pivot_table(index='treatment', columns='gender', values='response', aggfunc='count')

gender,F,M
treatment,Unnamed: 1_level_1,Unnamed: 2_level_1
A,2,2
B,3,1
