### Pivoting DataFrames

In [1]:
import pandas as pd
trials = pd.read_csv('trials_01.csv')
trials

Unnamed: 0,id,treatment,gender,response
0,1,A,F,5
1,2,A,M,3
2,3,B,F,8
3,4,B,M,9


In [2]:
trials.pivot(index='treatment',
            columns='gender',
            values='response')

gender,F,M
treatment,Unnamed: 1_level_1,Unnamed: 2_level_1
A,5,3
B,8,9


In [3]:
trials.pivot(index='treatment', columns='gender')

Unnamed: 0_level_0,id,id,response,response
gender,F,M,F,M
treatment,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
A,1,2,5,3
B,3,4,8,9


In [4]:
users = pd.read_csv('users.csv')
users

Unnamed: 0,weekday,city,visitors,signups
0,Sun,Austin,139,7
1,Sun,Dallas,237,12
2,Mon,Austin,326,3
3,Mon,Dallas,456,5


In [5]:
visitors_pivot = users.pivot(index='weekday',
                            columns='city',
                            values='visitors')

visitors_pivot

city,Austin,Dallas
weekday,Unnamed: 1_level_1,Unnamed: 2_level_1
Mon,326,456
Sun,139,237


In [6]:
signups_pivot = users.pivot(index='weekday',
                           columns='city',
                           values='signups')
signups_pivot

city,Austin,Dallas
weekday,Unnamed: 1_level_1,Unnamed: 2_level_1
Mon,3,5
Sun,7,12


In [7]:
pivot = users.pivot(index='weekday', columns='city')
pivot

Unnamed: 0_level_0,visitors,visitors,signups,signups
city,Austin,Dallas,Austin,Dallas
weekday,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Mon,326,456,3,5
Sun,139,237,7,12


### Stacking and unstacking DataFrames

In [8]:
trials

Unnamed: 0,id,treatment,gender,response
0,1,A,F,5
1,2,A,M,3
2,3,B,F,8
3,4,B,M,9


In [9]:
trials = trials.set_index(['treatment', 'gender'])
trials

Unnamed: 0_level_0,Unnamed: 1_level_0,id,response
treatment,gender,Unnamed: 2_level_1,Unnamed: 3_level_1
A,F,1,5
A,M,2,3
B,F,3,8
B,M,4,9


In [10]:
trials.unstack(level='gender')

Unnamed: 0_level_0,id,id,response,response
gender,F,M,F,M
treatment,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
A,1,2,5,3
B,3,4,8,9


In [11]:
trials

Unnamed: 0_level_0,Unnamed: 1_level_0,id,response
treatment,gender,Unnamed: 2_level_1,Unnamed: 3_level_1
A,F,1,5
A,M,2,3
B,F,3,8
B,M,4,9


In [12]:
trials.unstack(level=1)

Unnamed: 0_level_0,id,id,response,response
gender,F,M,F,M
treatment,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
A,1,2,5,3
B,3,4,8,9


In [13]:
trials_by_gender = trials.unstack(level='gender')
trials_by_gender

Unnamed: 0_level_0,id,id,response,response
gender,F,M,F,M
treatment,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
A,1,2,5,3
B,3,4,8,9


In [14]:
stacked = trials_by_gender.stack(level='gender')
stacked

Unnamed: 0_level_0,Unnamed: 1_level_0,id,response
treatment,gender,Unnamed: 2_level_1,Unnamed: 3_level_1
A,F,1,5
A,M,2,3
B,F,3,8
B,M,4,9


In [15]:
swapped = stacked.swaplevel(0,1)
swapped

Unnamed: 0_level_0,Unnamed: 1_level_0,id,response
gender,treatment,Unnamed: 2_level_1,Unnamed: 3_level_1
F,A,1,5
M,A,2,3
F,B,3,8
M,B,4,9


In [16]:
sorted_trials = swapped.sort_index()
sorted_trials

Unnamed: 0_level_0,Unnamed: 1_level_0,id,response
gender,treatment,Unnamed: 2_level_1,Unnamed: 3_level_1
F,A,1,5
F,B,3,8
M,A,2,3
M,B,4,9


In [17]:
users = users.set_index(['city', 'weekday'])
users = users.sort_index()
users

Unnamed: 0_level_0,Unnamed: 1_level_0,visitors,signups
city,weekday,Unnamed: 2_level_1,Unnamed: 3_level_1
Austin,Mon,326,3
Austin,Sun,139,7
Dallas,Mon,456,5
Dallas,Sun,237,12


In [18]:
byweekday = users.unstack(level='weekday')
byweekday

Unnamed: 0_level_0,visitors,visitors,signups,signups
weekday,Mon,Sun,Mon,Sun
city,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Austin,326,139,3,7
Dallas,456,237,5,12


In [19]:
byweekday.stack(level='weekday')

Unnamed: 0_level_0,Unnamed: 1_level_0,visitors,signups
city,weekday,Unnamed: 2_level_1,Unnamed: 3_level_1
Austin,Mon,326,3
Austin,Sun,139,7
Dallas,Mon,456,5
Dallas,Sun,237,12


In [20]:
bycity = users.unstack(level='city')
bycity

Unnamed: 0_level_0,visitors,visitors,signups,signups
city,Austin,Dallas,Austin,Dallas
weekday,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Mon,326,456,3,5
Sun,139,237,7,12


In [21]:
newusers = bycity.stack(level='city')
newusers

Unnamed: 0_level_0,Unnamed: 1_level_0,visitors,signups
weekday,city,Unnamed: 2_level_1,Unnamed: 3_level_1
Mon,Austin,326,3
Mon,Dallas,456,5
Sun,Austin,139,7
Sun,Dallas,237,12


In [22]:
newusers = newusers.swaplevel(0,1)
newusers

Unnamed: 0_level_0,Unnamed: 1_level_0,visitors,signups
city,weekday,Unnamed: 2_level_1,Unnamed: 3_level_1
Austin,Mon,326,3
Dallas,Mon,456,5
Austin,Sun,139,7
Dallas,Sun,237,12


In [23]:
newusers = newusers.sort_index()
newusers

Unnamed: 0_level_0,Unnamed: 1_level_0,visitors,signups
city,weekday,Unnamed: 2_level_1,Unnamed: 3_level_1
Austin,Mon,326,3
Austin,Sun,139,7
Dallas,Mon,456,5
Dallas,Sun,237,12


### Melting DataFrames

In [24]:
new_trials = pd.read_csv('trials_02.csv')
new_trials

Unnamed: 0,treatment,F,M
0,A,5,3
1,B,8,9


In [25]:
pd.melt(new_trials)

Unnamed: 0,variable,value
0,treatment,A
1,treatment,B
2,F,5
3,F,8
4,M,3
5,M,9


In [26]:
pd.melt(new_trials, id_vars=['treatment'])

Unnamed: 0,treatment,variable,value
0,A,F,5
1,B,F,8
2,A,M,3
3,B,M,9


In [27]:
pd.melt(new_trials, id_vars=['treatment'],
       value_vars=['F','M'])

Unnamed: 0,treatment,variable,value
0,A,F,5
1,B,F,8
2,A,M,3
3,B,M,9


In [28]:
pd.melt(new_trials, id_vars=['treatment'],
       var_name='gender', value_name='response')

Unnamed: 0,treatment,gender,response
0,A,F,5
1,B,F,8
2,A,M,3
3,B,M,9


In [29]:
users = pd.read_csv('users.csv')
users

Unnamed: 0,weekday,city,visitors,signups
0,Sun,Austin,139,7
1,Sun,Dallas,237,12
2,Mon,Austin,326,3
3,Mon,Dallas,456,5


In [30]:
visitors_by_city_weekday = users.pivot(index='weekday',
                            columns='city',
                            values='visitors')
visitors_by_city_weekday

city,Austin,Dallas
weekday,Unnamed: 1_level_1,Unnamed: 2_level_1
Mon,326,456
Sun,139,237


In [31]:
visitors_by_city_weekday = visitors_by_city_weekday.reset_index()
visitors_by_city_weekday

city,weekday,Austin,Dallas
0,Mon,326,456
1,Sun,139,237


In [32]:
visitors = pd.melt(visitors_by_city_weekday, id_vars='weekday', 
                  value_name='visitors')
visitors

Unnamed: 0,weekday,city,visitors
0,Mon,Austin,326
1,Sun,Austin,139
2,Mon,Dallas,456
3,Sun,Dallas,237


In [33]:
skinny = pd.melt(users, id_vars=['weekday', 'city'])
skinny

Unnamed: 0,weekday,city,variable,value
0,Sun,Austin,visitors,139
1,Sun,Dallas,visitors,237
2,Mon,Austin,visitors,326
3,Mon,Dallas,visitors,456
4,Sun,Austin,signups,7
5,Sun,Dallas,signups,12
6,Mon,Austin,signups,3
7,Mon,Dallas,signups,5


In [34]:
users_idx = users.set_index(['city','weekday'])
users_idx

Unnamed: 0_level_0,Unnamed: 1_level_0,visitors,signups
city,weekday,Unnamed: 2_level_1,Unnamed: 3_level_1
Austin,Sun,139,7
Dallas,Sun,237,12
Austin,Mon,326,3
Dallas,Mon,456,5


In [35]:
kv_pairs = pd.melt(users_idx, col_level=0)
kv_pairs

Unnamed: 0,variable,value
0,visitors,139
1,visitors,237
2,visitors,326
3,visitors,456
4,signups,7
5,signups,12
6,signups,3
7,signups,5


### Pivot tables

In [36]:
more_trials = pd.read_csv('trials_03.csv')
more_trials

Unnamed: 0,id,treatment,gender,response
0,1,A,F,5
1,2,A,M,3
2,3,A,M,8
3,4,A,F,9
4,5,B,F,1
5,6,B,M,8
6,7,B,F,4
7,8,B,F,6


In [37]:
more_trials.pivot_table(index='treatment',
                       columns='gender',
                       values='response')

gender,F,M
treatment,Unnamed: 1_level_1,Unnamed: 2_level_1
A,7.0,5.5
B,3.666667,8.0


In [38]:
more_trials.pivot_table(index='treatment',
                       columns='gender',
                       values='response',
                       aggfunc='count')

gender,F,M
treatment,Unnamed: 1_level_1,Unnamed: 2_level_1
A,2,2
B,3,1


In [39]:
users = pd.read_csv('users.csv')
users

Unnamed: 0,weekday,city,visitors,signups
0,Sun,Austin,139,7
1,Sun,Dallas,237,12
2,Mon,Austin,326,3
3,Mon,Dallas,456,5


In [40]:
by_city_day = users.pivot_table(index='weekday', columns='city')
by_city_day

Unnamed: 0_level_0,signups,signups,visitors,visitors
city,Austin,Dallas,Austin,Dallas
weekday,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Mon,3,5,326,456
Sun,7,12,139,237


In [41]:
count_by_weekday1 = users.pivot_table(index='weekday', aggfunc='count')
count_by_weekday1

Unnamed: 0_level_0,city,signups,visitors
weekday,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Mon,2,2,2
Sun,2,2,2


In [42]:
signups_and_visitors = users.pivot_table(index='weekday', aggfunc=sum)
signups_and_visitors

Unnamed: 0_level_0,signups,visitors
weekday,Unnamed: 1_level_1,Unnamed: 2_level_1
Mon,8,782
Sun,19,376


In [43]:
signups_and_visitors_total = users.pivot_table(index='weekday', aggfunc=sum, margins=True)
signups_and_visitors_total

Unnamed: 0_level_0,signups,visitors
weekday,Unnamed: 1_level_1,Unnamed: 2_level_1
Mon,8,782
Sun,19,376
All,27,1158
