# Reshaping & Pivoting in Pandas

In [1]:
import pandas as pd
import numpy as np

In [2]:
data=pd.DataFrame(
    np.arange(16).reshape(4,4),
    index=[list("aabb"),[1,2]*2],
    columns=[["num","num",
              "comp","comp"],
             ["math","stat"]*2])
data

Unnamed: 0_level_0,Unnamed: 1_level_0,num,num,comp,comp
Unnamed: 0_level_1,Unnamed: 1_level_1,math,stat,math,stat
a,1,0,1,2,3
a,2,4,5,6,7
b,1,8,9,10,11
b,2,12,13,14,15


In [3]:
data.index.names=["class","exam"]
data.columns.names=["field","lesson"]
data

Unnamed: 0_level_0,field,num,num,comp,comp
Unnamed: 0_level_1,lesson,math,stat,math,stat
class,exam,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
a,1,0,1,2,3
a,2,4,5,6,7
b,1,8,9,10,11
b,2,12,13,14,15


In [4]:
long=data.stack()

In [5]:
long

Unnamed: 0_level_0,Unnamed: 1_level_0,field,comp,num
class,exam,lesson,Unnamed: 3_level_1,Unnamed: 4_level_1
a,1,math,2,0
a,1,stat,3,1
a,2,math,6,4
a,2,stat,7,5
b,1,math,10,8
b,1,stat,11,9
b,2,math,14,12
b,2,stat,15,13


In [6]:
long.unstack()

Unnamed: 0_level_0,field,comp,comp,num,num
Unnamed: 0_level_1,lesson,math,stat,math,stat
class,exam,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
a,1,2,3,0,1
a,2,6,7,4,5
b,1,10,11,8,9
b,2,14,15,12,13


In [7]:
data.stack()

Unnamed: 0_level_0,Unnamed: 1_level_0,field,comp,num
class,exam,lesson,Unnamed: 3_level_1,Unnamed: 4_level_1
a,1,math,2,0
a,1,stat,3,1
a,2,math,6,4
a,2,stat,7,5
b,1,math,10,8
b,1,stat,11,9
b,2,math,14,12
b,2,stat,15,13


In [8]:
data.stack(0)

Unnamed: 0_level_0,Unnamed: 1_level_0,lesson,math,stat
class,exam,field,Unnamed: 3_level_1,Unnamed: 4_level_1
a,1,comp,2,3
a,1,num,0,1
a,2,comp,6,7
a,2,num,4,5
b,1,comp,10,11
b,1,num,8,9
b,2,comp,14,15
b,2,num,12,13


In [9]:
data.stack("field")

Unnamed: 0_level_0,Unnamed: 1_level_0,lesson,math,stat
class,exam,field,Unnamed: 3_level_1,Unnamed: 4_level_1
a,1,comp,2,3
a,1,num,0,1
a,2,comp,6,7
a,2,num,4,5
b,1,comp,10,11
b,1,num,8,9
b,2,comp,14,15
b,2,num,12,13


In [10]:
s1=pd.Series(
    np.arange(4),index=list("abcd"))
s2=pd.Series(
    np.arange(6,9),index=list("cde"))

In [11]:
print(s1)
print(s2)

a    0
b    1
c    2
d    3
dtype: int32
c    6
d    7
e    8
dtype: int32


In [12]:
data2=pd.concat([s1,s2],keys=["bir","iki"])
data2

bir  a    0
     b    1
     c    2
     d    3
iki  c    6
     d    7
     e    8
dtype: int32

In [13]:
data2.unstack()

Unnamed: 0,a,b,c,d,e
bir,0.0,1.0,2.0,3.0,
iki,,,6.0,7.0,8.0


In [14]:
data2.unstack().stack(dropna=False)

bir  a    0.0
     b    1.0
     c    2.0
     d    3.0
     e    NaN
iki  a    NaN
     b    NaN
     c    6.0
     d    7.0
     e    8.0
dtype: float64

In [15]:
data2.unstack().stack(dropna=False)

bir  a    0.0
     b    1.0
     c    2.0
     d    3.0
     e    NaN
iki  a    NaN
     b    NaN
     c    6.0
     d    7.0
     e    8.0
dtype: float64

## Pivoting “Long” to “Wide” Format

In [16]:
stock=pd.DataFrame(
    {"fruit": ["apple", "plum","grape"]*2,
     "color": ["purple","yellow"]*3,
     "piece":[3,4,5,6,1,2]})

In [17]:
stock

Unnamed: 0,fruit,color,piece
0,apple,purple,3
1,plum,yellow,4
2,grape,purple,5
3,apple,yellow,6
4,plum,purple,1
5,grape,yellow,2


In [18]:
stock.pivot("fruit", "color", "piece")

color,purple,yellow
fruit,Unnamed: 1_level_1,Unnamed: 2_level_1
apple,3,6
grape,5,2
plum,1,4


In [19]:
stock["value"]=np.random.randn(len(stock))

In [20]:
stock

Unnamed: 0,fruit,color,piece,value
0,apple,purple,3,-0.038716
1,plum,yellow,4,-0.069972
2,grape,purple,5,-1.116665
3,apple,yellow,6,0.374715
4,plum,purple,1,-0.023233
5,grape,yellow,2,0.608953


In [21]:
p=stock.pivot("fruit","color")
p

Unnamed: 0_level_0,piece,piece,value,value
color,purple,yellow,purple,yellow
fruit,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
apple,3,6,-0.038716,0.374715
grape,5,2,-1.116665,0.608953
plum,1,4,-0.023233,-0.069972


In [22]:
p["value"]

color,purple,yellow
fruit,Unnamed: 1_level_1,Unnamed: 2_level_1
apple,-0.038716,0.374715
grape,-1.116665,0.608953
plum,-0.023233,-0.069972


## Pivoting “Wide” to “Long” Format

In [23]:
data=pd.DataFrame(
    {"lesson":["math","stat","bio"],
     "Sam":[50,60,70],
     "Kim":[80,70,90],
     "Tom":[60,70,85]})
data

Unnamed: 0,lesson,Sam,Kim,Tom
0,math,50,80,60
1,stat,60,70,70
2,bio,70,90,85


In [24]:
group=pd.melt(data,["lesson"])

In [25]:
group

Unnamed: 0,lesson,variable,value
0,math,Sam,50
1,stat,Sam,60
2,bio,Sam,70
3,math,Kim,80
4,stat,Kim,70
5,bio,Kim,90
6,math,Tom,60
7,stat,Tom,70
8,bio,Tom,85


In [26]:
data=group.pivot(
    "lesson","variable","value")
data

variable,Kim,Sam,Tom
lesson,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
bio,90,70,85
math,80,50,60
stat,70,60,70


In [27]:
data.reset_index()

variable,lesson,Kim,Sam,Tom
0,bio,90,70,85
1,math,80,50,60
2,stat,70,60,70
