In [1]:
import pandas as pd
import numpy as np

In [2]:
data = {
   "value": range(12),
   "variable": ["A"] * 3 + ["B"] * 3 + ["C"] * 3 + ["D"] * 3,
   "date": pd.to_datetime(["2020-01-03", "2020-01-04", "2020-01-05"] * 4)
}


df = pd.DataFrame(data)

In [3]:
df

Unnamed: 0,value,variable,date
0,0,A,2020-01-03
1,1,A,2020-01-04
2,2,A,2020-01-05
3,3,B,2020-01-03
4,4,B,2020-01-04
5,5,B,2020-01-05
6,6,C,2020-01-03
7,7,C,2020-01-04
8,8,C,2020-01-05
9,9,D,2020-01-03


In [7]:
df.pivot(index="date", columns='variable', values="value")

variable,A,B,C,D
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-01-03,0,3,6,9
2020-01-04,1,4,7,10
2020-01-05,2,5,8,11


In [8]:
df["value2"] = df["value"] * 2

pivoted = df.pivot(index="date", columns="variable")

In [9]:
pivoted

Unnamed: 0_level_0,value,value,value,value,value2,value2,value2,value2
variable,A,B,C,D,A,B,C,D
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
2020-01-03,0,3,6,9,0,6,12,18
2020-01-04,1,4,7,10,2,8,14,20
2020-01-05,2,5,8,11,4,10,16,22


In [16]:
import datetime

In [17]:
df = pd.DataFrame(
    {
        "A": ["one", "one", "two", "three"] * 6,
        "B": ["A", "B", "C"] * 8,
        "C": ["foo", "foo", "foo", "bar", "bar", "bar"] * 4,
        "D": np.random.randn(24),
        "E": np.random.randn(24),
        "F": [datetime.datetime(2013, i, 1) for i in range(1, 13)]
        + [datetime.datetime(2013, i, 15) for i in range(1, 13)],
    }
)


df

Unnamed: 0,A,B,C,D,E,F
0,one,A,foo,0.185454,0.900435,2013-01-01
1,one,B,foo,-0.417737,0.087004,2013-02-01
2,two,C,foo,2.147932,-1.205239,2013-03-01
3,three,A,bar,-0.392889,0.084355,2013-04-01
4,one,B,bar,1.26993,1.649917,2013-05-01
5,one,C,bar,-2.081931,0.520503,2013-06-01
6,two,A,foo,0.108188,1.034439,2013-07-01
7,three,B,foo,-1.193387,-1.269833,2013-08-01
8,one,C,foo,0.008564,0.332849,2013-09-01
9,one,A,bar,0.053144,0.184585,2013-10-01


In [None]:
pd.pivot_table(df, values="D", index=["A", "B"], columns=["C"])

In [22]:
table = pd.pivot_table(df, values=["D", "E"], index=["A"], margins=True)

In [23]:
table.stack(future_stack=True)

A       
one    D   -0.050083
       E    0.373917
three  D   -0.227047
       E    0.026262
two    D    0.253551
       E   -0.020768
All    D   -0.018416
       E    0.188332
dtype: float64

In [24]:
tuples = [
   ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
   ["one", "two", "one", "two", "one", "two", "one", "two"],
]


index = pd.MultiIndex.from_arrays(tuples, names=["first", "second"])

df = pd.DataFrame(np.random.randn(8, 2), index=index, columns=["A", "B"])

df2 = df[:4]

df2

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,-0.72476,-0.953051
bar,two,-0.490964,1.384155
baz,one,0.681173,0.328557
baz,two,-2.029003,-0.92674


In [25]:
stacked = df2.stack(future_stack=True)

In [26]:
stacked

first  second   
bar    one     A   -0.724760
               B   -0.953051
       two     A   -0.490964
               B    1.384155
baz    one     A    0.681173
               B    0.328557
       two     A   -2.029003
               B   -0.926740
dtype: float64

In [27]:
stacked.unstack()

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,-0.72476,-0.953051
bar,two,-0.490964,1.384155
baz,one,0.681173,0.328557
baz,two,-2.029003,-0.92674


In [28]:
stacked.unstack(1)

Unnamed: 0_level_0,second,one,two
first,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,A,-0.72476,-0.490964
bar,B,-0.953051,1.384155
baz,A,0.681173,-2.029003
baz,B,0.328557,-0.92674


In [29]:
cheese = pd.DataFrame(
    {
        "first": ["John", "Mary"],
        "last": ["Doe", "Bo"],
        "height": [5.5, 6.0],
        "weight": [130, 150],
    }
)


cheese

Unnamed: 0,first,last,height,weight
0,John,Doe,5.5,130
1,Mary,Bo,6.0,150


In [30]:
cheese.melt(id_vars=["first", "last"], value_vars=["height", "weight"])

Unnamed: 0,first,last,variable,value
0,John,Doe,height,5.5
1,Mary,Bo,height,6.0
2,John,Doe,weight,130.0
3,Mary,Bo,weight,150.0


In [31]:
cheese.melt(id_vars=["first", "last"])

Unnamed: 0,first,last,variable,value
0,John,Doe,height,5.5
1,Mary,Bo,height,6.0
2,John,Doe,weight,130.0
3,Mary,Bo,weight,150.0


In [32]:
keys = ["panda1", "panda2", "panda3"]

values = [["eats", "shoots"], ["shoots", "leaves"], ["eats", "leaves"]]

df = pd.DataFrame({"keys": keys, "values": values})

df

Unnamed: 0,keys,values
0,panda1,"[eats, shoots]"
1,panda2,"[shoots, leaves]"
2,panda3,"[eats, leaves]"


In [33]:
df["values"].explode()

0      eats
0    shoots
1    shoots
1    leaves
2      eats
2    leaves
Name: values, dtype: object

In [34]:
df.explode("values")

Unnamed: 0,keys,values
0,panda1,eats
0,panda1,shoots
1,panda2,shoots
1,panda2,leaves
2,panda3,eats
2,panda3,leaves


In [47]:
df = pd.DataFrame(
    {"A": [1, 2, 2, 2, 3],
     "B": [3, 3, 4, 4, 4],
     "C": [1, 1, np.nan, 1, 1]}
)


df

Unnamed: 0,A,B,C
0,1,3,1.0
1,2,3,1.0
2,2,4,
3,2,4,1.0
4,3,4,1.0


In [48]:
pd.crosstab(df["A"], df["B"])

B,3,4
A,Unnamed: 1_level_1,Unnamed: 2_level_1
1,1,0
2,1,2
3,0,1


In [45]:
foo = pd.Categorical(["a", "b", "c", "d", "e"], categories=["a", "b", "c", "d", "e"])

bar = pd.Categorical(["d", "e", "f", "g", "h"], categories=["d", "e", "f", "g", "h"])

pd.crosstab(foo, bar, rownames=['foo'], colnames=['bar'])

bar,d,e,f,g,h
foo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
a,1,0,0,0,0
b,0,1,0,0,0
c,0,0,1,0,0
d,0,0,0,1,0
e,0,0,0,0,1


In [49]:
pd.crosstab(df["A"], df["B"], normalize=True)

B,3,4
A,Unnamed: 1_level_1,Unnamed: 2_level_1
1,0.2,0.0
2,0.2,0.4
3,0.0,0.2


In [51]:
pd.crosstab(df["A"], df["B"], values=df["C"], aggfunc="sum")

B,3,4
A,Unnamed: 1_level_1,Unnamed: 2_level_1
1,1.0,
2,1.0,1.0
3,,1.0


In [52]:
ages = np.array([10, 15, 13, 12, 23, 25, 28, 59, 60])

pd.cut(ages, bins=3)

[(9.95, 26.667], (9.95, 26.667], (9.95, 26.667], (9.95, 26.667], (9.95, 26.667], (9.95, 26.667], (26.667, 43.333], (43.333, 60.0], (43.333, 60.0]]
Categories (3, interval[float64, right]): [(9.95, 26.667] < (26.667, 43.333] < (43.333, 60.0]]

In [53]:
x = pd.Series(["A", "A", np.nan, "B", 3.14, np.inf])

x.factorize()

(array([ 0,  0, -1,  1,  2,  3]), Index(['A', 'B', 3.14, inf], dtype='object'))