In [1]:
import pandas._testing as tm
import numpy as np
import pandas as pd

In [6]:
frame = tm.makeTimeDataFrame(3)
frame

Unnamed: 0,A,B,C,D
2000-01-03,-1.024163,0.251037,0.479822,0.357758
2000-01-04,-1.786565,0.863099,-1.091532,1.239576
2000-01-05,1.546184,0.242519,-0.948737,-0.500372


In [2]:
def unpivot(frame):
    N, K = frame.shape
    data = {
        "value": frame.to_numpy().ravel("F"),
        "variable": np.asarray(frame.columns).repeat(N),
        "date": np.tile(np.asarray(frame.index), K),
    }
    return pd.DataFrame(data, columns=["date", "variable", "value"])


df = unpivot(tm.makeTimeDataFrame(3))

In [3]:
df

Unnamed: 0,date,variable,value
0,2000-01-03,A,-0.252551
1,2000-01-04,A,0.512549
2,2000-01-05,A,0.969642
3,2000-01-03,B,-2.013839
4,2000-01-04,B,0.169718
5,2000-01-05,B,0.836208
6,2000-01-03,C,0.279144
7,2000-01-04,C,0.575676
8,2000-01-05,C,0.543042
9,2000-01-03,D,0.23816


In [4]:
df.pivot(index='date', columns='variable', values='value')

variable,A,B,C,D
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2000-01-03,-0.252551,-2.013839,0.279144,0.23816
2000-01-04,0.512549,0.169718,0.575676,-1.882149
2000-01-05,0.969642,0.836208,0.543042,0.408645


In [5]:
tuples = list(zip(*[['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
                   ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]))
index = pd.MultiIndex.from_tuples(tuples, name=['fist', 'second'])
df = pd.DataFrame(np.random.randn(8,2), index=index, columns=list('AB'))

In [6]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
fist,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,-0.147259,-0.017446
bar,two,-1.378097,-0.171903
baz,one,-0.80739,-0.563272
baz,two,-2.552057,0.780392
foo,one,-1.047248,0.088584
foo,two,-0.684953,1.159205
qux,one,0.014452,1.797823
qux,two,0.394805,-0.214345


In [9]:
df2 = df.stack()
df2

fist  second   
bar   one     A   -0.147259
              B   -0.017446
      two     A   -1.378097
              B   -0.171903
baz   one     A   -0.807390
              B   -0.563272
      two     A   -2.552057
              B    0.780392
foo   one     A   -1.047248
              B    0.088584
      two     A   -0.684953
              B    1.159205
qux   one     A    0.014452
              B    1.797823
      two     A    0.394805
              B   -0.214345
dtype: float64

In [10]:
df2.unstack()

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
fist,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,-0.147259,-0.017446
bar,two,-1.378097,-0.171903
baz,one,-0.80739,-0.563272
baz,two,-2.552057,0.780392
foo,one,-1.047248,0.088584
foo,two,-0.684953,1.159205
qux,one,0.014452,1.797823
qux,two,0.394805,-0.214345


In [11]:
df2.unstack(1)

Unnamed: 0_level_0,second,one,two
fist,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,A,-0.147259,-1.378097
bar,B,-0.017446,-0.171903
baz,A,-0.80739,-2.552057
baz,B,-0.563272,0.780392
foo,A,-1.047248,-0.684953
foo,B,0.088584,1.159205
qux,A,0.014452,0.394805
qux,B,1.797823,-0.214345


In [12]:
df2.unstack(2)

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
fist,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,-0.147259,-0.017446
bar,two,-1.378097,-0.171903
baz,one,-0.80739,-0.563272
baz,two,-2.552057,0.780392
foo,one,-1.047248,0.088584
foo,two,-0.684953,1.159205
qux,one,0.014452,1.797823
qux,two,0.394805,-0.214345


In [13]:
df2.unstack(0)

Unnamed: 0_level_0,fist,bar,baz,foo,qux
second,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
one,A,-0.147259,-0.80739,-1.047248,0.014452
one,B,-0.017446,-0.563272,0.088584,1.797823
two,A,-1.378097,-2.552057,-0.684953,0.394805
two,B,-0.171903,0.780392,1.159205,-0.214345


In [25]:
cheese = pd.DataFrame({'first': ['John', 'Mar'], 'last': ['Dow', 'Bo'], 'height': [5.5, 6.0], 'weight': [130, 150]})
cheese

Unnamed: 0,first,last,height,weight
0,John,Dow,5.5,130
1,Mar,Bo,6.0,150


In [27]:
cheese.melt(id_vars=['first', 'last'])

Unnamed: 0,first,last,variable,value
0,John,Dow,height,5.5
1,Mar,Bo,height,6.0
2,John,Dow,weight,130.0
3,Mar,Bo,weight,150.0


In [28]:
cheese.melt(id_vars=['first', 'last'], var_name='quantity')

Unnamed: 0,first,last,quantity,value
0,John,Dow,height,5.5
1,Mar,Bo,height,6.0
2,John,Dow,weight,130.0
3,Mar,Bo,weight,150.0


In [29]:
cheese1 = pd.DataFrame({'first': ['John', 'Mar'], 'last': ['Dow', 'Bo'], 'location': ['London', 'Paris'], 'height': [5.5, 6.0], 'weight': [130, 150]})
cheese1

Unnamed: 0,first,last,location,height,weight
0,John,Dow,London,5.5,130
1,Mar,Bo,Paris,6.0,150


In [30]:
cheese1.melt(id_vars=['first', 'last', 'location'], var_name='var')

Unnamed: 0,first,last,location,var,value
0,John,Dow,London,height,5.5
1,Mar,Bo,Paris,height,6.0
2,John,Dow,London,weight,130.0
3,Mar,Bo,Paris,weight,150.0


In [38]:
index = pd.MultiIndex.from_tuples({('person', 'A'), ('person', 'B')})
df = cheese.set_index(index)
df

Unnamed: 0,Unnamed: 1,first,last,height,weight
person,A,John,Dow,5.5,130
person,B,Mar,Bo,6.0,150


In [39]:
df.melt(id_vars=['first', 'last'], ignore_index=False)

Unnamed: 0,Unnamed: 1,first,last,variable,value
person,A,John,Dow,height,5.5
person,B,Mar,Bo,height,6.0
person,A,John,Dow,weight,130.0
person,B,Mar,Bo,weight,150.0


In [46]:
index = pd.MultiIndex.from_product([['bar', 'baz', 'foo', 'qux'], ['one', 'two']], names=['first', 'second'])
columns = pd.MultiIndex.from_tuples([('A', 'cat'), ('B', 'dog'), ('B', 'cat'), ('A', 'dog')], names=['exp', 'animal'])
df = pd.DataFrame(np.random.randn(8,4), index=index, columns=columns)
df

Unnamed: 0_level_0,exp,A,B,B,A
Unnamed: 0_level_1,animal,cat,dog,cat,dog
first,second,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
bar,one,0.440186,0.456182,0.245471,-1.152767
bar,two,0.13085,-0.922904,-0.550016,-1.566868
baz,one,0.482003,-0.980593,-0.040883,-0.523137
baz,two,1.032251,-0.961894,0.227934,1.115256
foo,one,-0.456132,-1.371974,1.642311,-0.751516
foo,two,0.542874,-0.62052,-1.133246,-0.088572
qux,one,0.481883,-0.752749,0.573976,1.305406
qux,two,-1.08231,-0.507097,-0.352862,0.575986


In [57]:
df.unstack(1)

exp,A,A,B,B,B,B,A,A
animal,cat,cat,dog,dog,cat,cat,dog,dog
second,one,two,one,two,one,two,one,two
first,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3
bar,0.440186,0.13085,0.456182,-0.922904,0.245471,-0.550016,-1.152767,-1.566868
baz,0.482003,1.032251,-0.980593,-0.961894,-0.040883,0.227934,-0.523137,1.115256
foo,-0.456132,0.542874,-1.371974,-0.62052,1.642311,-1.133246,-0.751516,-0.088572
qux,0.481883,-1.08231,-0.752749,-0.507097,0.573976,-0.352862,1.305406,0.575986


In [59]:
df.groupby(level=1, axis=1).mean()

Unnamed: 0_level_0,animal,cat,dog
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,0.342828,-0.348293
bar,two,-0.209583,-1.244886
baz,one,0.22056,-0.751865
baz,two,0.630092,0.076681
foo,one,0.59309,-1.061745
foo,two,-0.295186,-0.354546
qux,one,0.527929,0.276328
qux,two,-0.717586,0.034445


In [61]:
df.stack().mean(1).unstack()

Unnamed: 0_level_0,animal,cat,dog
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,0.342828,-0.348293
bar,two,-0.209583,-1.244886
baz,one,0.22056,-0.751865
baz,two,0.630092,0.076681
foo,one,0.59309,-1.061745
foo,two,-0.295186,-0.354546
qux,one,0.527929,0.276328
qux,two,-0.717586,0.034445


In [85]:
df.stack().mean(1).groupby(['animal']).mean()

animal
cat    0.136518
dog   -0.421735
dtype: float64

In [100]:
import datetime
df = pd.DataFrame({ 'A': ['one', 'one', 'two', 'three'] * 6,
                    'B': ['A', 'B', 'C'] * 8,
                    'C': ['foo', 'foo', 'foo', 'bar', 'bar', 'bar'] * 4,
                    'D': np.random.randn(24),
                    'E': np.random.randn(24),
                    'F': [datetime.datetime(2013, i, 1) for i in range(1, 13)] + [datetime.datetime(2013, i, 15) for i in range(1, 13)]})

In [104]:
df.head(4)

Unnamed: 0,A,B,C,D,E,F
0,one,A,foo,-0.733,0.953468,2013-01-01
1,one,B,foo,-0.855326,-1.527126,2013-02-01
2,two,C,foo,-1.93129,-1.175786,2013-03-01
3,three,A,bar,-1.964581,-0.395056,2013-04-01


In [108]:
pd.pivot_table(df, index=['A'],columns=['B', 'C'], values=['D', 'E'], aggfunc=np.sum)

Unnamed: 0_level_0,D,D,D,D,D,D,E,E,E,E,E,E
B,A,A,B,B,C,C,A,A,B,B,C,C
C,bar,foo,bar,foo,bar,foo,bar,foo,bar,foo,bar,foo
A,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3
one,-0.463774,0.256101,-0.872767,-0.31173,0.205786,0.060311,0.935983,1.344029,-1.657711,-1.534024,0.577159,-1.719386
three,-1.266364,,,0.429624,0.080274,,-0.935639,,,-3.036266,0.983468,
two,,0.22406,-2.555067,,,-0.986996,,1.311266,-0.23045,,,-0.080964


In [110]:
df.pivot_table(index=['A'],columns=['B', 'C'], values=['D', 'E'], aggfunc=np.sum)

Unnamed: 0_level_0,D,D,D,D,D,D,E,E,E,E,E,E
B,A,A,B,B,C,C,A,A,B,B,C,C
C,bar,foo,bar,foo,bar,foo,bar,foo,bar,foo,bar,foo
A,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3
one,-0.463774,0.256101,-0.872767,-0.31173,0.205786,0.060311,0.935983,1.344029,-1.657711,-1.534024,0.577159,-1.719386
three,-1.266364,,,0.429624,0.080274,,-0.935639,,,-3.036266,0.983468,
two,,0.22406,-2.555067,,,-0.986996,,1.311266,-0.23045,,,-0.080964


In [126]:
foo, bar, dull, shiny, one, two = 'foo', 'bar', 'dull', 'shiny', 'one', 'two'
a = np.array([foo, foo, bar, bar, foo, foo], dtype=object)
b = np.array([one, one, two, one, two, one], dtype=object)
c = np.array([dull, dull, shiny, dull, dull, shiny], dtype=object)
d = np.arange(6)
e = np.arange(6, 12)

In [131]:
len(a), len(d)

(6, 6)

In [144]:
[1]* 10

[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

In [136]:
pd.crosstab(a, [b,c], values=e, aggfunc=np.sum)

col_0,one,one,two,two
col_1,dull,shiny,dull,shiny
row_0,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
bar,9.0,,,8.0
foo,13.0,11.0,10.0,


In [116]:
df = pd.DataFrame({'A': [1,2,2,2,2], 'B': [3,3,4,4,4], 'C': [1,1,np.nan,1,1]})
pd.crosstab(df['A'], df['B'])

B,3,4
A,Unnamed: 1_level_1,Unnamed: 2_level_1
1,1,0
2,1,3


In [122]:
df = pd.DataFrame({'A': ['a', 'b', 'c', 'd', 'e'], 'B': pd.date_range('2020-01-01', periods=5)})
pd.crosstab(df['A'], df['B'])

B,2020-01-01,2020-01-02,2020-01-03,2020-01-04,2020-01-05
A,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
a,1,0,0,0,0
b,0,1,0,0,0
c,0,0,1,0,0
d,0,0,0,1,0
e,0,0,0,0,1


In [123]:
df['C'] = np.arange(5)

In [145]:
pd.crosstab(df['A'], df['B'], values=df['C'], aggfunc=np.sum, dropna=False)

B,2020-01-01,2020-01-02,2020-01-03,2020-01-04,2020-01-05
A,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
a,0.0,,,,
b,,1.0,,,
c,,,2.0,,
d,,,,3.0,
e,,,,,4.0


In [147]:
ages = np.array([10, 15, 13, 12, 23, 25, 28, 59, 60])
pd.cut(ages, bins=3)

[(9.95, 26.667], (9.95, 26.667], (9.95, 26.667], (9.95, 26.667], (9.95, 26.667], (9.95, 26.667], (26.667, 43.333], (43.333, 60.0], (43.333, 60.0]]
Categories (3, interval[float64, right]): [(9.95, 26.667] < (26.667, 43.333] < (43.333, 60.0]]

In [150]:
c = pd.cut(ages, bins=[0, 18, 35, 70])

In [151]:
pd.cut([25, 30, 50], bins=c.categories)

[(18, 35], (18, 35], (35, 70]]
Categories (3, interval[int64, right]): [(0, 18] < (18, 35] < (35, 70]]

In [1]:
import numpy as np
import pandas as pd

In [11]:
s = pd.Series(['a', 'b', 'ab', 'abc', 'bar1, bar2', np.nan, 'ta1, ta2'], dtype='string')
v = pd.Series(['1', '2', 'das', 'col1', 'col2'], dtype='string')

In [13]:
s.str.cat(v, join='inner')

0                a1
1                b2
2             abdas
3           abccol1
4    bar1, bar2col2
dtype: string

In [18]:
s.str.split(',')

0              [a]
1              [b]
2             [ab]
3            [abc]
4    [bar1,  bar2]
5             <NA>
6      [ta1,  ta2]
dtype: object

In [72]:
s.str.join('-')

0                      a
1                      b
2                    a-b
3                  a-b-c
4    b-a-r-1-,- -b-a-r-2
5                   <NA>
6        t-a-1-,- -t-a-2
dtype: string

In [29]:
s.str.get_dummies(',')

Unnamed: 0,bar2,ta2,a,ab,abc,b,bar1,ta1
0,0,0,1,0,0,0,0,0
1,0,0,0,0,0,1,0,0
2,0,0,0,1,0,0,0,0
3,0,0,0,0,1,0,0,0
4,1,0,0,0,0,0,1,0
5,0,0,0,0,0,0,0,0
6,0,1,0,0,0,0,0,1


In [37]:
list(range(6))

[0, 1, 2, 3, 4, 5]

In [44]:
s

0             a
1             b
2            ab
3           abc
4    bar1, bar2
5          <NA>
6      ta1, ta2
dtype: string

In [65]:
pd.DataFrame({i:s.str.get(i) for i in list(range(10))}).fillna('-')

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,a,-,-,-,-,-,-,-,-,-
1,b,-,-,-,-,-,-,-,-,-
2,a,b,-,-,-,-,-,-,-,-
3,a,b,c,-,-,-,-,-,-,-
4,b,a,r,1,",",,b,a,r,2
5,-,-,-,-,-,-,-,-,-,-
6,t,a,1,",",,t,a,2,-,-


In [73]:
s.str.replace('a', 'col1')

0                col1
1                   b
2               col1b
3              col1bc
4    bcol1r1, bcol1r2
5                <NA>
6      tcol11, tcol12
dtype: string

In [75]:
s.str.repeat(2)

0                      aa
1                      bb
2                    abab
3                  abcabc
4    bar1, bar2bar1, bar2
5                    <NA>
6        ta1, ta2ta1, ta2
dtype: string

In [76]:
s

0             a
1             b
2            ab
3           abc
4    bar1, bar2
5          <NA>
6      ta1, ta2
dtype: string

In [82]:
s.str.pad(width=1, side='right', fillchar='0')

0             a
1             b
2            ab
3           abc
4    bar1, bar2
5          <NA>
6      ta1, ta2
dtype: string

In [93]:
s1 = pd.Series(['a', 'b', 'c'], dtype='string')

In [104]:
len(s1.str.center(width=5).loc[0])

5

In [106]:
s1.str.ljust(width=3, fillchar='-')

0    a--
1    b--
2    c--
dtype: string

In [109]:
s1.str.zfill(width=3)

0    00a
1    00b
2    00c
dtype: string

In [120]:
s = pd.Series(['line to be wrapped', 'another line to be wrapped'])
# s[1][:12]
len(s[1])

26

In [118]:
s.str.wrap(12, expand_tabs=False)

0             line to be\nwrapped
1    another line\nto be\nwrapped
dtype: object

In [119]:
s.str.wrap(12)

0             line to be\nwrapped
1    another line\nto be\nwrapped
dtype: object

In [123]:
s.str.slice(2, 8, 2)

0    n o
1    ohr
dtype: object

In [124]:
s.str.slice_replace(2,4, 'col1')

0            licol1 to be wrapped
1    ancol1her line to be wrapped
dtype: object

In [125]:
s.str.count('ine')

0    1
1    1
dtype: int64

In [126]:
s.str.startswith('ano')

0    False
1     True
dtype: bool

In [127]:
s.str.endswith('ped')

0    True
1    True
dtype: bool

In [128]:
s.str.findall('o')

0       [o]
1    [o, o]
dtype: object

In [134]:
s.str.match('Line', case=False)

0     True
1    False
dtype: bool

In [150]:
s = pd.Series(['a1', 'b2', 'c3', 'ca'])

In [155]:
# s.str.extract(r'([ab])([\d])')
# s.str.extract(r'([ab])?([\d])')
# s.str.extract(r'(?P<letter>[ab])?(?P<digit>[\d])')
s.str.extract(r'[ab]([\d])')

Unnamed: 0,0
0,1.0
1,2.0
2,
3,


In [159]:
# s.str.extractall(r'(?P<letter>[ab])(?P<digit>[\d])')
s.str.extractall(r'(?P<letter>[ab])')

Unnamed: 0_level_0,Unnamed: 1_level_0,letter
Unnamed: 0_level_1,match,Unnamed: 2_level_1
0,0,a
1,0,b
3,0,a


In [161]:
s.str.extractall(r'([abc][\d])')

Unnamed: 0_level_0,Unnamed: 1_level_0,0
Unnamed: 0_level_1,match,Unnamed: 2_level_1
0,0,a1
1,0,b2
2,0,c3


In [162]:
s.str.len()

0    2
1    2
2    2
3    2
dtype: int64

In [163]:
s = pd.Series(['1. Ant.  ', '2. Bee!\n', '3. Cat?\t', np.nan])

In [165]:
s.str.strip()

0    1. Ant.
1    2. Bee!
2    3. Cat?
3        NaN
dtype: object

In [178]:
s.str.strip('123.?! \n\t')

0    Ant
1    Bee
2    Cat
3    NaN
dtype: object

In [180]:
s.str.rstrip()

0    1. Ant.
1    2. Bee!
2    3. Cat?
3        NaN
dtype: object

In [182]:
s.str.rstrip('123.!? \n\t')

0    1. Ant
1    2. Bee
2    3. Cat
3       NaN
dtype: object

In [183]:
s = pd.Series(['lower', 'CAPITALS', 'this is a sentence', 'SwApCaSe'])

In [184]:
s.str.casefold()

0                 lower
1              capitals
2    this is a sentence
3              swapcase
dtype: object

In [188]:
s.str.find('e')

0     3
1    -1
2    11
3     7
dtype: int64

In [187]:
s.str.rfind('e')

0     3
1    -1
2    17
3     7
dtype: int64

In [193]:
s

0                 lower
1              CAPITALS
2    this is a sentence
3              SwApCaSe
dtype: object

In [204]:
s.str.normalize('NFKC')

0                 lower
1              CAPITALS
2    this is a sentence
3              SwApCaSe
dtype: object

In [205]:
s.str.normalize('NFD')

0                 lower
1              CAPITALS
2    this is a sentence
3              SwApCaSe
dtype: object

In [207]:
s.str.normalize('NFKD')

0                 lower
1              CAPITALS
2    this is a sentence
3              SwApCaSe
dtype: object

In [208]:
s.str.isalnum()

0     True
1     True
2    False
3     True
dtype: bool

In [209]:
s1 = pd.Series(['1. Ant.  ', '2. Bee!\n', '3. Cat?\t', np.nan])

In [210]:
s1.str.isalnum()

0    False
1    False
2    False
3      NaN
dtype: object

In [211]:
s1.str.isalpha()

0    False
1    False
2    False
3      NaN
dtype: object

In [213]:
s1 = pd.Series(['one', 'one1', '1', ''])

In [214]:
s1.str.isalnum()

0     True
1     True
2     True
3    False
dtype: bool