In [79]:
import pandas as pd
import numpy as np

In [80]:
df = pd.DataFrame(np.random.randn(3, 2), columns=[' Column A ', ' Column B '],index=range(3));df

Unnamed: 0,Column A,Column B
0,-0.019788,-0.421768
1,-0.865284,-0.708722
2,0.148111,0.07953


In [81]:
df.columns.str.strip().str.lower().str.replace(' ', '_')

Index(['column_a', 'column_b'], dtype='object')

In [82]:
s2 = pd.Series(['a_b_c', 'c_d_e','f_g_h'])

In [83]:
pd.DataFrame(list(zip(*(s2.str.split('_')).pipe(lambda x: x.values))))

Unnamed: 0,0,1,2
0,a,c,f
1,b,d,g
2,c,e,h


In [84]:
#list(zip(*[(s2.str.split('_')).pipe(lambda x: x.values)])) #if you wanna add []
s2.str.split('_').str.get(1)

0    b
1    d
2    g
dtype: object

In [85]:
#or just simply
#even there is np.nan, it works
s2.str.split('_', expand=True)

Unnamed: 0,0,1,2
0,a,b,c
1,c,d,e
2,f,g,h


In [86]:
s2.str.split('_', expand=True, n=1)

Unnamed: 0,0,1
0,a,b_c
1,c,d_e
2,f,g_h


In [87]:
s2.str.rsplit('_', expand=True, n=1)

Unnamed: 0,0,1
0,a_b,c
1,c_d,e
2,f_g,h


In [88]:
s3 = pd.Series(['A', 'B', 'C', 'Aaba', 'Baca','', np.nan, 'CABA', 'dog', 'cat'])
##regular expression
s3.str.replace('^.a|dog','XX', case=False)

0       A
1       B
2       C
3    XXba
4    XXca
5        
6     NaN
7    XXBA
8      XX
9     XXt
dtype: object

In [89]:
dollars = pd.Series(['12', '-$10', '$10,000'])
dollars.str.replace(r'-\$', '-')
dollars.str.replace('-$', '-', regex=False) ##regular expression is False, just replacement

0         12
1        -10
2    $10,000
dtype: object

In [90]:
#match lowercase, and reverse
pat = r'[a-z]+'
repl = lambda m: m.group(0)[::-1]
pd.Series(['foo 123', 'bar baz', np.nan]).str.replace(pat, repl)

0    oof 123
1    rab zab
2        NaN
dtype: object

In [91]:
#match by group
pat = r"(?P<one>\w+) (?P<two>\w+) (?P<three>\w+)"
repl = lambda m: m.group('two').swapcase()
pd.Series(['Foo Bar Baz', np.nan]).str.replace(pat, repl)

0    bAR
1    NaN
dtype: object

In [92]:
s = pd.Series(['a', 'b', 'c', 'd'])
s.str.cat(sep=',')

'a,b,c,d'

In [93]:
t = pd.Series(['a', 'b', np.nan, 'd'])
t.str.cat(sep='/', na_rep='oh my god!')

'a/b/oh my god!/d'

In [94]:
s.str.cat(['A', 'B', 'C', 'D'])
d = pd.concat([s,t], axis=1); s.str.cat(d,na_rep='_')

0    aaa
1    bbb
2    cc_
3    ddd
dtype: object

In [95]:
u = pd.Series(['b', 'd', 'a', 'c'], index=[1, 3, 0, 2]); s.str.cat(u, join='left')

0    aa
1    bb
2    cc
3    dd
dtype: object

In [96]:
##join more than two series
tm = s.str.cat([u, pd.Index(u.values), ['A', 'B', 'C', 'D'], map(str, u.index)], na_rep='-',sep=[' '])

  


In [97]:
tm

0    a b b A 1
1    b d d B 3
2    c a a C 0
3    d c c D 2
dtype: object

In [98]:
tm.str[0:3]

0    a b
1    b d
2    c a
3    d c
dtype: object

In [99]:
pd.Series(['a1', 'b2', 'c3']).str.extract('(?P<letter>[ab])(?P<holycow>\d)', expand=False)

Unnamed: 0,letter,holycow
0,a,1.0
1,b,2.0
2,,


In [100]:
##oprional grouping prior to'?'
pd.Series(['a1', 'b2', '3']).str.extract('([ab])?(\d)', expand=False)

Unnamed: 0,0,1
0,a,1
1,b,2
2,,3


In [103]:
#return only what you want to do so
#also take a close attention to 'expand=True'. you got an column of 0
pd.Series(['a1', 'b2', 'c3']).str.extract('[ab]?(\d)', expand=True)

Unnamed: 0,0
0,1
1,2
2,3


In [104]:
#index matching
s = pd.Series(["a1", "b2", "c3"], ["A11", "B22", "C33"])
s.index.str.extract("(?P<letter>[a-zA-Z])([0-9]+)", expand=True)

Unnamed: 0,letter,1
0,A,11
1,B,22
2,C,33


In [105]:
## extract all of them!!!
s = pd.Series(["a1a2", "b1", "c1"], index=["A", "B", "C"])
two_groups = '(?P<letter>[a-z])(?P<digit>[0-9])'
s.str.extractall(two_groups)

Unnamed: 0_level_0,Unnamed: 1_level_0,letter,digit
Unnamed: 0_level_1,match,Unnamed: 2_level_1,Unnamed: 3_level_1
A,0,a,1
A,1,a,2
B,0,b,1
C,0,c,1


In [106]:
s.str.extractall(two_groups).xs(0, level='match', )

Unnamed: 0,letter,digit
A,a,1
B,b,1
C,c,1


In [111]:
##contain method and match method
pattern = r'[0-9][a-z]'
pd.Series(['1', '2', '3a', '3b', '03c']).str.contains(pattern)

0    False
1    False
2     True
3     True
4     True
dtype: bool

In [112]:
pd.Series(['1', '2', '3a', '3b', '03c']).str.match(pattern)

0    False
1    False
2     True
3     True
4    False
dtype: bool

In [113]:
s4 = pd.Series(['A', 'B', 'C', 'Aaba', 'Baca', np.nan, 'CABA', 'dog', 'cat'])
s4.str.contains('A', na=False)

0     True
1    False
2    False
3     True
4    False
5    False
6     True
7    False
8    False
dtype: bool

In [115]:
s = pd.Series(['a', 'a|b', np.nan, 'a|c'])
s.str.get_dummies(sep='|')

Unnamed: 0,a,b,c
0,1,0,0
1,1,1,0
2,0,0,0
3,1,0,1


In [114]:
idx = pd.Index(['a', 'a|b', np.nan, 'a|c'])
idx.str.get_dummies(sep='|')

MultiIndex(levels=[[0, 1], [0, 1], [0, 1]],
           labels=[[1, 1, 0, 1], [0, 1, 0, 0], [0, 0, 0, 1]],
           names=['a', 'b', 'c'])

In [118]:
s = pd.Series(['A_B_C', 'D_E_F', 'X'])
ss = s.str.partition('_');ss

Unnamed: 0,0,1,2
0,A,_,B_C
1,D,_,E_F
2,X,,


In [121]:
sss= pd.concat([(ss.iloc[slice(None), 0:2]), ss.iloc[:,2].str.partition('_')], axis=1)
sss.columns = list(range(len(sss.iloc[0]))) ;sss

Unnamed: 0,0,1,2,3,4
0,A,_,B,_,C
1,D,_,E,_,F
2,X,,,,


In [122]:
s.str.rpartition('_')

Unnamed: 0,0,1,2
0,A_B,_,C
1,D_E,_,F
2,,,X


In [124]:
"ABC".rjust(10, "!")

'!!!!!!!ABC'