In [1]:
import numpy as np
import pandas as pd

In [2]:
s = pd.Series(['abcd', 'efg', 'hi'])

In [3]:
s

0    abcd
1     efg
2      hi
dtype: object

In [4]:
s.str

<pandas.core.strings.accessor.StringMethods at 0x1d1a1645880>

In [6]:
s.str.upper()

0    ABCD
1     EFG
2      HI
dtype: object

In [7]:
s.str[0]

0    a
1    e
2    h
dtype: object

In [8]:
s.str[-1: 0: -2]

0    db
1     g
2     i
dtype: object

In [9]:
s.str[2]

0      c
1      g
2    NaN
dtype: object

In [10]:
s = pd.Series([{1: 'temp_1', 2: 'temp_2'}, ['a', 'b'], 0.5, 'my_string'])

In [11]:
s

0    {1: 'temp_1', 2: 'temp_2'}
1                        [a, b]
2                           0.5
3                     my_string
dtype: object

In [12]:
s.str[1]

0    temp_1
1         b
2       NaN
3         y
dtype: object

In [13]:
s.astype('string').str[1]

0    1
1    '
2    .
3    y
dtype: string

In [15]:
s1 = pd.Series(['a','b'])
s2 = pd.Series(['cat','dog'])
s1.str.cat(s2,sep='-')

0    a-cat
1    b-dog
dtype: object

In [16]:
s2.index = [1, 2]
s2

1    cat
2    dog
dtype: object

In [17]:
s1

0    a
1    b
dtype: object

In [18]:
s1.str.cat(s2, sep='-', na_rep='?', join='outer')

0      a-?
1    b-cat
2    ?-dog
dtype: object

In [19]:
s = pd.Series(['This is an apple. That is not an apple.'])
s.str.find('apple')

0    11
dtype: int64

In [20]:
s

0    This is an apple. That is not an apple.
dtype: object

In [21]:
s = pd.Series(['上海市方浜中路黄浦区249号',
               '上海市宝山区密山路5号',
               '北京市昌平区北农路2号'])
pat = '(\w+市)(\w+区)(\w+路)(\d+号)'
city = {'上海市': 'Shanghai', '北京市': 'Beijing'}
district = {'昌平区': 'CP District',
            '黄浦区': 'HP District',
            '宝山区': 'BS District'}
road = {'方浜中路': 'Mid Fangbin Road',
        '密山路': 'Mishan Road',
        '北农路': 'Beinong Road'}
def my_func(m):
    str_city = city[m.group(1)]
    str_district = district[m.group(2)]
    str_road = road[m.group(3)]
    str_no = 'No. ' + m.group(4)[:-1]
    return ' '.join([str_city,
                    str_district,
                    str_road,
                    str_no])
s.str.replace(pat, my_func, regex=True)

0                            上海市方浜中路黄浦区249号
1    Shanghai BS District Mishan Road No. 5
2    Beijing CP District Beinong Road No. 2
dtype: object

In [25]:
s = pd.Series(['上海市黄浦区方浜中路249号',
               '上海市宝山区密山路5号',
               '北京市昌平区北农路2号'])
pat = '(\w+市)(\w+区)(\w+路)(\d+号)'
def my_func(m):
    return '匹配'+m.group(1)
s.str.replace(pat, my_func, regex=True)

0    匹配上海市
1    匹配上海市
2    匹配北京市
dtype: object

In [26]:
pat = '(\w+市)(\w+区)(\w+路)(\d+号)'
s.str.extract(pat)

Unnamed: 0,0,1,2,3
0,上海市,黄浦区,方浜中路,249号
1,上海市,宝山区,密山路,5号
2,北京市,昌平区,北农路,2号


In [38]:
s = pd.Series(['A135T15,A26S5','B674S2,B25T6'], index = ['my_A','my_B'])
pat = '[A|B](\d+)[T|S](\d+)'
s

my_A    A135T15,A26S5
my_B     B674S2,B25T6
dtype: object

In [39]:
s.str.extractall(pat)

Unnamed: 0_level_0,Unnamed: 1_level_0,0,1
Unnamed: 0_level_1,match,Unnamed: 2_level_1,Unnamed: 3_level_1
my_A,0,135,15
my_A,1,26,5
my_B,0,674,2
my_B,1,25,6


In [40]:
s = pd.Series(['cat rat fat at', 'get feed sheet heat'])
s.str.count('[r|f]at|ee')

0    2
1    2
dtype: int64

In [41]:
my_index = pd.Index([' col1', 'col2 ', ' col3 '])
my_index.str.strip().str.len()

Int64Index([4, 4, 4], dtype='int64')