In [3]:
import numpy as np
import pandas as pd

data = {'Dave': 'dave@google.com', 'Steve': 'steve@gmail.com', 
        'Rob': 'rob@gmail.com', 'Wes': np.nan}

data = pd.Series(data)
data

Dave     dave@google.com
Steve    steve@gmail.com
Rob        rob@gmail.com
Wes                  NaN
dtype: object

In [4]:
data.isnull()

Dave     False
Steve    False
Rob      False
Wes       True
dtype: bool

In [8]:
'''
You can apply string and regular expression methods can be applied (passing a
lambda or other function) to each value using data.map , but it will fail on the NA
(null) values. To cope with this, Series has array-oriented methods for string opera‐
tions that skip NA values. These are accessed through Series’s str attribute; for exam‐
ple, we could check whether each email address has 'gmail' in it with str.contains
'''

data.str.contains('gmail')

Dave     False
Steve     True
Rob       True
Wes        NaN
dtype: object

In [19]:
pattern = '([A-Z0-9._%+-]+)@([A-Z0-9.-]+)\\.([A-Z]{2,4})'
import re
data.str.findall(pattern,flags=re.IGNORECASE)


Dave     [(dave, google, com)]
Steve    [(steve, gmail, com)]
Rob        [(rob, gmail, com)]
Wes                        NaN
dtype: object

In [22]:
matches = data.str.match(pattern, flags=re.IGNORECASE)
matches

Dave     True
Steve    True
Rob      True
Wes       NaN
dtype: object

In [30]:
data.str[:5]


Dave     dave@
Steve    steve
Rob      rob@g
Wes        NaN
dtype: object

In [None]:
'''
cat -> Concatenate strings element-wise with optional delimiter

contains -> Return boolean array if each string contains pattern/regex

count -> Count occurrences of pattern

extract -> Use a regular expression with groups to extract one or more strings from a
            Series of strings; the result will be a DataFrame with one column per group

startswith -> Equivalent to x.startswith(pattern) for each element

endswith -> Equivalent to x.endswith(pattern) for each element

findall -> Compute list of all occurrences of pattern/regex for each string

get -> Index into each element (retrieve i-th element)

isalnum
Equivalent to built-in str.alnum

isalpha
Equivalent to built-in str.isalpha

isdecimal
Equivalent to built-in str.isdecimal

isdigit
Equivalent to built-in str.isdigit

islower
Equivalent to built-in str.islower

isnumeric
Equivalent to built-in str.isnumeric

isupper
Equivalent to built-in str.isupper

join
Join strings in each element of the Series with passed separator

len
Compute length of each string

lower, upper -> Convert cases; equivalent to x.lower() or x.upper() for each element


match
Use re.match with the passed regular expression on each element, returning matched groups as list

pad
Add whitespace to left, right, or both sides of strings

center
Equivalent to pad(side='both')

repeat
Duplicate values (e.g., s.str.repeat(3) is equivalent to x * 3 for each string)

replace
Replace occurrences of pattern/regex with some other string

slice
Slice each string in the Series

split
Split strings on delimiter or regular expression

strip
Trim whitespace from both sides, including newlines

rstrip
Trim whitespace on right side

lstrip
Trim whitespace on left side
'''