# Text Methods

A normal Python string has a variety of method calls available:

In [1]:
mystring = 'hello'

In [2]:
mystring.capitalize()

'Hello'

In [3]:
mystring.isdigit()

False

In [4]:
#help(str)

## Pandas and Text

https://pandas.pydata.org/docs/user_guide/text.html

In [5]:
import pandas as pd

In [6]:
names = pd.Series(['john','sam','steve','david','24'])

In [7]:
names

0     john
1      sam
2    steve
3    david
4       24
dtype: object

In [8]:
names.str.capitalize()

0     John
1      Sam
2    Steve
3    David
4       24
dtype: object

In [9]:
names.str.isdigit()

0    False
1    False
2    False
3    False
4     True
dtype: bool

## Splitting , Grabbing, and Expanding

In [10]:
tech_finance = ['GOOG,APPL,AMZN','JPM,BAC,GS']

In [11]:
tech_finance

['GOOG,APPL,AMZN', 'JPM,BAC,GS']

In [12]:
len(tech_finance)

2

In [13]:
tickers = pd.Series(tech_finance)

In [14]:
tickers

0    GOOG,APPL,AMZN
1        JPM,BAC,GS
dtype: object

In [15]:
tickers.str.split(',')

0    [GOOG, APPL, AMZN]
1        [JPM, BAC, GS]
dtype: object

In [16]:
tickers.str.split(',').str[0]

0    GOOG
1     JPM
dtype: object

In [17]:
tickers.str.split(',',expand=True)

Unnamed: 0,0,1,2
0,GOOG,APPL,AMZN
1,JPM,BAC,GS


## Cleaning or Editing Strings

In [18]:
messy_names = pd.Series(["john ","s;am","  claire  "])

In [19]:
messy_names

0         john 
1          s;am
2      claire  
dtype: object

In [20]:
messy_names.str.replace(";","")

0         john 
1           sam
2      claire  
dtype: object

In [21]:
messy_names.str.strip()

0      john
1      s;am
2    claire
dtype: object

In [22]:
messy_names.str.replace(";","").str.strip()

0      john
1       sam
2    claire
dtype: object

In [23]:
messy_names.str.replace(";","").str.strip().str.capitalize()

0      John
1       Sam
2    Claire
dtype: object

## Alternative with Custom apply() call

In [24]:
def cleanup(name):
    name = name.replace(";","")
    name = name.strip()
    name = name.capitalize()
    return name

In [25]:
messy_names

0         john 
1          s;am
2      claire  
dtype: object

In [26]:
messy_names.apply(cleanup)

0      John
1       Sam
2    Claire
dtype: object

In [27]:
import numpy as np
np.vectorize(cleanup)(messy_names)

array(['John', 'Sam', 'Claire'], dtype='<U6')

## Practice Practice and Practice