In [1]:
import numpy as np
import pandas as pd

In [2]:
email = 'jose@email.com'

In [3]:
email.split('@')

['jose', 'email.com']

In [5]:
names = pd.Series(['andrew','bobo','claire','david','5'])

In [6]:
names

0    andrew
1      bobo
2    claire
3     david
4         5
dtype: object

In [7]:
names.str.upper()

0    ANDREW
1      BOBO
2    CLAIRE
3     DAVID
4         5
dtype: object

In [8]:
names.str.lower()

0    andrew
1      bobo
2    claire
3     david
4         5
dtype: object

In [9]:
email.isdigit()

False

In [10]:
'5'.isdigit()

True

In [11]:
names.str.isdigit()

0    False
1    False
2    False
3    False
4     True
dtype: bool

In [12]:
tech_finanace = ['GGOG,APL,AMZN','JPM,BAC,GS']

In [13]:
len(tech_finanace)

2

In [14]:
tickers = pd.Series(tech_finanace)

In [15]:
tickers

0    GGOG,APL,AMZN
1       JPM,BAC,GS
dtype: object

In [16]:
tickers.str.split(',')

0    [GGOG, APL, AMZN]
1       [JPM, BAC, GS]
dtype: object

In [17]:
tickers.str.split(',')[0]

['GGOG', 'APL', 'AMZN']

In [18]:
tickers.str.split(',')[0][2]

'AMZN'

In [21]:
tickers.str.split(',')[1]

['JPM', 'BAC', 'GS']

In [22]:
tickers.str.split(',',expand=True)

Unnamed: 0,0,1,2
0,GGOG,APL,AMZN
1,JPM,BAC,GS


In [23]:
df = tickers.str.split(',',expand=True)

In [26]:
type(df)

pandas.core.frame.DataFrame

In [27]:
mess_name = pd.Series(['andrew   ',"bo;bo","  claire "])

In [28]:
mess_name

0    andrew   
1        bo;bo
2      claire 
dtype: object

In [31]:
mess_name.str.replace(';','').str.strip().str.capitalize()

0    Andrew
1      Bobo
2    Claire
dtype: object

In [46]:
def cleanup(name):
    name = name.str.replace(';','')
    name = name.str.strip()
    name = name.str.capitalize()
    return name

In [47]:
result = cleanup(mess_name)

In [48]:
result

0    Andrew
1      Bobo
2    Claire
dtype: object

In [49]:
import timeit
# code snippet to be executed only one
setup = '''
import pandas as pd
import numpy as np
messy_name = pd.Series(['andrew   ',"bo;bo","  claire "])
def cleanup(name):
    name = name.replace(';','')
    name = name.strip()
    name = name.capitalize()
    return name
'''

#code snippet whose execution time is to be measured
stmt_pandas_str ='''
messy_name.str.replace(';','').str.strip().str.capitalize()
'''

stmt_pandas_apply = '''
messy_name.apply(cleanup)'''

stmt_pandas_vectorize= '''
np.vectorize(cleanup)(messy_name)
'''

In [50]:
timeit.timeit(setup = setup, stmt =stmt_pandas_str,number = 1000 )

0.4321747999999843

In [51]:
timeit.timeit(setup = setup, stmt = stmt_pandas_apply, number = 1000)

0.1535393000001477

In [52]:
timeit.timeit(setup = setup, stmt = stmt_pandas_vectorize, number = 1000)

0.029803099999980986