In [1]:
import pandas as pd

In [5]:
email = 'mako@gmail.com'


In [6]:
email.split('@')

['mako', 'gmail.com']

In [7]:
names = pd.Series(['andrew','bobo','claire','david','4'])

In [8]:
names

0    andrew
1      bobo
2    claire
3     david
4         4
dtype: object

In [9]:
names.str.upper()

0    ANDREW
1      BOBO
2    CLAIRE
3     DAVID
4         4
dtype: object

In [10]:
names.str.isdigit()

0    False
1    False
2    False
3    False
4     True
dtype: bool

In [11]:
tech_finance = ['GOOG,APPL,AMZN','JPN,BAC,GS']

In [12]:
tickers = pd.Series(tech_finance)

In [16]:
tickers.str.split(',', expand= True)

Unnamed: 0,0,1,2
0,GOOG,APPL,AMZN
1,JPN,BAC,GS


In [17]:
messy_names = pd.Series(['andrew  ', 'bo:bo', ' claire  '])

In [18]:
messy_names

0     andrew  
1        bo:bo
2     claire  
dtype: object

In [19]:
messy_names[0]

'andrew  '

In [22]:
messy_names.str.replace(':', "").str.strip().str.capitalize()

0    Andrew
1      Bobo
2    Claire
dtype: object

In [25]:
def cleanup(name):
    name = name.replace(":","")
    name = name.strip()
    name = name.capitalize()
    return name

In [26]:
messy_names.apply(cleanup)

0    Andrew
1      Bobo
2    Claire
dtype: object

In [27]:
import timeit 
  
# Этот фрагмент кода запускается только один раз
setup = '''
import pandas as pd
import numpy as np
messy_names = pd.Series(["andrew  ","bo;bo","  claire  "])
def cleanup(name):
    name = name.replace(";","")
    name = name.strip()
    name = name.capitalize()
    return name
'''
  
# Это фрагменты кода, для которых мы будем измерять время выполнения
# (они запускаются много раз)
stmt_pandas_str = ''' 
messy_names.str.replace(";","").str.strip().str.capitalize()
'''

stmt_pandas_apply = '''
messy_names.apply(cleanup)
'''

stmt_pandas_vectorize='''
np.vectorize(cleanup)(messy_names)
'''

In [28]:
timeit.timeit(setup = setup, 
                    stmt = stmt_pandas_str, 
                    number = 10000) 

3.3659339000005275

In [29]:
timeit.timeit(setup = setup, 
                    stmt = stmt_pandas_apply, 
                    number = 10000) 

1.1361236000084318

In [30]:
timeit.timeit(setup = setup, 
                    stmt = stmt_pandas_vectorize, 
                    number = 10000) 

0.23819770000409335