## Regular Expressions

In [0]:
import re

In [0]:
text = "foo   bar\t baz   \tqux"

In [3]:
re.split('\s+', text)

['foo', 'bar', 'baz', 'qux']

In [0]:
regex = re.compile('\s+')

In [5]:
regex.split(text)

['foo', 'bar', 'baz', 'qux']

In [6]:
regex.findall(text)

['   ', '\t ', '   \t']

In [0]:
text = """Dave dave@google.com
Steve steve@gmail.com
Rob rob@gmail.com
Ryan ryan@yahoo.com
"""

pattern = r'[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}'

# re.IGNORECASE makes the regex case - insensitive

regex = re.compile(pattern, flags = re.IGNORECASE)

In [8]:
regex.findall(text)

['dave@google.com', 'steve@gmail.com', 'rob@gmail.com', 'ryan@yahoo.com']

In [9]:
m = regex.search(text)

m

<_sre.SRE_Match object; span=(5, 20), match='dave@google.com'>

In [10]:
text[m.start() : m.end()]

'dave@google.com'

In [11]:
print(regex.match(text))

None


In [12]:
print(regex.sub('REDACTED', text))

Dave REDACTED
Steve REDACTED
Rob REDACTED
Ryan REDACTED



In [0]:
pattern = r'([A-Z0-9._%+-]+)@([A-Z0-9.-]+)\.([A-Z]{2,4})'

regex = re.compile(pattern, flags = re.IGNORECASE)

In [0]:
m = regex.match('wesm@bright.net')

In [15]:
m.groups()

('wesm', 'bright', 'net')

In [16]:
regex.findall(text)

[('dave', 'google', 'com'),
 ('steve', 'gmail', 'com'),
 ('rob', 'gmail', 'com'),
 ('ryan', 'yahoo', 'com')]

In [17]:
print(regex.sub(r'Username: \1, Domain: \2, Suffix: \3', text))

Dave Username: dave, Domain: google, Suffix: com
Steve Username: steve, Domain: gmail, Suffix: com
Rob Username: rob, Domain: gmail, Suffix: com
Ryan Username: ryan, Domain: yahoo, Suffix: com



## Regular expression methods

### re.findall()

In [18]:
str = "hello, education is fun"

r1 = re.findall(r"^\w+", str)
r1

['hello']

In [19]:
print((re.split(r'\s', 'we are splitting the words')))

print((re.split(r's', 'split the words')))

['we', 'are', 'splitting', 'the', 'words']
['', 'plit the word', '']


In [20]:
abc = 'guru99@google.com, careerguru99@hotmail.com, users@yahoomail.com'

emails = re.findall(r'[\w\.-]+@[\w\.-]+', abc)

for email in emails:
  print(email)

guru99@google.com
careerguru99@hotmail.com
users@yahoomail.com


In [21]:
str = """guru99
careerguru99
selenium"""

k1 = re.findall(r"^\w", str)
k2 = re.findall(r"^\w", str, re.MULTILINE)

print(k1)
print(k2)

['g']
['g', 'c', 's']


### re.finditer()

In [22]:
re.finditer(r'\w', 'http://www.hackerrank.com/')

<callable_iterator at 0x7f54e5419390>

In [23]:
map(lambda x: x.group(), re.finditer(r'\w', 'http://www.hackerrank.com/'))

<map at 0x7f54e5419588>

In [24]:
s1 = 'Blue Berries'

pattern = 'Blue Berries'

for match in re.finditer(pattern, s1):
  s = match.start()
  e = match.end()
  print('String match "%s" at %d: %d' % (s1[s:e], s, e))

String match "Blue Berries" at 0: 12


### re.match()

In [25]:
line = "Cats are smarter than dogs"

matchObj = re.match(r'(.*) are (.*?) .*', line, re.M|re.I)

if matchObj:
  print("matchObj.group(): ", matchObj.group())
  print("matchObj.group(1): ", matchObj.group(1))
  print("matchObj.group(2): ", matchObj.group(2))
else:
  print("No match")

matchObj.group():  Cats are smarter than dogs
matchObj.group(1):  Cats
matchObj.group(2):  smarter


In [26]:
matchObj = re.match(r'dogs', line, re.M|re.I)

if matchObj:
  print("match --> matchObj.group(): ", matchObj.group())
else:
  print("No match")

No match


### re.search()

In [27]:
line = "Cats are smarter than dogs"

searchObj = re.search(r'(.*) are (.*?) .*', line, re.M|re.I)

if searchObj:
  print("searchObj.group(): ", searchObj.group())
  print("searchObj.group(1): ", searchObj.group(1))
  print("searchObj.group(2): ", searchObj.group(2))
else:
  print("No match")

searchObj.group():  Cats are smarter than dogs
searchObj.group(1):  Cats
searchObj.group(2):  smarter


In [28]:
searchObj = re.search(r'dogs', line, re.M|re.I)

if searchObj:
  print("match --> matchObj.group(): ", searchObj.group())
else:
  print("No match")

match --> matchObj.group():  dogs


### re.split()

In [29]:
print((re.split(r'\s', 'we are splitting the words')))

['we', 'are', 'splitting', 'the', 'words']


In [30]:
print((re.split(r's', 'split the words')))

['', 'plit the word', '']


In [31]:
# '\W+' denotes non - alphanumeric characters or group of characters
# Upon finding ',' or whitespace ' ', the split(), splits the string from that point
print(re.split('\W+', 'Words, words , Words'))
print(re.split('\W+', "Word's words Words"))

# Here ':', ' ', ',' are not alphanumeric thus, the point where splitting occurs
print(re.split('\W+', 'On 12th Jan 2016, at 11:02 AM'))

# '\d+' denotes numeric characters or group of characters
# Splitting occurs at '12', '2016', '11', '02' only
print(re.split('\d+', 'On 12th Jan 2016, at 11:02 AM'))

['Words', 'words', 'Words']
['Word', 's', 'words', 'Words']
['On', '12th', 'Jan', '2016', 'at', '11', '02', 'AM']
['On ', 'th Jan ', ', at ', ':', ' AM']


### re.sub()

In [32]:
phone = "2004-959-559 # This is Phone Number"

# Delete Python style comments
num = re.sub(r'#.*$', "", phone)
print("Phone Num: ", num)

# Remove anything other than digits
num = re.sub(r'\D', "", phone)
print("Phone Num: ", num)

Phone Num:  2004-959-559 
Phone Num:  2004959559


In [0]:
# def multiply(m):
#   # Convert group 0 to an integer
#   v = int(m.group(0))
  
#   # Multiply integer by 2
#   # Convert back into string and return it
#   return str(v * 2)

# # Use pattern of 1 or more digits
# # Use multiply method as second argument
# result = re.sub("\d+", multiply, "10 20 30 40 50")
# result

In [34]:
v = "running eating reading"

# Replace words starting with 'r' and ending in 'ing'
# With a new string
v = re.sub(r"r.*?ing", "ring", v)
v

'ring eating ring'

In [35]:
input = "laugh eat sleep think"

result = re.sub("\w+", lambda m: m.group(0) + "ing", input)
result

'laughing eating sleeping thinking'

In [36]:
plants = {"flower": 1, "tree": 1, "grass": 1}

def modify(m):
  v = m.group(0)
  
  # If string is in dictionary, return different string
  if v in plants:
    return "PLANT"
  
  # Don't change anything
  return v

# Modify to remove all strings within the dictionary
result = re.sub("\w+", modify, "bird flower dog fish tree")
result

'bird PLANT dog fish PLANT'

In [37]:
str = 'purple alice@google.com, blah monkey bob@abc.com blah dishwasher'

# Returns new string with all replacements
# re.sub(pat, replacement, str)
# \1 is group(1), \2 is group(2) in the replacement

print(re.sub(r'([\w\.-]+)@([\w\.-]+)', r'\1@yo-yo-dyne.com', str))

purple alice@yo-yo-dyne.com, blah monkey bob@yo-yo-dyne.com blah dishwasher


### re.subn()

In [0]:
# def add(m):
  
#   # Convert
#   v = int(m.group(0))
  
#   # Add 2
#   return str(v + 1)

# result = re.subn("\d+", add, "1 2 3 4 5")

# print("Result string: ", result[0])
# print("Number of substitutions: ", result[1])

In [40]:
print(re.subn('ub', '~*', 'Subject has booked Uber already'))

t = re.subn('ub', '~*', 'Subject has booked Uber already',
            flags = re.IGNORECASE)

print(t)
print(len(t))

# This will give same output as sub() would have
print(t[0])

('S~*ject has booked Uber already', 1)
('S~*ject has booked ~*er already', 2)
2
S~*ject has booked ~*er already


## Back to book content

## Vectorized String Functions in pandas

In [0]:
import numpy as np
import pandas as pd

In [42]:
data = {'Dave': 'dave@google.com',
        'Steve': 'steve@gmail.com',
        'Rob': 'rob@gmail.com',
        'Wes': np.nan}

data = pd.Series(data)
data

Dave     dave@google.com
Rob        rob@gmail.com
Steve    steve@gmail.com
Wes                  NaN
dtype: object

In [43]:
data.isnull()

Dave     False
Rob      False
Steve    False
Wes       True
dtype: bool

In [44]:
data.str.contains('gmail')

Dave     False
Rob       True
Steve     True
Wes        NaN
dtype: object

In [0]:
pattern = '([A-Z0-9._%+-]+)@([A-Z0-9.-]+)\\.([A-Z]{2,4})'

In [46]:
data.str.findall(pattern, flags = re.IGNORECASE)

Dave     [(dave, google, com)]
Rob        [(rob, gmail, com)]
Steve    [(steve, gmail, com)]
Wes                        NaN
dtype: object

In [47]:
matches = data.str.match(pattern, flags = re.IGNORECASE)

matches

Dave     True
Rob      True
Steve    True
Wes       NaN
dtype: object

In [48]:
matches.str.get(1)

Dave    NaN
Rob     NaN
Steve   NaN
Wes     NaN
dtype: float64

In [49]:
matches.str[0]

Dave    NaN
Rob     NaN
Steve   NaN
Wes     NaN
dtype: float64

In [50]:
data.str[:5]

Dave     dave@
Rob      rob@g
Steve    steve
Wes        NaN
dtype: object

## Partial listing of vectorized string methods

### cat()

### __contains__()

In [51]:
s = 'abc'

print('s contains a = ', s.__contains__('a'))
print('s contains A = ', s.__contains__('A'))
print('s contains X = ', s.__contains__('X'))

s contains a =  True
s contains A =  False
s contains X =  False


In [0]:
# print(str.__contains__('ABC', 'A'))
# print(str.__contains__('ABC', 'D'))

In [0]:
# input_str1 = input('Enter first input string\n')

# input_str2 = input('Enter second input string\n')

# print('First input string contains second string?',
#       input_str1.__contains__(input_str2))

### count()

In [54]:
string = "Python is awesome, isn't it?"
substring = "is"

count = string.count(substring)
count

2

In [55]:
string = "Python is awesome, isn't it?"
substring = "i"

# count after first 'i' and before the last 'i'
count = string.count(substring, 8, 25)
count

1

### extract()

### endswith()

In [56]:
text = "Python is easy to learn."

result = text.endswith('to learn')
print(result)   # False

result = text.endswith('to learn.')
print(result)    # True

result = text.endswith('Python is easy to learn.')
print(result)   # True

False
True
True


In [57]:
text = "Python programming is easy to learn."

# start parameter: 7
# "programming is easy to learn." string is searched
result = text.endswith('learn.', 7)
print(result)

# start: 7, end: 26
# "programming is easy" string is searched
result = text.endswith('is', 7, 26)
print(result)

result = text.endswith('easy', 7, 26)
print(result)

True
False
True


In [58]:
text = "programming is easy"

result = text.endswith(('programming', 'python'))
print(result)    # False

result = text.endswith(('python', 'easy', 'java'))
print(result)    # True

# with start and end parameter
# 'programming is' string is searched
result = text.endswith(('is', 'an'), 0, 14)
print(result)

False
True
True


### startswith()

In [59]:
text = "Python is easy to learn."

result = text.startswith('is easy')
print(result)    # False

result = text.startswith('Python is')
print(result)    # True

result = text.startswith('Python is easy to learn')
print(result)    # True

False
True
True


In [60]:
text = "Python programming is easy."

# start parameter: 7
# 'programming is easy.' string is searched
result = text.startswith('programming is', 7)
print(result)

# start: 7, end: 10
# 'programming' string is searched
result = text.startswith('programming is', 7, 18)
print(result)

result = text.startswith('program', 7, 18)
print(result)

True
False
True


In [61]:
text = "programming is easy"

result = text.startswith(('python', 'programming'))
print(result)    # True

result = text.startswith(('is', 'easy', 'java'))
print(result)    # False

# with start and end parameter
# 'is easy' string is checked
result = text.startswith(('programming', 'easy'), 12, 19)
print(result)    # False

True
False
False


### findall()

In [0]:
## See examples of re.findall()

### get()

In [0]:
monte = pd.Series(['Graham Chapman', 'John Cleese', 
                   'Terry Gilliam', 'Eric Idle', 'Terry Jones',
                   'Michael Palin'])

In [64]:
monte.str[0:3]

0    Gra
1    Joh
2    Ter
3    Eri
4    Ter
5    Mic
dtype: object

In [65]:
monte.str.split().str.get(-1)

0    Chapman
1     Cleese
2    Gilliam
3       Idle
4      Jones
5      Palin
dtype: object

### isalnum()

In [66]:
name = "M234onica"
print(name.isalnum())

# contains whitespace
name = "M3onica Gell22er "
print(name.isalnum())

name = "Mo3nicaGell22er"
print(name.isalnum())

name = "133"
print(name.isalnum())

True
False
True
True


In [67]:
name = "M0n1caG3ll3r"

if name.isalnum() == True:
  print("All characters of string (name) are alphanumeric")
else:
  print("All characters aren't alphanumeric")

All characters of string (name) are alphanumeric


### isalpha()

In [68]:
name = "Monica"
print(name.isalpha())

# contains whitespace
name = "Monica Geller"
print(name.isalpha())

# contains number
name = "Mo3nicaGell22er"
print(name.isalpha())

True
False
False


In [69]:
name = "MonicaGeller"

if name.isalpha() == True:
  print("All characters are alphabets")
else:
  print("All characters are not alphabets")

All characters are alphabets


### isdecimal()

In [70]:
s = "28212"
print(s.isdecimal())

# contains alphabet
s = "32ladk3"
print(s.isdecimal())

# contains alphabets and spaces
s = "Mo3 nicaG el l22er"
print(s.isdecimal())

True
False
False


In [71]:
s = '23455'
print(s.isdecimal())

# s = '²3455'
s = '\u00B23455'
print(s.isdecimal())

# s =  '½'
s = '\u00BD'
print(s.isdecimal())

True
False
False


### isdigit()

In [72]:
s = "28212"
print(s.isdigit())

s = "Mo3 nicaG el l22er"
print(s.isdigit())

True
False


In [73]:
s = '23455'
print(s.isdigit())

# s = '²3455'
# subscript is a digit
s = '\u00B23455'
print(s.isdigit())

# s = '½'
# fraction isn't a digit
s = '\u00BD'
print(s.isdigit())

True
True
False


### islower()

In [74]:
s = 'this is good'
print(s.islower())

s = 'th!s is a1so g00d'
print(s.islower())

s = 'this is Not good'
print(s.islower())

True
True
False


### isnumeric()

In [75]:
s = '1242323'
print(s.isnumeric())

# s = '²3455'
s = '\u00B23455'
print(s.isnumeric())

# s = '½'
s = '\u00BD'
print(s.isnumeric())

s = 'python12'
print(s.isnumeric())

True
True
True
False


In [76]:
# s = '²3455'
s = '\u00B23455'

if s.isnumeric() == True:
  print('All characters are numeric')
else:
  print('All characters are not numeric')

All characters are numeric


### isupper()

In [77]:
string = "THIS IS GOOD!"
print(string.isupper())

# numbers in place of alphabets
string = "THIS IS ALSO G00D"
print(string.isupper())

string = "THIS IS not GOOD!"
print(string.isupper())

True
True
False


### join()

In [78]:
## lists and tuples

numList = ['1', '2', '3', '4']
seperator = ', '
print(seperator.join(numList))

numTuple = ('1', '2', '3', '4')
print(seperator.join(numTuple))

s1 = 'abc'
s2 = '123'

""" Each character of s2 is concatenated to the front of s1 """
print('s1.join(s2): ', s1.join(s2))

""" Each character of s1 is concatenated to the front of s2 """
print('s2.join(s1): ', s2.join(s1))

1, 2, 3, 4
1, 2, 3, 4
s1.join(s2):  1abc2abc3
s2.join(s1):  a123b123c


In [79]:
## sets

test = {'2', '1', '3'}
s = ', '
print(s.join(test))

test = {'Python', 'Java', 'Ruby'}
s = '->->'
print(s.join(test))

2, 1, 3
Ruby->->Python->->Java


In [80]:
## dictionaries

test = {'mat': 1, 'that': 2}
s = '->'
print(s.join(test))

test = {1: 'mat', 2: 'that'}
s = ', '

# this gives error
# print(s.join(test))

mat->that


### len()

In [81]:
string = "geeks"
print(len(string))

string = "geeks for geeks"
print(len(string))

5
15


In [82]:
## tuples, lists and range

testList = []
print(testList, 'length is', len(testList))

testList = [1, 2, 3]
print(testList, 'length is', len(testList))

testTuple = (1, 2, 3)
print(testTuple, 'length is', len(testTuple))

testRange = range(1, 10)
print('Length of', testRange, 'is', len(testRange))

[] length is 0
[1, 2, 3] length is 3
(1, 2, 3) length is 3
Length of range(1, 10) is 9


In [83]:
## strings and bytes

testString = ''
print('Length of', testString, 'is', len(testString))

testString = 'Python'
print('Length of', testString, 'is', len(testString))

# byte object
testByte = b'Python'
print('Length of', testByte, 'is', len(testByte))

testList = [1, 2, 3]

# converting to bytes object
testByte = bytes(testList)
print('Length of', testByte, 'is', len(testByte))

Length of  is 0
Length of Python is 6
Length of b'Python' is 6
Length of b'\x01\x02\x03' is 3


In [84]:
## dictionaries and sets

testSet = {1, 2, 3}
print(testSet, 'length is', len(testSet))

# empty set
testSet = set()
print(testSet, 'length is', len(testSet))

testDict = {1: 'one', 2: 'two'}
print(testDict, 'length is', len(testDict))

testDict = {}
print(testDict, 'length is', len(testDict))

testSet = {1, 2}

# frozenSet
frozenTestSet = frozenset(testSet)
print(frozenTestSet, 'length is', len(frozenTestSet))

{1, 2, 3} length is 3
set() length is 0
{1: 'one', 2: 'two'} length is 2
{} length is 0
frozenset({1, 2}) length is 2


In [85]:
## custom objects

class Session:
  def __init__(self, number = 0):
    self.number = number
   
  def __len__(self):
    return self.number

# default length is 0
s1 = Session()
print(len(s1))

# giving custom length
s2 = Session(6)
print(len(s2))

0
6


### lower()

In [86]:
string = "THIS SHOULD BE LOWERCASE!"
print(string.lower())

# string with numbers
# all alphabets would be lowercase
string = "Th!s Sh0uLd B3 L0w3rCas3!"
print(string.lower())

this should be lowercase!
th!s sh0uld b3 l0w3rcas3!


In [87]:
first = "PYTHON IS AWESOME!"

second = "PyThOn Is AwEsOmE!"

if(first.lower() == second.lower()):
  print("Strings are same")
else:
  print("Strings aren't same")

Strings are same


### upper()

In [88]:
string = "this should be uppercase!"
print(string.upper())

# string with numbers
# all alphabets would be lowercase
string = "th!s sh0uld b3 upp3rcas3!"
print(string.upper())

THIS SHOULD BE UPPERCASE!
TH!S SH0ULD B3 UPP3RCAS3!


In [89]:
first = "python is awesome!"

second = "PyThOn Is AwEsOmE!"

if(first.upper() == second.upper()):
  print("Strings are same.")
else:
  print("Strings aren't same")

Strings are same.


### match()

In [0]:
## See re.match() examples

### pad (ljust(), rjust() and center())

In [91]:
cstr = "I love geeksforgeeks"

print("Original string: \n", cstr, "\n")

print("Center aligned string is: ", cstr.center(40), "\n")

print("Center aligned string with fillchar: ", cstr.center(40, '#'))

Original string: 
 I love geeksforgeeks 

Center aligned string is:            I love geeksforgeeks           

Center aligned string with fillchar:  ##########I love geeksforgeeks##########


In [92]:
lstr = "I love geeksforgeeks"

print("Original string: \n", lstr, "\n")

print("Left aligned string with fillchar: ", cstr.ljust(40, '#'))

Original string: 
 I love geeksforgeeks 

Left aligned string with fillchar:  I love geeksforgeeks####################


In [93]:
cstr = "I love geeksforgeeks"

print("Original string: \n", cstr, "\n")

print("Right aligned string with fillchar: ", cstr.rjust(40, '#'))

Original string: 
 I love geeksforgeeks 

Right aligned string with fillchar:  ####################I love geeksforgeeks


### repeat()

In [94]:
data = pd.read_csv("https://cdncontribute.geeksforgeeks.org/wp-content/uploads/nba.csv")

# removing null values to avoid errors
data.dropna(how = 'all', inplace = True)

# overwriting with repeated value
data["Team"] = data["Team"].str.repeat(2)

data

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston CelticsBoston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston CelticsBoston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston CelticsBoston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
3,R.J. Hunter,Boston CelticsBoston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Boston CelticsBoston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0
5,Amir Johnson,Boston CelticsBoston Celtics,90.0,PF,29.0,6-9,240.0,,12000000.0
6,Jordan Mickey,Boston CelticsBoston Celtics,55.0,PF,21.0,6-8,235.0,LSU,1170960.0
7,Kelly Olynyk,Boston CelticsBoston Celtics,41.0,C,25.0,7-0,238.0,Gonzaga,2165160.0
8,Terry Rozier,Boston CelticsBoston Celtics,12.0,PG,22.0,6-2,190.0,Louisville,1824360.0
9,Marcus Smart,Boston CelticsBoston Celtics,36.0,PG,22.0,6-4,220.0,Oklahoma State,3431040.0


In [95]:
data = pd.read_csv("https://cdncontribute.geeksforgeeks.org/wp-content/uploads/nba.csv")

# removing null values to avoid errors
data.dropna(how = 'all', inplace = True)

# creating data of 10 rows
sample_data = data.head(10).copy()

# creating list of 10 ints
repeat_list = [2, 1, 3, 4, 1, 5, 0, 6, 1, 2]

sample_data["Name"] = sample_data["Name"].str.repeat(repeat_list)

sample_data

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery BradleyAvery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John HollandJohn HollandJohn Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
3,R.J. HunterR.J. HunterR.J. HunterR.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0
5,Amir JohnsonAmir JohnsonAmir JohnsonAmir Johns...,Boston Celtics,90.0,PF,29.0,6-9,240.0,,12000000.0
6,,Boston Celtics,55.0,PF,21.0,6-8,235.0,LSU,1170960.0
7,Kelly OlynykKelly OlynykKelly OlynykKelly Olyn...,Boston Celtics,41.0,C,25.0,7-0,238.0,Gonzaga,2165160.0
8,Terry Rozier,Boston Celtics,12.0,PG,22.0,6-2,190.0,Louisville,1824360.0
9,Marcus SmartMarcus Smart,Boston Celtics,36.0,PG,22.0,6-4,220.0,Oklahoma State,3431040.0


### replace()

In [96]:
song = 'cold, cold heart'
print(song.replace('cold', 'hurt'))

song = 'Let it be, let it be, let it be, let it be'

''' only 2 occurences of 'let' is replaced '''
print(song.replace('let', "don't let", 2))

hurt, hurt heart
Let it be, don't let it be, don't let it be, let it be


In [97]:
song = 'cold, cold heart'
replaced_song = song.replace('o', 'e')

# Original string is unchanged
print('Original string: ', song)
print('Replaced string: ', replaced_song)

song = 'Let it be, let it be, let it be, let it be'

# Maximum of 0 substring is replaced
# Returns copy of original string
print(song.replace('let', 'so', 0))

Original string:  cold, cold heart
Replaced string:  celd, celd heart
Let it be, let it be, let it be, let it be


### slice()

In [0]:
# data = pd.read_csv("https://cdncontribute.geeksforgeeks.org/wp-content/uploads/nba.csv")

# # removing null values to avoid errors
# data.dropna(inplace = True)

# # start, stop and step variables
# start, stop, step = 0, -2, 1

# # converting to string data type
# data["Salary"] = data["Salary"].astype(str)

# # slicing till 2nd last element
# data["Salary (int)"] = data["Salary"].str.slice(start, stop, step)

# data.head(10)

In [100]:
data = pd.read_csv("https://cdncontribute.geeksforgeeks.org/wp-content/uploads/nba.csv")

# removing null values to avoid errors
data.dropna(inplace = True)

start, stop, step = 0, -2, 2

# slicing till 2nd last element
data["Name"] = data["Name"].str.slice(start, stop, step)

data.head(10)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,AeyBal,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,JeCod,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
3,RJ ut,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
6,Jra ik,Boston Celtics,55.0,PF,21.0,6-8,235.0,LSU,1170960.0
7,KlyOy,Boston Celtics,41.0,C,25.0,7-0,238.0,Gonzaga,2165160.0
8,TryRz,Boston Celtics,12.0,PG,22.0,6-2,190.0,Louisville,1824360.0
9,Mru m,Boston Celtics,36.0,PG,22.0,6-4,220.0,Oklahoma State,3431040.0
10,JrdSlig,Boston Celtics,7.0,C,24.0,6-9,260.0,Ohio State,2569260.0
11,Iaa hm,Boston Celtics,4.0,PG,27.0,5-9,185.0,Washington,6912869.0
12,Ea un,Boston Celtics,11.0,SG,27.0,6-7,220.0,Ohio State,3425510.0


### split()

In [103]:
text = 'geeks for geeks'

# splits at space
print(text.split())

word = 'geeks, for, geeks'

# splits at ','
print(word.split(', '))

word = 'geeks:for:geeks'

# splits at ':'
print(word.split(':'))

word = 'CatBatSatFatOr'

# splits at 3
print([word[i : i + 3] for i in range(0, len(word), 3)])

['geeks', 'for', 'geeks']
['geeks', 'for', 'geeks']
['geeks', 'for', 'geeks']
['Cat', 'Bat', 'Sat', 'Fat', 'Or']


In [104]:
word = 'geeks, for, geeks, anamitra'

# maxsplit: 0
print(word.split(', ', 0))

# maxsplit: 4
print(word.split(', ', 4))

# maxsplit: 1
print(word.split(', ', 1))

['geeks, for, geeks, anamitra']
['geeks', 'for', 'geeks', 'anamitra']
['geeks', 'for, geeks, anamitra']


### pd.Series.strip(), lstrip() and rstrip()

In [105]:
## lstrip()

data = pd.read_csv("https://cdncontribute.geeksforgeeks.org/wp-content/uploads/nba.csv")

# replacing team name and adding spaces in start and end
new = data["Team"].replace("Boston Celtics", "  Boston Celtics  ").copy()

# checking with custom removed space string
new.str.lstrip() == "Boston Celtics  "

0       True
1       True
2       True
3       True
4       True
5       True
6       True
7       True
8       True
9       True
10      True
11      True
12      True
13      True
14      True
15     False
16     False
17     False
18     False
19     False
20     False
21     False
22     False
23     False
24     False
25     False
26     False
27     False
28     False
29     False
       ...  
428    False
429    False
430    False
431    False
432    False
433    False
434    False
435    False
436    False
437    False
438    False
439    False
440    False
441    False
442    False
443    False
444    False
445    False
446    False
447    False
448    False
449    False
450    False
451    False
452    False
453    False
454    False
455    False
456    False
457    False
Name: Team, Length: 458, dtype: bool

In [106]:
## strip()

data = pd.read_csv("https://cdncontribute.geeksforgeeks.org/wp-content/uploads/nba.csv")

# replacing team name and adding spaces in start and end
new = data["Team"].replace("Boston Celtics", "  Boston Celtics  ").copy()

# checking with custom string
new.str.strip() == "  Boston Celtics"
new.str.strip() == "Boston Celtics  "
new.str.strip() == "  Boston Celtics  "

0      False
1      False
2      False
3      False
4      False
5      False
6      False
7      False
8      False
9      False
10     False
11     False
12     False
13     False
14     False
15     False
16     False
17     False
18     False
19     False
20     False
21     False
22     False
23     False
24     False
25     False
26     False
27     False
28     False
29     False
       ...  
428    False
429    False
430    False
431    False
432    False
433    False
434    False
435    False
436    False
437    False
438    False
439    False
440    False
441    False
442    False
443    False
444    False
445    False
446    False
447    False
448    False
449    False
450    False
451    False
452    False
453    False
454    False
455    False
456    False
457    False
Name: Team, Length: 458, dtype: bool

In [107]:
## rstrip()

data = pd.read_csv("https://cdncontribute.geeksforgeeks.org/wp-content/uploads/nba.csv")

# replacing team name and adding spaces in start and end
new = data["Team"].replace("Boston Celtics", "  Boston Celtics  ").copy()

# checking with custom removed space string
new.str.rstrip() == "  Boston Celtics"

0       True
1       True
2       True
3       True
4       True
5       True
6       True
7       True
8       True
9       True
10      True
11      True
12      True
13      True
14      True
15     False
16     False
17     False
18     False
19     False
20     False
21     False
22     False
23     False
24     False
25     False
26     False
27     False
28     False
29     False
       ...  
428    False
429    False
430    False
431    False
432    False
433    False
434    False
435    False
436    False
437    False
438    False
439    False
440    False
441    False
442    False
443    False
444    False
445    False
446    False
447    False
448    False
449    False
450    False
451    False
452    False
453    False
454    False
455    False
456    False
457    False
Name: Team, Length: 458, dtype: bool