Skip to content

Commit

Permalink
adding partition_nth, find_nth
Browse files Browse the repository at this point in the history
  • Loading branch information
jonrkarr committed May 9, 2017
1 parent dc61f74 commit 41894ab
Show file tree
Hide file tree
Showing 2 changed files with 257 additions and 6 deletions.
104 changes: 98 additions & 6 deletions tests/util/test_string.py
Expand Up @@ -6,7 +6,7 @@
:License: MIT
"""

from wc_utils.util.string import indent_forest, delete_trailing_blanks
from wc_utils.util import string
import unittest


Expand All @@ -29,7 +29,7 @@ def test_indent_forest(self):
2,1
2,2
1,3"""
self.assertEqual(indent_forest(forest, indentation=2), indent_by_2)
self.assertEqual(string.indent_forest(forest, indentation=2), indent_by_2)

forest2 = [
'0,1',
Expand All @@ -41,7 +41,7 @@ def test_indent_forest(self):
could write
but couldn't code
0,2"""
self.assertEqual(indent_forest(forest2, indentation=3), indent_with_text)
self.assertEqual(string.indent_forest(forest2, indentation=3), indent_with_text)

def test_delete_trailing_blanks(self):
test_strings = ['test_text\ntest_text',
Expand All @@ -56,16 +56,108 @@ def test_delete_trailing_blanks(self):
]
for test_string, correct_list in zip(test_strings, correct_lists):
lines = test_string.split('\n')
delete_trailing_blanks(lines)
string.delete_trailing_blanks(lines)
self.assertEqual(lines, correct_list)

def test_indent_forest_with_trailing_blanks(self):
test_string1 = 'test_text1\ntest_text2\n\ntest_text4\n \n'
test_string2 = 'test_text5\ntest_text6'
forest = [test_string1, test_string2]
self.assertEqual(
indent_forest(forest, keep_trailing_blank_lines=True, indentation=0),
string.indent_forest(forest, keep_trailing_blank_lines=True, indentation=0),
test_string1 + '\n' + test_string2)
self.assertEqual(
indent_forest(forest, indentation=0),
string.indent_forest(forest, indentation=0),
test_string1.rstrip() + '\n' + test_string2)

def test_find_nth(self):
self.assertEqual(string.find_nth('123', '0', 1), '123'.find('0'))
self.assertEqual(string.find_nth('123', '1', 1), '123'.find('1'))
self.assertEqual(string.find_nth('123', '2', 1), '123'.find('2'))
self.assertEqual(string.find_nth('123', '3', 1), '123'.find('3'))

self.assertEqual(string.find_nth('123232323', '3', 1), 2)
self.assertEqual(string.find_nth('123232323', '3', 2), 4)
self.assertEqual(string.find_nth('123232323', '3', 3), 6)
self.assertEqual(string.find_nth('123232323', '3', 4), 8)
self.assertEqual(string.find_nth('123232323', '3', 5), -1)

self.assertEqual(string.find_nth('123232323', '23', 1), 1)
self.assertEqual(string.find_nth('123232323', '23', 2), 3)
self.assertEqual(string.find_nth('123232323', '23', 3), 5)
self.assertEqual(string.find_nth('123232323', '23', 4), 7)
self.assertEqual(string.find_nth('123232323', '23', 5), -1)

self.assertEqual(string.find_nth('123232323', '123', 1), 0)
self.assertEqual(string.find_nth('123232323', '123', 2), -1)

self.assertEqual(string.find_nth('123232323', '1234', 1), -1)

def test_rfind_nth(self):
self.assertEqual(string.rfind_nth('123', '0', 1), '123'.rfind('0'))
self.assertEqual(string.rfind_nth('123', '1', 1), '123'.rfind('1'))
self.assertEqual(string.rfind_nth('123', '2', 1), '123'.rfind('2'))
self.assertEqual(string.rfind_nth('123', '3', 1), '123'.rfind('3'))

self.assertEqual(string.rfind_nth('123232323', '3', 1), 8)
self.assertEqual(string.rfind_nth('123232323', '3', 2), 6)
self.assertEqual(string.rfind_nth('123232323', '3', 3), 4)
self.assertEqual(string.rfind_nth('123232323', '3', 4), 2)
self.assertEqual(string.rfind_nth('123232323', '3', 5), -1)

self.assertEqual(string.rfind_nth('123232323', '23', 1), 7)
self.assertEqual(string.rfind_nth('123232323', '23', 2), 5)
self.assertEqual(string.rfind_nth('123232323', '23', 3), 3)
self.assertEqual(string.rfind_nth('123232323', '23', 4), 1)
self.assertEqual(string.rfind_nth('123232323', '23', 5), -1)

self.assertEqual(string.rfind_nth('123232323', '123', 1), 0)
self.assertEqual(string.rfind_nth('123232323', '123', 2), -1)

self.assertEqual(string.rfind_nth('123232323', '1234', 1), -1)

def test_partition_nth(self):
self.assertEqual(string.partition_nth('123', '0', 1), '123'.partition('0'))
self.assertEqual(string.partition_nth('123', '1', 1), '123'.partition('1'))
self.assertEqual(string.partition_nth('123', '2', 1), '123'.partition('2'))
self.assertEqual(string.partition_nth('123', '3', 1), '123'.partition('3'))

self.assertEqual(string.partition_nth('123232323', '3', 1), ('12', '3', '232323'))
self.assertEqual(string.partition_nth('123232323', '3', 2), ('1232', '3', '2323'))
self.assertEqual(string.partition_nth('123232323', '3', 3), ('123232', '3', '23'))
self.assertEqual(string.partition_nth('123232323', '3', 4), ('12323232', '3', ''))
self.assertEqual(string.partition_nth('123232323', '3', 5), ('123232323', '', ''))

self.assertEqual(string.partition_nth('123232323', '23', 1), ('1', '23', '232323'))
self.assertEqual(string.partition_nth('123232323', '23', 2), ('123', '23', '2323'))
self.assertEqual(string.partition_nth('123232323', '23', 3), ('12323', '23', '23'))
self.assertEqual(string.partition_nth('123232323', '23', 4), ('1232323', '23', ''))
self.assertEqual(string.partition_nth('123232323', '23', 5), ('123232323', '', ''))

self.assertEqual(string.partition_nth('123232323', '123', 1), ('', '123', '232323'))
self.assertEqual(string.partition_nth('123232323', '123', 2), ('123232323', '', ''))

self.assertEqual(string.partition_nth('123232323', '1234', 1), ('123232323', '', ''))

def test_rpartition_nth(self):
self.assertEqual(string.rpartition_nth('123', '0', 1), '123'.rpartition('0'))
self.assertEqual(string.rpartition_nth('123', '1', 1), '123'.rpartition('1'))
self.assertEqual(string.rpartition_nth('123', '2', 1), '123'.rpartition('2'))
self.assertEqual(string.rpartition_nth('123', '3', 1), '123'.rpartition('3'))

self.assertEqual(string.rpartition_nth('123232323', '3', 4), ('12', '3', '232323'))
self.assertEqual(string.rpartition_nth('123232323', '3', 3), ('1232', '3', '2323'))
self.assertEqual(string.rpartition_nth('123232323', '3', 2), ('123232', '3', '23'))
self.assertEqual(string.rpartition_nth('123232323', '3', 1), ('12323232', '3', ''))
self.assertEqual(string.rpartition_nth('123232323', '3', 5), ('', '', '123232323'))

self.assertEqual(string.rpartition_nth('123232323', '23', 4), ('1', '23', '232323'))
self.assertEqual(string.rpartition_nth('123232323', '23', 3), ('123', '23', '2323'))
self.assertEqual(string.rpartition_nth('123232323', '23', 2), ('12323', '23', '23'))
self.assertEqual(string.rpartition_nth('123232323', '23', 1), ('1232323', '23', ''))
self.assertEqual(string.rpartition_nth('123232323', '23', 5), ('', '', '123232323'))

self.assertEqual(string.rpartition_nth('123232323', '123', 1), ('', '123', '232323'))
self.assertEqual(string.rpartition_nth('123232323', '123', 2), ('', '', '123232323'))

self.assertEqual(string.rpartition_nth('123232323', '1234', 1), ('', '', '123232323'))
159 changes: 159 additions & 0 deletions wc_utils/util/string.py
@@ -1,6 +1,7 @@
""" String utilities.
:Author: Arthur Goldberg <Arthur.Goldberg@mssm.edu>
:Author: Jonathan Karr <jonrkarr@gmail.com>
:Date: 2017-03-20
:Copyright: 2017, Karr Lab
:License: MIT
Expand Down Expand Up @@ -93,3 +94,161 @@ def delete_trailing_blanks(l_of_strings):
last = i
if last is not None:
del l_of_strings[last:]


def find_nth(s, sub, n, start=0, end=float('inf')):
""" Get the index of the nth occurrence of a substring within a string
Args:
s (:obj:`str`): string to search
sub (:obj:`str`): substring to search for
n (:obj:`int`): number of occurence to find the position of
start (:obj:`int`, optional): starting position to search from
end (:obj:`int`, optional): end position to search within
Returns:
:obj:`int`: index of nth occurence of the substring within the string
or -1 if there are less than n occurrences of the substring within
the string
Raises:
:obj:`ValueError`: if `sub` is empty or `n` is less than 1
"""
if not sub:
raise ValueError('sep cannot be empty')
if n < 1:
raise ValueError('n must be at least 1')

L = len(s)
l = len(sub)
count = 0
i = start
while i < min(end, L) - l + 1:
if s[i:i+l] == sub:
count += 1
if count == n:
return i
i += l
else:
i += 1

return -1


def rfind_nth(s, sub, n, start=0, end=float('inf')):
""" Get the index of the nth-last occurrence of a substring within a string
Args:
s (:obj:`str`): string to search
sub (:obj:`str`): substring to search for
n (:obj:`int`): number of occurence to find the position of
start (:obj:`int`, optional): starting position to search from
end (:obj:`int`, optional): end position to search within
Returns:
:obj:`int`: index of nth-last occurence of the substring within the string
or -1 if there are less than n occurrences of the substring within
the string
Raises:
:obj:`ValueError`: if `sub` is empty or `n` is less than 1
"""
if not sub:
raise ValueError('sep cannot be empty')
if n < 1:
raise ValueError('n must be at least 1')

L = len(s)
l = len(sub)
count = 0
i = min(L, end) - l
while i >= start:
if s[i:i+l] == sub:
count += 1
if count == n:
return i
i -= l
else:
i -= 1

return -1


def partition_nth(s, sep, n):
""" Partition a string on the nth occurrence of a substring
Args:
s (:obj:`str`): string to partition
sep (:obj:`str`): separator to partition on
n (:obj:`int`): number of occurence to partition on
Returns:
:obj:`tuple`:
* :obj:`str`: substring before the nth separator
* :obj:`str`: separator
* :obj:`str`: substring after the nth separator
Raises:
:obj:`ValueError`: if `sep` is empty or `n` is less than 1
"""
if not sep:
raise ValueError('sep cannot be empty')
if n < 1:
raise ValueError('n must be at least 1')

i = find_nth(s, sep, n)
if i == -1:
return (s, '', '')
else:
if i == 0:
before = ''
else:
before = s[0:i]

if i == len(s) - len(sep):
after = ''
else:
after = s[i+len(sep):]

return (before, sep, after)


def rpartition_nth(s, sep, n):
""" Partition a string on the nth-last occurrence of a substring
Args:
s (:obj:`str`): string to partition
sep (:obj:`str`): separator to partition on
n (:obj:`int`): number of occurence to partition on
Returns:
:obj:`tuple`:
* :obj:`str`: substring before the nth-last separator
* :obj:`str`: separator
* :obj:`str`: substring after the nth-last separator
Raises:
:obj:`ValueError`: if `sep` is empty or `n` is less than 1
"""
if not sep:
raise ValueError('sep cannot be empty')
if n < 1:
raise ValueError('n must be at least 1')

i = rfind_nth(s, sep, n)
if i == -1:
return ('', '', s)
else:
if i == 0:
before = ''
else:
before = s[0:i]

if i == len(s) - len(sep):
after = ''
else:
after = s[i+len(sep):]

return (before, sep, after)

0 comments on commit 41894ab

Please sign in to comment.