Skip to content

Commit

Permalink
create det_dedupe(), which deterministically deduplicates a list
Browse files Browse the repository at this point in the history
  • Loading branch information
artgoldberg committed Dec 5, 2017
1 parent b04d2da commit 20c42cc
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 4 deletions.
14 changes: 11 additions & 3 deletions tests/util/test_list.py
@@ -1,12 +1,13 @@
""" Test list utilities
:Author: Jonathan Karr <karr@mssm.edu>
:Author: Arthur Goldberg <Arthur.Goldberg@mssm.edu>
:Date: 2016-11-30
:Copyright: 2016, Karr Lab
:License: MIT
"""

from wc_utils.util.list import is_sorted, transpose, difference
from wc_utils.util.list import is_sorted, transpose, difference, det_dedupe
import unittest


Expand All @@ -28,9 +29,16 @@ def test_transpose(self):
self.assertEqual(transpose(lst), t_lst)

def test_difference(self):
l = list([0, 1, 2, 3, 4])
m = list([1, 2, 3])
l = [0, 1, 2, 3, 4]
m = [1, 2, 3]
self.assertEqual(difference(l, m), [0, 4])
self.assertEqual(difference(m, l), [])
with self.assertRaises(TypeError):
self.assertEqual(difference([], [[1]]), [])

def test_det_dedupe(self):
l = [0, 1, 2, 0, 1, 0, 7, 1]
expected = [0, 1, 2, 7]
self.assertEqual(det_dedupe(l), expected)
with self.assertRaises(TypeError):
det_dedupe([[]])
27 changes: 26 additions & 1 deletion wc_utils/util/list.py
Expand Up @@ -57,7 +57,32 @@ def difference(list_1, list_2):
:obj:`list`: a set-like difference between `list_1` and `list_2`
Raises:
`TypeError` if `list_1` or `list_2` contains an unhashable (mutable) type
`TypeError` if `list_1` or `list_2` contains an unhashable (mutable) type
"""
list_2_set = set(list_2)
return list(filter(lambda item:not item in list_2_set, list_1))


def det_dedupe(l):
""" Deterministically deduplicate a list
Returns a deduplicated copy of `l`. That is, returns a new list that contains one instance of
each element in `l` and orders these instances by their first occurrence in `l`.
Costs O(n), where n is the length of `l`.
Args:
l (:obj:`list`): a list with hashable elements
Returns:
:obj:`list`: a deterministically deduplicated copy of `l`
Raises:
`TypeError` if `l` contains an unhashable (mutable) type
"""
s = set()
t = []
for e in l:
if e not in s:
t.append(e)
s.add(e)
return t

0 comments on commit 20c42cc

Please sign in to comment.