Skip to content
klbostee edited this page Nov 24, 2010 · 15 revisions

They don’t illustrate all of Dumbo’s nifty features, but you should be able to get the basics from these examples. Some of these examples can also be found here with additional info in the form of docstrings.

wordcount.py

def mapper(key, value):
    for word in value.split():
        yield word, 1

def reducer(key, values):
    yield key, sum(values)

if __name__ == "__main__":
    import dumbo
    dumbo.run(mapper, reducer)

oowordcount.py

class Mapper:
    def __init__(self):
        file = open("excludes.txt", "r")
        self.excludes = set(line.strip() for line in file)
        file.close()
    def __call__(self, key, value):
        for word in value.split():
            if not word in self.excludes:
                yield word, 1

def reducer(key, values):
    yield key, sum(values)

if __name__ == "__main__":
    import dumbo
    dumbo.run(Mapper, reducer, reducer)

itertwice.py

def mapper1(key, value):
    for word in value.split():
        yield word, 1

def mapper2(key, value):
    for letter in key:
        yield letter, 1

def reducer1(key, values):
    count = sum(values)
    if count > 1:
        yield key, count

def reducer2(key, values):
    yield key, sum(values)

if __name__ == "__main__":
    import dumbo
    job = dumbo.Job()
    job.additer(mapper1, reducer1, reducer2)
    job.additer(mapper2, reducer2, reducer2)
    job.run()

greplogs.py

def mapper(key, value):
    if "playground.last.fm" in value:
        yield key, value

if __name__ == "__main__":
    import dumbo
    dumbo.run(mapper)