## Частота покупок с MapReduce на коленке

In [1]:
from typing import Iterable, Callable, Any
from dataclasses import dataclass
import datetime

from simplemr import SimpleMapReduce


@dataclass
class UserEvent:
    userid: str
    moment: datetime.datetime
    action: str
    value: float

@dataclass
class UserID:
    userid: str

@dataclass
class UserFrequency:
    userid: str
    frequency: int


def parse_user_event(line: str) -> UserEvent:
    row = line.strip().split('\t')
    if row[0] != 'userid':
        yield UserEvent(
            userid=row[0],
            moment=datetime.datetime.fromisoformat(row[1]),
            action=row[2],
            value=float(row[3]),
        )
        
def filter_user_checkout(event: UserEvent) -> Iterable[UserEvent]:
    if getattr(event, 'action', None) == 'checkout':
        yield event
        
def user_event_to_userid(event: UserEvent) -> Iterable[UserID]:
    yield UserID(
        userid=event.userid,
    )
        
def count_user_checkouts(inp: Iterable[UserID]) -> Iterable[UserFrequency]:
    count = 0
    userid = None
    for user in inp:
        userid = user.userid
        count += 1
    
    yield UserFrequency(userid=userid, frequency=count)


def process(mrjob: SimpleMapReduce) -> SimpleMapReduce:
     return mrjob.map(parse_user_event) \
         .map(filter_user_checkout) \
         .map(user_event_to_userid) \
         .reduce(count_user_checkouts, ['userid'])

## Testing

In [2]:
with open("log.tsv", "r") as input_stream:
    mrjob = process(SimpleMapReduce(input_stream))
    for item in mrjob.output():
        print(item)

UserFrequency(userid='user_1', frequency=1)
UserFrequency(userid='user_3', frequency=1)
