## Выручка по месяцам с MapReduce на коленке

In [12]:
from typing import Iterable, Callable, Any
from dataclasses import dataclass
import datetime

from simplemr import SimpleMapReduce


@dataclass
class UserEvent:
    userid: str
    moment: datetime.datetime
    action: str
    value: float

@dataclass
class UserMonthCheck:
    month: str
    value: float

@dataclass
class MonthSale:
    month: datetime.date
    value: float


def parse_user_event(line: str) -> UserEvent:
    row = line.strip().split('\t')
    if row[0] != 'userid':
        yield UserEvent(
            userid=row[0],
            moment=datetime.datetime.fromisoformat(row[1]),
            action=row[2],
            value=float(row[3]),
        )
        
def filter_user_checkout(event: UserEvent) -> Iterable[UserEvent]:
    if getattr(event, 'action', None) == 'checkout':
        yield event
        
def user_event_to_user_date_check(event: UserEvent) -> Iterable[UserMonthCheck]:
    yield UserMonthCheck(
        month=event.moment.date().strftime('%Y-%m'),
        value=event.value,
    )
        
def user_date_check_to_month_sale(inp: Iterable[UserMonthCheck]) -> Iterable[MonthSale]:
    value = 0
    month = None
    for user in inp:
        month = user.month
        value += user.value
    assert month is not None

    yield MonthSale(month=month, value=value)


def process(mrjob: SimpleMapReduce) -> SimpleMapReduce:
     return mrjob.map(parse_user_event) \
         .map(filter_user_checkout) \
         .map(user_event_to_user_date_check) \
         .reduce(user_date_check_to_month_sale, ['month'])

## Testing

In [13]:
with open("log.tsv", "r") as input_stream:
    mrjob = process(SimpleMapReduce(input_stream))
    for item in mrjob.output():
        print(item)

MonthSale(month='2022-08', value=24740.0)
MonthSale(month='2022-09', value=20000.0)
