# Mapper

In [None]:
# | default_exp mapper

In [None]:
# | export

from dreamai_ray.imports import *
from dreamai_ray.utils import *


In [None]:
#| hide

%load_ext autoreload
%autoreload 2
%reload_ext autoreload

In [None]:
# | export


class Callback:
    def before_batch(self, **kwargs):
        pass

    def before_batch_rows(self, **kwargs):
        pass

    def after_batch_rows(self, **kwargs):
        pass

    def after_batch(self, **kwargs):
        pass


class msg_bs_cb(Callback):
    def before_batch(self, df, **kwargs):
        msg.info(f"DF BATCH SIZE: {len(df)}", spaced=True)


def cbs_before_batch(cbs, **kwargs):
    [cb.before_batch(**kwargs) for cb in cbs]


def cbs_before_batch_rows(cbs, df, **kwargs):
    [cb.before_batch_rows(**kwargs) for _ in range(len(df)) for cb in cbs]


def cbs_after_batch_rows(cbs, df, **kwargs):
    [cb.after_batch_rows(**kwargs) for _ in range(len(df)) for cb in cbs]


def cbs_after_batch(cbs, **kwargs):
    [cb.after_batch(**kwargs) for cb in cbs]


class Mapper:
    """
    A class to map a function to a dataframe. The function can be a UDF or a function that returns a dataframe.
    """

    def __init__(
        self,
        udf=noop,
        udf_kwargs={},
        cbs=[msg_bs_cb()],
        **kwargs,
    ):
        """
        Parameters
        ----------
        udf: function
            A function that takes a dataframe as input and returns a dataframe as output.
        udf_kwargs: dict
            A dictionary of keyword arguments to pass to the udf.
        cbs: list
            A list of callbacks to run before and after the mapping.
        """
        udf = partial(udf, **udf_kwargs)
        store_attr(**locals_to_params(locals()))

    def map(self, df):
        return df.apply(self.udf, axis=1, result_type="expand")

    def __call__(self, df):
        cbs_before_batch(self.cbs, df=df)
        cbs_before_batch_rows(self.cbs, df=df)

        df = self.map(df)

        cbs_after_batch_rows(self.cbs, df=df)
        cbs_after_batch(self.cbs, df=df)

        return df

In [None]:
# | eval: false

df = pd.DataFrame(
    {
        "text": [
            ["I worked at Google", "I studied at Harvard"],
            ["I worked at Facebook", "I studied at MIT"],
        ]
    }
)


def ander(df):
    """
    A simple example of a user defined function that joins the text in each row with " and "
    """
    df["joined"] = " and ".join(df["text"])
    return df


m = Mapper(udf=ander)
df = m(df)
df


[38;5;4mℹ DF BATCH SIZE: 2[0m



Unnamed: 0,text,joined
0,"[I worked at Google, I studied at Harvard]",I worked at Google and I studied at Harvard
1,"[I worked at Facebook, I studied at MIT]",I worked at Facebook and I studied at MIT


In [None]:
# # | eval: false

# df = pd.DataFrame(
#     {
#         "text": [
#             ["I worked at Google", "I studied at Harvard"],
#             ["I worked at Facebook", "I studied at MIT"],
#         ]
#     }
# )

# m = SegsMapper()
# df = m(df)
# df



[38;5;4mℹ DF BATCH SIZE: 2[0m



Unnamed: 0,text,segs,preds,probs
0,"[I worked at Google, I studied at Harvard]","{'Work Experience': ['I worked at Google'], 'Education': ['I studied at Harvard'], 'Certifications': [], 'Other': []}","[Work Experience, Education]","[0.98256487, 0.98468]"
1,"[I worked at Facebook, I studied at MIT]","{'Work Experience': ['I worked at Facebook'], 'Education': ['I studied at MIT'], 'Certifications': [], 'Other': []}","[Work Experience, Education]","[0.9836285, 0.98444146]"


In [None]:
# | hide

import nbdev

nbdev.nbdev_export()
