In [13]:
import pandas as pd
import dask.dataframe as dd
from typing import Callable, Type

df = pd.DataFrame({
    "id": [1, 2, 4, 4, 5],
    "columns": ["col1", "col2", "col1", "col1", "col3"],
    "values": ["a", "b", "c", "d", "e"]
})
ddf = dd.from_pandas(df, npartitions=1)
ddf.head()

Unnamed: 0,id,columns,values
0,1,col1,a
1,2,col2,b
2,4,col1,c
3,4,col1,d
4,5,col3,e


In [59]:
def dask_pivot_table(
    ddf: dd.DataFrame,
    index: str, columns: str,
    values: str,
    aggfunc: Callable,
    value_type: "str") -> dd.DataFrame:
    columns_set = ddf[columns].unique().compute()
    series = []
    for column in columns_set:
        df = ddf[ddf[columns] == column]
        if len(df.index) == 0:
            series.append(dd.from_pandas(pd.Series([], name=name, dtype=object), npartitions=1))
            continue
        df = df[[index, values]].groupby(index).aggregate(list)
        df["new_value"] = df[values].apply(aggfunc, meta=(values, value_type))
        s = df["new_value"]
        s.name = column
        series.append(s)
    pivoted = series[0].to_frame()
    for s in series[1:]:
        frame = s.to_frame()
        pivoted = dd.merge(pivoted, frame, on=index, how="outer")
    return pivoted

In [60]:
dask_pivot_table(ddf, "id", "columns", "values", lambda x: ','.join(x), "str").compute()

Unnamed: 0_level_0,col1,col2,col3
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,a,,
4,"c,d",,
2,,b,
5,,,e
