# Add custom functions to spark

In [None]:
import pyspark.sql.functions as f
from pyspark.sql import DataFrame

## Add pipe to spark
add the [pandas.DataFrame.pipe](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.pipe.html) method functionality to pyspark

In [None]:
def pipe(self, func, *args, **kwargs):
    """ custom pipe method
    Apply chainable functions that expect DataFrames
    """
    return func(self, *args, **kwargs)

# add method to class
DataFrame.pipe = pipe

## Add melt to spark
add the [pandas.melt](https://pandas.pydata.org/docs/reference/api/pandas.melt.html) function to pyspark

In [None]:
def melt_df(df, id_vars, value_vars=None, var_name="variable", value_name="value"):
    """ custom melt function
    Convert DataFrame from wide to long format
    """
    
    if not value_vars:
        value_vars = [i for i in df.columns if i not in id_vars]

    _vars_and_vals = f.array(*( f.struct(f.lit(c).alias(var_name), f.col(c).alias(value_name)) for c in value_vars))

    # Add to the DataFrame and explode
    _tmp = df.withColumn("_vars_and_vals", f.explode(_vars_and_vals))

    cols = id_vars + [ f.col("_vars_and_vals")[x].alias(x) for x in [var_name, value_name] ]
    return _tmp.select(*cols)