In [1]:
# import the necessary package
from omigo_core import tsv

In [2]:
# This example shows how to create a custom extension of TSV class with additional methods.
class MedianTSV(tsv.TSV):
    # init
    def __init__(self, header, data):
        super().__init__(header, data)
      
    # custom aggregtion function
    def myfunc(self, vs):
        vs = sorted([float(v) for v in vs])
        m = vs[int(len(vs) / 2)]
        return str(m)
        
    # this the api
    def median(self, grouping_cols, col, new_col):
        # compute median and assign to new_col.
        # Notice that aggregate() method appends the function name to col
        return self \
            .aggregate(grouping_cols, [col], [self.myfunc]) \
            .rename("{}:myfunc".format(col), new_col) \
            .apply_precision(new_col, 1)


In [3]:
# now use this new method with a cleaner syntax in the regular data processing
x = tsv.read("https://github.com/CrowdStrike/tsv-data-analytics/raw/main/data/iris.tsv")
x.to_df(5)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [4]:
# call extend_class to use the specific methods in the derived class
x \
    .extend_class(MedianTSV).median("class", "sepal_length", "sepal_length_median") \
    .to_df(5)

Unnamed: 0,class,sepal_length_median
0,Iris-setosa,5.0
1,Iris-versicolor,5.9
2,Iris-virginica,6.5
