In [1]:
from lemuras import Table

## Sample data

In [2]:
cols = ['type', 'size', 'weight', 'tel']
rows = [
    ['A', 1, 12, '+79360360193'],
    ['B', 4, 12, 84505505151],
    ['A', 3, 10, '+31415926535'],
    ['B', 6, 14, ''],
    ['A', 4, 10, '23816326412'],
    ['A', 2, 12, 0],
]

df1 = Table(cols, rows)
df1

'type','size','weight','tel'
'A',1,12,'+79360360193'
'B',4,12,84505505151
'A',3,10,'+31415926535'
'B',6,14,''
'A',4,10,'23816326412'
'A',2,12,0


# Group by single column

You can create groups by single columns:

In [3]:
gr = df1.groupby('type')
gr

'type','counts'
'A',4
'B',2


And then aggregate the groups into a new table:

In [4]:
df2 = gr.agg({
    'size': { 'Count': 'count', 'SizeAvg': 'avg' },
    'weight': { 'WeightMedian': 'median', 'WeightSum': 'sum' }
})

df2

'type','Count','SizeAvg','WeightMedian','WeightSum'
'A',4,2.5,11.0,44
'B',2,5.0,13.0,26


# Group by multiple columns

To create groups by multiple columns put a list with key columns:

In [5]:
df2 = df1.groupby(['type', 'weight']).agg({
    'size': {
        'Count': 'count',
        'SizeSum': 'sum'
    }
})

df2

'type','weight','Count','SizeSum'
'A',12,2,3
'A',10,2,7
'B',12,1,4
'B',14,1,6


# Group by all

You can easily create aggregate all the rows by creating a group by none column. Just do not put any column:

In [6]:
gr = df1.groupby()
gr

'counts'
6


Then aggregate it as you wish:

In [7]:
df2 = gr.agg({ 'size': { 'Count': 'count', 'Sum': 'sum' } })

df2

'Count','Sum'
6,20


# Aggregation with own function or lambda

The following aggregation functions are available by strings:

- **`'count'`** - elements count, group size.

- **`'min'`** - the lowest element.

- **`'max'`** - the highest element.

- **`'sum'`** - elements sum.

- **`'avg'`**, **`'mean'`** - average value.

- **`'mode'`** - the most common value.

- **`'middle'`**, **`'median'`** - that number where half the numbers are lower and half the numbers are higher.


But if it is not enough, you can put functions or lambda expressions in aggregation:

In [8]:
def check5(lst):
    for el in lst:
        if el == 5:
            return True
    return False

df2 = df1.groupby().agg({ 'size': {
    'Count': 'count',
    'Something': lambda x: 2 * sum(x) - 3 * min(x),
    'Have_size_5': check5,
}})

df2

'Count','Something','Have_size_5'
6,37,False
