In [2]:
import pandas as pd
import numpy as np
import datetime as dt

import plotly.graph_objects as go

# Генерация данных

In [12]:
data = pd.DataFrame([
            ['16-10-2021', '23:56:10', 'A', 'a1', '0001'], 
            ['16-10-2021', '23:58:30', 'A', 'a1', '0002'],
            ['16-10-2021', '23:58:35', 'B', 'b1', '0003'],
            ['16-10-2021', '23:58:56', 'B', 'b2', '0004'],
            ['16-10-2021', '23:59:21', 'C', 'c1', '0005'],
            ['16-10-2021', '23:59:42', 'C', 'c1', '0006'],

            ['17-10-2021', '00:03:05', 'A', 'a1', '0007'],
            ['17-10-2021', '00:03:25', 'A', 'a2', '0008'],
            ['17-10-2021', '00:03:48', 'A', 'a3', '0009'],
            ['17-10-2021', '00:05:24', 'B', 'b1', '0010'],
            ['17-10-2021', '00:05:36', 'B', 'b1', '0011'],
            ['17-10-2021', '00:05:44', 'B', 'b2', '0012'],
            ['17-10-2021', '00:06:03', 'B', 'b2', '0013'],
            ['17-10-2021', '03:12:13', 'B', 'b3', '0014'],
            ['17-10-2021', '03:12:46', 'C', 'c1', '0015'],
            ['17-10-2021', '13:06:54', 'C', 'c1', '0016'], 
            ['17-10-2021', '13:12:10', 'C', 'c2', '0017'], 
            ['17-10-2021', '19:48:30', 'C', 'c2', '0018'], 

            ['18-10-2021', '00:06:05', 'C', 'c1', '0019'], 
            ['18-10-2021', '03:08:03', 'C', 'c2', '0020'],
            ['18-10-2021', '12:07:08', 'C', 'c3', '0021'],
    
            ['19-10-2021', '00:04:03', 'A', 'a1', '0022'], 
            ['19-10-2021', '02:08:20', 'A', 'a2', '0023'],
            ['19-10-2021', '10:03:08', 'B', 'a2', '0024'],
            ['19-10-2021', '12:07:08', 'B', 'b1', '0025'],
    
], columns=['str_date', 'str_time','group', 'sub_group','val_id'])

data['datetime'] = data.apply(lambda row:  dt.datetime.strptime(f'{row.str_date} {row.str_time}', '%d-%m-%Y %H:%M:%S'), axis=1)

data['date'] = data.datetime.dt.date
data['time'] = data.datetime.dt.time
data['hour'] = data.datetime.dt.hour
data['month'] = data.datetime.dt.month



weekday_rus = {
    'Monday': 'Понедельник',
    'Tuesday': 'Вторник', 
    'Wednesday': 'Среда', 
    'Thursday': 'Четверг', 
    'Friday': 'Пятница', 
    'Saturday': 'Суббота', 
    'Sunday': 'Воскресенье'
}
data['week'] = data.datetime.dt.isocalendar().week # Получить номер недели
data['weekday_number'] = data.datetime.dt.weekday
data['weekday_name'] = data.datetime.dt.strftime('%A') # Получить день недели (на английском)
data['weekday_rusname'] = data.weekday_name.apply(lambda x: weekday_rus[x])


datetime_cols = ['datetime', 'date','time', 'hour','week','weekday_number', 'weekday_name', 'weekday_rusname']
group_cols = ['group', 'sub_group']
data = data[[*datetime_cols, *group_cols, 'val_id']]

data

Unnamed: 0,datetime,date,time,hour,week,weekday_number,weekday_name,weekday_rusname,group,sub_group,val_id
0,2021-10-16 23:56:10,2021-10-16,23:56:10,23,41,5,Saturday,Суббота,A,a1,1
1,2021-10-16 23:58:30,2021-10-16,23:58:30,23,41,5,Saturday,Суббота,A,a1,2
2,2021-10-16 23:58:35,2021-10-16,23:58:35,23,41,5,Saturday,Суббота,B,b1,3
3,2021-10-16 23:58:56,2021-10-16,23:58:56,23,41,5,Saturday,Суббота,B,b2,4
4,2021-10-16 23:59:21,2021-10-16,23:59:21,23,41,5,Saturday,Суббота,C,c1,5
5,2021-10-16 23:59:42,2021-10-16,23:59:42,23,41,5,Saturday,Суббота,C,c1,6
6,2021-10-17 00:03:05,2021-10-17,00:03:05,0,41,6,Sunday,Воскресенье,A,a1,7
7,2021-10-17 00:03:25,2021-10-17,00:03:25,0,41,6,Sunday,Воскресенье,A,a2,8
8,2021-10-17 00:03:48,2021-10-17,00:03:48,0,41,6,Sunday,Воскресенье,A,a3,9
9,2021-10-17 00:05:24,2021-10-17,00:05:24,0,41,6,Sunday,Воскресенье,B,b1,10


# Группировки

### Ввести колонку с комбинацие групп в течении дня

In [4]:
add = (
    data
        .groupby('date', as_index=False)
        .apply(lambda day: '_'.join(sorted(day['group'].unique())))
        .rename(columns={None: 'group_combination'})

)

add

Unnamed: 0,date,group_combination
0,2021-10-16,A_B_C
1,2021-10-17,A_B_C
2,2021-10-18,C
3,2021-10-19,A_B


In [5]:
data = data.merge(add, on='date')
data

Unnamed: 0,datetime,date,time,week,weekday,group,sub_group,val_id,group_combination
0,2021-10-16 23:56:10,2021-10-16,23:56:10,41,Saturday,A,a1,1,A_B_C
1,2021-10-16 23:58:30,2021-10-16,23:58:30,41,Saturday,A,a1,2,A_B_C
2,2021-10-16 23:58:35,2021-10-16,23:58:35,41,Saturday,B,b1,3,A_B_C
3,2021-10-16 23:58:56,2021-10-16,23:58:56,41,Saturday,B,b2,4,A_B_C
4,2021-10-16 23:59:21,2021-10-16,23:59:21,41,Saturday,C,c1,5,A_B_C
5,2021-10-16 23:59:42,2021-10-16,23:59:42,41,Saturday,C,c1,6,A_B_C
6,2021-10-17 00:03:05,2021-10-17,00:03:05,41,Sunday,A,a1,7,A_B_C
7,2021-10-17 00:03:25,2021-10-17,00:03:25,41,Sunday,A,a2,8,A_B_C
8,2021-10-17 00:03:48,2021-10-17,00:03:48,41,Sunday,A,a3,9,A_B_C
9,2021-10-17 00:05:24,2021-10-17,00:05:24,41,Sunday,B,b1,10,A_B_C


### Посчитать несколько новых колонок

In [6]:
def f(day):
    d = {
        'sub_group_combination': '_'.join(sorted(day['sub_group'].unique())),
        'sub_group_nunique': day['sub_group'].nunique(),
        'val_id_nunique': day['val_id'].nunique(),
    }
    
    return pd.Series(d)
    
add = data.groupby('date', as_index=False).apply(f)
add

Unnamed: 0,date,sub_group_combination,sub_group_nunique,val_id_nunique
0,2021-10-16,a1_b1_b2_c1,4,6
1,2021-10-17,a1_a2_a3_b1_b2_b3_c1_c2,8,12
2,2021-10-18,c1_c2_c3,3,3
3,2021-10-19,a1_a2_b1,3,4


In [7]:
data = data.merge(add, on='date')
data

Unnamed: 0,datetime,date,time,week,weekday,group,sub_group,val_id,group_combination,sub_group_combination,sub_group_nunique,val_id_nunique
0,2021-10-16 23:56:10,2021-10-16,23:56:10,41,Saturday,A,a1,1,A_B_C,a1_b1_b2_c1,4,6
1,2021-10-16 23:58:30,2021-10-16,23:58:30,41,Saturday,A,a1,2,A_B_C,a1_b1_b2_c1,4,6
2,2021-10-16 23:58:35,2021-10-16,23:58:35,41,Saturday,B,b1,3,A_B_C,a1_b1_b2_c1,4,6
3,2021-10-16 23:58:56,2021-10-16,23:58:56,41,Saturday,B,b2,4,A_B_C,a1_b1_b2_c1,4,6
4,2021-10-16 23:59:21,2021-10-16,23:59:21,41,Saturday,C,c1,5,A_B_C,a1_b1_b2_c1,4,6
5,2021-10-16 23:59:42,2021-10-16,23:59:42,41,Saturday,C,c1,6,A_B_C,a1_b1_b2_c1,4,6
6,2021-10-17 00:03:05,2021-10-17,00:03:05,41,Sunday,A,a1,7,A_B_C,a1_a2_a3_b1_b2_b3_c1_c2,8,12
7,2021-10-17 00:03:25,2021-10-17,00:03:25,41,Sunday,A,a2,8,A_B_C,a1_a2_a3_b1_b2_b3_c1_c2,8,12
8,2021-10-17 00:03:48,2021-10-17,00:03:48,41,Sunday,A,a3,9,A_B_C,a1_a2_a3_b1_b2_b3_c1_c2,8,12
9,2021-10-17 00:05:24,2021-10-17,00:05:24,41,Sunday,B,b1,10,A_B_C,a1_a2_a3_b1_b2_b3_c1_c2,8,12


### Просумировать уникальные значения для каждого уровня группировки

In [8]:
def groupcount(df, cols, char, prefix="_count"):
    grouplist = []
    
    for col in cols:
        grouplist.append(col)
        add = df.groupby([*grouplist], as_index=False)[char].nunique()
        
        old, new = add.iloc[:,-1].name, f"{col}{prefix}"
        add = add.rename(columns={old: new})
        
        add = add[[*grouplist, new]]
        df = pd.merge(df, add, on=[*grouplist])
        grouplist.append(new)
        
    gc = df.groupby([*grouplist[:-1]])[char].nunique().to_frame().rename(columns={char: grouplist[-1]})
    return gc

In [13]:
groupcount(data, cols=['date', 'hour', 'group', 'sub_group'], char='val_id', prefix="_count")

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,sub_group_count
date,date_count,hour,hour_count,group,group_count,sub_group,Unnamed: 7_level_1
2021-10-16,6,23,6,A,2,a1,2
2021-10-16,6,23,6,B,2,b1,1
2021-10-16,6,23,6,B,2,b2,1
2021-10-16,6,23,6,C,2,c1,2
2021-10-17,12,0,7,A,3,a1,1
2021-10-17,12,0,7,A,3,a2,1
2021-10-17,12,0,7,A,3,a3,1
2021-10-17,12,0,7,B,4,b1,2
2021-10-17,12,0,7,B,4,b2,2
2021-10-17,12,3,2,B,1,b3,1
