In [1]:
import pandas as pd
import numpy as np

import datetime as dt

# Работа с объектами

## Как генерить объекты типа datetime

### Объект date

In [2]:
year = 2021
month = 10
day = 10

date = dt.date(year, month, day)
date

datetime.date(2021, 10, 10)

### Объект time

In [3]:
hours = 17
minutes = 45
seconds = 13

time = dt.time(hours, minutes, seconds)
time

datetime.time(17, 45, 13)

### Объект datetime

In [4]:
date_time = dt.datetime.combine(date, time)
date_time

datetime.datetime(2021, 10, 10, 17, 45, 13)

### Или так

In [5]:
dt.datetime(2021, 10, 17, 0, 3, 5)

datetime.datetime(2021, 10, 17, 0, 3, 5)

## str - datetime, datetime - str

In [6]:
number = 34
day = 1 # 1 - monday, 0 - saturday

d = f"2021-W{number}-{day}"
date_time = dt.datetime.strptime(d, "%Y-W%W-%w")
print(date_time)

2021-08-23 00:00:00


In [10]:
string_date = date_time.strftime("%m/%d/%Y, %H:%M:%S")
string_date

'08/23/2021, 00:00:00'

## datetime - timestamp, timestamp - datetime

In [7]:
datetime = dt.datetime(2021, 10, 17, 0, 3, 5)
print(datetime)

2021-10-17 00:03:05


In [8]:
timestamp = datetime.timestamp()
print(timestamp)

1634403785.0


In [9]:
fromtimestamp = dt.datetime.fromtimestamp(timestamp)
print(fromtimestamp)

2021-10-17 00:03:05


In [11]:
utcfromtimestamp = dt.datetime.utcfromtimestamp(timestamp)
print(utcfromtimestamp)

2021-10-16 17:03:05


# Работа с датафреймами

In [17]:
df = pd.DataFrame([['A', '16-10-2021', '23:56:10', 'a'], 
                   ['A', '16-10-2021', '23:58:30', 'b'], 
                   
                   ['B', '17-10-2021', '00:03:05', 'c'], 
                   ['B', '17-10-2021', '00:10:07', 'd'], 
                   ['B', '17-10-2021', '17:36:10', 'e'], 
                   ['B', '17-10-2021', '19:48:30', 'f'], 
                   
                   ['C', '18-10-2021', '00:06:05', 'g'], 
                   ['C', '18-10-2021', '03:08:03', 'h']],
                  columns=['group', 'str_date', 'str_time','val'])
df

Unnamed: 0,group,str_date,str_time,val
0,A,16-10-2021,23:56:10,a
1,A,16-10-2021,23:58:30,b
2,B,17-10-2021,00:03:05,c
3,B,17-10-2021,00:10:07,d
4,B,17-10-2021,17:36:10,e
5,B,17-10-2021,19:48:30,f
6,C,18-10-2021,00:06:05,g
7,C,18-10-2021,03:08:03,h


## К объектам datetime в колонках можно обращатся напрямую (без apply) с помощью конструкции .dt + .[метод]

In [18]:
df['datetime'] = df.apply(lambda row:  dt.datetime.strptime(f'{row.str_date} {row.str_time}', '%d-%m-%Y %H:%M:%S'), axis=1)

df['date'] = df.datetime.dt.date
df['time'] = df.datetime.dt.time
df['week'] = df.datetime.dt.isocalendar().week # Получить номер недели
df['weekday'] = df.datetime.dt.strftime('%A') # Получить день недели (на английском)

df

Unnamed: 0,group,str_date,str_time,val,datetime,date,time,week,weekday
0,A,16-10-2021,23:56:10,a,2021-10-16 23:56:10,2021-10-16,23:56:10,41,Saturday
1,A,16-10-2021,23:58:30,b,2021-10-16 23:58:30,2021-10-16,23:58:30,41,Saturday
2,B,17-10-2021,00:03:05,c,2021-10-17 00:03:05,2021-10-17,00:03:05,41,Sunday
3,B,17-10-2021,00:10:07,d,2021-10-17 00:10:07,2021-10-17,00:10:07,41,Sunday
4,B,17-10-2021,17:36:10,e,2021-10-17 17:36:10,2021-10-17,17:36:10,41,Sunday
5,B,17-10-2021,19:48:30,f,2021-10-17 19:48:30,2021-10-17,19:48:30,41,Sunday
6,C,18-10-2021,00:06:05,g,2021-10-18 00:06:05,2021-10-18,00:06:05,42,Monday
7,C,18-10-2021,03:08:03,h,2021-10-18 03:08:03,2021-10-18,03:08:03,42,Monday


## Как посчитать количество секунд между строками в таблице?

In [19]:
df['seconds_diff'] = (
    
        df['datetime']
                .diff() # считаем разность между значениями - получаем объект типа timedelta
                .dt.total_seconds() # применяем метод total_seconds(), чтобы получить количество секунд
                .fillna(0) # первое значение NaN - заменяем его нулем
                .astype(int) # меняем тип всех значений с float на int
)

df

Unnamed: 0,group,str_date,str_time,val,datetime,date,time,week,weekday,seconds_df
0,A,16-10-2021,23:56:10,a,2021-10-16 23:56:10,2021-10-16,23:56:10,41,Saturday,0
1,A,16-10-2021,23:58:30,b,2021-10-16 23:58:30,2021-10-16,23:58:30,41,Saturday,140
2,B,17-10-2021,00:03:05,c,2021-10-17 00:03:05,2021-10-17,00:03:05,41,Sunday,275
3,B,17-10-2021,00:10:07,d,2021-10-17 00:10:07,2021-10-17,00:10:07,41,Sunday,422
4,B,17-10-2021,17:36:10,e,2021-10-17 17:36:10,2021-10-17,17:36:10,41,Sunday,62763
5,B,17-10-2021,19:48:30,f,2021-10-17 19:48:30,2021-10-17,19:48:30,41,Sunday,7940
6,C,18-10-2021,00:06:05,g,2021-10-18 00:06:05,2021-10-18,00:06:05,42,Monday,15455
7,C,18-10-2021,03:08:03,h,2021-10-18 03:08:03,2021-10-18,03:08:03,42,Monday,10918


## Эту операцию можне провести отдельно для каждой группы

In [20]:
df['group_seconds_diff'] = (
    
            df.groupby('group', as_index = False)['datetime']
                .diff() # в данном случае возвращается dataframe
                .datetime.dt.total_seconds() # нужно снова взять колоноку 'datetime'
                .fillna(0)
                .astype(int)
)

df

Unnamed: 0,group,str_date,str_time,val,datetime,date,time,week,weekday,seconds_df,group_seconds_diff
0,A,16-10-2021,23:56:10,a,2021-10-16 23:56:10,2021-10-16,23:56:10,41,Saturday,0,0
1,A,16-10-2021,23:58:30,b,2021-10-16 23:58:30,2021-10-16,23:58:30,41,Saturday,140,140
2,B,17-10-2021,00:03:05,c,2021-10-17 00:03:05,2021-10-17,00:03:05,41,Sunday,275,0
3,B,17-10-2021,00:10:07,d,2021-10-17 00:10:07,2021-10-17,00:10:07,41,Sunday,422,422
4,B,17-10-2021,17:36:10,e,2021-10-17 17:36:10,2021-10-17,17:36:10,41,Sunday,62763,62763
5,B,17-10-2021,19:48:30,f,2021-10-17 19:48:30,2021-10-17,19:48:30,41,Sunday,7940,7940
6,C,18-10-2021,00:06:05,g,2021-10-18 00:06:05,2021-10-18,00:06:05,42,Monday,15455,0
7,C,18-10-2021,03:08:03,h,2021-10-18 03:08:03,2021-10-18,03:08:03,42,Monday,10918,10918


## Как посчитать записи по заданному временному периоду?

In [21]:
periods_df = df.set_index('datetime')
periods_df = periods_df.resample('60min').sum() # .count(), .first()
periods_df

Unnamed: 0_level_0,week,seconds_df,group_seconds_diff
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2021-10-16 23:00:00,82,140,140
2021-10-17 00:00:00,82,697,422
2021-10-17 01:00:00,0,0,0
2021-10-17 02:00:00,0,0,0
2021-10-17 03:00:00,0,0,0
2021-10-17 04:00:00,0,0,0
2021-10-17 05:00:00,0,0,0
2021-10-17 06:00:00,0,0,0
2021-10-17 07:00:00,0,0,0
2021-10-17 08:00:00,0,0,0
