# Настройка ноутбука

In [1]:
import pandas as pd
import numpy as np
import datetime as dt

In [2]:
# Расширить рабочее поле ноутбука на весь экран
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

# Описание ноутбука

Ноутбук содержит практики работы с датой и временем.

# Работа с объектами

## Как генерить объекты типа datetime

### Объект date

In [3]:
year = 2021
month = 10
day = 10

date = dt.date(year, month, day)
date

datetime.date(2021, 10, 10)

In [4]:
# Получить текущую дату

today = date.today()
today

datetime.date(2023, 3, 2)

### Объект time

In [5]:
hours = 17
minutes = 45
seconds = 13

time = dt.time(hours, minutes, seconds)
time

datetime.time(17, 45, 13)

### Объект datetime

In [6]:
date_time = dt.datetime.combine(date, time)
date_time

datetime.datetime(2021, 10, 10, 17, 45, 13)

In [7]:
# Вывести datetime-объект в расширенном виде
date_time.ctime()

'Sun Oct 10 17:45:13 2021'

In [8]:
# Или так
dt.datetime(2021, 10, 10, 17, 45, 13)

datetime.datetime(2021, 10, 10, 17, 45, 13)

## str - datetime

## datetime - str

### Варианты форматов

| format                   | datetime_str             | datetime                 |
| ------------------------ | ------------------------ | ------------------------ |
| "%d/%m/%Y %H:%M:%S"      | '13/9/2021 19:45:13'     | 2021-09-13 19:45:13      |
| "%Y-W%W-%w"              | '2021-W34-1'             | 2021-08-23 00:00:00      |

### Дата и время

In [9]:
month = 9
day = 13
year = 2021

hour = 19
minute = 33
second = 12

datetime_str = f"{day}/{month}/{year} {hour}:{minutes}:{seconds}"

datetime = dt.datetime.strptime(datetime_str, "%d/%m/%Y %H:%M:%S")
print(f"""
datetime_str: {datetime_str}
datetime: {datetime}
""")


datetime_str: 13/9/2021 19:45:13
datetime: 2021-09-13 19:45:13



### Только дата, в странном формате

In [10]:
number = 34
day = 1 # 1 - monday, 0 - saturday

datetime_str = f"2021-W{number}-{day}"
datetime = dt.datetime.strptime(datetime_str, "%Y-W%W-%w")
print(f"""
datetime_str: {datetime_str}
datetime: {datetime}
""")


datetime_str: 2021-W34-1
datetime: 2021-08-23 00:00:00



### Только время

## datetime <=> timestamp

In [11]:
datetime = dt.datetime(2021, 10, 17, 0, 3, 5)
print(datetime)

2021-10-17 00:03:05


In [12]:
timestamp = datetime.timestamp()
print(timestamp)

1634403785.0


In [13]:
fromtimestamp = dt.datetime.fromtimestamp(timestamp)
print(fromtimestamp)

2021-10-17 00:03:05


In [14]:
utcfromtimestamp = dt.datetime.utcfromtimestamp(timestamp)
print(utcfromtimestamp)

2021-10-16 17:03:05


## timedelta

https://medium.com/geekculture/timestamps-and-date-ranges-in-depth-work-with-special-date-and-time-objects-in-pandas-692ae6ff02d0

In [15]:
date_1 = dt.date(2021, 2, 15)
date_2 = dt.date(2021, 3, 12)

print(date_2 - date_1)
print(date_1 - date_2)
print((date_2 - date_1).days)
print((date_1 - date_2).days)
date_2 - date_1

25 days, 0:00:00
-25 days, 0:00:00
25
-25


datetime.timedelta(days=25)

In [16]:
(date_2 - date_1).days

25

In [17]:
(date_2 - date_1).total_seconds()

2160000.0

In [18]:
mydelta = dt.timedelta(days=6, hours=4)
mydelta

datetime.timedelta(days=6, seconds=14400)

In [19]:
today = dt.datetime.today()

two_weeks_from_now = today + dt.timedelta(weeks=2)
two_weeks_from_now

datetime.datetime(2023, 3, 16, 16, 46, 3, 722521)

In [20]:
dt.timedelta(days=1, seconds=1) - dt.timedelta(days=1, seconds=0)

datetime.timedelta(seconds=1)

In [21]:
dt.timedelta(weeks=1) / dt.timedelta(days=1)

7.0

In [22]:
series = pd.Series(pd.date_range(start = "2021-01-01", end="2021-01-05", periods=5))
series + pd.Timedelta("1 days")

0   2021-01-02
1   2021-01-03
2   2021-01-04
3   2021-01-05
4   2021-01-06
dtype: datetime64[ns]

In [23]:
"""
dates = []
for date in (self._date_from + datetime.timedelta(n) for n in range(self._n_days)):
    date_path = date.strftime('%Y/%m/%d')
    dates.append(date_path)
    self._date_to = date
self._date_from = self._date_from.strftime('%Y.%m.%d')
self._date_to = self._date_to.strftime('%Y.%m.%d')

"""

"\ndates = []\nfor date in (self._date_from + datetime.timedelta(n) for n in range(self._n_days)):\n    date_path = date.strftime('%Y/%m/%d')\n    dates.append(date_path)\n    self._date_to = date\nself._date_from = self._date_from.strftime('%Y.%m.%d')\nself._date_to = self._date_to.strftime('%Y.%m.%d')\n\n"

# Практики

### Получить список дат за определенный период

In [27]:
start = '01-09-2021'
end = '30-09-2021'

pd.Series([str(timestamp.date()) for timestamp in pd.date_range(start=start, end=end)])

0      2021-01-09
1      2021-01-10
2      2021-01-11
3      2021-01-12
4      2021-01-13
          ...    
260    2021-09-26
261    2021-09-27
262    2021-09-28
263    2021-09-29
264    2021-09-30
Length: 265, dtype: object

## Работа с датафреймами

In [56]:
N = 300

datetimes = pd.Series(pd.date_range(start='2021-07-01',end='2021-09-30', freq='S'))
dates = datetimes.dt.date.unique().tolist()
times = datetimes.dt.time.unique().tolist()

groups_list = ['A', 'B', 'C']
subgroup_list = ['a', 'b', 'c' , 'd']

value_min = 1
value_max = 100

data = pd.DataFrame({
    'date': [np.random.choice(dates, 1, replace=True)[0] for i in range(N)],
    'time': [np.random.choice(times, 1, replace=True)[0] for i in range(N)],
    'group': [np.random.choice(groups_list, 1, replace=True)[0] for i in range(N)],
    'sub_group': [np.random.choice(subgroup_list, 1, replace=True)[0] for i in range(N)],
    'value': [np.random.choice(range(value_min, value_max+1), 1)[0] for i in range(N)]
}).sort_values(['date', 'time'])
data.insert(0, 'datetime', data.apply(lambda row: dt.datetime.combine(row.date, row.time), axis=1))

data

Unnamed: 0,datetime,date,time,group,sub_group,value
6,2021-07-01 00:31:36,2021-07-01,00:31:36,C,c,16
39,2021-07-01 06:29:01,2021-07-01,06:29:01,C,a,60
20,2021-07-01 08:42:40,2021-07-01,08:42:40,B,b,78
253,2021-07-02 01:46:58,2021-07-02,01:46:58,C,a,29
268,2021-07-02 07:23:54,2021-07-02,07:23:54,A,b,11
...,...,...,...,...,...,...
89,2021-09-29 15:32:42,2021-09-29,15:32:42,C,a,84
279,2021-09-30 03:22:53,2021-09-30,03:22:53,A,c,94
273,2021-09-30 08:26:47,2021-09-30,08:26:47,A,a,36
127,2021-09-30 21:07:32,2021-09-30,21:07:32,A,d,63


### Найти ближайшую по времени запись

In [71]:
df = data[['datetime', 'group', 'sub_group', 'value']]
df.head()

Unnamed: 0,datetime,group,sub_group,value
6,2021-07-01 00:31:36,C,c,16
39,2021-07-01 06:29:01,C,a,60
20,2021-07-01 08:42:40,B,b,78
253,2021-07-02 01:46:58,C,a,29
268,2021-07-02 07:23:54,A,b,11


In [74]:
timepoint = dt.datetime(2021, 7, 1, 5, 36, 35)


closest = df.iloc[np.argmin(np.abs(df.datetime - timepoint))]
closest

datetime     2021-07-01 06:29:01
group                          C
sub_group                      a
value                         60
Name: 39, dtype: object

### Прямое (без apply) обращение к объектам datetime в колонках с помощью конструкции .dt + .[метод]

In [30]:
data['datetime_str'] = data.apply(lambda row: f'{row.date} {row.time}', axis=1)

In [31]:
# data['datetime'] = data.datetime_str.apply(lambda x:  dt.datetime.strptime(x, '%d-%m-%Y %H:%M:%S'))
data['datetime'] = pd.to_datetime(data['datetime_str'])

data['date'] = data.datetime.dt.date
data['time'] = data.datetime.dt.time
data['year'] = data.datetime.dt.year
data['month'] = data.datetime.dt.month
data['hour'] = data.datetime.dt.hour # Час



weekday_rus = {
    'Monday': 'Понедельник',
    'Tuesday': 'Вторник', 
    'Wednesday': 'Среда', 
    'Thursday': 'Четверг', 
    'Friday': 'Пятница', 
    'Saturday': 'Суббота', 
    'Sunday': 'Воскресенье'
}
data['week'] = data.datetime.dt.isocalendar().week # Получить номер недели
data['weekday_number'] = data.datetime.dt.weekday

# data['weekday_name'] = data.datetime.dt.strftime('%A') 
data['weekday_name'] = data.datetime.dt.day_name() # Получить день недели (на английском)
data['weekday_rusname'] = data.weekday_name.apply(lambda x: weekday_rus[x])

data

Unnamed: 0,date,time,group,sub_group,value,datetime_str,datetime,year,month,hour,week,weekday_number,weekday_name,weekday_rusname
155,2021-07-01,10:00:18,B,d,71,2021-07-01 10:00:18,2021-07-01 10:00:18,2021,7,10,26,3,Thursday,Четверг
232,2021-07-01,21:59:58,B,a,17,2021-07-01 21:59:58,2021-07-01 21:59:58,2021,7,21,26,3,Thursday,Четверг
134,2021-07-02,05:55:43,B,a,81,2021-07-02 05:55:43,2021-07-02 05:55:43,2021,7,5,26,4,Friday,Пятница
244,2021-07-02,10:13:43,C,d,79,2021-07-02 10:13:43,2021-07-02 10:13:43,2021,7,10,26,4,Friday,Пятница
39,2021-07-02,11:57:22,B,a,75,2021-07-02 11:57:22,2021-07-02 11:57:22,2021,7,11,26,4,Friday,Пятница
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
97,2021-09-29,04:51:14,A,b,89,2021-09-29 04:51:14,2021-09-29 04:51:14,2021,9,4,39,2,Wednesday,Среда
132,2021-09-29,07:47:51,A,a,78,2021-09-29 07:47:51,2021-09-29 07:47:51,2021,9,7,39,2,Wednesday,Среда
143,2021-09-29,12:32:35,A,b,44,2021-09-29 12:32:35,2021-09-29 12:32:35,2021,9,12,39,2,Wednesday,Среда
222,2021-09-29,20:49:53,C,d,38,2021-09-29 20:49:53,2021-09-29 20:49:53,2021,9,20,39,2,Wednesday,Среда


### Как посчитать количество секунд между строками в таблице?

In [32]:
data['seconds_diff'] = (
    
        data['datetime']
                .diff() # считаем разность между значениями - получаем объект типа timedelta
                .dt.total_seconds() # применяем метод total_seconds(), чтобы получить количество секунд
                .fillna(0) # первое значение NaN - заменяем его нулем
                .astype(int) # меняем тип всех значений с float на int
)

data.head()

Unnamed: 0,date,time,group,sub_group,value,datetime_str,datetime,year,month,hour,week,weekday_number,weekday_name,weekday_rusname,seconds_diff
155,2021-07-01,10:00:18,B,d,71,2021-07-01 10:00:18,2021-07-01 10:00:18,2021,7,10,26,3,Thursday,Четверг,0
232,2021-07-01,21:59:58,B,a,17,2021-07-01 21:59:58,2021-07-01 21:59:58,2021,7,21,26,3,Thursday,Четверг,43180
134,2021-07-02,05:55:43,B,a,81,2021-07-02 05:55:43,2021-07-02 05:55:43,2021,7,5,26,4,Friday,Пятница,28545
244,2021-07-02,10:13:43,C,d,79,2021-07-02 10:13:43,2021-07-02 10:13:43,2021,7,10,26,4,Friday,Пятница,15480
39,2021-07-02,11:57:22,B,a,75,2021-07-02 11:57:22,2021-07-02 11:57:22,2021,7,11,26,4,Friday,Пятница,6219


### Эта же опрерация, отдельно для каждой группы

In [33]:
(  
    data.groupby('group', as_index = False)['datetime']
        .diff() # в данном случае возвращается dataframe
        .datetime.dt.total_seconds() # нужно снова взять колоноку 'datetime'
        .fillna(0)
        .astype(int)
)

155        0
232    43180
134    28545
244        0
39     21699
       ...  
97     88214
132    10597
143    17084
222    84003
213    63048
Name: datetime, Length: 300, dtype: int64

### Посчитать количество записей по заданному временному периоду?

In [34]:
periods_data = data.set_index('datetime')
periods_data = periods_data.resample('60min').sum() # .count(), .first()
periods_data

Unnamed: 0_level_0,value,year,month,hour,week,weekday_number,seconds_diff
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-07-01 10:00:00,71,2021,7,10,26,3,0
2021-07-01 11:00:00,0,0,0,0,0,0,0
2021-07-01 12:00:00,0,0,0,0,0,0,0
2021-07-01 13:00:00,0,0,0,0,0,0,0
2021-07-01 14:00:00,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...
2021-09-30 10:00:00,0,0,0,0,0,0,0
2021-09-30 11:00:00,0,0,0,0,0,0,0
2021-09-30 12:00:00,0,0,0,0,0,0,0
2021-09-30 13:00:00,0,0,0,0,0,0,0


### Как сгруппировать данные одновременно по месячному периоду и категориям?

1) Добавить признак с указанием месячного периода записи (формат 'первый день месяца - последний день месяца' )

In [35]:
from calendar import monthrange
data['month_period'] = data.apply(lambda row: f'{dt.date(row.year, row.month, 1)} - {dt.date(row.year, row.month, monthrange(row.year, row.month)[-1])}', axis=1)
data['month_period'].unique()

array(['2021-07-01 - 2021-07-31', '2021-08-01 - 2021-08-31',
       '2021-09-01 - 2021-09-30'], dtype=object)

2) Сгруппировать по признакам и категориям

In [36]:
data.groupby(['month_period', 'group', 'sub_group'], as_index=False)['value'].sum()

Unnamed: 0,month_period,group,sub_group,value
0,2021-07-01 - 2021-07-31,A,a,409
1,2021-07-01 - 2021-07-31,A,b,216
2,2021-07-01 - 2021-07-31,A,c,430
3,2021-07-01 - 2021-07-31,A,d,380
4,2021-07-01 - 2021-07-31,B,a,603
5,2021-07-01 - 2021-07-31,B,b,535
6,2021-07-01 - 2021-07-31,B,c,313
7,2021-07-01 - 2021-07-31,B,d,355
8,2021-07-01 - 2021-07-31,C,a,460
9,2021-07-01 - 2021-07-31,C,b,530


### Как посчитать средние недельные значения за месячный период

In [37]:
pivot_table = (
    data # Переформатируем таблицу, чтобы даты и дни недели стали индексами, bic банков колонками. А на их пересечении - сумма диалогов за день
        .pivot_table(
            index=['date','weekday_rusname'], 
            columns=['group','sub_group'], 
            values='value', 
            aggfunc='nunique') 
        .fillna(0)
)

# Будет лучше отсоритровать колонки в порядке убывания суммарных значений
pivot_table = pivot_table.append(pivot_table.sum(numeric_only=True).rename(('',"Сумма по строкам"))) # Добавим суммирующую строку
pivot_table = pivot_table.sort_values(('',"Сумма по строкам"), axis=1, ascending=False) # Отсортируем колонку по суммирующей строке
pivot_table = pivot_table.iloc[:-1] # Удалим суммирующую строку, чтобы при наложении heatmap-а она не перетягивала цвет на себя
pivot_table

Unnamed: 0_level_0,group,B,C,C,B,B,A,C,A,B,A,A,C
Unnamed: 0_level_1,sub_group,a,d,c,b,d,a,b,c,c,b,d,a
date,weekday_rusname,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
2021-07-01,Четверг,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2021-07-02,Пятница,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2021-07-03,Суббота,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
2021-07-04,Воскресенье,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
2021-07-05,Понедельник,0.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-09-26,Воскресенье,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2021-09-27,Понедельник,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2021-09-28,Вторник,0.0,3.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0
2021-09-29,Среда,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2.0,0.0,0.0


In [38]:
# Если считать по кажому дню таблица получится слишком длинной. 
# Вместо этого можно посчитать средние значения по каждому дню и вывести обобщающую недельную сводку
dates = sorted(set([ind[0] for ind in pivot_table.index])) # Выделим даты из индекса
weeknames = ['Понедельник',  'Вторник', 'Среда', 'Четверг', 'Пятница', 'Суббота', 'Воскресенье']

pivot_table_concat = pd.concat([pivot_table.loc[date] for date in dates]) # Поставим дни рядышком
pivot_table_mean = np.round(pivot_table_concat.groupby(pivot_table_concat.index).mean(), 2) # Посчитаем среднее по каждому дню недели
pivot_table_mean = pivot_table_mean.reindex(weeknames) # Поправим последовательность дней недели
pivot_table_mean

group,B,C,C,B,B,A,C,A,B,A,A,C
sub_group,a,d,c,b,d,a,b,c,c,b,d,a
weekday_rusname,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
Понедельник,0.31,0.46,0.23,0.46,0.23,0.31,0.46,0.23,0.38,0.15,0.23,0.15
Вторник,0.31,0.62,0.31,0.08,0.23,0.23,0.38,0.31,0.31,0.23,0.31,0.15
Среда,0.33,0.25,0.25,0.33,0.5,0.33,0.25,0.42,0.42,0.42,0.08,0.08
Четверг,0.36,0.29,0.14,0.21,0.36,0.43,0.07,0.29,0.14,0.29,0.29,0.43
Пятница,0.46,0.08,0.46,0.23,0.23,0.23,0.0,0.23,0.15,0.46,0.38,0.38
Суббота,0.5,0.5,0.5,0.42,0.33,0.08,0.5,0.0,0.17,0.08,0.25,0.08
Воскресенье,0.54,0.08,0.23,0.31,0.15,0.23,0.23,0.31,0.23,0.0,0.08,0.23


In [39]:
data = pivot_table_mean

data_styled = (
            data.style
                    .background_gradient(cmap='Greys', 
#                                          axis=1
                    ) # Добавим цветовой градиент, для простоты восприятия
                    .format({col: "{}" for col in data.columns}) # Настроим формат отображения, чтобы к числам не добавлялись лишние нули
                    
)

data_styled

group,B,C,C,B,B,A,C,A,B,A,A,C
sub_group,a,d,c,b,d,a,b,c,c,b,d,a
weekday_rusname,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
Понедельник,0.31,0.46,0.23,0.46,0.23,0.31,0.46,0.23,0.38,0.15,0.23,0.15
Вторник,0.31,0.62,0.31,0.08,0.23,0.23,0.38,0.31,0.31,0.23,0.31,0.15
Среда,0.33,0.25,0.25,0.33,0.5,0.33,0.25,0.42,0.42,0.42,0.08,0.08
Четверг,0.36,0.29,0.14,0.21,0.36,0.43,0.07,0.29,0.14,0.29,0.29,0.43
Пятница,0.46,0.08,0.46,0.23,0.23,0.23,0.0,0.23,0.15,0.46,0.38,0.38
Суббота,0.5,0.5,0.5,0.42,0.33,0.08,0.5,0.0,0.17,0.08,0.25,0.08
Воскресенье,0.54,0.08,0.23,0.31,0.15,0.23,0.23,0.31,0.23,0.0,0.08,0.23
