In [1]:
import pandas as pd

## 1. Создаем фрейм данных views с двумя столбцами: datetime и user.

In [2]:
views = pd.read_csv(
    '../../data/feed-views.log',
    delimiter='\t',
    names=['datetime', 'user'])

In [3]:
views['datetime'] = pd.to_datetime(views['datetime'])

In [4]:
views['year'] = views['datetime'].dt.year
views['month'] = views['datetime'].dt.month
views['day'] = views['datetime'].dt.day
views['hour'] = views['datetime'].dt.hour
views['minute'] = views['datetime'].dt.minute
views['second'] = views['datetime'].dt.second

In [14]:
views.head()

Unnamed: 0,datetime,user,year,month,day,hour,minute,second,daytime
0,2020-04-17 12:01:08.463179,artem,2020,4,17,12,1,8,afternoon
1,2020-04-17 12:01:23.743946,artem,2020,4,17,12,1,23,afternoon
2,2020-04-17 12:27:30.646665,artem,2020,4,17,12,27,30,afternoon
3,2020-04-17 12:35:44.884757,artem,2020,4,17,12,35,44,afternoon
4,2020-04-17 12:35:52.735016,artem,2020,4,17,12,35,52,afternoon


## 2. Создаем столбец daytime.

In [15]:
views['daytime'] = pd.cut(
    views['hour'], 
    bins=[0, 4, 7, 11, 17, 20, 24],
    labels= ['night', 'early morning', 'morning', 'afternoon', 'early evening', 'evening'],
    right=False)

In [17]:
views.head()

Unnamed: 0,datetime,user,year,month,day,hour,minute,second,daytime
0,2020-04-17 12:01:08.463179,artem,2020,4,17,12,1,8,afternoon
1,2020-04-17 12:01:23.743946,artem,2020,4,17,12,1,23,afternoon
2,2020-04-17 12:27:30.646665,artem,2020,4,17,12,27,30,afternoon
3,2020-04-17 12:35:44.884757,artem,2020,4,17,12,35,44,afternoon
4,2020-04-17 12:35:52.735016,artem,2020,4,17,12,35,52,afternoon


In [19]:
views.set_index('user', inplace=True)
views.head()

Unnamed: 0_level_0,datetime,year,month,day,hour,minute,second,daytime
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
artem,2020-04-17 12:01:08.463179,2020,4,17,12,1,8,afternoon
artem,2020-04-17 12:01:23.743946,2020,4,17,12,1,23,afternoon
artem,2020-04-17 12:27:30.646665,2020,4,17,12,27,30,afternoon
artem,2020-04-17 12:35:44.884757,2020,4,17,12,35,44,afternoon
artem,2020-04-17 12:35:52.735016,2020,4,17,12,35,52,afternoon


## 3. Считаем количество данных во фрейме.

In [20]:
views.count()

datetime    1076
year        1076
month       1076
day         1076
hour        1076
minute      1076
second      1076
daytime     1076
dtype: int64

In [21]:
views['daytime'].value_counts()

daytime
evening          509
afternoon        252
early evening    145
night            129
morning           36
early morning      5
Name: count, dtype: int64

## 4. Сортируем данные по часам, минутам и секундам по возрастанию.

In [24]:
views.sort_values(by=['hour', 'minute', 'second'], ascending=True)
views.tail()

Unnamed: 0_level_0,datetime,year,month,day,hour,minute,second,daytime
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
valentina,2020-05-21 18:45:20.441142,2020,5,21,18,45,20,early evening
maxim,2020-05-21 23:03:06.457819,2020,5,21,23,3,6,evening
pavel,2020-05-21 23:23:49.995349,2020,5,21,23,23,49,evening
artem,2020-05-21 23:49:22.386789,2020,5,21,23,49,22,evening
artem,2020-05-22 10:36:14.662600,2020,5,22,10,36,14,morning


## 5. Вычисляем максимальное значение часа для ночи и минимальное значение для вечера. Также выводим строки, где время 8 часов. Вычисляем моду для daytime и hour.

In [50]:
views[views['daytime'] == 'night']['hour'].max().item()

3

In [29]:
views[views['daytime'] == 'evening']['hour'].min().item()

20

In [30]:
views[views['hour'] == 8]

Unnamed: 0_level_0,datetime,year,month,day,hour,minute,second,daytime
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
alexander,2020-05-15 08:16:03.918402,2020,5,15,8,16,3,morning
alexander,2020-05-15 08:35:01.471463,2020,5,15,8,35,1,morning


In [48]:
hours_mode = views['hour'].mode().item()
hours_mode

22

In [46]:
daytime_mode = views['daytime'].mode().item()
daytime_mode

'evening'

## 6. Выводим 3 самые ранние и самые поздние заходы пользователей. 

In [59]:
earliest_morning = views[views['daytime'] == 'morning'].nsmallest(3, 'hour')['hour']
earliest_morning

user
alexander    8
alexander    8
artem        9
Name: hour, dtype: int32

In [60]:
latest_morning = views[views['daytime'] == 'morning'].nlargest(3, 'hour')['hour']
latest_morning

user
konstantin    10
maxim         10
konstantin    10
Name: hour, dtype: int32

## 7. Применение метода describe.

In [61]:
views.describe()

Unnamed: 0,datetime,year,month,day,hour,minute,second
count,1076,1076.0,1076.0,1076.0,1076.0,1076.0,1076.0
mean,2020-05-10 09:00:41.211420672,2020.0,4.870818,13.552974,16.249071,29.629182,29.500929
min,2020-04-17 12:01:08.463179,2020.0,4.0,1.0,0.0,0.0,0.0
25%,2020-05-10 01:13:49.857472,2020.0,5.0,11.0,13.0,14.0,14.0
50%,2020-05-11 22:48:35.302552832,2020.0,5.0,13.0,19.0,29.0,30.0
75%,2020-05-14 14:44:34.749530624,2020.0,5.0,15.0,22.0,46.0,45.0
max,2020-05-22 10:36:14.662600,2020.0,5.0,30.0,23.0,59.0,59.0
std,,0.0,0.335557,4.906567,6.95549,17.689388,17.405506


In [65]:
desc_hours = views['hour'].describe()
desc_hours

count    1076.000000
mean       16.249071
std         6.955490
min         0.000000
25%        13.000000
50%        19.000000
75%        22.000000
max        23.000000
Name: hour, dtype: float64

In [67]:
iqr = desc_hours['75%'] - desc_hours['25%']
iqr.item()

9.0