In [1]:
import pandas as pd

## Create views

In [15]:
views = pd.read_csv(
    '../../data/feed-views.log',
    names=['datetime', 'user'],
    header=None,
    sep='\t'
)

In [16]:
views['datetime'] = pd.to_datetime(views['datetime'])

views['year'] = views['datetime'].dt.year
views['month'] = views['datetime'].dt.month
views['day'] = views['datetime'].dt.day
views['hour'] = views['datetime'].dt.hour
views['minute'] = views['datetime'].dt.minute
views['second'] = views['datetime'].dt.second

## Create column daytime

In [17]:
bins = [0, 4, 7, 11, 17, 20, 24]
labels = ['night', 'early morning', 'morning', 'afternoon', 'early evening', 'evening']
views['daytime'] = pd.cut(views['hour'], bins=bins, labels=labels, right=False)

views.set_index('user', inplace=True)

In [18]:
views.tail()

Unnamed: 0_level_0,datetime,year,month,day,hour,minute,second,daytime
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
valentina,2020-05-21 18:45:20.441142,2020,5,21,18,45,20,early evening
maxim,2020-05-21 23:03:06.457819,2020,5,21,23,3,6,evening
pavel,2020-05-21 23:23:49.995349,2020,5,21,23,23,49,evening
artem,2020-05-21 23:49:22.386789,2020,5,21,23,49,22,evening
artem,2020-05-22 10:36:14.662600,2020,5,22,10,36,14,morning


## Calculate the number of elements

In [None]:
count = views.count().iloc[0]

print(count)

datetime    1076
year        1076
month       1076
day         1076
hour        1076
minute      1076
second      1076
daytime     1076
dtype: int64


In [29]:
daytime_counts = views['daytime'].value_counts()

print(daytime_counts)

daytime
evening          509
afternoon        252
early evening    145
night            129
morning           36
early morning      5
Name: count, dtype: int64


## Sort by time

In [31]:
views.sort_values(by=['hour', 'minute', 'second'], inplace=True)

In [34]:
views.head()

Unnamed: 0_level_0,datetime,year,month,day,hour,minute,second,daytime
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
valentina,2020-05-15 00:00:13.222265,2020,5,15,0,0,13,night
valentina,2020-05-15 00:01:05.153738,2020,5,15,0,1,5,night
pavel,2020-05-12 00:01:27.764025,2020,5,12,0,1,27,night
pavel,2020-05-12 00:01:38.444917,2020,5,12,0,1,38,night
pavel,2020-05-12 00:01:55.395042,2020,5,12,0,1,55,night


## Min, max, mode

In [42]:
min_hour = views['hour'].min()
max_hour = views['hour'].max()
mode_hour = views['daytime'].mode()[0]

print(min_hour, max_hour, mode_hour, sep='\n')

0
23
evening


In [59]:
max_night_hour = views[views['daytime'] == 'night']['hour'].max()
min_morning_hour = views[views['daytime'] == 'morning']['hour'].min()
user_at_this_hours = views[(max_night_hour <= views['hour']) & (views['hour'] <= min_morning_hour)].index[0]
hour_mode = views['hour'].mode()
daytime_mode = views['daytime'].mode()

print(max_night_hour, min_morning_hour, user_at_this_hours, sep='\n')

3
8
konstantin


In [61]:
print(hour_mode[0])

22


In [62]:
print(daytime_mode[0])

evening


In [72]:
earliest_morning = views[views['daytime'] == 'morning'].nsmallest(3, 'hour')[['hour']]

print(earliest_morning)

           hour
user           
alexander     8
alexander     8
alexander     9


In [None]:
largest_hours = views.nlargest(3, 'hour')[['hour']]

print(largest_hours)

           hour
user           
ekaterina    23
ekaterina    23
ekaterina    23


## Describe()

In [None]:
stats = views.describe()

q1 = stats.loc['25%', 'hour']
q3 = stats.loc['75%', 'hour']
iqr = q3 - q1

print(q3)

22.0
