##### Load libs:

In [1]:
import pandas as pd

##### Read log-file:

In [2]:
views = pd.read_csv(
	'../data/feed-views-semicolon.log',
	sep=';'
)
print(views)

                       date_time       user
0     2020-04-17 12:01:08.463179      artem
1     2020-04-17 12:35:44.884757      artem
2     2020-04-17 12:35:52.735016      artem
3     2020-04-17 12:36:21.401412     oksana
4     2020-04-17 12:36:22.023355     oksana
...                          ...        ...
1067  2020-05-21 16:36:40.915488  ekaterina
1068  2020-05-21 17:49:36.429237      maxim
1069  2020-05-21 18:45:20.441142  valentina
1070  2020-05-21 23:03:06.457819      maxim
1071  2020-05-21 23:23:49.995349      pavel

[1072 rows x 2 columns]


##### 1.Converter to datetime:

In [3]:
views['date_time']=pd.to_datetime(views['date_time'])
views['year']=views['date_time'].dt.year
views['month'] = views['date_time'].dt.month
views['day'] = views['date_time'].dt.day
views['hour'] = views['date_time'].dt.hour
views['minute'] = views['date_time'].dt.minute
views['second'] = views['date_time'].dt.second
print(views)

                      date_time       user  year  month  day  hour  minute  \
0    2020-04-17 12:01:08.463179      artem  2020      4   17    12       1   
1    2020-04-17 12:35:44.884757      artem  2020      4   17    12      35   
2    2020-04-17 12:35:52.735016      artem  2020      4   17    12      35   
3    2020-04-17 12:36:21.401412     oksana  2020      4   17    12      36   
4    2020-04-17 12:36:22.023355     oksana  2020      4   17    12      36   
...                         ...        ...   ...    ...  ...   ...     ...   
1067 2020-05-21 16:36:40.915488  ekaterina  2020      5   21    16      36   
1068 2020-05-21 17:49:36.429237      maxim  2020      5   21    17      49   
1069 2020-05-21 18:45:20.441142  valentina  2020      5   21    18      45   
1070 2020-05-21 23:03:06.457819      maxim  2020      5   21    23       3   
1071 2020-05-21 23:23:49.995349      pavel  2020      5   21    23      23   

      second  
0          8  
1         44  
2         52  
3  

##### 2.Create new col "datetime":

In [4]:
views['datetime'] = pd.cut(
	views['hour'],
	labels=['night', 'early morning', 'morning', 'afternoon', 'early evening', 'evening'],
	bins=[0,4,7,11,17,20,24],
	right=False
)
views.set_index('user', inplace=True)
print(views)

                           date_time  year  month  day  hour  minute  second  \
user                                                                           
artem     2020-04-17 12:01:08.463179  2020      4   17    12       1       8   
artem     2020-04-17 12:35:44.884757  2020      4   17    12      35      44   
artem     2020-04-17 12:35:52.735016  2020      4   17    12      35      52   
oksana    2020-04-17 12:36:21.401412  2020      4   17    12      36      21   
oksana    2020-04-17 12:36:22.023355  2020      4   17    12      36      22   
...                              ...   ...    ...  ...   ...     ...     ...   
ekaterina 2020-05-21 16:36:40.915488  2020      5   21    16      36      40   
maxim     2020-05-21 17:49:36.429237  2020      5   21    17      49      36   
valentina 2020-05-21 18:45:20.441142  2020      5   21    18      45      20   
maxim     2020-05-21 23:03:06.457819  2020      5   21    23       3       6   
pavel     2020-05-21 23:23:49.995349  20

##### 3.Count elements:

In [5]:
print(f"Count all elements: {max(views.count())}")
print(f"Elements in {views['datetime'].value_counts()}")

Count all elements: 1072
Elements in datetime
evening          508
afternoon        250
early evening    145
night            129
morning           35
early morning      5
Name: count, dtype: int64


##### 4.Sort DataFrame:

In [6]:
views.sort_values(['hour','minute','second'], inplace=True)
print(views)

                           date_time  year  month  day  hour  minute  second  \
user                                                                           
valentina 2020-05-15 00:00:13.222265  2020      5   15     0       0      13   
valentina 2020-05-15 00:01:05.153738  2020      5   15     0       1       5   
pavel     2020-05-12 00:01:27.764025  2020      5   12     0       1      27   
pavel     2020-05-12 00:01:38.444917  2020      5   12     0       1      38   
pavel     2020-05-12 00:01:55.395042  2020      5   12     0       1      55   
...                              ...   ...    ...  ...   ...     ...     ...   
artem     2020-04-29 23:48:14.208828  2020      4   29    23      48      14   
anatoliy  2020-05-09 23:53:55.599821  2020      5    9    23      53      55   
pavel     2020-05-09 23:54:54.260791  2020      5    9    23      54      54   
valentina 2020-05-14 23:58:56.754866  2020      5   14    23      58      56   
alexander 2020-05-14 23:59:38.758438  20

##### 5.Max and min datetime:

In [7]:
print(f"Max hour night: {views[views['datetime']=='night']['hour'].max()}")
print(f"Min hour morning: {views[views['datetime']=='morning']['hour'].min()}")

print(f"Visit in max hour night: {views[views['hour']==views[views['datetime']=='night']['hour'].max()].index.to_list()[0]}")
print(f"Visit in min hour morning: {views[views['hour']==views[views['datetime']=='morning']['hour'].min()].index.to_list()[0]}")

print(f"Mode: {views['hour'].mode().iloc[0]}")

Max hour night: 3
Min hour morning: 8
Visit in max hour night: konstantin
Visit in min hour morning: alexander
Mode: 22


##### 6.Early and late hour:

In [8]:
print(f"Early hour: {views['hour'].nsmallest(3).to_list()}")
print(f"Late hour: {views['hour'].nlargest(3).to_list()}")

Early hour: [0, 0, 0]
Late hour: [23, 23, 23]


##### 7.Use describe:

In [9]:
stats = views[['hour', 'minute','second']].describe()
iqr = stats.loc['75%','hour']-stats.loc['25%','hour']
print(f"Interquartile range: {iqr}")

Interquartile range: 9.0


In [10]:
print(views.info())

<class 'pandas.core.frame.DataFrame'>
Index: 1072 entries, valentina to alexander
Data columns (total 8 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   date_time  1072 non-null   datetime64[ns]
 1   year       1072 non-null   int32         
 2   month      1072 non-null   int32         
 3   day        1072 non-null   int32         
 4   hour       1072 non-null   int32         
 5   minute     1072 non-null   int32         
 6   second     1072 non-null   int32         
 7   datetime   1072 non-null   category      
dtypes: category(1), datetime64[ns](1), int32(6)
memory usage: 75.4+ KB
None


In [11]:
print(views.count())

date_time    1072
year         1072
month        1072
day          1072
hour         1072
minute       1072
second       1072
datetime     1072
dtype: int64


In [14]:
print(views['date_time'].mode())

0      2020-04-17 12:01:08.463179
1      2020-04-17 12:35:44.884757
2      2020-04-17 12:35:52.735016
3      2020-04-17 12:36:21.401412
4      2020-04-17 12:36:22.023355
                  ...            
1067   2020-05-21 16:36:40.915488
1068   2020-05-21 17:49:36.429237
1069   2020-05-21 18:45:20.441142
1070   2020-05-21 23:03:06.457819
1071   2020-05-21 23:23:49.995349
Name: date_time, Length: 1072, dtype: datetime64[ns]
