## create a dataframe views with two columns: datetime and user by reading feed-views.log

In [3]:
import pandas as pd

df = pd.read_csv("../ex00/data/feed-views.log", sep='\t',  header=None, names=["datetime", "user"], skiprows=[2,3], skipfooter=2, engine='python')
df.set_index("datetime", inplace=True)
df.index = pd.to_datetime(df["user"].index)
df.index
df['year'] = df.index.year
df['month'] = df.index.month
df['day'] = df.index.day
df['hour'] = df.index.hour
df['minute'] = df.index.minute
df['second'] = df.index.second
df

Unnamed: 0_level_0,user,year,month,day,hour,minute,second
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2020-04-17 12:01:08.463179,artem,2020,4,17,12,1,8
2020-04-17 12:01:23.743946,artem,2020,4,17,12,1,23
2020-04-17 12:35:52.735016,artem,2020,4,17,12,35,52
2020-04-17 12:36:21.401412,oksana,2020,4,17,12,36,21
2020-04-17 12:36:22.023355,oksana,2020,4,17,12,36,22
...,...,...,...,...,...,...,...
2020-05-21 16:36:40.915488,ekaterina,2020,5,21,16,36,40
2020-05-21 17:49:36.429237,maxim,2020,5,21,17,49,36
2020-05-21 18:45:20.441142,valentina,2020,5,21,18,45,20
2020-05-21 23:03:06.457819,maxim,2020,5,21,23,3,6


## create the new column daytime

In [5]:
# night 00:00 03:59
# early morning 04:00 06:59
# morning 07:00 10:59
# afternoon 11:00 16:59
# early evening 17:00 19:59
# evening 20:00 23:59
bins = [0, 4, 7, 11, 17, 20, 24]
labels = ['night', 'early morning', 'morning', 'afternoon', 'early evening', 'evening']
df['daytime'] = pd.cut(df.hour, bins, labels=labels, right=False)
df.set_index("user", inplace=True)
df

Unnamed: 0_level_0,year,month,day,hour,minute,second,daytime
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
artem,2020,4,17,12,1,8,afternoon
artem,2020,4,17,12,1,23,afternoon
artem,2020,4,17,12,35,52,afternoon
oksana,2020,4,17,12,36,21,afternoon
oksana,2020,4,17,12,36,22,afternoon
...,...,...,...,...,...,...,...
ekaterina,2020,5,21,16,36,40,afternoon
maxim,2020,5,21,17,49,36,early evening
valentina,2020,5,21,18,45,20,early evening
maxim,2020,5,21,23,3,6,evening


## calculate the number of elements in your dataframe

In [269]:
df.count()[0] #number of elements

1072

In [270]:
df['daytime'].value_counts() #

evening          508
afternoon        250
early evening    145
night            129
morning           35
early morning      5
Name: daytime, dtype: int64

## sort values in your dataframe by hour, minute, and second in ascending order (simultaneously and not one by one)

In [6]:
df.sort_values(by=['hour', 'minute', 'second'], inplace=True)
df

Unnamed: 0_level_0,year,month,day,hour,minute,second,daytime
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
valentina,2020,5,15,0,0,13,night
valentina,2020,5,15,0,1,5,night
pavel,2020,5,12,0,1,27,night
pavel,2020,5,12,0,1,38,night
pavel,2020,5,12,0,1,55,night
...,...,...,...,...,...,...,...
artem,2020,4,29,23,48,14,evening
anatoliy,2020,5,9,23,53,55,evening
pavel,2020,5,9,23,54,54,evening
valentina,2020,5,14,23,58,56,evening


## calculate the minimum and maximum for the hours and the mode for the daytime categories

In [8]:
print("max hour:", df['hour'].max())
print("min hour:", df['hour'].min())
print("mode of daytime is:", df['daytime'].mode())
print("max hour where daytime is night:", df[(df['daytime'] == 'night')]['hour'].max())
print("min hour where daytime is morning:", df[(df['daytime'] == 'morning')]['hour'].min())
print("Who visited the site at those hours:")
print(pd.DataFrame(df[(df['hour'] == 3) | (df['hour'] == 8)].index).drop_duplicates())
print("Mode of hour:", df['hour'].mode()[0])
print("Mode of daytime:", df['daytime'].mode()[0])

max hour: 23
min hour: 0
mode of daytime is: 0    evening
Name: daytime, dtype: category
Categories (6, object): ['night' < 'early morning' < 'morning' < 'afternoon' < 'early evening' < 'evening']
max hour where daytime is night: 3
min hour where daytime is morning: 8
Who visited the site at those hours:
         user
0  konstantin
3   alexander
Mode of hour: 22
Mode of daytime: evening


## show the 3 earliest hours in the morning and the corresponding usernames and the 3 latest hours and the usernames using nsmallest() and nlargest()

In [11]:
print(df.nsmallest(3 ,['hour'], keep='first')[['hour']])
print(df.nlargest(3 ,['hour'], keep='first')[['hour']])

           hour
user           
valentina     0
valentina     0
pavel         0
           hour
user           
ekaterina    23
ekaterina    23
ekaterina    23


## use the method describe() to get the basic statistics for the columns

In [12]:
print("Most popular interval of visiting the page:", df.describe()['hour'][4], '-', df.describe()['hour'][6])
iqr = df.describe()['hour'][6]- df.describe()['hour'][4]
print('iqr:', iqr)

Most popular interval of visiting the page: 13.0 - 22.0
iqr: 9.0
