## Импорт библиотеки

In [100]:
import pandas as pd

## Читаем файл и создать датафрейм

In [104]:
views = pd.read_csv("../data/feed-views.log",
            names=["datetime", "user"],
            sep='\t')

## Преобразовываем столбец datetime в тип datetime64[ns]

In [105]:
views["datetime"] = pd.to_datetime(views["datetime"])
views.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1076 entries, 0 to 1075
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   datetime  1076 non-null   datetime64[ns]
 1   user      1076 non-null   object        
dtypes: datetime64[ns](1), object(1)
memory usage: 16.9+ KB


## Добавление новых столбцов 

In [106]:
views["years"] = views["datetime"].dt.year
views["months"] = views["datetime"].dt.month
views["days"] = views["datetime"].dt.day
views["hours"] = views["datetime"].dt.hour
views["minutes"] = views["datetime"].dt.minute
views["seconds"] = views["datetime"].dt.second
views



Unnamed: 0,datetime,user,years,months,days,hours,minutes,seconds
0,2020-04-17 12:01:08.463179,artem,2020,4,17,12,1,8
1,2020-04-17 12:01:23.743946,artem,2020,4,17,12,1,23
2,2020-04-17 12:27:30.646665,artem,2020,4,17,12,27,30
3,2020-04-17 12:35:44.884757,artem,2020,4,17,12,35,44
4,2020-04-17 12:35:52.735016,artem,2020,4,17,12,35,52
...,...,...,...,...,...,...,...,...
1071,2020-05-21 18:45:20.441142,valentina,2020,5,21,18,45,20
1072,2020-05-21 23:03:06.457819,maxim,2020,5,21,23,3,6
1073,2020-05-21 23:23:49.995349,pavel,2020,5,21,23,23,49
1074,2020-05-21 23:49:22.386789,artem,2020,5,21,23,49,22


## Создаем столбец daytime

In [107]:

bins = [0, 4, 7, 11, 17, 20, 24]
labels = ["night", "early morning", "morning", "afternoon", "early evening", "evening"]

views["daytime"] = pd.cut(views["hours"], bins=bins, labels=labels, right=False)
views = views.set_index("user")
print(views['daytime'])



user
artem            afternoon
artem            afternoon
artem            afternoon
artem            afternoon
artem            afternoon
                 ...      
valentina    early evening
maxim              evening
pavel              evening
artem              evening
artem              morning
Name: daytime, Length: 1076, dtype: category
Categories (6, object): ['night' < 'early morning' < 'morning' < 'afternoon' < 'early evening' < 'evening']


## Кол-во элементов в датафрейме

In [108]:
views.count()

datetime    1076
years       1076
months      1076
days        1076
hours       1076
minutes     1076
seconds     1076
daytime     1076
dtype: int64

## Кол-во записей по времени суток

In [109]:
daytime_counts = views["daytime"].value_counts()
daytime_counts.name = "daytime"
print(daytime_counts)

daytime
evening          509
afternoon        252
early evening    145
night            129
morning           36
early morning      5
Name: daytime, dtype: int64


## Сортировка по часам, минутам, секундам одновременно

In [110]:
views = views.sort_values(by=["hours","minutes","seconds"])
views

Unnamed: 0_level_0,datetime,years,months,days,hours,minutes,seconds,daytime
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
valentina,2020-05-15 00:00:13.222265,2020,5,15,0,0,13,night
valentina,2020-05-15 00:01:05.153738,2020,5,15,0,1,5,night
pavel,2020-05-12 00:01:27.764025,2020,5,12,0,1,27,night
pavel,2020-05-12 00:01:38.444917,2020,5,12,0,1,38,night
pavel,2020-05-12 00:01:55.395042,2020,5,12,0,1,55,night
...,...,...,...,...,...,...,...,...
artem,2020-05-21 23:49:22.386789,2020,5,21,23,49,22,evening
anatoliy,2020-05-09 23:53:55.599821,2020,5,9,23,53,55,evening
pavel,2020-05-09 23:54:54.260791,2020,5,9,23,54,54,evening
valentina,2020-05-14 23:58:56.754866,2020,5,14,23,58,56,evening


## Рассчитываем минимум, максимум и моду

In [112]:
max_night = views[views.daytime == "night"].hours.max()
min_morning = views[views.daytime == "morning"].hours.min()
mode_hour = views['hours'].mode()[0]
mode_daytime = views['daytime'].mode()[0]
visitors_max = views.loc[views.daytime == 'night'].hours.idxmax()
visitors_min = views.loc[views.daytime == 'morning'].hours.idxmin()

print(f"Max hour at night: {max_night}")
print(f"Min hour in morning: {min_morning}")
print(f"Mode hour: {mode_hour}")
print(f"Mode daytime: {mode_daytime}")
print(f"Visitors at max night hour: {visitors_max}")
print(f"Visitors at min morning hour: {visitors_min}")

Max hour at night: 3
Min hour in morning: 8
Mode hour: 22
Mode daytime: evening
Visitors at max night hour: konstantin
Visitors at min morning hour: alexander


## 3 самых ранних часа утром + пользователи

In [113]:
print(views[views.daytime == "morning"].nsmallest(3, "hours").hours)

user
alexander    8
alexander    8
alexander    9
Name: hours, dtype: int32


## 3 самых поздних часа + пользователи

In [114]:
print(views[views.daytime == "night"].nlargest(3, "hours").hours)

user
konstantin    3
konstantin    3
konstantin    3
Name: hours, dtype: int32


## describe() и межквартильный размах

In [115]:
desc = views["hours"].describe()
iqr = desc["75%"] - desc["25%"]
print("Interquartile range (IQR):", iqr)

Interquartile range (IQR): 9.0
