## Импорты

In [21]:
import pandas as pd

## Загрузка данных

In [22]:
path = "/content/feed-views.log"

df = pd.read_csv(
                path,
                sep="\t",
                names=["datetime", "user"],
                header=None,
                engine='python'
                )


## Преобразования и манипуляции

In [23]:
df["datetime"] = pd.to_datetime(df["datetime"])

In [24]:
df["year"] = df["datetime"].dt.year
df["month"] = df["datetime"].dt.month
df["day"] = df["datetime"].dt.day
df["hour"] = df["datetime"].dt.hour
df["minute"] = df["datetime"].dt.minute
df["second"] = df["datetime"].dt.second

In [25]:
bins = [0, 4, 7, 11, 17, 20, 24]
labels = ["night", "early morning", "morning", "afternoon", "early evening", "evening"]
df["daytime"] = pd.cut(df["hour"], bins=bins, labels=labels, right=False, include_lowest=True)

In [26]:
df.set_index("user", inplace=True)

In [27]:
count_total = df.count()
count_total

Unnamed: 0,0
datetime,1076
year,1076
month,1076
day,1076
hour,1076
minute,1076
second,1076
daytime,1076


In [28]:
daytime_counts = df["daytime"].value_counts()
daytime_counts

Unnamed: 0_level_0,count
daytime,Unnamed: 1_level_1
evening,509
afternoon,252
early evening,145
night,129
morning,36
early morning,5


In [29]:
df_sorted = df.sort_values(by=["hour", "minute", "second"])
df_sorted

Unnamed: 0_level_0,datetime,year,month,day,hour,minute,second,daytime
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
valentina,2020-05-15 00:00:13.222265,2020,5,15,0,0,13,night
valentina,2020-05-15 00:01:05.153738,2020,5,15,0,1,5,night
pavel,2020-05-12 00:01:27.764025,2020,5,12,0,1,27,night
pavel,2020-05-12 00:01:38.444917,2020,5,12,0,1,38,night
pavel,2020-05-12 00:01:55.395042,2020,5,12,0,1,55,night
...,...,...,...,...,...,...,...,...
artem,2020-05-21 23:49:22.386789,2020,5,21,23,49,22,evening
anatoliy,2020-05-09 23:53:55.599821,2020,5,9,23,53,55,evening
pavel,2020-05-09 23:54:54.260791,2020,5,9,23,54,54,evening
valentina,2020-05-14 23:58:56.754866,2020,5,14,23,58,56,evening


In [30]:
min_hour = df["hour"].min()
max_hour = df["hour"].max()

daytime_mode = df["daytime"].mode()[0]
night_max_hour = df[df["daytime"] == "night"]["hour"].max()
night_user = df[df["hour"] == night_max_hour].index[0]
print(night_max_hour, night_user)


3 konstantin


In [31]:
morning_min_hour = df[df["daytime"] == "morning"]["hour"].min()
morning_user = df[df["hour"] == morning_min_hour].index[0]
print(morning_min_hour, morning_user)

8 alexander


In [32]:
hour_mode = df["hour"].mode()[0]
daytime_mode = df["daytime"].mode()[0]
daytime_mode

'evening'

In [33]:
earliest = df.nsmallest(3, "hour")[["hour"]]
earliest

Unnamed: 0_level_0,hour
user,Unnamed: 1_level_1
artem,0
konstantin,0
konstantin,0


In [34]:
latest = df.nlargest(3, "hour")[["hour"]]
latest

Unnamed: 0_level_0,hour
user,Unnamed: 1_level_1
konstantin,23
artem,23
artem,23


In [35]:
desc_stats = df.describe()
desc_stats

Unnamed: 0,datetime,year,month,day,hour,minute,second
count,1076,1076.0,1076.0,1076.0,1076.0,1076.0,1076.0
mean,2020-05-10 09:00:41.211420672,2020.0,4.870818,13.552974,16.249071,29.629182,29.500929
min,2020-04-17 12:01:08.463179,2020.0,4.0,1.0,0.0,0.0,0.0
25%,2020-05-10 01:13:49.857472,2020.0,5.0,11.0,13.0,14.0,14.0
50%,2020-05-11 22:48:35.302552832,2020.0,5.0,13.0,19.0,29.0,30.0
75%,2020-05-14 14:44:34.749530624,2020.0,5.0,15.0,22.0,46.0,45.0
max,2020-05-22 10:36:14.662600,2020.0,5.0,30.0,23.0,59.0,59.0
std,,0.0,0.335557,4.906567,6.95549,17.689388,17.405506


In [36]:
q75 = desc_stats.loc["75%", "hour"]
q25 = desc_stats.loc["25%", "hour"]
iqr = q75 - q25
iqr

np.float64(9.0)

## Результаты

In [37]:
df.head()

Unnamed: 0_level_0,datetime,year,month,day,hour,minute,second,daytime
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
artem,2020-04-17 12:01:08.463179,2020,4,17,12,1,8,afternoon
artem,2020-04-17 12:01:23.743946,2020,4,17,12,1,23,afternoon
artem,2020-04-17 12:27:30.646665,2020,4,17,12,27,30,afternoon
artem,2020-04-17 12:35:44.884757,2020,4,17,12,35,44,afternoon
artem,2020-04-17 12:35:52.735016,2020,4,17,12,35,52,afternoon


In [38]:
df.columns

Index(['datetime', 'year', 'month', 'day', 'hour', 'minute', 'second',
       'daytime'],
      dtype='object')

In [39]:
df.dtypes

Unnamed: 0,0
datetime,datetime64[ns]
year,int32
month,int32
day,int32
hour,int32
minute,int32
second,int32
daytime,category


In [40]:
print("Total count:\n", count_total)
print("\nDaytime counts:\n", daytime_counts)
print("\nMin hour:", min_hour)
print("Max hour:", max_hour)
print("Mode daytime:", daytime_mode)
print("Max hour in night:", night_max_hour, "User:", night_user)
print("Min hour in morning:", morning_min_hour, "User:", morning_user)
print("Hour mode:", hour_mode)
print("3 earliest hours:\n", earliest)
print("3 latest hours:\n", latest)
print("IQR for hour:", iqr)

Total count:
 datetime    1076
year        1076
month       1076
day         1076
hour        1076
minute      1076
second      1076
daytime     1076
dtype: int64

Daytime counts:
 daytime
evening          509
afternoon        252
early evening    145
night            129
morning           36
early morning      5
Name: count, dtype: int64

Min hour: 0
Max hour: 23
Mode daytime: evening
Max hour in night: 3 User: konstantin
Min hour in morning: 8 User: alexander
Hour mode: 22
3 earliest hours:
             hour
user            
artem          0
konstantin     0
konstantin     0
3 latest hours:
             hour
user            
konstantin    23
artem         23
artem         23
IQR for hour: 9.0
