# Basic Operations
Этот код обрабатывает файл журнала, извлекает информацию о дате и времени, классифицирует время суток и выполняет статистические анализы.

### 1. 📝 Загружаем данные

In [8]:

import pandas as pd

df = pd.read_csv("../ex00/feed-views.log", sep="\t", header=None, names=["datetime", "user"])
df.head()

Unnamed: 0,datetime,user
0,2020-04-17 12:01:08.463179,artem
1,2020-04-17 12:01:23.743946,artem
2,2020-04-17 12:27:30.646665,artem
3,2020-04-17 12:35:44.884757,artem
4,2020-04-17 12:35:52.735016,artem


### 2. ⏱️ Преобразование времени

In [9]:
df["datetime"] = pd.to_datetime(df["datetime"])
df.dtypes

datetime    datetime64[ns]
user                object
dtype: object

### 3. 📆൹ Извлечение компонентов даты и времени

In [11]:
df["year"] = df["datetime"].dt.year
df["month"] = df["datetime"].dt.month
df["day"] = df["datetime"].dt.day
df["hour"] = df["datetime"].dt.hour
df["minute"] = df["datetime"].dt.minute
df["second"] = df["datetime"].dt.second

df.head()

Unnamed: 0,datetime,user,year,month,day,hour,minute,second
0,2020-04-17 12:01:08.463179,artem,2020,4,17,12,1,8
1,2020-04-17 12:01:23.743946,artem,2020,4,17,12,1,23
2,2020-04-17 12:27:30.646665,artem,2020,4,17,12,27,30
3,2020-04-17 12:35:44.884757,artem,2020,4,17,12,35,44
4,2020-04-17 12:35:52.735016,artem,2020,4,17,12,35,52


### 4. ☀️ 🌒 Определение времени суток

In [12]:
time_bins = [0, 4, 7, 11, 17, 20, 24]
time_labels = ["night", "early morning", "morning", "afternoon", "early evening", "evening"]

df["daytime"] = pd.cut(df["hour"], bins=time_bins, labels=time_labels, right=False)

df.head()

Unnamed: 0,datetime,user,year,month,day,hour,minute,second,daytime
0,2020-04-17 12:01:08.463179,artem,2020,4,17,12,1,8,afternoon
1,2020-04-17 12:01:23.743946,artem,2020,4,17,12,1,23,afternoon
2,2020-04-17 12:27:30.646665,artem,2020,4,17,12,27,30,afternoon
3,2020-04-17 12:35:44.884757,artem,2020,4,17,12,35,44,afternoon
4,2020-04-17 12:35:52.735016,artem,2020,4,17,12,35,52,afternoon


### 5. 🫵 Установка индекса

In [13]:
df.set_index("user", inplace=True)
df.head()

Unnamed: 0_level_0,datetime,year,month,day,hour,minute,second,daytime
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
artem,2020-04-17 12:01:08.463179,2020,4,17,12,1,8,afternoon
artem,2020-04-17 12:01:23.743946,2020,4,17,12,1,23,afternoon
artem,2020-04-17 12:27:30.646665,2020,4,17,12,27,30,afternoon
artem,2020-04-17 12:35:44.884757,2020,4,17,12,35,44,afternoon
artem,2020-04-17 12:35:52.735016,2020,4,17,12,35,52,afternoon


### 6. 🔄 Подсчет элементов

In [14]:
total_elements = df.count()

daytime_counts = df["daytime"].value_counts()

total_elements, daytime_counts

(datetime    1076
 year        1076
 month       1076
 day         1076
 hour        1076
 minute      1076
 second      1076
 daytime     1076
 dtype: int64,
 daytime
 evening          509
 afternoon        252
 early evening    145
 night            129
 morning           36
 early morning      5
 Name: count, dtype: int64)

### 7. ✄ Сортировка по времени

In [15]:
df_sorted = df.sort_values(by=["hour", "minute", "second"])

df_sorted.head()

Unnamed: 0_level_0,datetime,year,month,day,hour,minute,second,daytime
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
valentina,2020-05-15 00:00:13.222265,2020,5,15,0,0,13,night
valentina,2020-05-15 00:01:05.153738,2020,5,15,0,1,5,night
pavel,2020-05-12 00:01:27.764025,2020,5,12,0,1,27,night
pavel,2020-05-12 00:01:38.444917,2020,5,12,0,1,38,night
pavel,2020-05-12 00:01:55.395042,2020,5,12,0,1,55,night


### 8. ℀ Вычисление статистик

⚫️ Минимальный и максимальный час посещений

⚫️ Мода (наиболее частое значение) для времени суток

In [17]:
min_hour = df["hour"].min()
max_hour = df["hour"].max()

mode_daytime = df["daytime"].mode()[0]

min_hour, max_hour, mode_daytime

(np.int32(0), np.int32(23), 'evening')

### 9. 🧐 Анализ по времени суток

⚫️ Максимальный час для ночных посещений

⚫️ Минимальный час для утренних посещений

⚫️ Кто заходил в это время

In [18]:
max_hour_night = df[df["daytime"] == "night"]["hour"].max()
user_max_hour_night = df[df["hour"] == max_hour_night].index[0]

min_hour_morning = df[df["daytime"] == "morning"]["hour"].min()
user_min_hour_morning = df[df["hour"] == min_hour_morning].index[0]

max_hour_night, user_max_hour_night, min_hour_morning, user_min_hour_morning

(np.int32(3), 'konstantin', np.int32(8), 'alexander')

⚫️ Три самых ранних и поздних часа

In [19]:
earliest_morning = df.nsmallest(3, "hour")[["hour"]]

latest_evening = df.nlargest(3, "hour")[["hour"]]

earliest_morning, latest_evening

(            hour
 user            
 artem          0
 konstantin     0
 konstantin     0,
             hour
 user            
 konstantin    23
 artem         23
 artem         23)

### 10. Описание данных

In [20]:
df.describe()

Unnamed: 0,datetime,year,month,day,hour,minute,second
count,1076,1076.0,1076.0,1076.0,1076.0,1076.0,1076.0
mean,2020-05-10 09:00:41.211420672,2020.0,4.870818,13.552974,16.249071,29.629182,29.500929
min,2020-04-17 12:01:08.463179,2020.0,4.0,1.0,0.0,0.0,0.0
25%,2020-05-10 01:13:49.857472,2020.0,5.0,11.0,13.0,14.0,14.0
50%,2020-05-11 22:48:35.302552832,2020.0,5.0,13.0,19.0,29.0,30.0
75%,2020-05-14 14:44:34.749530624,2020.0,5.0,15.0,22.0,46.0,45.0
max,2020-05-22 10:36:14.662600,2020.0,5.0,30.0,23.0,59.0,59.0
std,,0.0,0.335557,4.906567,6.95549,17.689388,17.405506


### 11. Вычисление межквартильного размаха (IQR)

In [21]:
stats = df.describe()
iqr = stats.loc["75%", "hour"] - stats.loc["25%", "hour"]
iqr

np.float64(9.0)

### 12. Сохраним в виде таблицы

In [6]:
df_sorted.to_csv("df_sorted.csv")