## ⅍ A/B-testing

#### 1. 🔗 Подключаемся к базе данных

In [35]:
import pandas as pd
import sqlite3

db_path = "checking-logs.sqlite"
conn = sqlite3.connect(db_path)

#### 2. Получение схемы таблиц

In [36]:
schema_query = "PRAGMA table_info(deadlines);"
schema = pd.io.sql.read_sql(schema_query, conn)
print("\nСхема таблицы deadlines:")
print(schema)
schema_query = "PRAGMA table_info(test);"
schema = pd.io.sql.read_sql(schema_query, conn)
print("\nСхема таблицы test:")
print(schema)
schema_query = "PRAGMA table_info(control);"
schema = pd.io.sql.read_sql(schema_query, conn)
print("\nСхема таблицы control:")
print(schema)


Схема таблицы deadlines:
   cid       name     type  notnull dflt_value  pk
0    0      index  INTEGER        0       None   0
1    1       labs     TEXT        0       None   0
2    2  deadlines  INTEGER        0       None   0

Схема таблицы test:
   cid             name       type  notnull dflt_value  pk
0    0              uid       TEXT        0       None   0
1    1          labname       TEXT        0       None   0
2    2  first_commit_ts  TIMESTAMP        0       None   0
3    3    first_view_ts  TIMESTAMP        0       None   0

Схема таблицы control:
   cid             name       type  notnull dflt_value  pk
0    0              uid       TEXT        0       None   0
1    1          labname       TEXT        0       None   0
2    2  first_commit_ts  TIMESTAMP        0       None   0
3    3    first_view_ts  TIMESTAMP        0       None   0


#### 3. Получение первых 10 строк таблиц

In [37]:
preview_query = "SELECT * FROM deadlines LIMIT 10;"
preview = pd.io.sql.read_sql(preview_query, conn)
print("\nПервые 10 строк таблицы deadlines:")
print(preview)
preview_query = "SELECT * FROM test LIMIT 10;"
preview = pd.io.sql.read_sql(preview_query, conn)
print("\nПервые 10 строк таблицы test:")
print(preview)
preview_query = "SELECT * FROM control LIMIT 10;"
preview = pd.io.sql.read_sql(preview_query, conn)
print("\nПервые 10 строк таблицы control:")
print(preview)


Первые 10 строк таблицы deadlines:
   index      labs   deadlines
0      0    laba04  1587945599
1      1   laba04s  1587945599
2      2    laba05  1588550399
3      4    laba06  1590364799
4      5   laba06s  1590364799
5      3  project1  1589673599

Первые 10 строк таблицы test:
       uid   labname             first_commit_ts               first_view_ts
0   user_1    laba04  2020-04-26 17:06:18.462708  2020-04-26 21:53:59.624136
1   user_1   laba04s  2020-04-26 17:12:11.843671  2020-04-26 21:53:59.624136
2   user_1    laba05  2020-05-02 19:15:18.540185  2020-04-26 21:53:59.624136
3   user_1    laba06  2020-05-17 16:26:35.268534  2020-04-26 21:53:59.624136
4   user_1   laba06s  2020-05-20 12:23:37.289724  2020-04-26 21:53:59.624136
5   user_1  project1  2020-05-14 20:56:08.898880  2020-04-26 21:53:59.624136
6  user_10    laba04  2020-04-25 08:24:52.696624  2020-04-18 12:19:50.182714
7  user_10   laba04s  2020-04-25 08:37:54.604222  2020-04-18 12:19:50.182714
8  user_10    laba05  2

#### 4. SQL-запрос для тестовой группы

In [38]:
query_test = """
SELECT 'before' AS time, 
       AVG((julianday(t.first_commit_ts) - julianday(datetime(d.deadlines, 'unixepoch'))) * 24) AS avg_diff
FROM test AS t
JOIN deadlines AS d ON t.labname = d.labs
WHERE t.first_commit_ts < t.first_view_ts
AND t.labname != 'project1'
UNION ALL
SELECT 'after' AS time, 
       AVG((julianday(t.first_commit_ts) - julianday(datetime(d.deadlines, 'unixepoch'))) * 24) AS avg_diff
FROM test AS t
JOIN deadlines AS d ON t.labname = d.labs
WHERE t.first_commit_ts >= t.first_view_ts
AND t.labname != 'project1';
"""

test_results = pd.io.sql.read_sql(query_test, conn)
print(test_results.head())


     time    avg_diff
0  before  -61.156438
1   after -103.953310


#### 5. SQL-запрос для контрольной группы

In [39]:
query_control = """
SELECT 'before' AS time, 
       AVG((julianday(c.first_commit_ts) - julianday(datetime(d.deadlines, 'unixepoch'))) * 24) AS avg_diff
FROM control AS c
JOIN deadlines AS d ON c.labname = d.labs
WHERE c.first_commit_ts < c.first_view_ts
AND c.labname != 'project1'
UNION ALL
SELECT 'after' AS time, 
       AVG((julianday(c.first_commit_ts) - julianday(datetime(d.deadlines, 'unixepoch'))) * 24) AS avg_diff
FROM control AS c
JOIN deadlines AS d ON c.labname = d.labs
WHERE c.first_commit_ts >= c.first_view_ts
AND c.labname != 'project1';
"""

control_results = pd.io.sql.read_sql(query_control, conn)
print(control_results.head())

conn.close()

     time    avg_diff
0  before  -99.901295
1   after -113.232196


#### 6. Интерпретация результатов

In [40]:
delta_before_test = test_results.loc[test_results['time'] == 'before', 'avg_diff'].values[0]
delta_after_test = test_results.loc[test_results['time'] == 'after', 'avg_diff'].values[0]
delta_before_control = control_results.loc[control_results['time'] == 'before', 'avg_diff'].values[0]
delta_after_control = control_results.loc[control_results['time'] == 'after', 'avg_diff'].values[0]

print("\nИзменение времени в тестовой группе:", delta_before_test - delta_after_test)
print("Изменение времени в контрольной группе:", delta_before_control - delta_after_control)

if (delta_before_test - delta_after_test) > (delta_before_control - delta_after_control):
    print("\nГипотеза ПОДТВЕРЖДЕНА: Страница новостей повлияла на студентов, и они начали работать раньше.")
else:
    print("\nГипотеза НЕ ПОДТВЕРЖДЕНА: Страница новостей не оказала значительного влияния на поведение студентов.")


Изменение времени в тестовой группе: 42.79687207436655
Изменение времени в контрольной группе: 13.330901912455545

Гипотеза ПОДТВЕРЖДЕНА: Страница новостей повлияла на студентов, и они начали работать раньше.
