In [135]:
import pandas as pd
import sqlite3

## создаем подключение к базе данных с помощью библиотеки sqlite3

In [136]:
conn = sqlite3.connect('../data/checking-logs.sqlite', check_same_thread=False)

## Используя только один запрос для каждой из групп, создайте два фрейма данных: test_results и control_results со столбцами time и avg_diff и только двумя строками.

In [137]:
query = """
WITH users_with_any_period AS (
    SELECT 
        uid,
        SUM(CASE 
                WHEN  first_commit_ts <= first_view_ts THEN 1 
                ELSE 0 
            END) AS before_count,
        SUM(CASE 
                WHEN  first_commit_ts > first_view_ts THEN 1 
                ELSE 0 
            END) AS after_count
    FROM test
    WHERE labname != 'project1'
    GROUP BY uid
    HAVING before_count > 0 AND after_count > 0
),

commit_differences AS (
    SELECT 
        t.uid,
        CASE 
            WHEN t.first_commit_ts <= t.first_view_ts THEN 'before' 
            ELSE 'after' 
        END AS time_period,
        AVG((CAST(strftime('%s', t.first_commit_ts) AS INT) - d.deadlines) / 3600) AS avg_diff
    FROM test t
    INNER JOIN deadlines d ON lower(t.labname) = lower(d.labs)
    WHERE 
        t.labname != 'project1' 
        
        AND t.uid IN (SELECT uid FROM users_with_any_period)
    GROUP BY t.uid, time_period
)

SELECT 
    time_period AS time,
    AVG(avg_diff) AS avg_diff
FROM commit_differences
GROUP BY time_period;
"""
test_results = pd.io.sql.read_sql(query, conn)
test_results

Unnamed: 0,time,avg_diff
0,after,-99.52381
1,before,-66.047619


In [138]:
query2 = """

WITH users_with_any_period AS (
    SELECT 
        uid,
        SUM(CASE 
                WHEN  first_commit_ts <= first_view_ts THEN 1 
                ELSE 0 
            END) AS before_count,
        SUM(CASE 
                WHEN  first_commit_ts > first_view_ts THEN 1 
                ELSE 0 
            END) AS after_count
    FROM control
    WHERE labname != 'project1'
    GROUP BY uid
    HAVING before_count > 0 AND after_count > 0
),

commit_differences AS (
    SELECT 
        c.uid,
        CASE 
            WHEN c.first_commit_ts <= c.first_view_ts THEN 'before' 
            ELSE 'after' 
        END AS time_period,
        AVG((CAST(strftime('%s', c.first_commit_ts) AS INT) - d.deadlines) / 3600) AS avg_diff
    FROM control c
    INNER JOIN deadlines d ON lower(c.labname) = lower(d.labs)
    WHERE 
        c.labname != 'project1' 
        
        AND c.uid IN (SELECT uid FROM users_with_any_period)
    GROUP BY c.uid, time_period
)

SELECT 
    time_period AS time,
    AVG(avg_diff) AS avg_diff
FROM commit_differences
GROUP BY time_period;
"""
control_results = pd.read_sql(query2, conn)
control_results

Unnamed: 0,time,avg_diff
0,after,-99.322222
1,before,-98.033333


## закрыть соединение

In [139]:
conn.close()

## Ответ на гипотезу, оказалась ли верной и страница действительно влияет на поведение студентов?

In [140]:
# Вывод интерпретации
before_test = test_results.query("time == 'before'")['avg_diff'].values[0]
after_test = test_results.query("time == 'after'")['avg_diff'].values[0]
before_control = control_results.query("time == 'before'")['avg_diff'].values[0]
after_control = control_results.query("time == 'after'")['avg_diff'].values[0]


In [141]:
print("\nВывод:")

# Изменение в дельтах
delta_test = before_test - after_test
delta_control = before_control - after_control
print(delta_test, delta_control)
if (delta_test > delta_control):
    print("✅ Да, гипотеза подтвердилась: лента новостей помогла студентам раньше начинать выполнение лабораторных работ.")
else:
    print("❌ Нет, гипотеза не подтвердилась: значимого эффекта не наблюдается.")


Вывод:
33.47619047619047 1.2888888888888914
✅ Да, гипотеза подтвердилась: лента новостей помогла студентам раньше начинать выполнение лабораторных работ.
