## Импорты

In [1]:
import pandas as pd
import sqlite3

## Подключаемся к БД

In [2]:
conn = sqlite3.connect("/content/checking-logs.sqlite")

## Создание таблицы datamart через join

In [3]:
query = '''
CREATE TABLE IF NOT EXISTS datamart AS
SELECT
    c.uid,
    c.labname,
    MIN(c.timestamp) AS first_commit_ts,
    (
        SELECT MIN(p.datetime)
        FROM pageviews p
        WHERE p.uid = c.uid AND p.uid LIKE 'user_%'
    ) AS first_view_ts
FROM checker c
WHERE c.status = 'ready'
  AND c.numTrials = 1
  AND c.labname IN ('laba04', 'laba04s', 'laba05', 'laba06', 'laba06s', 'project1')
  AND c.uid LIKE 'user_%'
GROUP BY c.uid, c.labname
'''

conn.execute(query)
conn.commit()

## Таблица datamart, парсинг дат

In [4]:
df = pd.read_sql("SELECT * FROM datamart;", conn, parse_dates=["first_commit_ts", "first_view_ts"])
df.head()

Unnamed: 0,uid,labname,first_commit_ts,first_view_ts
0,user_1,laba04,2020-04-26 17:06:18.462708,2020-04-26 21:53:59.624136
1,user_1,laba04s,2020-04-26 17:12:11.843671,2020-04-26 21:53:59.624136
2,user_1,laba05,2020-05-02 19:15:18.540185,2020-04-26 21:53:59.624136
3,user_1,laba06,2020-05-17 16:26:35.268534,2020-04-26 21:53:59.624136
4,user_1,laba06s,2020-05-20 12:23:37.289724,2020-04-26 21:53:59.624136


## Создание два фрейма данных: test и control

In [5]:
test = df[df["first_view_ts"].notna()].copy()
control = df[df["first_view_ts"].isna()].copy()

## Замена пропущенных значений на среднее

In [6]:
mean_view_ts = test["first_view_ts"].mean()
control["first_view_ts"] = mean_view_ts

## Сохранение test и control в базу данных

In [7]:
test.to_sql("test", conn, if_exists="replace", index=False)

59

In [8]:

control.to_sql("control", conn, if_exists="replace", index=False)

81

## Закрываем соединение с БД

In [9]:
conn.close()