## 1. Connect


In [None]:
import pandas as pd
import sqlite3

conn = sqlite3.connect('../data/checking-logs.sqlite')


## 2. Schema


In [None]:
pd.io.sql.read_sql('PRAGMA table_info(test);', conn)


## 3. Sample rows


In [None]:
pd.io.sql.read_sql('SELECT * FROM test LIMIT 10;', conn)


## 4. Min delta


In [None]:
df_min = pd.io.sql.read_sql(
    """
    SELECT
        t.uid,
        (d.deadlines - strftime('%s', t.first_commit_ts)) / 3600.0 AS min_diff
    FROM test t
    JOIN deadlines d ON t.labname = d.labs
    WHERE t.labname != 'project1'
    ORDER BY min_diff ASC
    LIMIT 1;
    """,
    conn
)
df_min


## 5. Max delta


In [None]:
df_max = pd.io.sql.read_sql(
    """
    SELECT
        t.uid,
        (d.deadlines - strftime('%s', t.first_commit_ts)) / 3600.0 AS max_diff
    FROM test t
    JOIN deadlines d ON t.labname = d.labs
    WHERE t.labname != 'project1'
    ORDER BY max_diff DESC
    LIMIT 1;
    """,
    conn
)
df_max


## 6. Avg delta


In [None]:
df_avg = pd.io.sql.read_sql(
    """
    SELECT
        AVG((d.deadlines - strftime('%s', t.first_commit_ts)) / 3600.0) AS avg_diff
    FROM test t
    JOIN deadlines d ON t.labname = d.labs
    WHERE t.labname != 'project1';
    """,
    conn
)
df_avg


## 7. Correlation


In [None]:
views_diff = pd.io.sql.read_sql(
    """
    SELECT
        t.uid,
        AVG((d.deadlines - strftime('%s', t.first_commit_ts)) / 3600.0) AS avg_diff,
        pv.pageviews
    FROM test t
    JOIN deadlines d ON t.labname = d.labs
    JOIN (
        SELECT uid, COUNT(*) AS pageviews
        FROM pageviews
        GROUP BY uid
    ) pv ON t.uid = pv.uid
    WHERE t.labname != 'project1'
    GROUP BY t.uid;
    """,
    conn
)
views_diff
views_diff[['avg_diff', 'pageviews']].corr()


## 8. Close


In [None]:
conn.close()
