In [12]:
import collections
import itertools

import attr
import csv
import sqlite3

In [13]:
%run common.ipynb

In [32]:
@attr.s
class OkpdtrOkz:
    TABLE = 'okz_okpdtr'
    CSV = 'okz_to_okpdtr.csv'
    okpdtr = attr.ib()
    okz = attr.ib()
    name = attr.ib()
    
@attr.s
class PsOkz:
    TABLE = 'ps_okz'
    CSV = 'ps_to_okz.csv'
    ps = attr.ib()
    okz = attr.ib()

@attr.s
class Program:
    TABLE = 'programs'
    CSV = 'programs.csv'
    program_code = attr.ib()
    name = attr.ib()

    
@attr.s
class ProgramProfstandards:
    TABLE = 'program_ps'
    CSV = 'program_to_profstandard.csv'
    program = attr.ib()
    ps = attr.ib()

In [33]:
def make_reader(cls):
    filename = cls.CSV
    def read():
        with open(filename, 'r') as f:
            reader = csv.DictReader(f)
            rows = (cls(**dict(row)) for row in reader)
            yield from rows
    read.__name__ = 'read_' + cls.__name__
    return read

In [34]:
def cls_to_insert_query(cls):
    placeholders = ','.join(('?' for x in attr.fields(cls)))
    return f'INSERT INTO {cls.TABLE} VALUES ({placeholders});'

def cls_to_create_table(cls):
    fields = attr.fields(cls)
    fields = (f.name + ' text' for f in fields)
    fields = ', '.join(fields)
    return f'CREATE TABLE {cls.TABLE} ({fields});'

In [39]:
def join_queries():
    yield """CREATE TABLE program_okpdtr AS
    SELECT DISTINCT p.program_code AS "program", okz_okpdtr.okpdtr AS "okpdtr"
    FROM programs p
    INNER JOIN program_ps p_ps ON p.program_code = p_ps.program
    INNER JOIN ps_okz ON p_ps.ps = ps_okz.ps
    INNER JOIN okz_okpdtr ON ps_okz.okz = okz_okpdtr.okz;
    """
    yield """
    CREATE TABLE links AS
    SELECT DISTINCT
    p.program_code AS "program",
    okz_okpdtr.okpdtr AS "okpdtr",
    p_ps.ps as "profstandard",
    ps_okz.okz as "okz"
    FROM programs p
    INNER JOIN program_ps p_ps ON p.program_code = p_ps.program
    INNER JOIN ps_okz ON p_ps.ps = ps_okz.ps
    INNER JOIN okz_okpdtr ON ps_okz.okz = okz_okpdtr.okz;
    """
    yield """CREATE TABLE cnt_program_okpdtrs AS
    SELECT program, p.name, COUNT(1) AS "cnt"
    FROM program_okpdtr
    INNER JOIN programs p ON p.program_code = program
    GROUP BY program_okpdtr.okpdtr;
    """

In [40]:
def feed(c):
    for cls in [
            OkpdtrOkz,
            PsOkz,
            Program,
            ProgramProfstandards]:
        reader = make_reader(cls)
        c.execute(cls_to_create_table(cls))
        data = tqdm.tqdm(reader())
        data = map(attr.astuple, data)
        con.executemany(cls_to_insert_query(cls), data)


try:
    if os.path.exists('programs.db'):
        os.remove('programs.db')
    con = sqlite3.connect('programs.db')
    feed(con)
    con.commit()
    for q in join_queries():
        con.execute(q)
    print(next(con.execute('SELECT COUNT(1) FROM program_okpdtr;')))
    print(pd.read_sql_query('SELECT * FROM cnt_program_okpdtrs WHERE cnt == 0 OR cnt > 10;', con))
finally:
    con.close()




0it [00:00, ?it/s][A[A[A


5026it [00:00, 50253.58it/s][A[A[A


7993it [00:00, 49901.42it/s][A[A[A


0it [00:00, ?it/s][A[A[A


335it [00:00, 41851.84it/s][A[A[A


0it [00:00, ?it/s][A[A[A


459it [00:00, 54090.40it/s][A[A[A


0it [00:00, ?it/s][A[A[A


365it [00:00, 58073.02it/s][A[A[A

(4407,)
     program                                 name  cnt
0   01.03.01                           Математика   22
1   01.03.01                           Математика   22
2   01.03.01                           Математика   22
3   01.03.02  Прикладная математика и информатика   12
4   01.03.02  Прикладная математика и информатика   12
5   01.03.02  Прикладная математика и информатика   12
6   01.03.01                           Математика   22
7   01.03.01                           Математика   22
8   01.03.02  Прикладная математика и информатика   12
9   01.03.01                           Математика   22
10  01.03.01                           Математика   22
11  01.03.01                           Математика   22
12  01.03.01                           Математика   22
13  01.03.01                           Математика   22
14  01.03.01                           Математика   22
15  01.03.01                           Математика   22
16  01.03.01                           Математика   22
17