In [66]:
import itertools
import re
import numpy as np
from tqdm.notebook import tqdm
from tabDatabase import *
import matplotlib.pyplot as plt
import pandas as pd
import json
from sqlalchemy import Connection, select, update, bindparam

total_entries = 615613
caselist_total = 338521
metadata.create_all(tabEngine)
pd.set_option("display.max_columns", None)
# pd.set_option("display.max_rows", 20)
tqdm.pandas()

# Create Teams

In [44]:
def get_team_values(row_group):
    students = [record[2] for record in row_group]
    num_debaters = len(students)
    students += [None] * (5 - len(students)) # when negative ignores
    return { f"debater{i + 1}": students[i] for i in range(5) } | {
        "other_debaters":  ','.join(map(str, students[5:])) if len(students) > 5 else None,
        "num_debaters": num_debaters
    }

In [45]:
def chunked_iterable(iterable, size):
    it = iter(iterable)
    while True:
        s = list(itertools.islice(it, size))
        if not s: break
        yield s

In [None]:
from sqlalchemy.dialects.sqlite import insert

with tabEngine.connect() as conn:
    query = entry_student_table.select().order_by('entry', 'student')
    entry_groups = conn.execute(query)

data = tqdm((
    (entry, get_team_values(group))
    for entry, group in itertools.groupby(entry_groups, lambda x: x[1])
), total=total_entries)

chunk_size = 10000
for chunk in chunked_iterable(data, chunk_size):
    with tabEngine.begin() as conn:
        values = [record[1] for record in chunk]
        stmt = insert(team_table)
        stmt = stmt.on_conflict_do_update(index_elements=team_unique, set_=team_table.c) # Do dummy update so id is returned
        stmt = stmt.returning(team_table.c.id)

        teams = (value[0] for value in conn.execute(stmt, values))
        entries = (record[0] for record in chunk)

        for entry, team_id in zip(entries, teams):
            conn.execute(entry_table.update().where(entry_table.c.id == entry).values({'team': team_id}))


In [40]:
# with engine.begin() as con:
#     con.execute(team_table.delete())
#     con.execute(entry_table.update().values({'team': None}))

# Add short codes and schools

In [3]:
with tabEngine.begin() as conn:
    q = select(entry_table.c.id, entry_table.c.code).where(entry_table.c.code.regexp_match(r'.*\s[a-zA-Z]{2}$'))
    code_info = [
        {'entry_id': i, 'short_code': code[-2:].upper(), 'school_name': code[:-3]}
        for i, code in tqdm(conn.execute(q))
    ]

    insert_q = update(entry_table)\
        .where(entry_table.c.id == bindparam('entry_id'))\
        .values({'short_code': bindparam('short_code'), 'school_name': bindparam('school_name')})
    conn.execute(insert_q, code_info)

0it [00:00, ?it/s]