In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import logging

logging.basicConfig(format="%(asctime)s [%(process)d] %(levelname)-8s "
                    "%(name)s,%(lineno)s\t%(message)s")
logging.getLogger().setLevel('INFO')

In [None]:
%matplotlib inline

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


from tqdm import tqdm_notebook as tqdm

In [None]:
# Read information to connect to the database and put it in environment variables
import os
with open('ENVVARS.txt') as f:
    for line in f:
        parts = line.split('=')
        if len(parts) == 2:
            os.environ[parts[0]] = parts[1].strip()

In [None]:
db_name = 'ticclat'
# db_name = 'ticclat_test'
os.environ['dbname'] = db_name

In [None]:
from ticclat.ticclat_schema import Lexicon, Wordform, Anahash, Document, Corpus, WordformLink, WordformLinkSource, lexical_source_wordform, TextAttestation

from ticclat.dbutils import get_session, session_scope

Session = get_session(os.environ['user'], os.environ['password'], os.environ['dbname'])

In [None]:
with session_scope(Session) as session:
    print(session.get_bind())

In [None]:
from sqlalchemy import Table, Column, BigInteger, Integer, Unicode
from sqlalchemy import select
from sqlalchemy import text
from sqlalchemy.sql import func, desc, and_

from sqlalchemy_views import CreateView

from ticclat.ticclat_schema import Base

class wordform_frequencies(Base):
    __tablename__ = 'wordform_frequency'
    
    wf_id = Column(BigInteger().with_variant(Integer, 'sqlite'), primary_key=True)
    wordform_id = Column(BigInteger().with_variant(Integer, 'sqlite'))
    wordform = Column(Unicode(255, convert_unicode=False), index=True)
    frequency = Column(BigInteger())

with session_scope(Session) as session:
    Base.metadata.create_all(session.get_bind())

In [None]:
%%time

from ticclat.sacoreutils import sql_insert

def iterate_results(result):
    for row in tqdm(result.fetchall()):
        yield {'wordform': row.wordform, 'wordform_id': row.wordform_id, 'frequency': row.freq}

with session_scope(Session) as session:
    q = select([Wordform, func.sum(TextAttestation.frequency).label('freq')]).select_from(Wordform.__table__.join(TextAttestation)).group_by(Wordform.wordform_id)
    #q = select([Wordform, Lexicon.lexicon_name]).select_from(lexical_source_wordform.join(Wordform).join(Lexicon))
    r = session.execute(q)
    sql_insert(session, wordform_frequencies, iterate_results(r))

In [None]:
%%time

from ticclat.dbutils import create_wf_frequencies_table

with session_scope(Session) as session:
    create_word_frequencies_table(session)