In [6]:
from flask_sqlalchemy import SQLAlchemy
from flask import Flask
from datetime import datetime
import os
import re

basedir = os.path.abspath('..')
app = Flask(__name__)
app.config['SQLALCHEMY_DATABASE_URI'] = 'postgresql+psycopg2://localhost/test_db'
app.config['SQLALCHEMY_COMMIT_ON_TEARDOWN'] = True
app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False
app.config['TESTING_FOLDER'] = os.path.join(basedir, 'utility/testing')
db = SQLAlchemy(app)

In [7]:
class Article(db.Model):
    __tablename__ = 'articles'

    id = db.Column(db.Integer, primary_key=True)
    article = db.Column(db.String(256))

    def __init__(self, **kwargs):
        super(Article, self).__init__(**kwargs)

    def __repr__(self):
        return '<Article %r>' % self.id

class Sentence(db.Model):
    __tablename__ = 'sentences'
    id = db.Column(db.Integer, primary_key=True)
    sentence = db.Column(db.String(256))
    translation = db.Column(db.String(256))
    article_id = db.Column(db.Integer, db.ForeignKey('articles.id'))
    article = db.relationship('Article', backref='sentences')

    def __init__(self, **kwargs):
        super(Sentence, self).__init__(**kwargs)

    def __repr__(self):
        return '<Sentence %r>' % self.id

class Word(db.Model):
    __tablename__ = 'words'
    id = db.Column(db.Integer, primary_key=True)
    word = db.Column(db.String(64), unique=True)
    translation = db.Column(db.String(256))

    def __init__(self, **kwargs):
        super(Word, self).__init__(**kwargs)

    def __repr__(self):
        return '<Word %r>' % self.id

class SentenceWord(db.Model):
    __tablename__ = 'sentencewords'
    id = db.Column(db.Integer, primary_key=True)
    sentence_id = db.Column(db.Integer, db.ForeignKey('sentences.id'))
    word_id = db.Column(db.Integer, db.ForeignKey('words.id'))
    sentence = db.relationship('Sentence', backref='sentencewords')
    word = db.relationship('Word', backref='sentencewords')

    def __init__(self, **kwargs):
        super(SentenceWord, self).__init__(**kwargs)

    def __repr__(self):
        return '<SentenceWord %r>' % self.id

class Review(db.Model):
    __tablename__ = 'reviews'
    id = db.Column(db.Integer, primary_key=True)
    word_id = db.Column(db.Integer, db.ForeignKey('words.id'))
    noshow = db.Column(db.Boolean, default=False)
    known = db.Column(db.Boolean, default=False)
    unknown = db.Column(db.Boolean, default=True)
    blurry = db.Column(db.Boolean, default=False)
    review_timestamp = db.Column(db.DateTime, default=datetime.utcnow,onupdate=datetime.now())
    word = db.relationship('Word', backref='review')

    def __init__(self, **kwargs):
        super(Review, self).__init__(**kwargs)

    def __repr__(self):
        return '<Review %r>' % self.id

class Mydict(db.Model):
    __tablename__ = 'mydict'
    id = db.Column(db.Integer, primary_key=True)
    word = db.Column(db.String(64), unique=True)
    timestamp = db.Column(db.DateTime, default=datetime.utcnow)

    def __init__(self, **kwargs):
        super(Mydict, self).__init__(**kwargs)

    def __repr__(self):
        return '<Mydict %r>' % self.id

In [8]:
def get_file(filetype):
    sourcedir = app.config.get('TESTING_FOLDER')
    for basename in os.listdir(sourcedir):
        file = os.path.join(sourcedir, basename)
        basename = os.path.basename(file)
        extention = basename.split('.')[1]
        if extention == filetype:
            return file

In [9]:
def import_articles(file):
    basename = os.path.basename(file)
    filename = basename.split('.')[0]
    a1 = Article(article=filename)
    print(a1)
    db.session.add(a1)
    db.session.commit()


In [12]:
def drop_everything():
    """(On a live db) drops all foreign key constraints before dropping all tables.
    Workaround for SQLAlchemy not doing DROP ## CASCADE for drop_all()
    (https://github.com/pallets/flask-sqlalchemy/issues/722)
    """
    from sqlalchemy.engine.reflection import Inspector
    from sqlalchemy.schema import DropConstraint, DropTable, MetaData, Table

    con = db.engine.connect()
    print(con)
    trans = con.begin()
    inspector = Inspector.from_engine(db.engine)

    # We need to re-create a minimal metadata with only the required things to
    # successfully emit drop constraints and tables commands for postgres (based
    # on the actual schema of the running instance)
    meta = MetaData()
    tables = []
    all_fkeys = []

    for table_name in inspector.get_table_names():
        fkeys = []

        for fkey in inspector.get_foreign_keys(table_name):
            if not fkey["name"]:
                continue

            fkeys.append(db.ForeignKeyConstraint((), (), name=fkey["name"]))

        tables.append(Table(table_name, meta, *fkeys))
        all_fkeys.extend(fkeys)

    for fkey in all_fkeys:
        con.execute(DropConstraint(fkey))

    for table in tables:
        con.execute(DropTable(table))

    trans.commit()

In [13]:
drop_everything()

<sqlalchemy.engine.base.Connection object at 0x7fadb020e6d8>


In [14]:
db.create_all()

In [15]:
import_articles(get_file('txt'))

<Article None>


In [16]:
def get_sentence(file):
    with open(file, 'r') as f:
        text = f.read()
    pttn = re.compile(r"[a-zA-Z].*", re.I)
    sentences = re.findall(pttn, text)
    return sentences

def get_tokens(text):
    tokens = re.findall('[a-z]+', text.lower())
    print(len(tokens))
    token = list(dict.fromkeys(tokens))
    print(len(token))
    return token

def read_text(filename):
    with open(filename, 'r') as f:
        text = f.read()
    return text

In [17]:
db.session.remove()
tokens = get_tokens(read_text(get_file('txt')))
words = [Word(word=t) for t in tokens]
db.session.add_all(words)
db.session.commit()


3460
772


In [55]:
sentences = get_sentence(get_file('txt'))
sentence = Sentence(sentence=sentences[0])
tokens = get_tokens(sentences[0])
words = [Word(word=t) for t in tokens]
sw = [SentenceWord(sentence=sentence, word=w) for w in words]

db.session.add_all(sw)
db.session.new
db.session.commit()

6
5


IntegrityError: (psycopg2.errors.UniqueViolation) duplicate key value violates unique constraint "words_word_key"
DETAIL:  Key (word)=(the) already exists.

[SQL: INSERT INTO words (word, translation) VALUES (%(word)s, %(translation)s) RETURNING words.id]
[parameters: {'word': 'the', 'translation': None}]
(Background on this error at: http://sqlalche.me/e/gkpj)

In [60]:
db.session.remove()
sentence = 'this is a one'
tokens = get_tokens(sentence)
a = db.session.query(Article).first()
w1 = db.session.query(Word).filter(Word.word == tokens[0]).first()
w2 = db.session.query(Word).filter(Word.word == tokens[1]).first()
w3 = db.session.query(Word).filter(Word.word == tokens[2]).first()
w4 = db.session.query(Word).filter(Word.word == tokens[3]).first()
s = Sentence(sentence=sentence, article=a)
sw1 = SentenceWord(word=w1)
sw2 = SentenceWord(word=w2)
sw3 = SentenceWord(word=w3)
sw4 = SentenceWord(word=w4)
s.sentencewords.append(sw1)
s.sentencewords.append(sw2)
s.sentencewords.append(sw3)
s.sentencewords.append(sw4)
db.session.add(s)
db.session.commit()

4
4


In [18]:
db.session.remove()
article = db.session.query(Article).first()

sl = []
sentences = get_sentence(get_file('txt'))
for sentence in sentences:
    tokens = get_tokens(sentence)
    s = Sentence(sentence=sentence, article=article)
    w = [db.session.query(Word).filter(Word.word == t).first() for t in tokens ]
    sw = [SentenceWord(word=i) for i in w]
    s.sentencewords = sw
    sl.append(s)
db.session.add_all(sl)
db.session.commit()

6
5
8
8
4
4
4
4
2
2
6
5
5
5
13
13
2
2
1
1
2
2
11
11
3
3
14
14
3
3
3
3
5
5
11
9
23
22
19
16
5
5
2
2
3
3
30
25
22
19
3
3
2
2
2
2
5
5
27
21
2
2
13
11
12
12
10
10
17
15
6
6
6
6
2
2
7
7
9
9
7
7
3
3
10
9
6
6
10
9
13
13
2
2
4
4
6
6
4
4
2
2
2
2
2
2
7
7
5
5
2
2
1
1
5
5
11
9
7
4
9
9
1
1
6
6
9
9
13
13
8
7
3
3
4
4
5
5
4
4
4
4
1
1
1
1
4
4
10
10
4
2
24
18
6
6
4
4
4
4
2
2
23
19
2
2
1
1
4
4
7
7
2
2
2
2
8
8
13
12
6
6
4
4
16
13
7
7
8
6
15
14
2
2
11
11
8
8
14
14
6
6
2
2
19
15
12
11
2
2
12
12
12
11
2
2
2
2
24
10
5
5
3
3
11
9
2
2
11
9
8
8
6
6
5
5
2
2
15
14
4
3
4
4
4
4
6
3
15
14
6
6
6
6
12
11
9
9
5
5
13
8
7
7
10
9
2
2
10
9
4
4
4
4
7
7
8
8
10
10
7
6
1
1
6
4
8
8
6
6
6
5
1
1
13
12
9
9
7
7
2
2
1
1
8
8
10
9
16
15
14
13
2
2
24
24
13
11
3
3
15
13
22
21
15
14
5
5
9
8
5
5
8
7
5
4
6
6
7
7
10
10
15
15
14
14
17
16
3
3
4
4
8
8
9
9
36
30
5
5
3
3
2
2
3
3
6
6
7
6
8
7
5
5
8
7
8
8
7
7
6
6
4
4
2
2
3
3
10
10
2
2
5
5
4
4
5
5
5
5
8
7
8
8
1
1
3
3
4
4
23
18
3
3
7
7
15
15
2
2
5
5
7
7
4
4
5
5
7
7
9
7
3
3
5
5
17
12
1
1
2
2
17
16
4
4
