In [1]:
!pip install psycopg2
import csv
from io import StringIO
import time

import pandas as pd
from sqlalchemy import create_engine



In [2]:
def connect_engine(engine, sql):
    # Return SQL query as a pandas dataframe
    #Reduced redundancy so we didnt have to copy and paste this code within every single statement
    with engine.connect() as conn:
        # Set 1 minute statement timeout (units are milliseconds)
        conn.execute("SET statement_timeout = 60000;")
        df = pd.read_sql(sql, conn)
    
    return df
    
def find_author(engine, name):
    #SQL query gathering all of the information for a specific author
    #Input is limited by 5, but will change upon further testing
    sql = f'''
        SELECT *
        FROM "sentimenttest"
        WHERE author = '{name}'
        LIMIT 10;
    '''
    
    return connect_engine(engine, sql)

def find_word(engine,word,author=None):
    #SQL query gathering all of the information for a specific word
    sql = f'''
        SELECT *
        FROM "sentimenttest"
        WHERE aspect = '{word}' AND aspect IS NOT NULL
        LIMIT 10;
    '''
    df = pd.DataFrame(data=connect_engine(engine, sql))
    if(author!=None):
        return df.loc[df['author'] == author]
    else:
        return df



def avg_sentiment(engine, author = 'all'):
    #Gathers the average sentiment for a word where it appears everywhere (for all authors)
    if author == 'all':
        sql = f'''
            SELECT aspect, COUNT(aspect) AS num_words, AVG(confidence) as avg_sentiment
            FROM "sentimenttest"
            WHERE aspect IS NOT NULL AND aspect NOT Like '%%[^0-9]%%'
            GROUP BY aspect
            ORDER BY num_words DESC;
        '''
    #Gathers the average sentiment for a word where it appears within a specific author's text (each author uses different syntax)
    else:
        sql = f'''
            SELECT author, aspect, COUNT(aspect) as num_words, AVG(confidence) as avg_sentiment
            FROM "sentimenttest"
            WHERE author = '{author}' AND aspect IS NOT NULL
            GROUP BY aspect, author
            ORDER BY num_words DESC;
        '''

    # Return SQL query as a pandas dataframe
    return connect_engine(engine, sql)


def books_per_author(engine):
    #Helpful statistic when comparing who wrote the most books and which words can appear most
    sql = f'''
        SELECT author, COUNT(author) AS num_books, COUNT(aspect) AS num_words
        FROM "sentimenttest"
        GROUP BY author
        ORDER BY num_books DESC, num_words DESC
    '''
    
    # Return SQL query as a pandas dataframe
    return connect_engine(engine, sql)

def highest_words(engine, author):
    #Counts the number of words to see which appear the most (specific or all)
    if author == 'all':
        sql = f'''
            SELECT aspect, COUNT(aspect) AS num_words
            FROM "sentimenttest"
            WHERE aspect IS NOT NULL AND aspect NOT Like '%%[^0-9]%%'
            GROUP BY aspect
            ORDER BY num_words DESC
        '''
    else:
        sql = f'''
            SELECT author, aspect, COUNT(aspect) AS num_words
            FROM "sentimenttest"
            WHERE author = '{author}' AND aspect IS NOT NULL AND aspect NOT Like '%%[^0-9]%%'
            GROUP BY aspect, author
            ORDER BY num_words DESC
        '''

    # Return SQL query as a pandas dataframe
    return connect_engine(engine, sql)

def words_per_book(engine, book, group = 'num_words'):
    
    if group == 'num_words':
        
        sql = f'''
            SELECT aspect, COUNT(aspect) AS num_words, AVG(confidence) AS avg_sentiment
            FROM "sentimenttest"
            WHERE aspect IS NOT NULL AND title = '{book}'
            GROUP BY aspect, title
            ORDER BY num_words DESC
        '''
    else:
        sql = f'''
            SELECT aspect, COUNT(aspect) AS num_words, AVG(confidence) AS avg_sentiment
            FROM "sentimenttest"
            WHERE aspect IS NOT NULL AND title = '{book}'
            GROUP BY aspect, title
            ORDER BY avg_sentiment DESC
        '''
    
    return connect_engine(engine, sql)

def find_similar(engine, specific):
    sql = f'''
        SELECT *
        FROM "sentimenttest"
        WHERE aspect LIKE '%%{specific}%%' AND aspect IS NOT NULL
        LIMIT 10;
    '''
    
    return connect_engine(engine, sql)

In [3]:
def arrange(df,ascending=True):
    if 'confidence' in df.columns:
        return df.sort_values(by=['confidence'],ascending=ascending)
    else:
        print("No confidence column found")

In [4]:
PG_STRING = 'postgresql://austinstein:v2_3ym92_V72h8UTXcH9WzSBC3tiMmjd@db.bit.io:5432/austinstein/greekandlatintexts'
engine = create_engine(PG_STRING, pool_pre_ping=True)

In [5]:
# SQL for querying an entire table
find_word(engine, 'μετεωρολογικων α','Aristotle')

Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,Unnamed: 0.1.1,Unnamed: 0.1.1.1,Section ID,Text Section,title,author,Translated Text,treepath,aspect,confidence
0,1958,1958,910,910,910,Data/tlg0086tlg0261st1K-grc1_clean.xml1-0-0-1-0-0,\n\t\t\tΜΕΤΕΩΡΟΛΟΓΙΚΩΝ Α.\n\t\t,Meteorologica,Aristotle,,11-1-0-0-1-0-0,μετεωρολογικων α,0.995833


In [6]:
#find_author(engine, 'Aristotle')
arrange(find_author(engine, 'Aristotle'),ascending=False)

Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,Unnamed: 0.1.1,Unnamed: 0.1.1.1,Section ID,Text Section,title,author,Translated Text,treepath,aspect,confidence
0,1958,1958,910,910,910,Data/tlg0086tlg0261st1K-grc1_clean.xml1-0-0-1-0-0,\n\t\t\tΜΕΤΕΩΡΟΛΟΓΙΚΩΝ Α.\n\t\t,Meteorologica,Aristotle,,11-1-0-0-1-0-0,μετεωρολογικων α,0.995833
1,1959,1959,911,911,911,Data/tlg0086tlg0261st1K-grc1_clean.xml1-0-0-1-0-1,\n\t\t\tΠΕΡΙ μὲν οὖν τῶν πρώτων αἰτίων τῆς φύσ...,Meteorologica,Aristotle,,11-1-0-0-1-0-1,ουν των πρωτων αιτιων της φυσεως,0.929122
4,1962,1962,913,913,913,Data/tlg0086tlg0261st1K-grc1_clean.xml1-0-0-1-3-0,\n\t\t\tἈναλαβόντες οὖν τὰς ἐξ ἀρχῆς θέσεις κα...,Meteorologica,Aristotle,,11-1-0-0-1-3-0,ουν τας εξ αρχης θεσεις και τους ειρημενους,0.918239
7,1965,1965,914,914,914,Data/tlg0086tlg0261st1K-grc1_clean.xml1-0-0-1-4-0,"\n\t\t\tΤούτων δὲ διωρισμένων, λέγωμεν διὰ τίν...",Meteorologica,Aristotle,,11-1-0-0-1-4-0,"σ μ ε ν ω ν , λ ε γ ω μ ε ν",0.663776
5,1963,1963,914,914,914,Data/tlg0086tlg0261st1K-grc1_clean.xml1-0-0-1-4-0,"\n\t\t\tΤούτων δὲ διωρισμένων, λέγωμεν διὰ τίν...",Meteorologica,Aristotle,,11-1-0-0-1-4-0,[ τ ο υ τ ω ν δ ε,-0.585432
6,1964,1964,914,914,914,Data/tlg0086tlg0261st1K-grc1_clean.xml1-0-0-1-4-0,"\n\t\t\tΤούτων δὲ διωρισμένων, λέγωμεν διὰ τίν...",Meteorologica,Aristotle,,11-1-0-0-1-4-0,ι,-0.856718
9,1967,1967,914,914,914,Data/tlg0086tlg0261st1K-grc1_clean.xml1-0-0-1-4-0,"\n\t\t\tΤούτων δὲ διωρισμένων, λέγωμεν διὰ τίν...",Meteorologica,Aristotle,,11-1-0-0-1-4-0,ι,-0.856718
8,1966,1966,914,914,914,Data/tlg0086tlg0261st1K-grc1_clean.xml1-0-0-1-4-0,"\n\t\t\tΤούτων δὲ διωρισμένων, λέγωμεν διὰ τίν...",Meteorologica,Aristotle,,11-1-0-0-1-4-0,α,-0.858921
2,1960,1960,912,912,912,Data/tlg0086tlg0261st1K-grc1_clean.xml1-0-0-1-2-0,\n\t\t\tἘπεὶ γὰρ διώρισται πρότερον ἡμῖν μία μ...,Meteorologica,Aristotle,,11-1-0-0-1-2-0,ημιν,-0.97574
3,1961,1961,912,912,912,Data/tlg0086tlg0261st1K-grc1_clean.xml1-0-0-1-2-0,\n\t\t\tἘπεὶ γὰρ διώρισται πρότερον ἡμῖν μία μ...,Meteorologica,Aristotle,,11-1-0-0-1-2-0,αρχη των,-0.977066


In [7]:
avg_sentiment(engine, 'all')

Unnamed: 0,aspect,num_words,avg_sentiment
0,ι,219,-0.860373
1,ar,165,0.930496
2,α,115,-0.856661
3,gitsm,108,0.975206
4,.,81,-0.498442
...,...,...,...
12136,βροτεων ωμων,1,-0.925707
12137,πλοιου αλιευτικου,1,-0.586475
12138,. 128,1,0.996704
12139,πολλοι πλουσιοι,1,-0.945301


In [8]:
highest_words(engine, 'Aristotle')

Unnamed: 0,author,aspect,num_words
0,Aristotle,ι,25
1,Aristotle,α,9
2,Aristotle,ε,8
3,Aristotle,",",5
4,Aristotle,σ,5
...,...,...,...
144,Aristotle,[ ο δ η λ ι ο ς,1
145,Aristotle,[ ο σ,1
146,Aristotle,[ ο τ ι δ ’ ο υ τ ε κ υ κ λ ο ν,1
147,Aristotle,[ ο υ,1


In [9]:
find_similar(engine, 'α')

Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,Unnamed: 0.1.1,Unnamed: 0.1.1.1,Section ID,Text Section,title,author,Translated Text,treepath,aspect,confidence
0,0,0,0,0,0,Data/tlg0062tlg0441st1K-grc1_clean.xml1-0-0-1,\n\t\t\tΤΟΞΑΡΙΣ Η ΦΙΛΙΑ\n\t\t,Toxaris vel amicitia,Lucianus Samosatenus,,0-1-0-0-1,τοξαρις η φιλια,0.995794
1,2,2,2,2,2,Data/tlg0062tlg0441st1K-grc1_clean.xml1-0-0-2-0-1,"\n\t\t\t1 Τί φής, ὦ Τόξαρι; θύετε Ὀρέστῃ καὶ Π...",Toxaris vel amicitia,Lucianus Samosatenus,,0-1-0-0-2-0-1,ω τοξαρι,-0.953741
2,3,3,2,2,2,Data/tlg0062tlg0441st1K-grc1_clean.xml1-0-0-2-0-1,"\n\t\t\t1 Τί φής, ὦ Τόξαρι; θύετε Ὀρέστῃ καὶ Π...",Toxaris vel amicitia,Lucianus Samosatenus,,0-1-0-0-2-0-1,ορεστη και πυλαδη υμεις οι σκυθαι και θεους,-0.912967
3,4,4,2,2,2,Data/tlg0062tlg0441st1K-grc1_clean.xml1-0-0-2-0-1,"\n\t\t\t1 Τί φής, ὦ Τόξαρι; θύετε Ὀρέστῃ καὶ Π...",Toxaris vel amicitia,Lucianus Samosatenus,,0-1-0-0-2-0-1,αυτους,-0.950146
4,5,5,3,3,3,Data/tlg0062tlg0441st1K-grc1_clean.xml1-0-0-2-1-0,\n\t\t\tΤΟΞΑΡΙΣ\n\t\t,Toxaris vel amicitia,Lucianus Samosatenus,,0-1-0-0-2-1-0,τοξαρις,0.995223
5,8,8,4,4,4,Data/tlg0062tlg0441st1K-grc1_clean.xml1-0-0-2-1-1,"\n\t\t\tΘύομεν, ὦ Μνήσιππε, θύομεν, οὐ μὴν θεο...",Toxaris vel amicitia,Lucianus Samosatenus,,0-1-0-0-2-1-1,αλλα ανδρας αγαθους,-0.9555
6,10,10,6,6,6,Data/tlg0062tlg0441st1K-grc1_clean.xml1-0-0-2-2-1,\n\t\t\tΝόμος δὲ ὑμῖν καὶ ἀνδράσιν ἀγαθοῖς ἀπο...,Toxaris vel amicitia,Lucianus Samosatenus,,0-1-0-0-2-2-1,νομος δε υμιν και ανδρασιν αγαθοις,-0.962246
7,12,12,7,7,7,Data/tlg0062tlg0441st1K-grc1_clean.xml1-0-0-2-3-0,\n\t\t\tΤΟΞΑΡΙΣ\n\t\t,Toxaris vel amicitia,Lucianus Samosatenus,,0-1-0-0-2-3-0,τοξαρις,0.995223
8,14,14,8,8,8,Data/tlg0062tlg0441st1K-grc1_clean.xml1-0-0-2-3-1,"\n\t\t\tΟὐ μόνον, ἀλλὰ καὶ ἑορταῖς καὶ πανηγύρ...",Toxaris vel amicitia,Lucianus Samosatenus,,0-1-0-0-2-3-1,αλλα και εορταις και πανηγυρεσιν,0.857286
9,15,15,8,8,8,Data/tlg0062tlg0441st1K-grc1_clean.xml1-0-0-2-3-1,"\n\t\t\tΟὐ μόνον, ἀλλὰ καὶ ἑορταῖς καὶ πανηγύρ...",Toxaris vel amicitia,Lucianus Samosatenus,,0-1-0-0-2-3-1,αυτους,-0.851374
