In [None]:
import numpy as np
import logging
from time import time
import codecs

import tensorflow as tf
import tensorflow.keras as keras
import tensorflow.keras.backend as K
#tf.sysconfig.get_build_info()

from reader import get_vocab, read_trainingData
from train import train
from argParseDummy import args
from pathlib import Path


args()
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s')
logger = logging.getLogger(__name__)


def loadAspectWords(args, saveName, k):
    #tmp = {0:("A0", ["w0", "w1"]), 1:("A1", ["w0","w1"])}
    p = args.out_dir_path  / (saveName+str(k)) / 'aspect.log'
    content = None
    with p.open(encoding="utf-8") as f:
        content = f.readlines()
    content = [x.strip() for x in content] 
    aspectWords = [[y.split(":")[0] for y in content[1+3*x].split()] for x in range(int(len(content)/3))]
    return aspectWords

#asks user to input inferred aspects
def infAspectsUserIO(aspectWords):
    infAspects = []
    for i,wordList in enumerate(aspectWords):
        print()
        print("Aspect: ", str(i))
        print(wordList)
        aspect = input("aspect: ")
        
        tmpFile = codecs.open(Path("tmpAspectsLog"), "a", "utf-8")
        tmpFile.write(aspect + "\n")
        tmpFile.close()
        
        infAspects.append(aspect)
        print()
    return infAspects

#loads representative words, asks user to iput aspects and saves them to a file
def getAndSafeInfAspects(args, saveName, k=15):
    aspectWords = loadAspectWords(args,saveName, k)
    infAspects = infAspectsUserIO(aspectWords)
    path = args.out_dir_path  / (saveName+str(k)) / 'infAspects'
    infAspect_file = codecs.open(path, 'w', 'utf-8')
    for i,aspect in enumerate(infAspects):
        infAspect_file.write(str(i) + ": "+ aspect + '\n')


def batchInfAsp(emb = "cord", kLs=[15, 30, 60], trainStrs = ["extr", "extOth"]):
    for k in kLs:
        for trainStr in trainStrs:
            print("--------------------------------------------------")
            print("inferring aspects for", emb+"Model"+ trainStr + str(k) )
            getAndSafeInfAspects(args, saveName = emb+"Model"+ trainStr, k=k)

In [None]:
#models trained on extracted(supp-sen) with embeddings trained on cord, extracted and extracted+other

#saveName : embData + "Model" + trainingData, e.g. cordModelExtr, extrModelExtr
kToTrain = [15,30,60]
embStrs = ["cord", "extr", "extOth"]
embPaths = [args.cordEmb_path, args.extrEmb_path, args.extOthEmb_path]
vocabPaths = [args.cordVocabDict_path, args.extrVocabDict_path, args.extOthVocabDict_path]

for embNr in range(len(embPaths)):
    vocab = get_vocab(vocab_path=vocabPaths[embNr])
    name = embStrs[embNr] + "ModelExtr"
    trainExtr_x , _ = read_trainingData(source = args.extractedProcessed_path, 
                                        vocab = vocab, maxlen = args.maxlen, numLines = -1)
    for k in kToTrain:
        train(args, vocab, trainExtr_x, emb_path = embPaths[embNr], k=k, saveName=name)

In [None]:
#models trained on extracted+other(all-sen), the extracted embedding with extr+other training data combination is pointless

#saveName : embData + "Model" + trainingData, e.g. cordModelExtr, extrModelExtr
kToTrain = [15,30,60]
embStrs = ["cord", "extOth"]
embPaths = [args.cordEmb_path, args.extOthEmb_path]
vocabPaths = [args.cordVocabDict_path, args.extOthVocabDict_path]

for embNr in range(len(embPaths)):
    vocab = get_vocab(vocab_path=vocabPaths[embNr])
    name = embStrs[embNr] + "ModelExtOth"
    trainExtr_x , _ = read_trainingData(source = args.extOthProcessed_path, 
                                        vocab = vocab, maxlen = args.maxlen, numLines = -1)
    for k in kToTrain:
        train(args, vocab, trainExtr_x, emb_path = embPaths[embNr], k=k, saveName=name)

In [None]:
#inf aspects with cord embedding
batchInfAsp("cord", kLs=[15, 30, 60], trainStrs=["extr","extOth"]) 

In [None]:
#inf aspects with extOth(all-sen) embedding
batchInfAsp("extOth", kLs=[15,30,60], trainStrs=["extr","extOth"]) 

In [None]:
#inf aspects with extr(supp-sen) embedding
batchInfAsp("extr", kLs=[15, 30, 60], trainStrs=["extr"])