In [None]:
from DeepSentiment.Preprocessing.CleanText import CleanText
from DeepSentiment.Dataset import Sentiment140
from DeepSentiment.Consts import (
    Global as Global, 
    Glove as Glove, 
    Paths as Paths, 
    Preprocessing as Preprocessing, 
    Training as Training 
)
from DeepSentiment.Networks.Tensorflow.Model import Model as TFModel
from DeepSentiment.Networks.Transformer.Model import Model as STModel
from pathlib import Path

In [None]:
from DeepSentiment.Preprocessing.CleanText import CleanText
from DeepSentiment.Dataset import Sentiment140
from DeepSentiment.Preprocessing.CleanText import CleanText
from DeepSentiment.Consts import (
    Global as Global, 
    Glove as Glove, 
    Paths as Paths, 
    Preprocessing as Preprocessing, 
    Training as Training 
)

trainArgs = Training.trainArgs
trainRatio = trainArgs["train_size_ratio"]

s140_W2V, s140_GloVe = Sentiment140.Dataset(path=Paths.SENTIMENT140_DATASET, 
                                parsedPath=Paths.SENTIMENT140_DATASET_PARSED,
                                embeddingDim=Glove.GLOVE_DIM, 
                                MAX_SEQUENCE_LENGTH=Preprocessing.MAX_SEQUENCE_LENGTH,
                                args=trainArgs), \
                        Sentiment140.Dataset(path=Paths.SENTIMENT140_DATASET, 
                                parsedPath=Paths.SENTIMENT140_DATASET_PARSED,
                                embeddingDim=Glove.GLOVE_DIM, 
                                MAX_SEQUENCE_LENGTH=Preprocessing.MAX_SEQUENCE_LENGTH,
                                args=trainArgs)

train_data_W2V, test_data_W2V, labelDecoder_W2V = s140_W2V.load(padInput=False, 
                                                        DEBUG=True, 
                                                        cleanFN = CleanText().cleanText,
                                                        BERT = False) #Bert = False = TransformLabels!


train_data_GloVe, test_data_GloVe, labelDecoder_GloVe = s140_GloVe.load(padInput=True, 
                                                        DEBUG=True, 
                                                        cleanFN = CleanText().cleanText,
                                                        BERT = False) #Bert = False = TransformLabels!

test_data_GloVe_fixed = test_data_GloVe[0], [x[0] for x in test_data_GloVe[1]]   #[1] -> 1
train_data_GloVe_fixed = train_data_GloVe[0], [x[0] for x in train_data_GloVe[1]]

test_data_W2V_fixed = test_data_W2V[0], [x[0] for x in test_data_W2V[1]]   #[1] -> 1
train_data_W2V_fixed = train_data_W2V[0], [x[0] for x in train_data_W2V[1]]

In [None]:
#from ImdbSentimentDataset import ImdbSentimentDataset
#from clean_text import CleanText
#imdbDS = ImdbSentimentDataset(Paths.IMDB_DATASET, Paths.IMDB_DATASET_Parsed)

#imdbDF, imdbGloVeDF = imdbDS.load(CleanText().cleanText), imdbDS.load(CleanText().cleanText)
#imdbDS.removeCache()

#imdbGloVeDF['text'] = imdbGloVeDF['text'].apply(s140_GloVe.padInput)

In [None]:
from sklearn.metrics import confusion_matrix
import pandas as pd 

def encodeLabel(y, threshhold=0.4):
    if y < threshhold:
        return 0 #negative
    if y > threshhold:
        return 1 #positive
    return 0.5 #neutral

def getMetrics(df):
    ''' binary label, df columns y, predictions '''
    confusionMatrix = confusion_matrix(df['y'], df['predictions'])
    tn, fp, fn, tp = confusionMatrix.ravel()
    total = tn + fp + fn + tp

    Accuracy = (tp+tn) /total
    Precision = tp/(tp+fp)
    Recall = tp/(tp+fn)
    F1 = 2*Recall*Precision/(Recall+Precision)
    
    return {        
        "Accuracy" : Accuracy,
        "Precision" : Precision,
        "Recall" : Recall,
        "F1" : F1,
        "tn" : tn, 
        "fp" : fp, 
        "fn" : fn, 
        "tp" : tp
    }


def evaluateModel(model, data, verbose=2, use_multiprocessing=True):
    """ TF-Model """
    dataX, dataY = data
    
    predictions = model.predict(
        dataX, 
        batch_size=trainArgs['train_batch_size'],
        verbose=verbose,
        use_multiprocessing=use_multiprocessing)        
    
    df = pd.DataFrame()
    #df['x'] = dataX
    df['y'] = dataY
    df['predictions'] = [encodeLabel(prediction) for prediction in predictions]  
    
    return getMetrics(df)

def metricCSVRow(metric):
    return ";".join([str(v) for v in metric.values()])

def metricHeaderRow(metric):
    return ";".join([k for k in metricDict.keys()])

In [None]:
import os
modelNames = os.listdir(Paths.MODEL)  
modelNames

In [None]:
Paths.MODEL

In [None]:
### TF-Modell

In [None]:
results = []

for modelName in modelNames:
    splittedName = modelName.split("_")
    GLOVE = "GLOVE" in modelName
    train_data, test_data = (train_data_GloVe_fixed, test_data_GloVe_fixed) if GLOVE else (train_data_W2V_fixed, test_data_W2V_fixed) 
    
    try:    
        model = TFModel().loadModel(GLOVE = "GLOVE" in splittedName,
            CNN_LAYER = "CNN" in splittedName,
            POOLING_LAYER = "POOLING" in splittedName,
            GRU_LAYER = "GRU" in splittedName,
            BiLSTM_Layer = "BiLSTM" in splittedName,
            LSTM_Layer = "LSTM" in splittedName,
            DENSE_LAYER = "DENSE" in splittedName)
        trainResult = evaluateModel(model, train_data, verbose=1)
        testResult = evaluateModel(model, test_data, verbose=1)

        results.append([modelName, trainResult, testResult])
    
    except Exception as e:
        print(f"Modelname: {modelName}, EXCEPTION: ")
        print(e)
        print(f"---------{modelName}---------")
        print(f"---------{modelName}---------")
        print(f"---------{modelName}---------")
        results.append([modelName, "ERROR", "ERROR"])
        

In [None]:
errorResults = [x for x in results if x[1] == x[2] and x[1] in 'ERROR']
validResults = [x for x in results if not (x[1] == x[2] and x[1] in 'ERROR')]

In [None]:
errorResults

In [None]:
[x[0] for x in validResults]

In [None]:
import io  
from datetime import datetime

metricResult = lambda validResult : ";".join([str(v) for (k, v) in validResult.items()])
metricResultHeader = lambda x, pre: ";".join([(pre + str(k)) for k in x.keys()])

#csvResults = [";".join([validResult[0],validResult[1],";".join([])]) for validResult in validResults]
header = ";".join(["modelName", metricResultHeader(validResults[0][1], "train"), metricResultHeader(validResults[0][2], "test")])
_validResults = [(validResult[0], metricResult(validResult[1]), metricResult(validResult[2])) for validResult in validResults]

combined = header + '\n' + '\n'+ '\n' + "\n".join([";".join(validResult) for validResult in _validResults])
resultDF = pd.read_csv(  io.StringIO(combined)  , sep=";")

resultPath = Paths.RESULTS_BASE + "//result_" + datetime.now().strftime('%Y-%m-%d_%H-%M-%S')  + ".csv"
resultDF.to_csv(resultPath,encoding="utf-8")

In [None]:
errorResults

In [None]:
[x[0] for x in validResults]