Team 10 NLP Project

Dataset: One Million Post Corpus
Dataset Language: German

Goal: Classify and predict User-Post catgeory


In [None]:
##############################################################
################### Create Posts_Annotated ###################
##############################################################
import sqlite3

def getData(db_name):
    # open and (if not exists) create database file
    con = sqlite3.connect(db_name)
    cur = con.cursor()

    # create table
    cur.execute("CREATE TABLE IF NOT EXISTS Posts_Annotated (" +
                "ID_Post           INTEGER PRIMARY KEY," +
                "Status            INTEGER," +
                "Headline          TEXT," +
                "Body              TEXT," +
                "PositiveVotes     INTEGER," +
                "NegativeVotes     INTEGER," +
                "ArgumentsUsed     INTEGER," +
                "Discriminating    INTEGER," +
                "Inappropriate     INTEGER," +
                "OffTopic          INTEGER," +
                "PersonalStories   INTEGER," +
                "PossiblyFeedback  INTEGER," +
                "SentimentNegative INTEGER," +
                "SentimentNeutral  INTEGER," +
                "SentimentPositive INTEGER" +
                ");")
    
    # check if we need to add data
    cur.execute('SELECT Count(*) FROM Posts_Annotated')
    count = cur.fetchall()
    if(count[0][0] > 1):
        return

    # insert statement
    insert_table = "INSERT INTO Posts_Annotated ( ID_Post, Status, Headline, Body, PositiveVotes, NegativeVotes, ArgumentsUsed, Discriminating, Inappropriate, OffTopic, PersonalStories, PossiblyFeedback, SentimentNegative, SentimentNeutral, SentimentPositive ) values ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )"

    # select annotation
    get_annotation = "SELECT Category, Value FROM Annotations_consolidated WHERE ID_Post = ? ORDER BY Category"
    
    # get all posts we need
    cur.execute('SELECT ID_Post, ' +
                'Status, ' +
                'Headline, ' +
                'Body, ' +
                'PositiveVotes, ' +
                'NegativeVotes ' +
                'FROM Posts ' +
                'WHERE ID_Post IN ( ' +
                'SELECT ID_Post ' +
                'FROM Annotations_consolidated);')

    rows = cur.fetchall()

    for row in rows:
        next_row = []

        for row_data in row:
            next_row.append(row_data)

        # get annotations
        cur.execute(get_annotation, [row[0]])
        annotations = cur.fetchall()
        
        types = [ "ArgumentsUsed", "Discriminating", "Inappropriate", "OffTopic", "PersonalStories", "PossiblyFeedback", "SentimentNegative", "SentimentNeutral", "SentimentPositive" ]
        i = 0
        for annotation in annotations:
            while i < 9:
                if types[i] in annotation[0]:
                    next_row.append(annotation[1])
                    i += 1
                    break
                else:
                    next_row.append(-1)
                    i += 1
        
        if i < 9:
            while i < 9:
                next_row.append(-1)
                i += 1

        cur.execute(insert_table, next_row)

    # Save (commit) the changes
    con.commit()

    # close the database connection
    con.close()

getData("corpus.sqlite3")

In [None]:
##############################################################
############## Create Posts_Annotated_combined ###############
##############################################################
import sqlite3
import pandas as pd
import numpy as np
import spacy
from spacy.attrs import LOWER, POS, ENT_TYPE, IS_ALPHA
from spacy.tokens import Doc

def remove_punct_and_numbers(corpus):
    #code from https://stackoverflow.com/questions/45375488/how-to-filter-tokens-from-spacy-document
    global nlp
    indexes = []
    doc = nlp(corpus)
    for index, token in enumerate(doc):
        if (token.pos_  in ('PUNCT', 'NUM', 'SYM')):
            indexes.append(index)
    np_array = doc.to_array([LOWER, POS, ENT_TYPE, IS_ALPHA])
    np_array = np.delete(np_array, indexes, axis = 0)
    doc2 = Doc(doc.vocab, words=[t.text for i, t in enumerate(doc) if i not in indexes])
    doc2.from_array([LOWER, POS, ENT_TYPE, IS_ALPHA], np_array)
    return doc2

def combine_text(headline, body):
    if(headline is None):
        if(body is None):
            return ""
        else:
            return "".join(token.text_with_ws for token in remove_punct_and_numbers(body))
    else:
        if(body is None):
            return "".join(token.text_with_ws for token in remove_punct_and_numbers(headline))
        else:
            #drei pünktchen entfernen und text ohne punkt 
            if headline[-3:] == "...":
                headline = headline[:-3]
            if body[:3] == "...":
                body = body[3:]
            #alternativ auch manchmal zwei pünktchen
            if headline[-3:] == "..":
                headline = headline[:-3]
            if body[:3] == "..":
                body = body[3:]
            
            #add missing space, to avoid combining words
            if headline[-1:] != " ":
                if body[:1] != " ":
                    headline += " "
            else:
                if body[:1] == " ":
                    body = body[1:]
                    
            return "".join(token.text_with_ws for token in remove_punct_and_numbers(headline + body))

con = sqlite3.connect('corpus.sqlite3')
Posts_Annotated_df = pd.read_sql_query("SELECT * FROM Posts_Annotated", con)
nlp = spacy.load("de_core_news_lg")
Posts_Annotated_df.insert(2, "combined_text", Posts_Annotated_df.apply(lambda x: combine_text(x["Headline"], x["Body"]), axis=1))
Posts_Annotated_df.drop(columns=['Headline', 'Body'], inplace=True)
Posts_Annotated_df.to_sql("Posts_Annotated_combined", con, if_exists='fail')

In [None]:
##############################################################
################# Create Dataframe for Test ##################
##############################################################
import sqlite3
import pandas as pd
import numpy as np
import spacy

con = sqlite3.connect('corpus.sqlite3')
Posts_Annotated_df = pd.read_sql_query("SELECT * FROM Posts_Annotated_combined WHERE ID_Post IN (SELECT ID_Post FROM Posts_Annotated)", con)
nlp = spacy.load("de_core_news_lg")
shape = nlp("test")

#create vector representation for all inputs
# Posts
combined_text = np.empty((len(Posts_Annotated_df["combined_text"]), shape.vector.shape[0]))
pos = 0
for text in Posts_Annotated_df["combined_text"]:
    combined_text[pos] = nlp(text).vector
    pos += 1

In [None]:
##############################################################
################# Classifier Parameter Test ##################
##############################################################
import pandas as pd
import numpy as np
import spacy
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.neural_network import MLPClassifier
        
def trainModel(text_vector, label, act="relu", solv="adam", l_rate="constant", l_rate_init=0.001, mom=0.9, iter_change=10):
    text_train, text_test, label_train, label_test = train_test_split(text_vector, label, test_size=0.30, random_state=101, shuffle=True)

    # Model Training (MLPClassifier)
    mlp_classifier = MLPClassifier(random_state=1, max_iter=5000, activation=act, solver=solv, learning_rate=l_rate, learning_rate_init=l_rate_init, momentum=mom, n_iter_no_change=iter_change) #increase iter
    mlp_classifier.fit(text_train, label_train)

    # Testing
    model_predictions = mlp_classifier.predict(text_test)

    # Evaluation
    print("Accuracy:", metrics.accuracy_score(label_test.to_numpy(), model_predictions) * 100)
    print("Recall:", metrics.recall_score(label_test.to_numpy(), model_predictions, average="binary", pos_label=1) * 100)
    print("Precision:", metrics.precision_score(label_test.to_numpy(), model_predictions, average="binary", pos_label=1) * 100)
    print("F1-Score:", metrics.f1_score(label_test.to_numpy(), model_predictions, average="binary", pos_label=1) * 100)
    print("Confusion Matrix:", metrics.confusion_matrix(label_test.to_numpy(), model_predictions))

#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "identity", "lbfgs")
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "identity", "sgd")
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "identity", "adam")
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "logistic", "lbfgs")
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "logistic", "sgd")
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "logistic", "adam")
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "tanh", "lbfgs")
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "tanh", "sgd")
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "tanh", "adam")
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "relu", "lbfgs")
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "relu", "sgd", "constant", 0.001) #76,667
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "relu", "sgd", "constant", 0.002) #76,389
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "relu", "sgd", "constant", 0.003) #77,685
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "relu", "sgd", "constant", 0.01) #77,314
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "relu", "sgd", "constant", 0.02) #76,203
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "relu", "sgd", "constant", 0.03) #76,667
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "relu", "sgd", "constant", 0.1) #76,296
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "relu", "sgd", "constant", 0.2) #73,703
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "relu", "sgd", "constant", 0.3) #73,703
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "relu", "sgd", "invscaling", 0.001) #65,462
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "relu", "sgd", "invscaling", 0.002) #68,056
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "relu", "sgd", "invscaling", 0.003) #70,277
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "relu", "sgd", "invscaling", 0.01) #75,556
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "relu", "sgd", "invscaling", 0.02) #76,203
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "relu", "sgd", "invscaling", 0.03) #76,389
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "relu", "sgd", "invscaling", 0.1) #77,592
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "relu", "sgd", "invscaling", 0.2) #76,389
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "relu", "sgd", "invscaling", 0.3) #74,444
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "relu", "sgd", "adaptive", 0.001) #76,667
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "relu", "sgd", "adaptive", 0.002) #76,574
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "relu", "sgd", "adaptive", 0.003) #77,5
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "relu", "sgd", "adaptive", 0.01) #77,685
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "relu", "sgd", "adaptive", 0.02) #76,296
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "relu", "sgd", "adaptive", 0.03) #76,111
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "relu", "sgd", "adaptive", 0.04) #76,203
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "relu", "sgd", "adaptive", 0.05) #77,96
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "relu", "sgd", "adaptive", 0.06) #77,96
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "relu", "sgd", "adaptive", 0.06, 0.1) #76,389
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "relu", "sgd", "adaptive", 0.06, 0.2) #76,481
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "relu", "sgd", "adaptive", 0.06, 0.3) #76,481
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "relu", "sgd", "adaptive", 0.06, 0.4) #77,870
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "relu", "sgd", "adaptive", 0.06, 0.5) #76,389
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "relu", "sgd", "adaptive", 0.06, 0.6) #75,833
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "relu", "sgd", "adaptive", 0.06, 0.7) #76,944
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "relu", "sgd", "adaptive", 0.06, 0.8) #76,759
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "relu", "sgd", "adaptive", 0.06, 0.9) #77,962
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "relu", "sgd", "adaptive", 0.06, 0.9, 11) #77,962
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "relu", "sgd", "adaptive", 0.06, 0.9, 15) #78,240
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "relu", "sgd", "adaptive", 0.06, 0.9, 20) #78,240 <=== best
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "relu", "sgd", "adaptive", 0.06, 0.9, 50) #78,240
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "relu", "sgd", "adaptive", 0.06, 0.9, 100) #78,240
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "relu", "sgd", "adaptive", 0.07) #77,129
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "relu", "sgd", "adaptive", 0.08) #76,296
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "relu", "sgd", "adaptive", 0.09) #77,037
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "relu", "sgd", "adaptive", 0.1) #77,777
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "relu", "sgd", "adaptive", 0.11) #75,277
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "relu", "sgd", "adaptive", 0.2) #73,703
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "relu", "sgd", "adaptive", 0.3) #71,389
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "relu", "adam")

In [None]:
##############################################################
#################### Test Classification #####################
##############################################################
import pandas as pd
import numpy as np
import spacy
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import ConfusionMatrixDisplay
import matplotlib.pyplot as plt
from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import RandomOverSampler
        
def trainModel(text_vector, label, label_name, sampling="n", act="relu", solv="adam", l_rate="constant", l_rate_init=0.001, mom=0.9, iter_change=10):
    # split data
    text_train, text_test, label_train, label_test = train_test_split(text_vector, label, test_size=0.30, random_state=101, shuffle=True)
    
    if sampling == "o":
        model_name = "MLPClassifier | Oversample"
        oversample = RandomOverSampler(sampling_strategy='minority')
        text_train, label_train = oversample.fit_resample(text_train, label_train)
    elif sampling == "u":
        model_name = "MLPClassifier | Undersample"
        undersample = RandomUnderSampler(sampling_strategy='majority')
        text_train, label_train = undersample.fit_resample(text_train, label_train)
    else:
        model_name = "MLPClassifier"
    
    # Model Training (MLPClassifier)
    mlp_classifier = MLPClassifier(random_state=1, max_iter=5000, activation=act, solver=solv, learning_rate=l_rate, learning_rate_init=l_rate_init, momentum=mom, n_iter_no_change=iter_change) #increase iter
    mlp_classifier.fit(text_train, label_train)

    # Testing
    model_predictions = mlp_classifier.predict(text_test)

    # Evaluation
    count_of_labels = label_train.copy()
    count_of_labels = count_of_labels.append(label_test)
    val_counts = count_of_labels.value_counts()
    
    fig, (ax1,ax2,ax3) = plt.subplots(1, 3)
    fig.suptitle(("Label: {}; Model: {}".format(label_name, model_name)))
    classifier_evaluation = ("F1_Score: " + str(metrics.f1_score(label_test, model_predictions)) + "\n" +
                             "Accuracy: " + str(metrics.accuracy_score(label_test, model_predictions)) + "\n" +
                             "Precision: " + str(metrics.precision_score(label_test, model_predictions)) + "\n" +
                             "Recall: " + str(metrics.recall_score(label_test, model_predictions)) + "\n" +
                             "Instances of value 0: " + str(val_counts[0]) + "\n" +
                             "Instances of value 1: " + str(val_counts[1]))
    plt.figtext(0.99, 0.01, classifier_evaluation, horizontalalignment='left', verticalalignment="bottom", fontsize=15)
    conf_matrix_normalized = ConfusionMatrixDisplay.from_predictions(label_test, model_predictions, normalize="true", ax=ax1)
    conf_matrix = ConfusionMatrixDisplay.from_predictions(label_test, model_predictions, ax=ax2)
    bar_plot = val_counts.plot.bar(ax=ax3, title = "Verteilung des Labels")
    fig.tight_layout()
    label_name = label_name.replace(" | ", "_")
    label_name = label_name.replace("/", "_")
    model_name = model_name.replace(" | ", "_")
    fig.savefig(label_name + "_" + model_name + ".pdf", bbox_inches='tight')
    plt.close(fig)

#add up and downvotes
combined_text_2 = combined_text.copy()
combined_text_2 = np.pad(combined_text_2, ((0,0),(0,2)), 'constant')
for x, vec in enumerate(combined_text_2):
    vec[-2] = Posts_Annotated_df["PositiveVotes"][x]
    vec[-1] = Posts_Annotated_df["NegativeVotes"][x]

#add status
combined_text_3 = combined_text.copy()
combined_text_3 = np.pad(combined_text_3, ((0,0),(0,1)), 'constant')
for x, vec in enumerate(combined_text_3):
    status = Posts_Annotated_df["Status"][x]
    if "online" in status:
        vec[-1] = 1
    elif "deleted" in status:
        vec[-1] = 0
    else:
        vec[-1] = 99
        print("There was no status")

#add up, downvotes and status
combined_text_4 = combined_text.copy()
combined_text_4 = np.pad(combined_text_4, ((0,0),(0,3)), 'constant')
for x, vec in enumerate(combined_text_4):
    vec[-3] = Posts_Annotated_df["PositiveVotes"][x]
    vec[-2] = Posts_Annotated_df["NegativeVotes"][x]
    status = Posts_Annotated_df["Status"][x]
    if "online" in status:
        vec[-1] = 1
    elif "deleted" in status:
        vec[-1] = 0
    else:
        vec[-1] = 99
        print("There was no status")

# Copy data
ArgumentsUsedText = combined_text_2.copy()
ArgumentsUsedDF = Posts_Annotated_df.copy()

# Get indecies to delete
index_to_delete = []
for row in ArgumentsUsedDF.iterrows():
    if row[1]["ArgumentsUsed"] == -1:
        index_to_delete.append(row[0])

# Delete indecies
ArgumentsUsedText = np.delete(ArgumentsUsedText, index_to_delete, 0)
ArgumentsUsedDF.drop(index_to_delete, axis=0, inplace=True)

# Train and test Model
#trainModel(combined_text, Posts_Annotated_df["ArgumentsUsed"], "ArgumentsUsed", "relu", "sgd", "adaptive", 0.06, 0.9, 20)
trainModel(ArgumentsUsedText, ArgumentsUsedDF["ArgumentsUsed"], "ArgumentsUsed | Votes", "n", "relu", "sgd", "adaptive", 0.06, 0.9, 20)
#trainModel(ArgumentsUsedText, ArgumentsUsedDF["ArgumentsUsed"], "ArgumentsUsed | Votes", "o", "relu", "sgd", "adaptive", 0.06, 0.9, 20)
#trainModel(ArgumentsUsedText, ArgumentsUsedDF["ArgumentsUsed"], "ArgumentsUsed | Votes", "u", "relu", "sgd", "adaptive", 0.06, 0.9, 20)
#trainModel(combined_text_3, Posts_Annotated_df["ArgumentsUsed"], "ArgumentsUsed | Status", "relu", "sgd", "adaptive", 0.06, 0.9, 20)
#trainModel(combined_text_4, Posts_Annotated_df["ArgumentsUsed"], "ArgumentsUsed | Votes/Status", "relu", "sgd", "adaptive", 0.06, 0.9, 20)
# Used already before all known data


# Copy data
DiscriminatingText = combined_text_3.copy()
DiscriminatingDF = Posts_Annotated_df.copy()

# Get indecies to delete
index_to_delete = []
for row in DiscriminatingDF.iterrows():
    if row[1]["Discriminating"] == -1:
        index_to_delete.append(row[0])

# Delete indecies
DiscriminatingText = np.delete(DiscriminatingText, index_to_delete, 0)
DiscriminatingDF.drop(index_to_delete, axis=0, inplace=True)

# Train and test Model
#trainModel(combined_text, Posts_Annotated_df["Discriminating"], "Discriminating", "relu", "sgd", "adaptive", 0.06, 0.9, 20)
#trainModel(combined_text_2, Posts_Annotated_df["Discriminating"], "Discriminating | Votes", "relu", "sgd", "adaptive", 0.06, 0.9, 20)
#trainModel(DiscriminatingText, DiscriminatingDF["Discriminating"], "Discriminating | Status", "n", "relu", "sgd", "adaptive", 0.06, 0.9, 20)
#trainModel(DiscriminatingText, DiscriminatingDF["Discriminating"], "Discriminating | Status", "o", "relu", "sgd", "adaptive", 0.06, 0.9, 20)
trainModel(DiscriminatingText, DiscriminatingDF["Discriminating"], "Discriminating | Status", "u", "relu", "sgd", "adaptive", 0.06, 0.9, 20)
#trainModel(combined_text_4, Posts_Annotated_df["Discriminating"], "Discriminating | Votes/Status", "relu", "sgd", "adaptive", 0.06, 0.9, 20)
# Used already before all known data


# Copy data
InappropriateText = combined_text_4.copy()
InappropriateDF = Posts_Annotated_df.copy()

# Get indecies to delete
index_to_delete = []
for row in InappropriateDF.iterrows():
    if row[1]["Inappropriate"] == -1:
        index_to_delete.append(row[0])

# Delete indecies
InappropriateText = np.delete(InappropriateText, index_to_delete, 0)
InappropriateDF.drop(index_to_delete, axis=0, inplace=True)

# Train and test Model
#trainModel(combined_text, Posts_Annotated_df["Inappropriate"], "Inappropriate", "relu", "sgd", "adaptive", 0.06, 0.9, 20)
#trainModel(combined_text_2, Posts_Annotated_df["Inappropriate"], "Inappropriate | Votes", "relu", "sgd", "adaptive", 0.06, 0.9, 20)
#trainModel(combined_text_3, Posts_Annotated_df["Inappropriate"], "Inappropriate | Status", "relu", "sgd", "adaptive", 0.06, 0.9, 20)
#trainModel(InappropriateText, InappropriateDF["Inappropriate"], "Inappropriate | Votes/Status", "n", "relu", "sgd", "adaptive", 0.06, 0.9, 20)
#trainModel(InappropriateText, InappropriateDF["Inappropriate"], "Inappropriate | Votes/Status", "o", "relu", "sgd", "adaptive", 0.06, 0.9, 20)
trainModel(InappropriateText, InappropriateDF["Inappropriate"], "Inappropriate | Votes/Status", "u", "relu", "sgd", "adaptive", 0.06, 0.9, 20)
# Used already before all known data


# Copy data
OffTopicText = combined_text_3.copy()
OffTopicDF = Posts_Annotated_df.copy()

# Get indecies to delete
index_to_delete = []
for row in OffTopicDF.iterrows():
    if row[1]["OffTopic"] == -1:
        index_to_delete.append(row[0])

# Delete indecies
OffTopicText = np.delete(OffTopicText, index_to_delete, 0)
OffTopicDF.drop(index_to_delete, axis=0, inplace=True)

# Train and test Model
#trainModel(combined_text, Posts_Annotated_df["OffTopic"], "OffTopic", "relu", "sgd", "adaptive", 0.06, 0.9, 20)
#trainModel(combined_text_2, Posts_Annotated_df["OffTopic"], "OffTopic | Votes", "relu", "sgd", "adaptive", 0.06, 0.9, 20)
#trainModel(OffTopicText, OffTopicDF["OffTopic"], "OffTopic | Status", "n", "relu", "sgd", "adaptive", 0.06, 0.9, 20)
#trainModel(OffTopicText, OffTopicDF["OffTopic"], "OffTopic | Status", "o", "relu", "sgd", "adaptive", 0.06, 0.9, 20)
trainModel(OffTopicText, OffTopicDF["OffTopic"], "OffTopic | Status", "u", "relu", "sgd", "adaptive", 0.06, 0.9, 20)
#trainModel(combined_text_4, Posts_Annotated_df["OffTopic"], "OffTopic | Votes/Status", "relu", "sgd", "adaptive", 0.06, 0.9, 20)
# Used already before all known data


# Copy data
PersonalStoriesText = combined_text_3.copy()
PersonalStoriesDF = Posts_Annotated_df.copy()

# Get indecies to delete
index_to_delete = []
for row in PersonalStoriesDF.iterrows():
    if row[1]["PersonalStories"] == -1:
        index_to_delete.append(row[0])

# Delete indecies
PersonalStoriesText = np.delete(PersonalStoriesText, index_to_delete, 0)
PersonalStoriesDF.drop(index_to_delete, axis=0, inplace=True)

# Train and test Model
##trainModel(combined_text, Posts_Annotated_df["PersonalStories"], "PersonalStories", "relu", "sgd", "adaptive", 0.06, 0.9, 20)
#trainModel(combined_text_2, Posts_Annotated_df["PersonalStories"], "PersonalStories | Votes", "relu", "sgd", "adaptive", 0.06, 0.9, 20)
#trainModel(PersonalStoriesText, PersonalStoriesDF["PersonalStories"], "PersonalStories | Status", "n", "relu", "sgd", "adaptive", 0.06, 0.9, 20)
trainModel(PersonalStoriesText, PersonalStoriesDF["PersonalStories"], "PersonalStories | Status", "o", "relu", "sgd", "adaptive", 0.06, 0.9, 20)
#trainModel(PersonalStoriesText, PersonalStoriesDF["PersonalStories"], "PersonalStories | Status", "u", "relu", "sgd", "adaptive", 0.06, 0.9, 20)
#trainModel(combined_text_4, Posts_Annotated_df["PersonalStories"], "PersonalStories | Votes/Status", "relu", "sgd", "adaptive", 0.06, 0.9, 20)
# Has improved a lot with more data


# Copy data
PossiblyFeedbackText = combined_text_2.copy()
PossiblyFeedbackDF = Posts_Annotated_df.copy()

# Get indecies to delete
index_to_delete = []
for row in PossiblyFeedbackDF.iterrows():
    if row[1]["PossiblyFeedback"] == -1:
        index_to_delete.append(row[0])

# Delete indecies
PossiblyFeedbackText = np.delete(PossiblyFeedbackText, index_to_delete, 0)
PossiblyFeedbackDF.drop(index_to_delete, axis=0, inplace=True)

# Train and test Model
#trainModel(combined_text, Posts_Annotated_df["PossiblyFeedback"], "PossiblyFeedback", "relu", "sgd", "adaptive", 0.06, 0.9, 20)
trainModel(PossiblyFeedbackText, PossiblyFeedbackDF["PossiblyFeedback"], "PossiblyFeedback | Votes", "n", "relu", "sgd", "adaptive", 0.06, 0.9, 20)
#trainModel(PossiblyFeedbackText, PossiblyFeedbackDF["PossiblyFeedback"], "PossiblyFeedback | Votes", "o", "relu", "sgd", "adaptive", 0.06, 0.9, 20)
#trainModel(PossiblyFeedbackText, PossiblyFeedbackDF["PossiblyFeedback"], "PossiblyFeedback | Votes", "u", "relu", "sgd", "adaptive", 0.06, 0.9, 20)
#trainModel(combined_text_3, Posts_Annotated_df["PossiblyFeedback"], "PossiblyFeedback | Status", "relu", "sgd", "adaptive", 0.06, 0.9, 20)
#trainModel(combined_text_4, Posts_Annotated_df["PossiblyFeedback"], "PossiblyFeedback | Votes/Status", "relu", "sgd", "adaptive", 0.06, 0.9, 20)
# Has improved a lot with more data


# Copy data
SentimentNegativeText = combined_text_3.copy()
SentimentNegativeDF = Posts_Annotated_df.copy()

# Get indecies to delete
index_to_delete = []
for row in SentimentNegativeDF.iterrows():
    if row[1]["SentimentNegative"] == -1:
        index_to_delete.append(row[0])

# Delete indecies
SentimentNegativeText = np.delete(SentimentNegativeText, index_to_delete, 0)
SentimentNegativeDF.drop(index_to_delete, axis=0, inplace=True)

# Train and test Model
#trainModel(combined_text, Posts_Annotated_df["SentimentNegative"], "SentimentNegative", "relu", "sgd", "adaptive", 0.06, 0.9, 20)
#trainModel(combined_text_2, Posts_Annotated_df["SentimentNegative"], "SentimentNegative | Votes", "relu", "sgd", "adaptive", 0.06, 0.9, 20)
#trainModel(SentimentNegativeText, SentimentNegativeDF["SentimentNegative"], "SentimentNegative | Status", "n", "relu", "sgd", "adaptive", 0.06, 0.9, 20)
#trainModel(SentimentNegativeText, SentimentNegativeDF["SentimentNegative"], "SentimentNegative | Status", "o", "relu", "sgd", "adaptive", 0.06, 0.9, 20)
trainModel(SentimentNegativeText, SentimentNegativeDF["SentimentNegative"], "SentimentNegative | Status", "u", "relu", "sgd", "adaptive", 0.06, 0.9, 20)
#trainModel(combined_text_4, Posts_Annotated_df["SentimentNegative"], "SentimentNegative | Votes/Status", "relu", "sgd", "adaptive", 0.06, 0.9, 20)
# Used already before all known data


# Copy data
SentimentNeutralText = combined_text_4.copy()
SentimentNeutralDF = Posts_Annotated_df.copy()

# Get indecies to delete
index_to_delete = []
for row in SentimentNeutralDF.iterrows():
    if row[1]["SentimentNeutral"] == -1:
        index_to_delete.append(row[0])

# Delete indecies
SentimentNeutralText = np.delete(SentimentNeutralText, index_to_delete, 0)
SentimentNeutralDF.drop(index_to_delete, axis=0, inplace=True)

# Train and test Model
#trainModel(combined_text, Posts_Annotated_df["SentimentNeutral"], "SentimentNeutral", "relu", "sgd", "adaptive", 0.06, 0.9, 20)
#trainModel(combined_text_2, Posts_Annotated_df["SentimentNeutral"], "SentimentNeutral | Votes", "relu", "sgd", "adaptive", 0.06, 0.9, 20)
#trainModel(combined_text_3, Posts_Annotated_df["SentimentNeutral"], "SentimentNeutral | Status", "relu", "sgd", "adaptive", 0.06, 0.9, 20)
trainModel(SentimentNeutralText, SentimentNeutralDF["SentimentNeutral"], "SentimentNeutral | Votes/Status", "n", "relu", "sgd", "adaptive", 0.06, 0.9, 20)
#trainModel(SentimentNeutralText, SentimentNeutralDF["SentimentNeutral"], "SentimentNeutral | Votes/Status", "o", "relu", "sgd", "adaptive", 0.06, 0.9, 20)
#trainModel(SentimentNeutralText, SentimentNeutralDF["SentimentNeutral"], "SentimentNeutral | Votes/Status", "u", "relu", "sgd", "adaptive", 0.06, 0.9, 20)
# Used already before all known data


# Copy data
SentimentPositiveText = combined_text_2.copy()
SentimentPositiveDF = Posts_Annotated_df.copy()

# Get indecies to delete
index_to_delete = []
for row in SentimentPositiveDF.iterrows():
    if row[1]["SentimentPositive"] == -1:
        index_to_delete.append(row[0])

# Delete indecies
SentimentPositiveText = np.delete(SentimentPositiveText, index_to_delete, 0)
SentimentPositiveDF.drop(index_to_delete, axis=0, inplace=True)

# Train and test Model
#trainModel(combined_text, Posts_Annotated_df["SentimentPositive"], "SentimentPositive", "relu", "sgd", "adaptive", 0.06, 0.9, 20)
#trainModel(SentimentPositiveText, SentimentPositiveDF["SentimentPositive"], "SentimentPositive | Votes", "n", "relu", "sgd", "adaptive", 0.06, 0.9, 20)
#trainModel(SentimentPositiveText, SentimentPositiveDF["SentimentPositive"], "SentimentPositive | Votes", "o", "relu", "sgd", "adaptive", 0.06, 0.9, 20)
trainModel(SentimentPositiveText, SentimentPositiveDF["SentimentPositive"], "SentimentPositive | Votes", "u", "relu", "sgd", "adaptive", 0.06, 0.9, 20)
#trainModel(combined_text_3, Posts_Annotated_df["SentimentPositive"], "SentimentPositive | Status", "relu", "sgd", "adaptive", 0.06, 0.9, 20)
#trainModel(combined_text_4, Posts_Annotated_df["SentimentPositive"], "SentimentPositive | Votes/Status", "relu", "sgd", "adaptive", 0.06, 0.9, 20)
# Used already before all known data