In [7]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
import numpy as np
import twokenize
from nltk.tokenize import TweetTokenizer
from FeatureFunctions import getfeatures
from FeatureFunctions import helper

import sklearn
import csv


### Import the classifier
import sys
sys.path.insert(0, 'libsvm')

from svmutil import *

In [8]:
# whichtask = input("Which task to you want to do (A/B): ")
whichtask = "A"

In [9]:
### Reading train and test files

datafile = "datasets/train/SemEval2018-T3-train-task"+whichtask+"_emoji.txt"
trainingdata = pd.read_csv(datafile, delimiter = "\t",  quoting=csv.QUOTE_NONE, header=0)
trainingdata = trainingdata[['Label','Tweet text']]

testfile = 'datasets/goldtest_Task'+whichtask+'/SemEval2018-T3_gold_test_task'+whichtask+'_emoji.txt'
testdata = pd.read_csv(testfile, sep="\t",  quoting=csv.QUOTE_NONE, header=0)
testdata = testdata[['Label','Tweet text']]

In [10]:
### Get lexical features
# training_features
lexical_training_features, unicount_vect, bicount_vect, tricount_vect, fourcount_vect = getfeatures.getlexical(trainingdata, 'Tweet text')
x_small = lexical_training_features[['PunctuationFlood', 'CharFlood', 'CapitalizedCount', 'HashtagCount', 'Hashtag2WordRatio', 'TweetCharLength', 'TweetWordLength', 'EmojiCount', 'FinalPunctuation']].values
x_lexical = np.array(lexical_training_features.apply(lambda row: sum([row['CharFourgramVector'], row['CharTrigramVector'],row['BigramVector']], []), axis=1).values.tolist())
x_lexical = np.hstack((x_small, x_lexical))

# train_bow = lexical_training_features['UnigramVector'].values.tolist()

# test_features
lexical_test_features, unicount_vect, bicount_vect, tricount_vect, fourcount_vect = getfeatures.getlexical(testdata, 'Tweet text', unicount_vect, bicount_vect, tricount_vect, fourcount_vect)
test_x_small = lexical_test_features[['PunctuationFlood', 'CharFlood', 'CapitalizedCount', 'HashtagCount', 'Hashtag2WordRatio', 'TweetCharLength', 'TweetWordLength', 'EmojiCount', 'FinalPunctuation']].values
test_lexical_x = np.array(lexical_test_features.apply(lambda row: sum([row['CharFourgramVector'], row['CharTrigramVector'],row['BigramVector']], []), axis=1).values.tolist())
test_lexical_x = np.hstack((test_x_small, test_lexical_x))

train_bow = np.array(lexical_training_features['UnigramVector'].values.tolist())
test_bow = np.array(lexical_test_features['UnigramVector'].values.tolist())

lexical_training_features = []
lexical_test_features = []




In [11]:
from importlib import reload  # Python 3.4+ only.
reload(getfeatures)

### Get sentiment features
train_sentiment_x = getfeatures.getaffinfeats(trainingdata['Tweet text'])
test_sentiment_x = getfeatures.getaffinfeats(testdata['Tweet text'])


In [99]:
def getindices(trainingdata, train_split=9, test_split=1):
    ### Creating 50-50% balance
    ## Train
    
    ### Count the amount of samples
    amount_nonirony_train = sum(trainingdata["Label"] == 0)
    amount_irony_train = sum(trainingdata["Label"] > 0)
    amount_train_amount = min(amount_nonirony_train, amount_irony_train)
    
    

    ### Sample indices
    nonirony_index = trainingdata[trainingdata["Label"] == 0].index.to_series()
    irony_index = trainingdata[trainingdata["Label"] > 0].index.to_series()

    total_nonirony_samples = nonirony_index.sample(amount_train_amount).tolist()
    total_irony_samples = irony_index.sample(amount_train_amount).tolist()
    
    train_amount = round(amount_train_amount / (train_split + test_split) * train_split)    
    test_amount = round(amount_train_amount / (train_split + test_split) * test_split)    
    
    print(len(total_nonirony_samples))
    print(len(total_irony_samples))

    resulting_train_index = total_nonirony_samples[:train_amount] + total_irony_samples[:train_amount]
    resulting_test_index = total_nonirony_samples[train_amount+1:] + total_irony_samples[train_amount+1:]

    return resulting_train_index, resulting_test_index


In [129]:
### Combining features
iterations = int(input("How many iterations: "))
heldoutset = int(input("Do you want to use a heldout set (1=heldout, 0=crossvalidate): "))
whichtype = int(input("Which type to test: "))
results = []

for i in range(iterations):
    print()
    print("-------------------------------------------------------------------")
    print("Iteration " + str(i+1) + "/" + str(iterations))
    print("-------------------------------------------------------------------")
    print()
    
    ### Selecting the actual training samples (Do some crossover here )
    if heldoutset:
        trainingdata_result = trainingdata
        testdata_result = testdata
        resulting_train_index = range(len(trainingdata))
        resulting_test_index = range(len(testdata_result))
    else:
        ## Crossvalidating
        resulting_train_index, resulting_test_index = getindices(trainingdata, 4, 1) # Get 80-20
        trainingdata_result = trainingdata.loc[resulting_train_index]
        testdata_result = trainingdata.loc[resulting_test_index]
    
    final_train_x = []
    final_test_x = []

    if heldoutset:
        ## BoW
        if whichtype == 1:
            final_train_x = train_bow[resulting_train_index]
            final_test_x = test_bow[resulting_test_index]
        ## Lexical
        if whichtype == 2:
            final_train_x = np.hstack((train_bow[resulting_train_index], x_lexical[resulting_train_index]))
            final_test_x = np.hstack((test_bow[resulting_test_index], test_lexical_x[resulting_test_index]))
        ## Sentiment
        if whichtype == 4:
            final_train_x = train_sentiment_x[resulting_train_index]
            final_test_x = test_sentiment_x[resulting_test_index]
        ## Combined
        if whichtype == 6:
            final_train_x = np.hstack((train_bow[resulting_train_index], train_sentiment_x[resulting_train_index], x_lexical[resulting_train_index]))
            final_test_x = np.hstack((test_bow[resulting_test_index], test_sentiment_x[resulting_test_index], test_lexical_x[resulting_test_index]))
    else:
        ## Crossvalidating
        ## BoW
        if whichtype == 1:
            final_train_x = train_bow[resulting_train_index]
            final_test_x = train_bow[resulting_test_index]
        ## Lexical
        if whichtype == 2:
            final_train_x = np.hstack((train_bow[resulting_train_index], x_lexical[resulting_train_index]))
            final_test_x = np.hstack((train_bow[resulting_test_index], x_lexical[resulting_test_index]))
        ## Sentiment
        if whichtype == 4:
            final_train_x = train_sentiment_x[resulting_train_index]
            final_test_x = train_sentiment_x[resulting_test_index]
        ## Combined
        if whichtype == 6:
            final_train_x = np.hstack((train_bow[resulting_train_index], train_sentiment_x[resulting_train_index], x_lexical[resulting_train_index]))
            final_test_x = np.hstack((train_bow[resulting_test_index], train_sentiment_x[resulting_test_index], x_lexical[resulting_test_index]))


    print("###########")
    print("Training:")
    print(final_train_x.shape)
    print()
    ### Train and get train error
    y = trainingdata_result['Label'].tolist()
    prob  = svm_problem(y, final_train_x)
    param = svm_parameter('-t 2 -c 8 -g ' + str(2**-11))
    m = svm_train(prob, param)
    p_label, p_acc, p_val = svm_predict(y, final_train_x, m)
    ACC, MSE, SCC = evaluations(y, p_label)

    print()
    print("###########")
    print("Testing:")
    print(final_test_x.shape)
    print()


    ### Get the test
    test_y = testdata_result['Label'].tolist()

    test_p_label, test_p_acc, test_p_val = svm_predict(test_y, final_test_x, m)
    test_ACC, test_MSE, test_SCC = evaluations(test_y, test_p_label)
    print(sklearn.metrics.classification_report(test_y, test_p_label, digits=4))

    if whichtask == "A":
        p, r, f = helper.precision_recall_fscore(test_y, test_p_label, beta=1, labels=[0,1], pos_label=1)
    elif whichtask == "B":
        p, r, f = helper.precision_recall_fscore(test_y, test_p_label, beta=1, labels=[0,1,2,3])

    print("Precision: " + str(p))
    print("Recall: " + str(r))
    print("F1-score: " + str(f))
    
    results.append([test_ACC, p*100, r*100, f*100])

    y_actu = pd.Series(test_y, name='Actual')
    y_pred = pd.Series(test_p_label, name='Predicted')
    df_confusion = pd.crosstab(y_actu, y_pred, rownames=['Actual'], colnames=['Predicted'], margins=True)

    print()
    print()
    print(df_confusion)
    

### Write output
mean_results = np.mean(np.array(results), axis=0)
std_results = 2*np.std(np.array(results), axis=0)
min_results = np.min(np.array(results), axis=0)
max_results = np.max(np.array(results), axis=0)

if whichtype == 1:
    sys.stdout.write("BoW results")
elif whichtype == 2:
    sys.stdout.write("Lexical results")
elif whichtype == 4:
    sys.stdout.write("Semantic results")
elif whichtype == 6:
    sys.stdout.write("Combined results")
    
### stdout
print(" with " + str(iterations) + " iterations: ")
for result_i in range(len(mean_results)):
    sys.stdout.write("{:.2f}".format(mean_results[result_i]))
    sys.stdout.write(" ±" + "{:.2f}".format(std_results[result_i]))
    sys.stdout.write(" -" + "{:.2f}".format(min_results[result_i]))
    sys.stdout.write(" +" + "{:.2f}".format(max_results[result_i]) + "\t")
    
    
### file out
with open("Results/resultswrite.txt", "a") as outfile:
    if whichtype == 1:
        outfile.write("BoW results")
    elif whichtype == 2:
        outfile.write("Lexical results")
    elif whichtype == 4:
        outfile.write("Semantic results")
    elif whichtype == 6:
        outfile.write("Combined results")

    ### stdout
    outfile.write(" with " + str(iterations) + " iterations: \n")
    for result_i in range(len(mean_results)):
        outfile.write("{:.2f}".format(mean_results[result_i]))
        outfile.write(" ±" + "{:.2f}".format(std_results[result_i]))
        outfile.write(" -" + "{:.2f}".format(min_results[result_i]))
        outfile.write(" +" + "{:.2f}".format(max_results[result_i]) + "\t")
        
    outfile.write("\n\n")
print()

How many iterations: 50
Do you want to use a heldout set (1=heldout, 0=crossvalidate): 0
Which type to test: 2

-------------------------------------------------------------------
Iteration 1/50
-------------------------------------------------------------------

1911
1911
###########
Training:
(3058, 63411)

Accuracy = 90.9091% (2780/3058) (classification)

###########
Testing:
(762, 63411)

Accuracy = 68.6352% (523/762) (classification)
              precision    recall  f1-score   support

           0     0.6972    0.6588    0.6775       381
           1     0.6766    0.7139    0.6948       381

   micro avg     0.6864    0.6864    0.6864       762
   macro avg     0.6869    0.6864    0.6861       762
weighted avg     0.6869    0.6864    0.6861       762

Precision: 0.6766169154228856
Recall: 0.7139107611548556
F1-score: 0.6947637292464879


Predicted  0.0  1.0  All
Actual                  
0          251  130  381
1          109  272  381
All        360  402  762

----------------

Accuracy = 69.4226% (529/762) (classification)
              precision    recall  f1-score   support

           0     0.7139    0.6483    0.6795       381
           1     0.6779    0.7402    0.7077       381

   micro avg     0.6942    0.6942    0.6942       762
   macro avg     0.6959    0.6942    0.6936       762
weighted avg     0.6959    0.6942    0.6936       762

Precision: 0.6778846153846154
Recall: 0.7401574803149606
F1-score: 0.7076537013801758


Predicted  0.0  1.0  All
Actual                  
0          247  134  381
1           99  282  381
All        346  416  762

-------------------------------------------------------------------
Iteration 11/50
-------------------------------------------------------------------

1911
1911
###########
Training:
(3058, 63411)

Accuracy = 90.9745% (2782/3058) (classification)

###########
Testing:
(762, 63411)

Accuracy = 71.1286% (542/762) (classification)
              precision    recall  f1-score   support

           0     0.7230  

###########
Training:
(3058, 63411)

Accuracy = 91.3996% (2795/3058) (classification)

###########
Testing:
(762, 63411)

Accuracy = 68.2415% (520/762) (classification)
              precision    recall  f1-score   support

           0     0.6915    0.6588    0.6747       381
           1     0.6742    0.7060    0.6897       381

   micro avg     0.6824    0.6824    0.6824       762
   macro avg     0.6828    0.6824    0.6822       762
weighted avg     0.6828    0.6824    0.6822       762

Precision: 0.6741854636591479
Recall: 0.7060367454068242
F1-score: 0.6897435897435898


Predicted  0.0  1.0  All
Actual                  
0          251  130  381
1          112  269  381
All        363  399  762

-------------------------------------------------------------------
Iteration 21/50
-------------------------------------------------------------------

1911
1911
###########
Training:
(3058, 63411)

Accuracy = 90.9418% (2781/3058) (classification)

###########
Testing:
(762, 63411)

Accur

###########
Training:
(3058, 63411)

Accuracy = 90.8437% (2778/3058) (classification)

###########
Testing:
(762, 63411)

Accuracy = 71.1286% (542/762) (classification)
              precision    recall  f1-score   support

           0     0.7307    0.6693    0.6986       381
           1     0.6949    0.7533    0.7229       381

   micro avg     0.7113    0.7113    0.7113       762
   macro avg     0.7128    0.7113    0.7108       762
weighted avg     0.7128    0.7113    0.7108       762

Precision: 0.6949152542372882
Recall: 0.7532808398950132
F1-score: 0.7229219143576827


Predicted  0.0  1.0  All
Actual                  
0          255  126  381
1           94  287  381
All        349  413  762

-------------------------------------------------------------------
Iteration 31/50
-------------------------------------------------------------------

1911
1911
###########
Training:
(3058, 63411)

Accuracy = 90.2878% (2761/3058) (classification)

###########
Testing:
(762, 63411)

Accur

###########
Training:
(3058, 63411)

Accuracy = 91.0399% (2784/3058) (classification)

###########
Testing:
(762, 63411)

Accuracy = 70.8661% (540/762) (classification)
              precision    recall  f1-score   support

           0     0.7239    0.6745    0.6984       381
           1     0.6953    0.7428    0.7183       381

   micro avg     0.7087    0.7087    0.7087       762
   macro avg     0.7096    0.7087    0.7083       762
weighted avg     0.7096    0.7087    0.7083       762

Precision: 0.6953316953316954
Recall: 0.7427821522309711
F1-score: 0.7182741116751269


Predicted  0.0  1.0  All
Actual                  
0          257  124  381
1           98  283  381
All        355  407  762

-------------------------------------------------------------------
Iteration 41/50
-------------------------------------------------------------------

1911
1911
###########
Training:
(3058, 63411)

Accuracy = 90.3532% (2763/3058) (classification)

###########
Testing:
(762, 63411)

Accur

###########
Training:
(3058, 63411)

Accuracy = 90.5494% (2769/3058) (classification)

###########
Testing:
(762, 63411)

Accuracy = 69.685% (531/762) (classification)
              precision    recall  f1-score   support

           0     0.7060    0.6745    0.6899       381
           1     0.6884    0.7192    0.7035       381

   micro avg     0.6969    0.6969    0.6969       762
   macro avg     0.6972    0.6969    0.6967       762
weighted avg     0.6972    0.6969    0.6967       762

Precision: 0.6884422110552764
Recall: 0.7191601049868767
F1-score: 0.7034659820282413


Predicted  0.0  1.0  All
Actual                  
0          257  124  381
1          107  274  381
All        364  398  762
Lexical results with 50 iterations: 
70.11 ±2.64 -67.59 +73.36	68.92 ±2.52 -66.59 +72.82	73.28 ±4.98 -65.35 +77.69	71.01 ±3.01 -66.85 +73.74	


In [117]:
if heldoutset:
    ### Evaluate results per category (Sectie 5.2)

    testfileB = 'datasets/goldtest_TaskB/SemEval2018-T3_gold_test_taskB_emoji.txt'
    testdataB = pd.read_csv(testfileB, sep="\t",  quoting=csv.QUOTE_NONE, header=0)
    testdataB = testdataB[['Label','Tweet text']]

    ## Getting category information
    used_testB = testdataB.loc[resulting_test_index]
    used_testB.reset_index(inplace=True, drop=True)

    ## Nonirony
    print("--- 0 Not ironic")
    predB_nonirony = sum((used_testB['Label'] == 0) & (y_pred == 0))
    testB_nonirony = sum((used_testB['Label'] == 0))
    print(str(predB_nonirony) + "/" + str(testB_nonirony))
    print("{:.2f}%".format(predB_nonirony/testB_nonirony * 100))
    print()

    ## 1
    print("--- 1 Ironic by clash")
    predB_nonirony = sum((used_testB['Label'] == 1) & (y_pred == 1))
    testB_nonirony = sum((used_testB['Label'] == 1))
    print(str(predB_nonirony) + "/" + str(testB_nonirony))
    print("{:.2f}%".format(predB_nonirony/testB_nonirony * 100))
    print()

    ## 2
    print("--- 2 Situational irony")
    predB_nonirony = sum((used_testB['Label'] == 2) & (y_pred == 1))
    testB_nonirony = sum((used_testB['Label'] == 2))
    print(str(predB_nonirony) + "/" + str(testB_nonirony))
    print("{:.2f}%".format(predB_nonirony/testB_nonirony * 100))
    print()

    ## 3
    print("--- 3 Other irony")
    predB_nonirony = sum((used_testB['Label'] == 3) & (y_pred == 1))
    testB_nonirony = sum((used_testB['Label'] == 3))
    print(str(predB_nonirony) + "/" + str(testB_nonirony))
    print("{:.2f}%".format(predB_nonirony/testB_nonirony * 100))
    print()


In [82]:
trainingdata_result = trainingdata.loc[resulting_train_index]
testdata_result = trainingdata.loc[resulting_test_index]

In [92]:
testdata_result.index
sum(testdata["Label"] == 1) / len(testdata_result)

0.40813648293963256

In [112]:
print(sum([1 for yi in y if yi == 0]))
print(sum([1 for yi in y if yi == 1]))


1529
1529
