In [109]:
# Imports
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import KFold
from sklearn.metrics import precision_recall_fscore_support, accuracy_score, matthews_corrcoef
import pandas as pd
import numpy as np
from google.colab import drive

In [110]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


##Data processing

In [111]:
def getDataClass(df, dataColumnName, labelColumnName):
  X = df.loc[:, dataColumnName].to_numpy()
  y = df.loc[:, labelColumnName].to_numpy()

  train_classes = ['']*4

  n = np.size(X)

  for i in range(n):
    if (y[i] == 'B'):
      train_classes[0] += str(X[i]) + '. '
    
    if (y[i] == 'D'):
      train_classes[1] += str(X[i]) + '. '
    
    if (y[i] == 'I'):
      train_classes[2] += str(X[i]) + '. '
    
    if (y[i] == 'P'):
      train_classes[3] += str(X[i]) + '. '
    
  return train_classes


In [112]:
def getDataAndLabel(df, dataColumnName, labelColumnName):

  X = df.loc[:, dataColumnName].to_numpy()
  y = df.loc[:, labelColumnName].to_numpy()

  n = np.size(X)

  X_train = ['']*n
  y_train = ['']*n

  for i in range(n):
    X_train[i] += str(X[i])
    y_train[i] += str(y[i])

  return X_train, y_train

##import data

In [113]:
# Import of the datasets
drive.mount('/content/drive')

df_train = pd.read_excel('/content/DATASET_downsampled.xlsx')
df_test = pd.read_excel('/content/Trainingskorpus_Final.xlsx')

# Import without google drive
# df_train = pd.read_excel('DATASET_downsampled.xlsx')
# df_test = pd.read_excel('Trainingskorpus_Final.xlsx')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


##Label functions

In [114]:
def getTfidf(document, stopWords):

  tfidf = TfidfVectorizer(input = document, stop_words = stopWords)

  X_train_transformed = tfidf.fit_transform(document)

  terms = tfidf.get_feature_names_out()

  # Sum tfidf frequency of each term through documents
  sums = X_train_transformed.sum(axis=0)

  # Connecting term to its sums frequency
  data = []
  for col, term in enumerate(terms):
      data.append( (term, sums[0,col], col ))
  M = np.transpose(X_train_transformed.toarray())

  ranking = pd.DataFrame(data, columns=['term','rank', 'ID'])
  ID_sorted = np.transpose(ranking.sort_values('rank', ascending=False).values)[2]


  terms_sorted = []
  for i, ID in enumerate(ID_sorted):
    terms_sorted.append([terms[ID], ID])

  return tfidf, terms_sorted, M

In [115]:
def predict(X, y, terms_sorted, M):

  pred = []
  
  for review, label in zip(X, y):
    weights = [0, 0, 0, 0]
    for term, ID in terms_sorted:
      if term in review:
        weights += M[ID]
    labels = ["B", "D", "I", "P"]
    pred.append( labels[np.argmax(weights)] )

  return pred


##Evaluation on Testset

In [116]:
train_classes = getDataClass(df_train, 'review', 'kano_labels')
X_test, y_test = getDataAndLabel(df_test, 'review', 'kano_labels')

tfidf, terms_sorted, M = getTfidf(train_classes, 'english')

y_pred = predict(X_test, y_test, terms_sorted, M)

acc = accuracy_score(y_test, y_pred)
prec, rec, f1, supp = precision_recall_fscore_support(y_test, y_pred, average = 'macro')

print("accuracy: ", acc)
print("precision: ", prec)
print("recall: ", rec)
print("f1 score: ", f1)

accuracy:  0.5548705302096177
precision:  0.34482260415991095
recall:  0.3971518987341772
f1 score:  0.3235265239517646


##Evaluation on Testset divided by labels

In [117]:
train_classes = getDataClass(df_train, 'review', 'kano_labels')

accuracy = []
precision = []
recall = []
f1_score = []


for label in ["B", "P", "D", "I"]:
  X_test, y_test = getDataAndLabel(df_test[df_test.kano_labels == label], 'review', 'kano_labels')
  print("------------------------------")
  print("evaluating label", label)
  print("------------------------------")

  tfidf, terms_sorted, M = getTfidf(train_classes, 'english')

  y_pred = predict(X_test, y_test, terms_sorted, M)
  acc = accuracy_score(y_test, y_pred)
  prec, rec, f1, sup = precision_recall_fscore_support(y_test, y_pred, average='macro', zero_division = 1)

  print("accuracy: ", acc)
  print("precision: ", prec)
  print("recall: ", rec)
  print("f1 score: ", f1)

  accuracy.append(acc)
  precision.append(prec)
  recall.append(rec)
  f1_score.append(f1)

------------------------------
evaluating label B
------------------------------
accuracy:  0.6578947368421053
precision:  0.25
recall:  0.9144736842105263
f1 score:  0.19841269841269843
------------------------------
evaluating label P
------------------------------
accuracy:  0.28860759493670884
precision:  0.25
recall:  0.8221518987341772
f1 score:  0.11198428290766207
------------------------------
evaluating label D
------------------------------
accuracy:  0.6421052631578947
precision:  0.25
recall:  0.9105263157894736
f1 score:  0.1955128205128205
------------------------------
evaluating label I
------------------------------
accuracy:  0.0
precision:  0.25
recall:  0.75
f1 score:  0.0


##Consistent vs. inconsistent labels

In [118]:
# Import of 'Trainingskorpus_Final', which gives the final labeling
drive.mount('/content/drive')
testset1 = pd.read_excel('/content/Trainingskorpus_Final.xlsx')

# Import without google drive
# testset1 = pd.read_excel('Trainingskorpus_Final.xlsx')

# Import of 'Trainingskorpus_InitialLabels', which gives the initial labeling of two sometimes differing labels
testset2 = pd.read_excel('/content/Trainingskorpus_InitialLabels.xlsx')

# Import without google drive
# testset2 = pd.read_excel('Trainingskorpus_InitialLabels.xlsx')

testset2 = testset2.drop(columns = ['Unnamed: 0'])

# Merge the two sets so we have the final labels as well as the initial labels in the testset
testset = testset1.merge(testset2, left_on='review', right_on='review')

# Add a column 'equal' which is True if the two initial labels and false otherwise
equal = []

for label1, label2 in zip(testset['label 1'], testset['label 2']):
  if(label1 == label2):
    equal.append(True)
  else:
    equal.append(False)
  
testset['equal'] = equal

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [119]:
from sklearn.metrics._plot.confusion_matrix import confusion_matrix
from pandas.io.formats.style_render import DataFrame
train_classes = getDataClass(df_train, 'review', 'kano_labels')

tfidf, terms_sorted, M = getTfidf(train_classes, 'english')

# for e in [True, False]:
#   X_test = testset[testset.equal == e].review
#   y_test = testset[testset.equal == e].kano_labels

#   y_pred = predict(X_test, y_test, terms_sorted, M)

#   if(e):
#     print("Consistent labels:")
#   else:
#     print("---------\nInconsistent labels:")

#   print("Accuracy: ", accuracy_score(y_test, y_pred))
#   print("Precision, Recall, f1 Score, Support: ", precision_recall_fscore_support(y_test, y_pred, average='macro', zero_division=1))
  #print("Phi Coefficient: ", matthews_corrcoef(y_test, y_pred))

X_test = testset.review
y_test = testset.kano_labels

y_pred = predict(X_test, y_test, terms_sorted, M)

print("Accuracy: ", accuracy_score(y_test, y_pred))
print("Precision, Recall, f1 Score, Support: ", precision_recall_fscore_support(y_test, y_pred, average='macro', zero_division=1))

pred_corr = []
for pred, label in zip(y_pred, y_test):
  pred_corr.append(pred == label)
print("Phi Coefficient: ", matthews_corrcoef(pred_corr, testset.equal))
print(confusion_matrix(pred_corr, testset.equal))

Accuracy:  0.5548705302096177
Precision, Recall, f1 Score, Support:  (0.34482260415991095, 0.3971518987341772, 0.3235265239517646, None)
Phi Coefficient:  0.219140575663304
[[169 553]
 [ 70 830]]


##10-fold cross-validation

In [120]:
# Import of the dataset
train_data = pd.read_excel('/content/DATASET_downsampled.xlsx')

# Import without google drive
# train_data = pd.read_excel('DATASET_downsampled.xlsx')

# 10 fold cross validation
n = 10
kf = KFold(n_splits=n, random_state = 42, shuffle = True)

# Lists to store the values for accuracy, precision, recall and f1-score for each label
resultsAcc = [[],[],[],[],[]]
resultsPrecision = [[],[],[],[],[]]
resultsRecall = [[],[],[],[],[]]
resultsf1 = [[],[],[],[],[]]

for train_index, val_index in kf.split(train_data):
  train_df = train_data.iloc[train_index]
  val_df = train_data.iloc[val_index]

  train_classes =  getDataClass(train_df, 'review', 'kano_labels')
  tfidf, terms_sorted, M = getTfidf(train_classes, 'english')

  # Evaluating on the val_df divided by labels
  for i, label in zip(range(4), ["B", "P", "D", "I"]):
    X_test, y_test = getDataAndLabel(val_df[val_df.kano_labels == label], 'review', 'kano_labels')

    y_pred = predict(X_test, y_test, terms_sorted, M)

    acc = accuracy_score(y_test, y_pred)
    prec, rec, f1, sup = precision_recall_fscore_support(y_test, y_pred, average='macro', zero_division=1)

    resultsAcc[i].append(acc)
    resultsPrecision[i].append(prec)
    resultsRecall[i].append(rec)
    resultsf1[i].append(f1)
  
  # Evaluating on the whole val_df
  X_test, y_test = getDataAndLabel(val_df, 'review', 'kano_labels')
  
  y_pred = predict(X_test, y_test, terms_sorted, M)

  acc = accuracy_score(y_test, y_pred)
  prec, rec, f1, sup = precision_recall_fscore_support(y_test, y_pred, average='macro', zero_division=1)

  resultsAcc[4].append(acc)
  resultsPrecision[4].append(prec)
  resultsRecall[4].append(rec)
  resultsf1[4].append(f1)
  

In [121]:
print("10-fold cross-validation")
for i, label in zip(range(5), ["basic", "performance", "delighter", "irrelevant", "overall"]):
  print("------------\n", label, "\n------------")
  print("Average accuracy: ", np.round(np.average(resultsAcc[i]), 3))
  print("Average precision: ", np.round(np.average(resultsPrecision[i]), 3))
  print("Average recall: ", np.round(np.average(resultsRecall[i]), 3))
  print("Average f1 score: ", np.round(np.average(resultsf1[i]), 3))
     

10-fold cross-validation
------------
 basic 
------------
Average accuracy:  0.787
Average precision:  0.283
Average recall:  0.94
Average f1 score:  0.25
------------
 performance 
------------
Average accuracy:  0.528
Average precision:  0.275
Average recall:  0.87
Average f1 score:  0.19
------------
 delighter 
------------
Average accuracy:  0.53
Average precision:  0.258
Average recall:  0.879
Average f1 score:  0.179
------------
 irrelevant 
------------
Average accuracy:  0.235
Average precision:  0.25
Average recall:  0.809
Average f1 score:  0.095
------------
 overall 
------------
Average accuracy:  0.52
Average precision:  0.568
Average recall:  0.52
Average f1 score:  0.5
