# Distinguishing comparison in sentences

## Data loading

In [1]:
import pandas as pd
from data_extraction import ExtractMiddlePart, ExtractRawSentence
from infersent.infersent_feature import initialize_infersent, InfersentFeature
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.pipeline import make_pipeline
from xgboost import XGBClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import f1_score

In [2]:
train = pd.read_csv("data/data.csv")
test = pd.read_csv("data/held-out-data.csv")

## BOW + XGBoost

### Full sentences

In [3]:
pl = make_pipeline(ExtractRawSentence(), CountVectorizer(), XGBClassifier(n_jobs=-1, n_estimators=1000))
fitted = pl.fit(train, train['most_frequent_label'].values)
predicted = fitted.predict(test)

In [4]:
print(classification_report(test['most_frequent_label'].values, predicted, labels=['BETTER', 'WORSE', 'NONE'], digits=3))

              precision    recall  f1-score   support

      BETTER      0.643     0.553     0.594       273
       WORSE      0.491     0.235     0.318       119
        NONE      0.839     0.919     0.877      1048

    accuracy                          0.793      1440
   macro avg      0.658     0.569     0.597      1440
weighted avg      0.773     0.793     0.777      1440



### Middle part of the sentence

In [5]:
pl = make_pipeline(ExtractMiddlePart(), CountVectorizer(), XGBClassifier(n_jobs=-1, n_estimators=1000))
fitted = pl.fit(train, train['most_frequent_label'].values)
predicted = fitted.predict(test)

In [6]:
print(classification_report(test['most_frequent_label'].values, predicted, labels=['BETTER', 'WORSE', 'NONE'], digits=3))

              precision    recall  f1-score   support

      BETTER      0.765     0.751     0.758       273
       WORSE      0.542     0.328     0.408       119
        NONE      0.903     0.948     0.925      1048

    accuracy                          0.859      1440
   macro avg      0.736     0.675     0.697      1440
weighted avg      0.847     0.859     0.850      1440



## InferSent + XGBoost

### Full sentences

In [7]:
full_sentences = ExtractRawSentence().transform(train)

In [8]:
infersent = initialize_infersent(full_sentences)



Loaded
Found 14943(/16494) words with glove vectors
Vocab size : 14943


In [9]:
pl = make_pipeline(ExtractRawSentence(), InfersentFeature(infersent), XGBClassifier(n_jobs=-1, n_estimators=1000))
fitted = pl.fit(train, train['most_frequent_label'].values)
predicted = fitted.predict(test)

  sentences[stidx:stidx + bsize]), volatile=True)


In [10]:
print(classification_report(test['most_frequent_label'].values, predicted, labels=['BETTER', 'WORSE', 'NONE'], digits=3))

              precision    recall  f1-score   support

      BETTER      0.708     0.586     0.641       273
       WORSE      0.556     0.210     0.305       119
        NONE      0.849     0.947     0.895      1048

    accuracy                          0.817      1440
   macro avg      0.704     0.581     0.614      1440
weighted avg      0.798     0.817     0.798      1440



### Middle part of the sentence

In [11]:
middle_part = ExtractMiddlePart().transform(train)

In [12]:
infersent = initialize_infersent(middle_part)

Loaded
Found 6021(/6555) words with glove vectors
Vocab size : 6021


In [13]:
pl = make_pipeline(ExtractMiddlePart(), InfersentFeature(infersent), XGBClassifier(n_jobs=-1, n_estimators=1000))
fitted = pl.fit(train, train['most_frequent_label'].values)
predicted = fitted.predict(test)

In [14]:
print(classification_report(test['most_frequent_label'].values, predicted, labels=['BETTER', 'WORSE', 'NONE'], digits=3))

              precision    recall  f1-score   support

      BETTER      0.768     0.751     0.759       273
       WORSE      0.553     0.353     0.431       119
        NONE      0.901     0.943     0.921      1048

    accuracy                          0.858      1440
   macro avg      0.740     0.682     0.704      1440
weighted avg      0.847     0.858     0.850      1440

