In [1]:
from base import *
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import classification_report, accuracy_score
from xgboost import XGBClassifier
import time

In [2]:
LIAR = Dataset('./liar_dataset')

text_train = LIAR.train['statement'].apply(text_preprocess)
text_test = LIAR.test['statement'].apply(text_preprocess)

tfidf = TfidfVectorizer()
data_train = tfidf.fit_transform(text_train)
data_test = tfidf.transform(text_test)

In [3]:
class XGB():
    def __init__(self):
        
        self.clf = XGBClassifier(
                    n_estimators = 100,
                    objective = 'multi:softmax',
                    learning_rate = .1,
                    max_leaves = 12,
                    max_depth = 6)
        
        self.map = {
            'pants-fire'  : 0, 
            'false'       : 1, 
            'barely-true' : 2, 
            'half-true'   : 3, 
            'mostly-true' : 4, 
            'true'        : 5
        }
        self.prediction = None

    def fit(self, X, y):
        start = time.time()
        y_ = y.map(self.map)
        self.clf.fit(X, y_)
        end = time.time()
        execution_time = end - start
        print(f'accuracy: {accuracy_score(y_, self.clf.predict(X))*100:.2f} %')
        print(f'training time: {execution_time:.2f}s')

    def predict(self, X, y):
        start = time.time()
        y_ = y.map(self.map)
        self.prediction = self.clf.predict(X)
        end = time.time()
        execution_time = end - start
        print(f'accuracy: {accuracy_score(y_, self.prediction)*100:.2f} %')
        print(f'prediction time: {execution_time:.2f}s')

In [4]:
clf = XGB()
print('Training phase:')
clf.fit(data_train, LIAR.y_train)
print('\nTesting phase:')
# clf.predict(data_train, LIAR.y_train)
clf.predict(data_test, LIAR.y_test)

Training phase:
accuracy: 46.45 %
training time: 19.08s

Testing phase:
accuracy: 24.32 %
prediction time: 0.01s
