In [2]:
import pandas as pd

In [3]:
X_tfidf = pd.read_csv('X_tfidf.csv')
split_data = pd.read_csv('split_data_with_sentiments.csv')

In [4]:
from sklearn.decomposition import PCA
pca = PCA(n_components=5)
X = pca.fit_transform(X_tfidf)
y = split_data["ttype"]

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(X_train.shape)
print(X_test.shape)

(3993, 5)
(999, 5)


In [10]:
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import classification_report

clf1 = LogisticRegression(multi_class='multinomial', random_state=42)
clf2 = DecisionTreeClassifier(criterion='entropy', max_depth=5, min_samples_split=2)
clf3 = SVC(random_state=42, probability=True)
estimators = [('lr', clf1), ('dt', clf2), ('svc', clf3)]
vc = VotingClassifier(estimators=estimators, voting='hard')
vc.fit(X_train,y_train)
vc_pred = vc.predict(X_test)
print(classification_report(y_test, vc_pred))

              precision    recall  f1-score   support

          -1       0.56      0.33      0.42       484
           1       0.55      0.76      0.64       515

    accuracy                           0.55       999
   macro avg       0.56      0.54      0.53       999
weighted avg       0.56      0.55      0.53       999



In [8]:
from sklearn.ensemble import BaggingClassifier
bag = BaggingClassifier(base_estimator=SVC(), n_estimators=10, random_state=42).fit(X_train, y_train)
bag_pred = bag.predict(X_test)
print(classification_report(y_test, bag_pred))


              precision    recall  f1-score   support

          -1       0.57      0.32      0.41       484
           1       0.55      0.77      0.64       515

    accuracy                           0.55       999
   macro avg       0.56      0.55      0.52       999
weighted avg       0.56      0.55      0.53       999



In [9]:
from sklearn.ensemble import AdaBoostClassifier
ada_boost = AdaBoostClassifier(n_estimators=100, random_state=42) # decision tree classifiers by default
ada_boost.fit(X_train, y_train)
y_pred = ada_boost.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

          -1       0.54      0.50      0.52       484
           1       0.56      0.59      0.57       515

    accuracy                           0.55       999
   macro avg       0.55      0.55      0.55       999
weighted avg       0.55      0.55      0.55       999



In [11]:
from sklearn.ensemble import StackingClassifier
stacking = StackingClassifier(estimators=estimators, final_estimator=LogisticRegression())
stacking.fit(X_train, y_train)
y_pred = stacking.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

          -1       0.55      0.44      0.49       484
           1       0.56      0.67      0.61       515

    accuracy                           0.56       999
   macro avg       0.56      0.55      0.55       999
weighted avg       0.56      0.56      0.55       999

