### Implementation of Random Forest, SVM and Logistic Regression

#### Importing required libraries

In [1]:
import os, sys
import pickle
import json
# from sklearn.naive_bayes import MultinomialNB as nb
from sklearn.linear_model import SGDClassifier as svm
import numpy as np
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from sklearn.linear_model import LogisticRegression

#### Reading train and test dataset

In [2]:
cwd = os.getcwd() #Current working directory

#Read training data
f = open(os.path.join(cwd, r'train.pkl'), 'rb')
(X_data, X_label) = pickle.load(f)
f.close()

#Read test data
f = open(os.path.join(cwd, r'test.pkl'), 'rb')
(Y_data, Y_label) = pickle.load(f)
f.close()

train = []
trainLabel = []

label2no = {u'supporting':0, u'querying':1, u'denying':2, u'comment':3}

#Convert list of lists to nd array (Required for SVM Training)
for key in X_label.keys():
    train.append(X_data[key])
    trainLabel.append(label2no[X_label[key]])

train = np.array(train)
trainLabel = np.array(trainLabel)

test = []
testLabel = []

for key in Y_label.keys():
    test.append(Y_data[key])
    testLabel.append(label2no[Y_label[key]])

test = np.array(test)
testLabel = np.array(testLabel)


### sklearn RandomForest Implementation and results

In [3]:
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier()

rf_cls = rf.fit(train, trainLabel.transpose())
y_pred = rf_cls.predict(test)

print("\nRandom forest Result\n")
print("Classification accuracy: ", accuracy_score(testLabel, y_pred))

print("Confusion matrix: ", confusion_matrix(testLabel, y_pred))
target_names = ['supporting', 'querying', 'denying', 'comment']
print(classification_report(testLabel, y_pred, target_names=target_names))
#Accuracy
print(np.mean(y_pred == testLabel)*100)




Random forest Result

Classification accuracy:  0.6352313167259787
Confusion matrix:  [[ 89   0   7 129]
 [ 11   7   1  80]
 [ 14   0   2  71]
 [ 74  10  13 616]]
              precision    recall  f1-score   support

  supporting       0.47      0.40      0.43       225
    querying       0.41      0.07      0.12        99
     denying       0.09      0.02      0.04        87
     comment       0.69      0.86      0.77       713

    accuracy                           0.64      1124
   macro avg       0.41      0.34      0.34      1124
weighted avg       0.57      0.64      0.59      1124

63.52313167259786


### sklearn SVM implementation and result

In [35]:
#SVM Training
svm_clf = svm(loss='squared_hinge', max_iter=1000).fit(train, trainLabel.transpose())

#SVM Testing
print ("\nSVM Result\n")
predicted = svm_clf.predict(test)
print("Classification accuracy: ", accuracy_score(testLabel, predicted))
print("Confusion matrix: ", confusion_matrix(testLabel, predicted))
target_names = ['supporting', 'querying', 'denying', 'comment']
print(classification_report(testLabel, predicted, target_names=target_names))

#Accuracy
print(np.mean(predicted == testLabel)*100)


SVM Result

Classification accuracy:  0.6209964412811388
Confusion matrix:  [[ 90   3   6 126]
 [  2  21  12  64]
 [  5   6   5  71]
 [ 57  24  50 582]]
              precision    recall  f1-score   support

  supporting       0.58      0.40      0.47       225
    querying       0.39      0.21      0.27        99
     denying       0.07      0.06      0.06        87
     comment       0.69      0.82      0.75       713

    accuracy                           0.62      1124
   macro avg       0.43      0.37      0.39      1124
weighted avg       0.59      0.62      0.60      1124

62.09964412811389


#### sklearn Logistic Regression and results

In [36]:

clf = LogisticRegression(random_state=0, solver='lbfgs',
                          multi_class='multinomial').fit(train, trainLabel.transpose())

#LR Testing
print ("\n\n\nLR Result")
predicted = clf.predict(test)
print("Classification accuracy: ", accuracy_score(testLabel, predicted))
print("Confusion matrix: ", confusion_matrix(testLabel, predicted))
target_names = ['supporting', 'querying', 'denying', 'comment']
print(classification_report(testLabel, predicted, target_names=target_names))

#Accuracy
print(np.mean(predicted == testLabel)*100)




LR Result
Classification accuracy:  0.699288256227758
Confusion matrix:  [[ 98   1   1 125]
 [  3  30   1  65]
 [  8   7   0  72]
 [ 24  25   6 658]]
              precision    recall  f1-score   support

  supporting       0.74      0.44      0.55       225
    querying       0.48      0.30      0.37        99
     denying       0.00      0.00      0.00        87
     comment       0.72      0.92      0.81       713

    accuracy                           0.70      1124
   macro avg       0.48      0.42      0.43      1124
weighted avg       0.64      0.70      0.65      1124

69.9288256227758


