In [1]:
import pandas as pd
from imblearn.combine import SMOTETomek
from sklearn.model_selection import train_test_split

df = pd.read_csv("dataset/preprocessed.csv")
df.shape

(762106, 44)

In [2]:
df = df.drop(df[df.target == -1].index)
df.shape

(762094, 44)

In [3]:
# Separate input features and target
y = df.target

In [4]:
X = df.drop('target', axis = 1)

In [5]:
# setting up testing and training sets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=27)

In [6]:
# Re-sample the training set

sm = SMOTETomek(random_state = 27, n_jobs = -1)

X_train, y_train = sm.fit_sample(X_train, y_train)

In [7]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
import xgboost as xgb

RF = RandomForestClassifier(n_estimators = 100, n_jobs = 1)
DT = DecisionTreeClassifier(criterion = "entropy", random_state = 23)
xgboost = xgb.XGBClassifier(learning_rate = 0.1, n_estimators = 100, max_depth = 20, nthread = 1, random_state = 1, verbosity = 0, gamma = 0.5)

vote = VotingClassifier(estimators = [("RF", RF),("DT", DT),("XGB", xgboost)], voting = "soft", n_jobs = 1).fit(X_train, y_train)

In [8]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

predictions = vote.predict(X_test)

print("Accuracy score: ", accuracy_score(y_test, predictions))
print("f1 score: ", f1_score(y_test, predictions, average = 'weighted'))
print("Precision score: ", precision_score(y_test, predictions, average = 'weighted'))
print("Recall score: ", recall_score(y_test, predictions, average = 'weighted'))

Accuracy score:  0.5589059646028847
f1 score:  0.55949530447443
Precision score:  0.5606771889486065
Recall score:  0.5589059646028847


In [9]:
import matplotlib.pyplot as plt
from sklearn.metrics import plot_confusion_matrix

classes = ["1", "2", "3", "4", "5"]
title = "Majority Voting Normalized confusion matrix"
disp = plot_confusion_matrix(vote, X_test, y_test, display_labels = classes, cmap = "RdYlGn", normalize = "true")
disp.ax_.set_title(title)
plt.savefig("maj_vot.png")