# Imports

In [1]:
import sys
import nltk
import numpy as np
import pandas as pd
import pickle
import os

from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
from matplotlib import pyplot as plt

sys.path.append(".")
sys.path.append("..")

# Set Params

In [2]:
# Output
column_to_predict = "urgency"
# Supported datasets:
# impact
# urgency

# Classifier
classifier = "SVM"
# Supported algorithms:
# DT
# NB
# SVM

# Load Dataset

In [3]:
dfTickets = pd.read_csv(
    "./generated data/weight_"+classifier+"_"+column_to_predict+".csv",
    dtype=str
)  

# Split Data

In [4]:
dic = {'summary': dfTickets["summary"].values,
       'description': dfTickets["description"].values,
       column_to_predict : dfTickets[column_to_predict].values}

df = pd.DataFrame(dic)

variables = df.iloc[:,:-1]
results = df.iloc[:,-1]

train_data, test_data, train_labels, test_labels = train_test_split(
    variables, results, test_size=0.2
)

# Train Classifier

In [5]:
if classifier == "NB":
    clf = MultinomialNB()
elif classifier == "SVM":
    clf = SVC(kernel='linear')
else:
    clf = DecisionTreeClassifier()

clfI = clf.fit(train_data, train_labels)

# Evaluate the Model

In [6]:
# Score and evaluate model on test data using model without hyperparameter tuning
predicted = clf.predict(test_data)
prediction_acc = np.mean(predicted == test_labels)
print("Confusion:")
print(metrics.confusion_matrix(test_labels, predicted))
print("Mean: " + str(prediction_acc))

Confusion:
[[ 33   2   2]
 [  5  56   9]
 [  3   9 197]]
Mean: 0.9050632911392406


# Print the Classification Report

In [7]:
%matplotlib inline
from sklearn.metrics import classification_report
print(classification_report(test_labels, predicted,
                            target_names=np.unique(test_labels)))

              precision    recall  f1-score   support

           1       0.80      0.89      0.85        37
           2       0.84      0.80      0.82        70
           3       0.95      0.94      0.94       209

    accuracy                           0.91       316
   macro avg       0.86      0.88      0.87       316
weighted avg       0.91      0.91      0.91       316



# Save Pickle

In [8]:
pickle.dump(clf, open('./model/'+classifier+'_'+column_to_predict+'_weight_model.pickle',"wb"))