In [77]:
import pandas as pd

In [78]:
data = pd.read_csv('FRs.csv')

In [79]:
data.head()

Unnamed: 0,FRs,Classes
0,Admin clients can login through their pre-assi...,Event-Driven
1,Extra admin clients can either login using the...,Event-Driven
2,User will be kept logged in if they opt to sta...,State-Driven
3,Clients will be provided associated access to ...,Event-Driven
4,An error will be generated prompting client to...,State-Driven


In [80]:
len(data)

68

In [81]:
data.Classes.unique()

array(['Event-Driven', 'State-Driven', 'Ubiquitous', 'Unwanted Behaviour'],
      dtype=object)

In [82]:
#Checking for the null values.
data.isnull().sum()

FRs        0
Classes    0
dtype: int64

In [158]:
#Data Preprocessing
import re
import nltk
nltk.download('stopwords')
from nltk.stem.porter import PorterStemmer
from nltk.corpus import stopwords

corpus = []
for i in range(0,68):
    expression = re.sub('[^a-zA-Z]',' ',data['FRs'][i])
    expression = re.sub(r'\s+[a-zA-Z]\s+', ' ',data['FRs'][i])
    expression = re.sub(r'\^[a-zA-Z]\s+', ' ',data['FRs'][i])
    expression = expression.lower()
    expression = expression.split()

    ps = PorterStemmer()
    expression = [ps.stem(word) for word in expression if not word in set(stopwords.words('english'))]
    expression = ' '.join(expression)
    corpus.append(expression)

[nltk_data] Downloading package stopwords to C:\Users\Syed Ali
[nltk_data]     Asad\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [159]:
print(corpus)

['admin client login pre-assign usernam password.', 'extra admin client either login use manual regist credenti social account (gmail).', 'user kept log opt stay sing time login in.', 'client provid associ access account log in.', 'error gener prompt client correct credenti case invalid input log in.', 'extra admin client get regist email addresses.', 'extra admin client provid follow inform order register: email, password, full name, profil pictur (optional, (optional), address (optional).', 'client? account creat success submiss information.', 'error gener prompt client correct inform case invalid input sign up.', 'client? email verifi sign up.', 'client abl chang password profile.', 'client ask old password verification.', 'client ask provid new password confirm old password.', 'client? password updat success submiss new password.', 'case invalid new password, error gener user.', 'notif sent user via email sm chang password.', 'client request link reset password forgotten password.'

In [160]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(corpus,data['Classes'],test_size = 0.2,random_state = 0)

In [161]:
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features = 100)
X_train_counts  = cv.fit_transform(X_train)
# y = data.iloc[:,1].values
# print(X)

# Here we are applying different classifiers to get the one with best accuracy.

# KNN Classifier

In [162]:
from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier()
clf = classifier.fit(X_train_counts,y_train)

In [163]:
y_pred = clf.predict(cv.transform(X_test))
y_pred

array(['Ubiquitous', 'Ubiquitous', 'Event-Driven', 'Ubiquitous',
       'Unwanted Behaviour', 'Event-Driven', 'Event-Driven',
       'Event-Driven', 'Ubiquitous', 'Event-Driven', 'Event-Driven',
       'Ubiquitous', 'Ubiquitous', 'Event-Driven'], dtype=object)

In [164]:
from sklearn import metrics
print(metrics.classification_report(y_test,y_pred))

                    precision    recall  f1-score   support

      Event-Driven       1.00      0.88      0.93         8
      State-Driven       0.00      0.00      0.00         1
        Ubiquitous       0.67      1.00      0.80         4
Unwanted Behaviour       1.00      1.00      1.00         1

          accuracy                           0.86        14
         macro avg       0.67      0.72      0.68        14
      weighted avg       0.83      0.86      0.83        14



  _warn_prf(average, modifier, msg_start, len(result))


In [165]:
print("Model Accuracy : ", metrics.accuracy_score(y_test,y_pred)*100)

Model Accuracy :  85.71428571428571


In [166]:
y_test

48            Ubiquitous
27          Event-Driven
32          Event-Driven
22            Ubiquitous
31    Unwanted Behaviour
45          Event-Driven
30          Event-Driven
60          Event-Driven
55            Ubiquitous
62          Event-Driven
28          Event-Driven
41            Ubiquitous
67          State-Driven
58          Event-Driven
Name: Classes, dtype: object

In [167]:
s = 'Admin clients can login through their pre-assigned username and password'
s1 = 'User will be kept logged'

In [168]:

new_test = cv.transform([s1])
new_test

<1x100 sparse matrix of type '<class 'numpy.int64'>'
	with 1 stored elements in Compressed Sparse Row format>

In [169]:
result = clf.predict(new_test)

In [170]:
print(result)

['Ubiquitous']


# Naive_Bayes Classifier

In [171]:
from sklearn.naive_bayes import MultinomialNB
classifier = MultinomialNB()
clf = classifier.fit(X_train_counts,y_train)

In [172]:
y_pred = clf.predict(cv.transform(X_test))

In [173]:
print(metrics.classification_report(y_test,y_pred))

                    precision    recall  f1-score   support

      Event-Driven       0.78      0.88      0.82         8
      State-Driven       0.00      0.00      0.00         1
        Ubiquitous       0.75      0.75      0.75         4
Unwanted Behaviour       1.00      1.00      1.00         1

          accuracy                           0.79        14
         macro avg       0.63      0.66      0.64        14
      weighted avg       0.73      0.79      0.76        14



  _warn_prf(average, modifier, msg_start, len(result))


In [174]:
print("Model Accuracy : ", metrics.accuracy_score(y_test,y_pred)*100)

Model Accuracy :  78.57142857142857


# Decision Tree Classifier

In [175]:
from sklearn import tree
clf = tree.DecisionTreeClassifier()
clf = clf.fit(X_train_counts,y_train)

In [176]:
y_pred = clf.predict(cv.transform(X_test))

In [177]:
print(metrics.classification_report(y_test,y_pred))

                    precision    recall  f1-score   support

      Event-Driven       0.80      0.50      0.62         8
      State-Driven       1.00      1.00      1.00         1
        Ubiquitous       0.43      0.75      0.55         4
Unwanted Behaviour       1.00      1.00      1.00         1

          accuracy                           0.64        14
         macro avg       0.81      0.81      0.79        14
      weighted avg       0.72      0.64      0.65        14



In [178]:
print("Model Accuracy : ", metrics.accuracy_score(y_test,y_pred)*100)

Model Accuracy :  64.28571428571429


# Support Vector Machine Classifier

In [179]:
from sklearn.svm import SVC
classifier = SVC()
clf = classifier.fit(X_train_counts,y_train)

In [180]:
y_pred = clf.predict(cv.transform(X_test))

In [181]:
print(metrics.classification_report(y_test,y_pred))

                    precision    recall  f1-score   support

      Event-Driven       0.80      1.00      0.89         8
      State-Driven       0.00      0.00      0.00         1
        Ubiquitous       0.75      0.75      0.75         4
Unwanted Behaviour       0.00      0.00      0.00         1

          accuracy                           0.79        14
         macro avg       0.39      0.44      0.41        14
      weighted avg       0.67      0.79      0.72        14



  _warn_prf(average, modifier, msg_start, len(result))


In [182]:
print("Model Accuracy : ", metrics.accuracy_score(y_test,y_pred)*100)

Model Accuracy :  78.57142857142857


# Linear Model Classifier

In [183]:
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression()
clf = classifier.fit(X_train_counts,y_train)

In [184]:
y_pred = clf.predict(cv.transform(X_test))

In [185]:
print(metrics.classification_report(y_test,y_pred))

                    precision    recall  f1-score   support

      Event-Driven       0.88      0.88      0.88         8
      State-Driven       0.00      0.00      0.00         1
        Ubiquitous       0.67      1.00      0.80         4
Unwanted Behaviour       0.00      0.00      0.00         1

          accuracy                           0.79        14
         macro avg       0.39      0.47      0.42        14
      weighted avg       0.69      0.79      0.73        14



  _warn_prf(average, modifier, msg_start, len(result))


In [186]:
print("Model Accuracy : ", metrics.accuracy_score(y_test,y_pred)*100)

Model Accuracy :  78.57142857142857


# Summary of Accuracy Rates

In [None]:
Naive_bayes = 78.57%
K-NearestNeighbor = 85.71%
SVM =  78.57%
Decision Tree = 64.28%
linear_model = 78.57%