# Bike Update Command Classification Training

# 1) Data Preprocessing

In [2]:
####################################################################
######################Update not update Bike Command Classification#####################
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


In [1]:
def predict(test_str, model):
    if (model.predict(test_str) == 1):
        result="The input is a bike update command."
    else:
        result="The input is not a bike update command."
    return result

In [3]:
df2 = pd.read_csv('2.bike_update_command_vs_non_bike_update_command.csv')

In [4]:
df2.head()

Unnamed: 0,Command,Response,Label,Cat_Label
0,Perform bike firmware update,Initiating bike firmware update. Please wait w...,1,2
1,Initiate software update for bike,Starting software update process. This may tak...,1,2
2,Update bike's system software,Updating bike's system software. Please do not...,1,2
3,Execute bike firmware upgrade,Executing bike firmware upgrade. Sit tight whi...,1,2
4,Start bike software update process,Commencing bike software update. Sit back and ...,1,2


In [5]:
df2.isna().sum()

Command      0
Response     0
Label        0
Cat_Label    0
dtype: int64

In [6]:
df2.tail()

Unnamed: 0,Command,Response,Label,Cat_Label
1018,Perform mobile upgrade,Perform mobile phone update,0,0
1019,Stop mobile update,Cancel mobile phone update,0,0
1020,Cancel mobile phone update,Cancel mobile phone update,0,0
1021,Check whether my bike software is updated or not,Return updated bike software or not,0,0
1022,Start the motor bike now,Start the bike,0,0


In [7]:
df2.describe()

Unnamed: 0,Label,Cat_Label
count,1023.0,1023.0
mean,0.325513,0.651026
std,0.468795,0.937591
min,0.0,0.0
25%,0.0,0.0
50%,0.0,0.0
75%,1.0,2.0
max,1.0,2.0


In [8]:
df2['Label'].value_counts()/ (len(df2))

Label
0    0.674487
1    0.325513
Name: count, dtype: float64

In [9]:
df2['Label'].value_counts()

Label
0    690
1    333
Name: count, dtype: int64

In [10]:
updateBikeCommand = df2[df2['Label'] == 1]
updateNonBikeCommand = df2[df2['Label'] == 0]

In [11]:
updateBikeCommand.shape, updateNonBikeCommand.shape

((333, 4), (690, 4))

In [12]:
#updateNonBikeCommand= updateNonBikeCommand.sample(updateBikeCommand.shape[0])

In [13]:
data = pd.concat([updateBikeCommand, updateNonBikeCommand], ignore_index=True)

In [14]:
data.shape

(1023, 4)

In [15]:
data['Label'].value_counts()

Label
0    690
1    333
Name: count, dtype: int64

In [16]:
data.head()

Unnamed: 0,Command,Response,Label,Cat_Label
0,Perform bike firmware update,Initiating bike firmware update. Please wait w...,1,2
1,Initiate software update for bike,Starting software update process. This may tak...,1,2
2,Update bike's system software,Updating bike's system software. Please do not...,1,2
3,Execute bike firmware upgrade,Executing bike firmware upgrade. Sit tight whi...,1,2
4,Start bike software update process,Commencing bike software update. Sit back and ...,1,2


In [17]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test =  train_test_split(data['Command'], data['Label'], test_size = 0.3, random_state =0, shuffle = True)

In [18]:
X_train.shape

(716,)

In [19]:
X_test.shape

(307,)

# 2) Building the Model (Random Forest)

In [20]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline

In [21]:
classifier = Pipeline([("tfidf", TfidfVectorizer()) , ("classifier", RandomForestClassifier(n_estimators=100))])

In [22]:
classifier.fit(X_train, y_train)

In [23]:
import joblib
joblib.dump(classifier, 'RF_update_bike_command_vs_update_non_bike_command.pkl')

['RF_update_bike_command_vs_update_non_bike_command.pkl']

In [24]:
y_pred = classifier.predict(X_test)

In [25]:
y_test, y_pred

(805    0
 27     1
 77     1
 608    0
 320    1
       ..
 761    0
 982    0
 715    0
 167    1
 928    0
 Name: Label, Length: 307, dtype: int64,
 array([0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
        0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1,
        0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0,
        1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1,
        0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0,
        0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0,
        0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0,
        1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0,
        0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1,
        0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0,
        1, 0, 0, 1, 0, 0, 0, 0, 0, 

In [26]:
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

In [27]:
accuracy_score(y_test, y_pred)

1.0

In [28]:
confusion_matrix(y_test, y_pred)

array([[213,   0],
       [  0,  94]], dtype=int64)

In [29]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00       213
           1       1.00      1.00      1.00        94

    accuracy                           1.00       307
   macro avg       1.00      1.00      1.00       307
weighted avg       1.00      1.00      1.00       307



In [30]:
#Load the model for Random Forest
model = joblib.load('RF_update_bike_command_vs_update_non_bike_command.pkl')
test_str = ['Start the motor bike now']
print(predict(test_str,model))
test_str = ['On the right indicator']
print(predict(test_str,model))
test_str = ['Congratulations, You won a lottery ticket worth $1 Million ! To claim call on 446677']
print(predict(test_str,model))
test_str=['Perform bike software update']
print(predict(test_str,model))
test_str=['Can you show me previous week bike update history']
print(predict(test_str,model))
test_str=['Update my mobile IOS']
print(predict(test_str,model))
test_str=['Please start my mobile update']
print(predict(test_str,model))

The input is not a bike update command.
The input is not a bike update command.
The input is not a bike update command.
The input is a bike update command.
The input is a bike update command.
The input is not a bike update command.
The input is not a bike update command.


# 4) Building the Model (SVM)

In [31]:
from sklearn.svm import SVC

In [32]:
svm = Pipeline([("tfidf", TfidfVectorizer()) , ("classifier", SVC(C = 100, gamma='auto'))])

In [33]:
svm.fit(X_train, y_train)

In [34]:
import joblib
joblib.dump(svm, 'SVM_update_bike_command_vs_update_non_bike_command.pkl')

['SVM_update_bike_command_vs_update_non_bike_command.pkl']

# 5) Predicting the results (SVM)

In [35]:
y_pred = svm.predict(X_test)

In [36]:
accuracy_score(y_test, y_pred)

1.0

In [37]:
confusion_matrix(y_test, y_pred)

array([[213,   0],
       [  0,  94]], dtype=int64)

In [38]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00       213
           1       1.00      1.00      1.00        94

    accuracy                           1.00       307
   macro avg       1.00      1.00      1.00       307
weighted avg       1.00      1.00      1.00       307



# 6) Testing of SVM Model

In [39]:
#Load the model for Random Forest
model = joblib.load('RF_update_bike_command_vs_update_non_bike_command.pkl')
test_str = ['Start the motor bike now']
print(predict(test_str,model))
test_str = ['On the right indicator']
print(predict(test_str,model))
test_str = ['Congratulations, You won a lottery ticket worth $1 Million ! To claim call on 446677']
print(predict(test_str,model))
test_str=['Perform bike software update']
print(predict(test_str,model))
test_str=['Can you show me previous week bike update history']
print(predict(test_str,model))
test_str=['Update my mobile IOS']
print(predict(test_str,model))
test_str=['Please start my mobile update']
print(predict(test_str,model))

The input is not a bike update command.
The input is not a bike update command.
The input is not a bike update command.
The input is a bike update command.
The input is a bike update command.
The input is not a bike update command.
The input is not a bike update command.
