### Import required modules

In [1]:
import pickle
import random
from sklearn.svm import SVC 
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split

### Read training data

In [2]:
open_file = open("pickled_data/train_data.pickle", "rb")
train_data = pickle.load(open_file)
open_file.close()

### Shuffling training data

In [3]:
random.shuffle(train_data)

In [4]:
X = [data[0] for data in train_data]
Y = [data[1] for data in train_data]

### Convert traing data into feature vectors

In [5]:
cv = CountVectorizer(max_features = 100) 
X = cv.fit_transform(X).toarray()

### Split training data into training and validation set in 80:20 ratio

In [6]:
train_x, val_x, train_y, val_y = train_test_split(X, Y, test_size = 0.20, random_state = 0)

### Train different classifiers

In [7]:
classifier = SVC() 
classifier.fit(train_x, train_y)



SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
    kernel='rbf', max_iter=-1, probability=False, random_state=None,
    shrinking=True, tol=0.001, verbose=False)

In [8]:
classifierMNB = MultinomialNB() 
classifierMNB.fit(train_x, train_y)

MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)

In [9]:
SVC_pred_val_y = classifier.predict(val_x)
MNB_pred_val_y = classifierMNB.predict(val_x)

### Print accuracy score of different classifiers

In [10]:
accuracySVC = accuracy_score(val_y, SVC_pred_val_y)
accuracyMNB = accuracy_score(val_y, MNB_pred_val_y)
print("SVC Classifier accuracy is " + str(accuracySVC*100) + "%")
print("MNB Classifier accuracy is " + str(accuracyMNB*100) + "%")

SVC Classifier accuracy is 97.21723518850987%
MNB Classifier accuracy is 97.30700179533214%


In [11]:
save_svc_classifier = open("pickled_classifiers/svc_classifier.pickle","wb")
pickle.dump(classifier, save_svc_classifier)
save_svc_classifier.close()

In [12]:
save_mnb_classifier = open("pickled_classifiers/mnb_classifier.pickle","wb")
pickle.dump(classifierMNB, save_mnb_classifier)
save_mnb_classifier.close()