###Import Libraries

In [1]:
import pandas as pd
import time
from sklearn import svm
from sklearn.metrics import classification_report

### Splitting Dataset into Training and Test Dataset

In [2]:
trainData = pd.read_csv("https://raw.githubusercontent.com/Vasistareddy/sentiment_analysis/master/data/train.csv") # train Data
testData = pd.read_csv("https://raw.githubusercontent.com/Vasistareddy/sentiment_analysis/master/data/test.csv")   # test Data

In [3]:
trainData.sample(frac=1).head(5) # shuffle the df and pick first 5

Unnamed: 0,Content,Label
1053,""" america's sweethearts "" has an intriguing p...",neg
244,roberto benigni is a clown in the tradition of...,pos
1414,there should be a requirement that a potential...,neg
1528,"for his directoral debut , gary oldman chose a...",neg
1573,have you ever been in an automobile accident w...,neg


### Vectorizing the data

In [4]:
from sklearn.feature_extraction.text import TfidfVectorizer
# Create feature vectors
vectorizer = TfidfVectorizer(min_df = 5,
                             max_df = 0.8,
                             sublinear_tf = True,
                             use_idf = True)
train_vectors = vectorizer.fit_transform(trainData['Content'])
test_vectors = vectorizer.transform(testData['Content'])

###Classification using SVM

In [5]:
# Perform classification with SVM, kernel=linear
classifier_linear = svm.SVC(kernel='linear')
t0 = time.time()
classifier_linear.fit(train_vectors, trainData['Label'])
t1 = time.time()
prediction_linear = classifier_linear.predict(test_vectors)
t2 = time.time()
time_linear_train = t1-t0
time_linear_predict = t2-t1
# results

In [6]:
print("Training time: %fs; Prediction time: %fs" % (time_linear_train, time_linear_predict))

Training time: 9.433649s; Prediction time: 0.897057s


In [7]:
report = classification_report(testData['Label'], prediction_linear, output_dict=True)
print('positive: ', report['pos'])
print('negative: ', report['neg'])

positive:  {'precision': 0.9191919191919192, 'recall': 0.91, 'f1-score': 0.9145728643216081, 'support': 100}
negative:  {'precision': 0.9108910891089109, 'recall': 0.92, 'f1-score': 0.9154228855721394, 'support': 100}


###Testing the SVM classifier on the reviews

In [8]:
review = """SUPERB, I AM IN LOVE IN THIS PHONE"""
review_vector = vectorizer.transform([review]) # vectorizing
print(classifier_linear.predict(review_vector))

['pos']


In [9]:
review2 = """Do not purchase this product. My cell phone blast when I switched the charger"""
review_vector = vectorizer.transform([review2]) # vectorizing
print(classifier_linear.predict(review_vector))

['neg']


In [10]:
review3 = """I received defective piece display is not working properly"""
review_vector = vectorizer.transform([review3]) # vectorizing
print(classifier_linear.predict(review_vector))

['neg']
