In [1]:
from sklearn.model_selection import cross_val_score, KFold
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer, TfidfVectorizer
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix, f1_score
import pandas as pd
from datetime import datetime

In [5]:
data_file = "../Data/AllData_Sentiment.csv"
data = pd.read_csv(data_file)

reviews = data['Reviews']
polarity = data['Polarity']

X_train, X_test, y_train, y_test = train_test_split(reviews, polarity, test_size=0.25, random_state=0)

countVectorizer = CountVectorizer(min_df = 4, max_df=0.85)
X_train = countVectorizer.fit_transform(X_train)
X_test = countVectorizer.transform(X_test)

parameters = {'C':[3], 'gamma':[0.1], 'kernel':['rbf'], 'degree': [1]}

svmClf = GridSearchCV(estimator = SVC(), param_grid = parameters)
svmClf.fit(X_train, y_train)
svmClf_ypred = svmClf.predict(X_test)
f1_svmClf = f1_score(y_test, svmClf_ypred, average = 'weighted')
accuracy_svmClf = accuracy_score(y_test, svmClf_ypred)
    


In [6]:
print (accuracy_svmClf)

0.938377973403259


In [None]:
import pickle
pickle.dump(countVectorizer, open("../sentiment_vectorizer.pickle", "wb"))
pickle.dump(svmClf, open("../sentiment_classifier.pickle", 'wb'))

In [None]:
### get all the predicted polarity

new_reviews = []
new_polarity = []
new_predicted_polarity = []
# count = 0

for i in range(len(reviews)):
    curr_review = [reviews[i]]
    curr_review = countVectorizer.transform(curr_review)
    predicted_polarity = svmClf.predict(curr_review)
    
    new_reviews.append(reviews[i])
    new_polarity.append(polarity[i])
    new_predicted_polarity.append(predicted_polarity[0])
    
# print (count)
# print (len(reviews))
# print (1 - count/len(reviews))

new_data = {'Reviews': new_reviews, 'Polarity': new_polarity, 'Predicted Polarity': new_predicted_polarity}
new_df = pd.DataFrame.from_dict(new_data)
new_df.to_csv('../Data/AllData_Sentiment_Labelled.csv')
