In [1]:
import pandas as pd
data = pd.read_csv('Tweets Indonesia.csv')
data['text'] = data['text'].apply(str)

In [2]:
from sklearn.model_selection import train_test_split
trainData, testData = train_test_split(data, test_size=0.2, random_state=42)

In [3]:
from sklearn.feature_extraction.text import TfidfVectorizer
# Create feature vectors
vectorizer = TfidfVectorizer(min_df = 5,
                             max_df = 0.8,
                             sublinear_tf = True,
                             use_idf = True)
train_vectors = vectorizer.fit_transform(trainData['text'])
test_vectors = vectorizer.transform(testData['text'])

In [4]:
import time
from sklearn import svm
from sklearn.metrics import classification_report
# Perform classification with SVM, kernel=linear
classifier_linear = svm.SVC(kernel='linear')
t0 = time.time()
classifier_linear.fit(train_vectors, trainData['label'])
t1 = time.time()
prediction_linear = classifier_linear.predict(test_vectors)
t2 = time.time()
time_linear_train = t1-t0
time_linear_predict = t2-t1
# results
print("Training time: %fs; Prediction time: %fs" % (time_linear_train, time_linear_predict))
report = classification_report(testData['label'], prediction_linear, output_dict=True)
print('positive: ', report['1'])
print('negative: ', report['-1'])
print('neutral: ', report['0'])

Training time: 23.339051s; Prediction time: 3.456476s
positive:  {'precision': 0.7631241997439181, 'recall': 0.7932564330079858, 'f1-score': 0.7778986295410051, 'support': 2254.0}
negative:  {'precision': 0.7101024890190337, 'recall': 0.6254029658284977, 'f1-score': 0.665066849502914, 'support': 1551.0}
neutral:  {'precision': 0.6628506010303378, 'recall': 0.7013930950938825, 'f1-score': 0.6815773984696881, 'support': 1651.0}


In [8]:
review = """Tatapannya sangat tajam"""
review_vector = vectorizer.transform([review]) # vectorizing
print(classifier_linear.predict(review_vector))

[0]


In [6]:
import pickle
# pickling the vectorizer
pickle.dump(vectorizer, open('vectorizer.sav', 'wb'))
# pickling the model
pickle.dump(classifier_linear, open('classifier.sav', 'wb'))