In [1]:
import pandas as pd
data = pd.read_csv('Clean_Tweets_Indonesia.csv')
data['text'] = data['text'].apply(str)

In [2]:
from sklearn.model_selection import train_test_split
trainData, testData = train_test_split(data, test_size=0.2, random_state=42)

In [3]:
from sklearn.feature_extraction.text import TfidfVectorizer
# Create feature vectors
vectorizer = TfidfVectorizer(min_df = 5,
                             max_df = 0.8,
                             sublinear_tf = True,
                             use_idf = True)
train_vectors = vectorizer.fit_transform(trainData['text'])
test_vectors = vectorizer.transform(testData['text'])

In [4]:
import time
from sklearn import svm
from sklearn.metrics import classification_report
# Perform classification with SVM, kernel=rbf
classifier_linear = svm.SVC(kernel='rbf')
t0 = time.time()
classifier_linear.fit(train_vectors, trainData['label'])
t1 = time.time()
prediction_linear = classifier_linear.predict(test_vectors)
t2 = time.time()
time_linear_train = t1-t0
time_linear_predict = t2-t1
# results
print("Training time: %fs; Prediction time: %fs" % (time_linear_train, time_linear_predict))
report = classification_report(testData['label'], prediction_linear, output_dict=True)
print('positive: ', report['1'])
print('negative: ', report['-1'])
print('neutral: ', report['0'])

Training time: 78.232771s; Prediction time: 7.413048s
positive:  {'precision': 0.8842975206611571, 'recall': 0.8544809228039042, 'f1-score': 0.8691335740072202, 'support': 2254.0}
negative:  {'precision': 0.8505415162454874, 'recall': 0.7595099935525468, 'f1-score': 0.8024523160762943, 'support': 1551.0}
neutral:  {'precision': 0.7321711568938193, 'recall': 0.8394912174439734, 'f1-score': 0.7821670428893905, 'support': 1651.0}


In [5]:
import pickle
import os

# Ensure Web App folder exists
web_app_folder = os.path.join('..', 'Web App')
os.makedirs(web_app_folder, exist_ok=True)

# pickling the vectorizer
pickle.dump(vectorizer, open(os.path.join(web_app_folder, 'vectorizer.sav'), 'wb'))
# pickling the model
pickle.dump(classifier_linear, open(os.path.join(web_app_folder, 'classifier.sav'), 'wb'))

print(f"Saved vectorizer.sav and classifier.sav to {web_app_folder}")

Saved vectorizer.sav and classifier.sav to ..\Web App
