In [1]:
pip install scikit-learn pandas numpy streamlit




In [2]:
import pandas as pd
import numpy as np

data = pd.read_csv('/content/drive/MyDrive/Skripsi/data_label.csv')
data.head()

Unnamed: 0,stemming,label
0,makan siang gratis gratis beef teriyakinya pak...,POSITIF
1,dut makan siang gratis dut laper,POSITIF
2,kemensetnegri paham universitas hidup makan si...,NEGATIF
3,detikcom t t kapai operasional tri lembaga neg...,NEGATIF
4,unmagnetism makan siang gratis un,NEGATIF


In [3]:
import re
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import TfidfVectorizer

# Preprocessing teks
def preprocess_text(text):
    text = re.sub(r"http\S+|www\S+|https\S+", '', text, flags=re.MULTILINE)  # Hilangkan URL
    text = re.sub(r'\@\w+|\#', '', text)  # Hilangkan mention dan hashtag
    text = re.sub(r'[^a-zA-Z\s]', '', text)  # Hilangkan karakter non-alfabet
    text = text.lower()  # Ubah ke huruf kecil
    return text

# Terapkan preprocessing pada teks
data['text_cleaned'] = data['stemming'].apply(preprocess_text)

# Encode label sentimen
label_encoder = LabelEncoder()
data['label_encoded'] = label_encoder.fit_transform(data['label'])  # 0=negatif, 1=netral, 2=positif


In [4]:
# TF-IDF Vectorizer
vectorizer = TfidfVectorizer(max_features=5000)
X = vectorizer.fit_transform(data['text_cleaned']).toarray()
y = data['label_encoded']


In [5]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Split data menjadi train dan test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Latih model SVM
model = SVC(kernel='linear', random_state=42)
model.fit(X_train, y_train)

# Evaluasi model
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))


              precision    recall  f1-score   support

     NEGATIF       0.73      0.62      0.67       164
      NETRAL       0.75      0.07      0.13        43
     POSITIF       0.71      0.89      0.79       262

    accuracy                           0.72       469
   macro avg       0.73      0.52      0.53       469
weighted avg       0.72      0.72      0.69       469



In [6]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [7]:
# TF-IDF Vectorizer
vectorizer = TfidfVectorizer(max_features=5000)
X = vectorizer.fit_transform(data['text_cleaned']).toarray()
y = data['label_encoded']


In [8]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from imblearn.over_sampling import SMOTE

# Terapkan SMOTE
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

# Split data menjadi train dan test
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# Latih model SVM
model_with_smote = SVC(kernel='linear', random_state=42)
model.fit(X_train, y_train)

# Evaluasi model
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))


              precision    recall  f1-score   support

     NEGATIF       0.81      0.85      0.83       255
      NETRAL       0.87      0.97      0.92       252
     POSITIF       0.87      0.75      0.80       267

    accuracy                           0.85       774
   macro avg       0.85      0.85      0.85       774
weighted avg       0.85      0.85      0.85       774



In [9]:
import pickle

# Simpan model SVM
with open('svm_model_smote.pkl', 'wb') as model_file:
    pickle.dump(model_with_smote, model_file)

# Simpan TF-IDF Vectorizer
with open('tfidf_vectorizer_smote.pkl', 'wb') as vectorizer_file:
    pickle.dump(vectorizer, vectorizer_file)

# Simpan Label Encoder (jika digunakan)
with open('label_encoder.pkl', 'wb') as label_encoder_file:
    pickle.dump(label_encoder, label_encoder_file)
