In [31]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_selection import SelectKBest, mutual_info_classif
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import time

In [32]:
# 1. Membaca Data
data = pd.read_csv("./Hasil_Labeling_imdb.csv")

In [33]:
# 2. Pemisahan Fitur dan Label
X = data['reviews']
y = data['sentiment']

In [34]:
# 3. Pembagian Data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [35]:
# 4. Ekstraksi Fitur (Contoh: menggunakan CountVectorizer untuk n-gram)
vectorizer = CountVectorizer(ngram_range=(1, 2))  # Ubah ngram_range sesuai kebutuhan
X_train_vectorized = vectorizer.fit_transform(X_train)
X_test_vectorized = vectorizer.transform(X_test)

In [37]:
start_time = time.time()

mi_scores = mutual_info_classif(X_train_vectorized, y_train)
k_best_features = sorted(range(len(mi_scores)), key=lambda i: mi_scores[i], reverse=True)[:10] # Ambil 1000 fitur terbaik
X_train_mi = X_train_vectorized[:, k_best_features]
X_test_mi = X_test_vectorized[:, k_best_features]

end_time = time.time()
print("Waktu eksekusi untuk seleksi fitur menggunakan Mutual Information: {:.2f} detik".format(end_time - start_time))

Waktu eksekusi untuk seleksi fitur menggunakan Mutual Information: 3446.30 detik


In [38]:
# 6. Pelatihan Model SVM dengan Fitur Mutual Information
svm_model_mi = SVC(kernel='linear')
svm_model_mi.fit(X_train_mi, y_train)
y_pred_mi = svm_model_mi.predict(X_test_mi)
accuracy_mi = accuracy_score(y_test, y_pred_mi)
print("Akurasi menggunakan Mutual Information setelah seleksi fitur: {:.2f}".format(accuracy_mi))

Akurasi menggunakan Mutual Information setelah seleksi fitur: 0.84


In [39]:
# 6. Seleksi Fitur menggunakan Chi-square
chi2_selector = SelectKBest(score_func=chi2, k=1000)  # Ubah nilai k sesuai kebutuhan
X_train_chi2 = chi2_selector.fit_transform(X_train_vectorized, y_train)
X_test_chi2 = chi2_selector.transform(X_test_vectorized)

In [40]:
# 7. Pelatihan Model SVM dengan Fitur Mutual Information
svm_model_mi = SVC(kernel='linear')
svm_model_mi.fit(X_train_mi, y_train)
y_pred_mi = svm_model_mi.predict(X_test_mi)
accuracy_mi = accuracy_score(y_test, y_pred_mi)
print("Accuracy using mutual information:", accuracy_mi)

Accuracy using mutual information: 0.8367551326990486


In [41]:
# 8. Pelatihan Model SVM dengan Fitur Chi-square
svm_model_chi2 = SVC(kernel='linear')
svm_model_chi2.fit(X_train_chi2, y_train)
y_pred_chi2 = svm_model_chi2.predict(X_test_chi2)
accuracy_chi2 = accuracy_score(y_test, y_pred_chi2)
print("Accuracy using chi-square:", accuracy_chi2)

Accuracy using chi-square: 0.8868302453680521
