## Import Library

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.svm import SVC
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from collections import Counter
import joblib
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
import psutil, os, time, gc

## Import Dataset

In [2]:
df = pd.read_csv('/kaggle/input/preprocessed-fake-reviews-dataset/preprocessed-fake-reviews-dataset.csv')

In [3]:
df.head()

Unnamed: 0,rating,label,text_,cleaned_text,category_Books_5,category_Clothing_Shoes_and_Jewelry_5,category_Electronics_5,category_Home_and_Kitchen_5,category_Kindle_Store_5,category_Movies_and_TV_5,category_Pet_Supplies_5,category_Sports_and_Outdoors_5,category_Tools_and_Home_Improvement_5,category_Toys_and_Games_5
0,5.0,1,"Love this! Well made, sturdy, and very comfor...",Love this Well made sturdy and very comfortab...,False,False,False,True,False,False,False,False,False,False
1,5.0,1,"love it, a great upgrade from the original. I...",love it a great upgrade from the original Ive...,False,False,False,True,False,False,False,False,False,False
2,5.0,1,This pillow saved my back. I love the look and...,This pillow saved my back I love the look and ...,False,False,False,True,False,False,False,False,False,False
3,1.0,1,"Missing information on how to use it, but it i...",Missing information on how to use it but it is...,False,False,False,True,False,False,False,False,False,False
4,5.0,1,Very nice set. Good quality. We have had the s...,Very nice set Good quality We have had the set...,False,False,False,True,False,False,False,False,False,False


In [4]:
df['cleaned_text'] = df['cleaned_text'].fillna("")

## Data Splitting

In [5]:
X_train, X_test, y_train, y_test = train_test_split(df, df['label'], test_size=0.2, random_state=42, stratify=df['label'])

## TF-IDF

In [6]:
vectorizer = TfidfVectorizer(ngram_range=(1,2), min_df=30, max_df=0.90, strip_accents='unicode', sublinear_tf=True)
X_train_tfidf = vectorizer.fit_transform(X_train['cleaned_text'])
X_test_tfidf = vectorizer.transform(X_test['cleaned_text'])

In [7]:
print("Jumlah fitur yang diekstrak:", len(vectorizer.vocabulary_))

Jumlah fitur yang diekstrak: 10256


## SVM

In [8]:
process = psutil.Process(os.getpid())

In [9]:
svm_model2 = joblib.load('/kaggle/input/fake-reviews-detection-models/scikitlearn/default/1/svm.pkl')

In [10]:
# Inference timing
times = []
for _ in range(50):
    start = time.time()
    svm_model2.predict(X_test_tfidf)
    end = time.time()
    times.append(end - start)

print(f"Average inference time: {np.mean(times):.6f} sec")
print(f"Standard deviation of inference time: {np.std(times):.6f} sec")

Average inference time: 65.735264 sec
Standard deviation of inference time: 0.290083 sec


In [11]:
svm_y_pred2 = svm_model2.predict(X_test_tfidf)

In [12]:
accuracy = accuracy_score(y_test, svm_y_pred2)
print(f'Akurasi: {accuracy:.4f}')

precision_cg = precision_score(y_test, svm_y_pred2)
print(f'Presisi (Fake Review): {precision_cg:.4f}')

recall_cg = recall_score(y_test, svm_y_pred2)
print(f'Recall (Fake Review): {recall_cg:.4f}')

f1_cg = f1_score(y_test, svm_y_pred2)
print(f'F1-Score (Fake Review): {f1_cg:.4f}')

Akurasi: 0.9531
Presisi (Fake Review): 0.9612
Recall (Fake Review): 0.9443
F1-Score (Fake Review): 0.9527
