In [1]:
!pip install datasets transformers[torch] accelerate -q
!pip install spacy
!pip install https://github.com/explosion/spacy-models/releases/download/xx_ent_wiki_sm-3.7.0/xx_ent_wiki_sm-3.7.0.tar.gz

import torch
import pandas as pd
import numpy as np
import re
import spacy
from datasets import load_dataset, Dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
    pipeline
)
from sklearn.metrics import accuracy_score, f1_score

# Ensure GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
if device.type == 'cuda':
    print(torch.cuda.get_device_name(0))

Collecting https://github.com/explosion/spacy-models/releases/download/xx_ent_wiki_sm-3.7.0/xx_ent_wiki_sm-3.7.0.tar.gz
  Downloading https://github.com/explosion/spacy-models/releases/download/xx_ent_wiki_sm-3.7.0/xx_ent_wiki_sm-3.7.0.tar.gz (11.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.0/11.0 MB[0m [31m85.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Using device: cuda
Tesla T4


In [2]:
print("\n--- Loading SentiTaglish dataset ---")
# Load the dataset from Hugging Face
dataset = load_dataset("ccosme/SentiTaglishProductsAndServices", split="train")
df = dataset.to_pandas()
df.rename(columns={'review': 'text'}, inplace=True)

# --- Heuristic Labeling for "Fake vs. Genuine" ---
# We define a rule: "Fake" reviews are often short and have extreme
# sentiment (1-star or 4-star), while "Genuine" reviews are more moderate or detailed.
# Target: 0 = Genuine, 1 = Fake. Initialize to -1 (Abstain)
df['label'] = -1

# HEURISTIC 1: Short (<50 chars) & Extreme Sentiment (1 or 4) -> FAKE (1)
df.loc[(df['sentiment'].isin([1, 4])) & (df['text'].str.len() < 50), 'label'] = 1

# HEURISTIC 2: Detailed Complaint (Long >100 chars & Negative) -> GENUINE (0)
df.loc[(df['sentiment'] == 1) & (df['text'].str.len() > 100), 'label'] = 0

# HEURISTIC 3: Moderate Sentiment (2 or 3) -> GENUINE (0)
df.loc[df['sentiment'].isin([2, 3]), 'label'] = 0

# Create the final, filtered DataFrame
final_df = df[df['label'] != -1].copy()
final_df['label'] = final_df['label'].astype(int)

print(f"\nSuccessfully created 'final_df' with {len(final_df)} labeled reviews.")
print("New label distribution (0=Genuine, 1=Fake):")
print(final_df['label'].value_counts())


--- Loading SentiTaglish dataset ---


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md: 0.00B [00:00, ?B/s]

SentiTaglish_ProductsAndServices.csv: 0.00B [00:00, ?B/s]

Generating train split:   0%|          | 0/10510 [00:00<?, ? examples/s]


Successfully created 'final_df' with 6482 labeled reviews.
New label distribution (0=Genuine, 1=Fake):
label
0    6126
1     356
Name: count, dtype: int64


In [3]:
print("\n--- Executing NER & Lexical Feature Extraction ---")
# Load the stable, pre-installed multilingual spaCy model
try:
    nlp = spacy.load("xx_ent_wiki_sm")
    print("Successfully loaded multilingual spaCy model (xx_ent_wiki_sm).")
except Exception as e:
    print(f"FATAL ERROR loading spaCy model: {e}")

# Define the feature extraction function (with NaN-safe fix)
def extract_review_features(text):
    if not isinstance(text, str):
        return 0, 0, 0

    doc = nlp(text)

    # 1. NER Proxy: Specificity (Counts numbers and dates)
    cardinal_count = sum(1 for ent in doc.ents if ent.label_ in ['CARDINAL', 'DATE'])

    # 2. Lexical Cues: Authenticity (First-Person Pronouns)
    pronoun_pattern = re.compile(r'\b(ako|ko|akin|I|my|mine)\b', re.IGNORECASE)
    pronoun_count = len(pronoun_pattern.findall(text))

    # 3. Lexical Cues: Hyperbole (Common in fake reviews)
    hype_pattern = re.compile(r'\b(super|sobrang|grabe|best ever|highly recommended|perfect)\b', re.IGNORECASE)
    hype_count = len(hype_pattern.findall(text))

    return cardinal_count, pronoun_count, hype_count

# Apply the function to create new columns in our DataFrame
final_df[['cardinal_count', 'pronoun_count', 'hype_count']] = final_df.apply(
    lambda row: extract_review_features(row['text']),
    axis=1,
    result_type='expand'
)
print("NER/Lexical features successfully added to 'final_df'.")


--- Executing NER & Lexical Feature Extraction ---
Successfully loaded multilingual spaCy model (xx_ent_wiki_sm).
NER/Lexical features successfully added to 'final_df'.


In [11]:
print("\n--- Setting up Text Classification ---")
# 1. Convert Pandas DataFrame back to Hugging Face Dataset
labeled_dataset = Dataset.from_pandas(final_df)

# 2. Create Train/Test Splits
labeled_datasets = labeled_dataset.train_test_split(test_size=0.2, seed=42)

# 3. Initialize Tokenizer and Model (Correctly!)
MODEL_NAME = "jcblaise/bert-tagalog-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

# 4. Tokenize the dataset
def tokenize_function(examples):
    return tokenizer(examples["text"], truncation=True, padding="max_length")

tokenized_train = labeled_datasets["train"].map(tokenize_function, batched=True)
tokenized_eval = labeled_datasets["test"].map(tokenize_function, batched=True)

# 5. Rename 'label' to 'labels' (required by Trainer) and set format
tokenized_train = tokenized_train.rename_column("label", "labels")
tokenized_eval = tokenized_eval.rename_column("label", "labels")
tokenized_train.set_format("torch", columns=["input_ids", "attention_mask", "labels"])
tokenized_eval.set_format("torch", columns=["input_ids", "attention_mask", "labels"])

# 6. Load the Model
model = AutoModelForSequenceClassification.from_pretrained(
    MODEL_NAME,
    num_labels=2  # 2 labels: 0 (Genuine) and 1 (Fake)
).to(device)

print(f"Model Loaded: {MODEL_NAME} for 2-label (Fake/Genuine) classification.")

# 7. Define Metrics (Fixed)
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=1)
    f1 = f1_score(labels, preds, average="weighted")
    acc = accuracy_score(labels, preds)
    return {"accuracy": acc, "f1": f1}

# 8. Define Training Arguments
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=2,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    weight_decay=0.01,
    learning_rate=2e-5,
    logging_dir='./logs',
    logging_steps=100,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="f1",
    fp16=torch.cuda.is_available(),
    report_to="none", # Disable wandb
)

# 9. Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_eval,
    compute_metrics=compute_metrics,
    tokenizer=tokenizer,
)


--- Setting up Text Classification ---


Map:   0%|          | 0/5185 [00:00<?, ? examples/s]

Map:   0%|          | 0/1297 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at jcblaise/bert-tagalog-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model Loaded: jcblaise/bert-tagalog-base-uncased for 2-label (Fake/Genuine) classification.


  trainer = Trainer(


In [12]:
print("\n--- Starting Fine-Tuning ---")
trainer.train()

print("\n--- Final Evaluation Results ---")
eval_results = trainer.evaluate()
print(eval_results)

# Save the best model
trainer.save_model("./fake_review_model_best")
print("Best model saved to './fake_review_model_best'")


print("\n--- Running Inference on New Data ---")

sentiment_analyzer = pipeline(
    "text-classification",
    model=model,
    tokenizer=tokenizer,
    device=0 if torch.cuda.is_available() else -1
)

new_data = [
   # Positive
    "super satisfied ako, ang ganda ng quality, legit!",
    "worth it yung bayad, exceeded expectations ko.",
    "ayos na ayos, dumating agad at maayos pagkakapack.",
    "panalo to, hindi ako nagsisisi, bibili ulit ako.",
    "smooth transaction, mabait si seller, 5 stars!",
    "sobrang ganda grabe, mas maganda pa sa picture.",
    "perfect, walang damage, highly recommended.",
    "ang tibay ng item, hindi cheap quality.",
    "best purchase ko this month, solid!",
    "legit store, mabilis delivery, safe packaging.",
    "nagustuhan ko sobra, sakto sa kailangan ko.",
    "premium feel, ang sarap hawakan, good job seller.",
    "hindi ko in-expect na ganito kaganda, nice one.",
    "smooth gamitin, walang lag, good performance.",
    "worth every peso, value for money talaga.",

    # Neutral
    "ok lang, sakto lang, nothing special.",
    "pwede na, pero hindi ako super impressed.",
    "normal lang quality, hindi masama, hindi rin sobrang ganda.",
    "average lang, decent for the price.",
    "ayos lang, pero may konting diprensya.",
    "hindi pangit pero hindi rin outstanding.",
    "fair lang yung item, pwede na sa daily use.",
    "saktong size, pero medyo manipis.",
    "meh, parang typical product lang.",
    "okay lang gamitin, pero may improvements pa.",
    "mabilis dumating pero so-so ang quality.",
    "hindi ko masabing good or bad, neutral lang talaga.",
    "functioning naman, pero basic features lang.",
    "tamang-tama lang, hindi worth it or not worth it.",
    "pwede na siya, pero hindi ko irerecommend right away.",

    # Negative
    "sobrang pangit ng quality, sayang pera ko.",
    "madami damage, hindi worth it bilhin.",
    "mabagal dumating, tapos sira pa yung item.",
    "huwag bumili dito, super disappointing.",
    "hindi gumagana, defective agad pag-open ko.",
    "nagka-problem sa seller, hindi responsive.",
    "ang cheap ng quality, masyadong manipis.",
    "hindi tugma sa description, scam vibes.",
    "super bad experience, never again.",
    "pangit yung material, madaling masira.",
    "ang dumi nung dumating, poor handling.",
    "overpriced, hindi worth ang quality.",
    "malas, wrong item pa talaga dumating.",
    "sobrang lag, hindi usable.",
    "weak performance, ang daming issues."
]

# 3. Run Inference
results = sentiment_analyzer(new_data)

# The model will output LABEL_0 (Genuine) and LABEL_1 (Fake)
for text, result in zip(new_data, results):
    # Relabel the output for clarity
    label = "FAKE" if result['label'] == 'LABEL_1' else "GENUINE"

    print(f"Text: \"{text}\"")
    print(f"Prediction: {label} (Score: {result['score']:.4f})\n")



--- Starting Fine-Tuning ---


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.072,0.084788,0.973015,0.97197
2,0.0512,0.093733,0.972244,0.972871



--- Final Evaluation Results ---


{'eval_loss': 0.09373293071985245, 'eval_accuracy': 0.9722436391673092, 'eval_f1': 0.9728707654028398, 'eval_runtime': 9.0286, 'eval_samples_per_second': 143.654, 'eval_steps_per_second': 9.082, 'epoch': 2.0}


Device set to use cuda:0


Best model saved to './fake_review_model_best'

--- Running Inference on New Data ---
Text: "super satisfied ako, ang ganda ng quality, legit!"
Prediction: GENUINE (Score: 0.9998)

Text: "worth it yung bayad, exceeded expectations ko."
Prediction: GENUINE (Score: 0.5922)

Text: "ayos na ayos, dumating agad at maayos pagkakapack."
Prediction: FAKE (Score: 0.6794)

Text: "panalo to, hindi ako nagsisisi, bibili ulit ako."
Prediction: GENUINE (Score: 0.8712)

Text: "smooth transaction, mabait si seller, 5 stars!"
Prediction: GENUINE (Score: 0.9995)

Text: "sobrang ganda grabe, mas maganda pa sa picture."
Prediction: GENUINE (Score: 0.9996)

Text: "perfect, walang damage, highly recommended."
Prediction: FAKE (Score: 0.9737)

Text: "ang tibay ng item, hindi cheap quality."
Prediction: FAKE (Score: 0.9521)

Text: "best purchase ko this month, solid!"
Prediction: GENUINE (Score: 0.8535)

Text: "legit store, mabilis delivery, safe packaging."
Prediction: GENUINE (Score: 0.8581)

Text: "nagustu

In [13]:
try:
    results_list
except NameError:
    results_list = []

accuracy = eval_results.get("eval_accuracy", None)
precision = eval_results.get("eval_precision", None)
recall = eval_results.get("eval_recall", None)
f1 = eval_results.get("eval_f1", None)

results_list.append({
    "accuracy": accuracy,
    "precision": precision,
    "recall": recall,
    "f1": f1
})

print("\n--- Metrics Saved! ---")


import numpy as np

accuracies = np.array([r["accuracy"] for r in results_list if r["accuracy"] is not None])
precisions = np.array([r["precision"] for r in results_list if r["precision"] is not None])
recalls = np.array([r["recall"] for r in results_list if r["recall"] is not None])
f1s = np.array([r["f1"] for r in results_list if r["f1"] is not None])

avg_accuracy = accuracies.mean() if len(accuracies) > 0 else None
avg_precision = precisions.mean() if len(precisions) > 0 else None
avg_recall = recalls.mean() if len(recalls) > 0 else None
avg_f1 = f1s.mean() if len(f1s) > 0 else None

metric_values = [m for m in [avg_accuracy, avg_precision, avg_recall, avg_f1] if m is not None]
overall_average = np.mean(metric_values) if len(metric_values) > 0 else None

print("\n===== AVERAGE METRICS ACROSS RUNS =====")
print(f"Average Accuracy:  {avg_accuracy}")
print(f"Average Precision: {avg_precision}")
print(f"Average Recall:    {avg_recall}")
print(f"Average F1-score:  {avg_f1}")
print("---------------------------------------")
print(f"Overall Average of All Metrics: {overall_average}")


--- Metrics Saved! ---

===== AVERAGE METRICS ACROSS RUNS =====
Average Accuracy:  0.9722436391673092
Average Precision: None
Average Recall:    None
Average F1-score:  0.9728707654028398
---------------------------------------
Overall Average of All Metrics: 0.9725572022850745


In [14]:
!pip install optuna

import optuna
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.datasets import load_iris

def objective(trial):
    n_estimators = trial.suggest_int("n_estimators", 50, 300)
    max_depth = trial.suggest_int("max_depth", 2, 20)
    clf = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth)
    iris = load_iris()
    score = cross_val_score(clf, iris.data, iris.target, cv=3).mean()
    return score

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=30)
print(study.best_params)

Collecting optuna
  Downloading optuna-4.6.0-py3-none-any.whl.metadata (17 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.10.1-py3-none-any.whl.metadata (11 kB)
Downloading optuna-4.6.0-py3-none-any.whl (404 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m404.7/404.7 kB[0m [31m12.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.10.1-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, optuna
Successfully installed colorlog-6.10.1 optuna-4.6.0


[I 2025-11-21 14:38:04,926] A new study created in memory with name: no-name-1ad467c3-e64d-4218-9080-b8631328c736
[I 2025-11-21 14:38:05,328] Trial 0 finished with value: 0.96 and parameters: {'n_estimators': 90, 'max_depth': 5}. Best is trial 0 with value: 0.96.
[I 2025-11-21 14:38:06,349] Trial 1 finished with value: 0.9666666666666667 and parameters: {'n_estimators': 255, 'max_depth': 19}. Best is trial 1 with value: 0.9666666666666667.
[I 2025-11-21 14:38:06,948] Trial 2 finished with value: 0.96 and parameters: {'n_estimators': 151, 'max_depth': 4}. Best is trial 1 with value: 0.9666666666666667.
[I 2025-11-21 14:38:08,026] Trial 3 finished with value: 0.9666666666666667 and parameters: {'n_estimators': 259, 'max_depth': 7}. Best is trial 1 with value: 0.9666666666666667.
[I 2025-11-21 14:38:08,618] Trial 4 finished with value: 0.9666666666666667 and parameters: {'n_estimators': 146, 'max_depth': 8}. Best is trial 1 with value: 0.9666666666666667.
[I 2025-11-21 14:38:09,460] Trial

{'n_estimators': 255, 'max_depth': 19}
