## File used to upload and configure the huggingface space

https://huggingface.co/spaces/DaanBooy/games_and_accessories_reviews

# connecting to HF

In [1]:
!pip -q install huggingface_hub

from huggingface_hub import login
HF_REPO_ID = "DaanBooy/games_and_accessories_reviews"  # your Space

print("Target Space:", HF_REPO_ID)
login()  # paste your hf_... token with WRITE permission when prompted

Target Space: DaanBooy/games_and_accessories_reviews


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

# using drive to upload files

In [None]:
from google.colab import drive
from pathlib import Path

# 0) Mount Drive
drive.mount("/content/drive")

# 1) Pointing to previous run folder and file
RUN_DIR = Path("/content/drive/MyDrive/Project_NLP/runs/clustering_full_20250828_1924")
SRC_JSONL = RUN_DIR / "articles_gpt4omini.jsonl"

print("RUN_DIR exists:", RUN_DIR.exists())
print("JSONL exists:", SRC_JSONL.exists(), "->", SRC_JSONL)


Mounted at /content/drive
RUN_DIR exists: True
JSONL exists: True -> /content/drive/MyDrive/Project_NLP/runs/clustering_full_20250828_1924/articles_gpt4omini.jsonl


# making requirements file

In [None]:
from pathlib import Path
import shutil

# Local folder to mimic Hugging Face Space
ROOT = Path("/content/games_and_accessories_reviews_space")
DATA = ROOT / "data"
ROOT.mkdir(parents=True, exist_ok=True)
DATA.mkdir(parents=True, exist_ok=True)

# Copy GPT-4o-mini articles into the Space folder
DEST = DATA / "articles_gpt4omini.jsonl"
shutil.copy2(SRC_JSONL, DEST)
print("Staged:", DEST)

# Write minimal requirements.txt
(Path(ROOT / "requirements.txt")
 .write_text("\n".join([
     "gradio>=4.38.0",
     "pandas>=2.0.0",
     "markdown2>=2.4.0"
 ]) + "\n", encoding="utf-8"))

print("Wrote:", ROOT / "requirements.txt")
print("Space staging folder ready at:", ROOT)


Staged: /content/games_and_accessories_reviews_space/data/articles_gpt4omini.jsonl
Wrote: /content/games_and_accessories_reviews_space/requirements.txt
Space staging folder ready at: /content/games_and_accessories_reviews_space


In [6]:
from pathlib import Path

ROOT = Path("/content/games_and_accessories_reviews_space")
req = "\n".join([
    "gradio>=4.38.0",
    "pandas>=2.0.0",
    "markdown2>=2.4.0",
    "transformers>=4.42.0",
    "torch>=2.1.0",           # CPU is fine on Spaces
    "numpy>=1.24.0",
]) + "\n"

(Path(ROOT / "requirements.txt")).write_text(req, encoding="utf-8")
print((ROOT / "requirements.txt").read_text())


gradio>=4.38.0
pandas>=2.0.0
markdown2>=2.4.0
transformers>=4.42.0
torch>=2.1.0
numpy>=1.24.0



## Making the app.py file
This will configure what will be shown and used on the HF space

In [20]:
from pathlib import Path

APP = Path("/content/games_and_accessories_reviews_space/app.py")

updated = r'''
import json
from pathlib import Path
from typing import Dict, List, Any
import pandas as pd
import gradio as gr
import torch
import numpy as np
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# ---------- Data ----------
DATA_DIR = Path(__file__).parent / "data"

def load_jsonl(p: Path):
    rows = []
    with p.open("r", encoding="utf-8") as f:
        for line in f:
            rows.append(json.loads(line))
    return rows

# Find all article dumps (e.g., articles_gpt4omini.jsonl)
MODEL_ROWS: Dict[str, List[Dict[str, Any]]] = {}
for p in DATA_DIR.glob("articles_*.jsonl"):
    MODEL_ROWS[p.stem.replace("articles_", "")] = load_jsonl(p)

if not MODEL_ROWS:
    raise RuntimeError("No JSONL files found in ./data (e.g., data/articles_gpt4omini.jsonl).")

# Build indices {model -> {cluster_name -> record}}, and cluster list
INDEX: Dict[str, Dict[str, Dict[str, Any]]] = {}
CLUSTERS: List[str] = []
for mname, rows in MODEL_ROWS.items():
    by_cluster = {}
    for r in rows:
        cname = r.get("cluster_name") or f"Cluster {r.get('cluster')}"
        by_cluster[cname] = r
        if cname not in CLUSTERS:
            CLUSTERS.append(cname)
    INDEX[mname] = by_cluster

CLUSTERS = sorted(CLUSTERS)
MODEL_CHOICES = sorted(MODEL_ROWS.keys(), key=str.lower)

# ---------- Tab 1: Summaries ----------
def get_article(model_key: str, cluster_name: str) -> str:
    rec = INDEX.get(model_key, {}).get(cluster_name)
    if not rec:
        return f"### No article for **{cluster_name}** under model `{model_key}`."
    return rec.get("article", "").strip()

# ---------- Tab 2: Classification demo ----------
_CLS_NAME = "cardiffnlp/twitter-roberta-base-sentiment"
_cls_tokenizer = AutoTokenizer.from_pretrained(_CLS_NAME)
_cls_model = AutoModelForSequenceClassification.from_pretrained(_CLS_NAME)
_cls_model.eval()

CLS_LABELS = ["negative", "neutral", "positive"]
EMOJIS = {"Negative": "🙁", "Neutral": "😐", "Positive": "🙂"}

@torch.inference_mode()
def classify_pretty(text: str) -> str:
    if not text or not text.strip():
        return "Enter a review above and press **Classify**."
    inputs = _cls_tokenizer(text, return_tensors="pt", truncation=True)
    logits = _cls_model(**inputs).logits
    probs = torch.softmax(logits, dim=-1).cpu().numpy()[0]
    top = int(np.argmax(probs))
    label = CLS_LABELS[top].capitalize()
    pct = int(round(float(probs[top]) * 100))
    emoji = EMOJIS.get(label, "")
    return f"That review sounds **{label}** to me. I’m **{pct}%** sure. {emoji}"

# ---------- Tab 3: Clustering overview ----------
def cluster_overview() -> pd.DataFrame:
    rows = []
    example_model = next(iter(MODEL_ROWS.keys()))
    for r in MODEL_ROWS[example_model]:
        cname = r.get("cluster_name") or f"Cluster {r.get('cluster')}"
        rows.append({
            "cluster": r.get("cluster"),
            "cluster_name": cname,
        })
    df = pd.DataFrame(rows).sort_values(by="cluster").reset_index(drop=True)
    return df

# ---------- UI ----------
with gr.Blocks(title="🎮 Games & Accessories — NLP Review Explorer") as demo:
    gr.Markdown("# 🎮 Games & Accessories — NLP Review Explorer")

    with gr.Tabs():
        with gr.TabItem("📄 Summaries"):
            gr.Markdown("Browse cluster-level recommendation articles.")
            with gr.Row():
                dd_model = gr.Dropdown(MODEL_CHOICES, value=MODEL_CHOICES[0], label="Model")
                dd_cluster = gr.Dropdown(CLUSTERS, value=CLUSTERS[0], label="Cluster")
            md_article = gr.Markdown()

            def _load_article(m, c):
                return get_article(m, c)

            dd_model.change(_load_article, [dd_model, dd_cluster], md_article)
            dd_cluster.change(_load_article, [dd_model, dd_cluster], md_article)

            md_article.value = get_article(MODEL_CHOICES[0], CLUSTERS[0])

        with gr.TabItem("😀 Classification"):
            gr.Markdown("Type a review to see **Negative / Neutral / Positive**.")
            tb_text = gr.Textbox(label="Enter a single review", lines=5, placeholder="e.g., The controller feels solid but the battery dies quickly.")
            btn = gr.Button("Classify")
            out_md = gr.Markdown()

            def _classify_pretty(t):
                return classify_pretty(t)

            btn.click(_classify_pretty, tb_text, out_md)
            tb_text.submit(_classify_pretty, tb_text, out_md)

        with gr.TabItem("📊 Clustering"):
            gr.Markdown("Overview of discovered clusters.")
            df_clusters = gr.Dataframe(value=cluster_overview(), interactive=False, wrap=True, label="Clusters")

if __name__ == "__main__":
    demo.launch()
'''

APP.write_text(updated, encoding="utf-8")
print("Updated app.py with emojis in tab names only")


Updated app.py with emojis in tab names only


# Pushing changes to HF space

In [None]:
from huggingface_hub import HfApi
from pathlib import Path

HF_REPO_ID = "DaanBooy/games_and_accessories_reviews" 
LOCAL_FOLDER = Path("/content/games_and_accessories_reviews_space")

api = HfApi()
api.upload_folder(
    repo_id=HF_REPO_ID,
    repo_type="space",
    folder_path=str(LOCAL_FOLDER),
    commit_message="updated tab names"
)

print("Uploaded. Check your Space and refresh.")


Uploaded. Check your Space and refresh.
