In [1]:
!pip install fsspec==2025.3.2
!pip install datasets transformers sentence-transformers

Collecting datasets
  Downloading datasets-3.5.0-py3-none-any.whl.metadata (19 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.12.0,>=2023.1.0 (from fsspec[http]<=2024.12.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.12.0-py3-none-any.whl.metadata (11 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda

In [2]:
from datasets import load_dataset

ds          = load_dataset("tau/commonsense_qa", split="train[:5]")
questions   = ds["question"]
choices_all = ds["choices"]
answer_keys = ds["answerKey"]   # list of "A", "B", ...

texts = []
for q, choice_dict in zip(questions, choices_all):
    prompt = "Question: " + q + "\n" + "\n".join(
        f"{lbl}. {txt}"
        for lbl, txt in zip(choice_dict["label"], choice_dict["text"])
    )
    texts.append(prompt)

labels = []
for choice_dict, key in zip(choices_all, answer_keys):
    # find the index of the correct label letter in the 'label' list
    idx = choice_dict["label"].index(key)
    # grab that text
    labels.append(choice_dict["text"][idx])

print(texts[0])
print("→ label:", labels[0])

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/7.39k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/1.25M [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/160k [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/151k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/9741 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/1221 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1140 [00:00<?, ? examples/s]

Question: The sanctions against the school were a punishing blow, and they seemed to what the efforts the school had made to change?
A. ignore
B. enforce
C. authoritarian
D. yell at
E. avoid
→ label: ignore


##commonsense qa

In [3]:
# ── Install once: ────────────────────────────────────────────────────────────
# pip install transformers datasets sentence-transformers

import torch
import numpy as np
from transformers import AutoTokenizer, AutoModelForCausalLM
from sentence_transformers import SentenceTransformer
from datasets import load_dataset

class Se2ICLInference:
    def __init__(
        self,
        model_name: str = "gpt2-medium",
        embed_model: str = "all-MiniLM-L6-v2",
        shot: int = 3,
        retrieve_k: int = 20,      # ← only top-20 now
        beam_size: int = 3,
        device: str = None,
    ):
        self.device      = device or ("cuda" if torch.cuda.is_available() else "cpu")
        # SBERT for fast retrieval
        self.embedder    = SentenceTransformer(embed_model, device=self.device)
        # GPT2 for CE + generation
        self.tok         = AutoTokenizer.from_pretrained(model_name)
        if self.tok.pad_token is None:
            self.tok.pad_token = self.tok.eos_token
        self.lm          = AutoModelForCausalLM.from_pretrained(model_name).to(self.device)
        self.lm.eval()

        self.shot        = shot
        self.retrieve_k  = retrieve_k
        self.beam_size   = beam_size

    def _batch_ce(self, texts):
        """Compute CE for a batch of texts in one forward pass."""
        enc = self.tok(
            texts,
            return_tensors="pt",
            padding=True,
            truncation=True,
            max_length=512,
        ).to(self.device)
        with torch.no_grad():
            out = self.lm(enc.input_ids, labels=enc.input_ids)
        lengths = enc.attention_mask.sum(dim=1).float()
        return (out.loss * lengths).cpu().tolist()

    def beam_search(self, prompt: str, label: str):
        # 1) retrieve top-K *indices* via precomputed self.demos_emb
        q_emb        = self.embedder.encode([prompt], convert_to_tensor=True)
        sims, idxs   = (self.demos_emb @ q_emb.T).squeeze(1).topk(self.retrieve_k)
        candidates   = [self.demos[i] for i in idxs.cpu().tolist()]

        # 2) beam-search with CE rerank
        beams = [([], 0.0)]
        for _ in range(self.shot):
            new_beams, batch_xc, batch_c, parents = [], [], [], []
            # build all the CE inputs for this beam step
            for seq, cum in beams:
                for d in candidates:
                    if d in seq:
                        continue
                    batch_xc.append(f"{d}\n{prompt}\nAnswer: {label}")
                    batch_c .append(f"{d}\nAnswer: {label}")
                    parents.append((seq, d, cum))
            # score them in two big batches
            H_xc = self._batch_ce(batch_xc)
            H_c  = self._batch_ce(batch_c)
            # collect the new beams
            for (seq, d, cum), hxc, hc in zip(parents, H_xc, H_c):
                new_beams.append((seq + [d], cum + (hxc - hc)))
            # keep top-beam_size
            beams = sorted(new_beams, key=lambda x: x[1], reverse=True)[: self.beam_size]

        return beams[0][0]

    def generate(self, prompt: str, demos: list[str]) -> str:
        full = "\n\n".join(demos) + f"\n\n{prompt}\nAnswer:"
        enc  = self.tok(full, return_tensors="pt").to(self.device)
        gen  = self.lm.generate(
            **enc,
            max_new_tokens=10,
            pad_token_id=self.tok.eos_token_id,
        )
        out  = self.tok.decode(gen[0], skip_special_tokens=True)
        # return the first token after the last "Answer:"
        return out.rsplit("Answer:", 1)[-1].strip().split()[0]

    def evaluate(self, prompts: list[str], labels: list[str], demos: list[str]):
        # Precompute SBERT embeddings for the demo pool ONCE
        self.demos      = demos
        self.demos_emb  = self.embedder.encode(demos, convert_to_tensor=True)

        correct = 0
        for p, l in zip(prompts, labels):
            sel  = self.beam_search(p, l)
            pred = self.generate(p, sel)
            if pred.lower() == l.lower():
                correct += 1

        acc = correct / len(prompts)
        print(f"Accuracy over {len(prompts)} examples: {acc:.4f}")


if __name__ == "__main__":
    # 1) load a small slice
    ds = load_dataset("tau/commonsense_qa", split="train[:50]")

    # 2) build prompts & labels
    prompts, labels = [], []
    for ex in ds:
        stem     = ex["question"]                # already a string
        labs,txt = ex["choices"]["label"], ex["choices"]["text"]
        p        = "Question: " + stem + "\n" + "\n".join(f"{L}. {T}" for L, T in zip(labs, txt))
        prompts.append(p)
        labels.append(txt[labs.index(ex["answerKey"])])

    # 3) full-demo pool
    demos = [f"{p}\nAnswer: {l}" for p, l in zip(prompts, labels)]

    # 4) run
    selector = Se2ICLInference(
        model_name   = "gpt2-medium",
        embed_model  = "all-MiniLM-L6-v2",
        shot         = 3,
        retrieve_k   = 20,    # fewer candidates → much faster
        beam_size    = 3,
    )
    selector.evaluate(prompts, labels, demos)

    # show one example
    print("\nExample #0:")
    print(prompts[0])
    print("→ label:", labels[0])

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/718 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/1.52G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

`loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.


Accuracy over 50 examples: 0.2000

Example #0:
Question: The sanctions against the school were a punishing blow, and they seemed to what the efforts the school had made to change?
A. ignore
B. enforce
C. authoritarian
D. yell at
E. avoid
→ label: ignore


## AG News

In [2]:
# ── Install once: ────────────────────────────────────────────────────────────
# pip install transformers datasets sentence-transformers

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from sentence_transformers import SentenceTransformer
from datasets import load_dataset

class Se2ICLInference:
    def __init__(
        self,
        model_name: str = "gpt2-medium",
        embed_model: str = "all-MiniLM-L6-v2",
        shot: int = 3,
        retrieve_k: int = 20,
        beam_size: int = 3,
        device: str = None,
    ):
        self.device     = device or ("cuda" if torch.cuda.is_available() else "cpu")
        # 1) fast SBERT retriever
        self.embedder   = SentenceTransformer(embed_model, device=self.device)
        # 2) GPT-2 for CE scoring & generation
        self.tok        = AutoTokenizer.from_pretrained(model_name)
        if self.tok.pad_token is None:
            self.tok.pad_token = self.tok.eos_token
        self.lm         = AutoModelForCausalLM.from_pretrained(model_name).to(self.device)
        self.lm.eval()

        self.shot       = shot
        self.retrieve_k = retrieve_k
        self.w          = beam_size

    def _batch_ce(self, texts):
        # batched cross-entropy
        enc = self.tok(
            texts,
            return_tensors="pt",
            padding=True,
            truncation=True,
            max_length=512
        ).to(self.device)
        with torch.no_grad():
            out = self.lm(enc.input_ids, labels=enc.input_ids)
        lengths = enc.attention_mask.sum(dim=1).float()
        return (out.loss * lengths).cpu().tolist()

    def beam_search(self, prompt: str, label: str, demos: list[str]):
        # 1) retrieve top-K demos by SBERT cosine
        q_emb      = self.embedder.encode([prompt], convert_to_tensor=True)
        demos_emb  = self.embedder.encode(demos,   convert_to_tensor=True)
        sims, idxs = (demos_emb @ q_emb.T).squeeze(1).topk(self.retrieve_k)
        candidates = [demos[i] for i in idxs.cpu().tolist()]

        # 2) beam-search with cross-entropy re-rank
        beams = [([], 0.0)]
        for _ in range(self.shot):
            new_beams, batch_xc, batch_c, parents = [], [], [], []
            for seq, cum in beams:
                for d in candidates:
                    if d in seq:
                        continue
                    batch_xc.append(f"{d}\n{prompt}\nAnswer: {label}")
                    batch_c .append(f"{d}\nAnswer: {label}")
                    parents.append((seq, d, cum))
            H_xc = self._batch_ce(batch_xc)
            H_c  = self._batch_ce(batch_c)
            for (seq, d, cum), hxc, hc in zip(parents, H_xc, H_c):
                new_beams.append((seq + [d], cum + (hxc - hc)))
            beams = sorted(new_beams, key=lambda x: x[1], reverse=True)[: self.w]
        return beams[0][0]

    def generate(self, prompt: str, demos: list[str]) -> str:
        full = "\n\n".join(demos) + f"\n\n{prompt}\nAnswer:"
        enc  = self.tok(full, return_tensors="pt").to(self.device)
        gen  = self.lm.generate(
            **enc,
            max_new_tokens=10,
            pad_token_id=self.tok.eos_token_id
        )
        out = self.tok.decode(gen[0], skip_special_tokens=True)
        return out.rsplit("Answer:", 1)[-1].strip().split()[0]

    def evaluate(self, prompts: list[str], labels: list[str], demos: list[str]):
        # precompute embeddings once
        self.demos     = demos
        self.demos_emb = self.embedder.encode(demos, convert_to_tensor=True)

        correct = 0
        for p, l in zip(prompts, labels):
            sel  = self.beam_search(p, l, demos)
            pred = self.generate(p, sel)
            if pred.lower() == l.lower():
                correct += 1

        acc = correct / len(prompts)
        print(f"→ Accuracy over {len(prompts)} examples: {acc:.4f}")


if __name__ == "__main__":
    # 1) load a small slice of AG News
    ds = load_dataset("ag_news", split="train[:200]")  # e.g. 200 examples

    # 2) build prompts & labels
    label_map = {0: "World", 1: "Sports", 2: "Business", 3: "Sci/Tech"}
    prompts, labels = [], []
    for ex in ds:
        # full text + multiple-choice style prompt
        prompt = (
            f"News article:\n{ex['text']}\n\n"
            "Category? (World / Sports / Business / Sci/Tech)"
        )
        prompts.append(prompt)
        labels.append(label_map[ex["label"]])

    # 3) demo pool: each entry "prompt\nAnswer: label"
    demos = [f"{p}\nAnswer: {l}" for p, l in zip(prompts, labels)]

    # 4) instantiate & evaluate
    selector = Se2ICLInference(
        model_name   = "gpt2-medium",
        embed_model  = "all-MiniLM-L6-v2",
        shot         = 3,
        retrieve_k   = 20,
        beam_size    = 3,
    )
    selector.evaluate(prompts, labels, demos)

    # print a worked example
    print("\nExample #0:")
    print(prompts[0])
    print("→ label:", labels[0])

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/8.07k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/18.6M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/1.23M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/120000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/7600 [00:00<?, ? examples/s]

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/718 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/1.52G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

`loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.


→ Accuracy over 200 examples: 0.5850

Example #0:
News article:
Wall St. Bears Claw Back Into the Black (Reuters) Reuters - Short-sellers, Wall Street's dwindling\band of ultra-cynics, are seeing green again.

Category? (World / Sports / Business / Sci/Tech)
→ label: Business


## SST 5

In [3]:
# ── Install once in your environment: ────────────────────────────────────────────
# pip install transformers datasets sentence-transformers

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from sentence_transformers import SentenceTransformer
from datasets import load_dataset

class Se2ICLInference:
    def __init__(
        self,
        model_name: str = "gpt2-medium",
        embed_model: str = "all-MiniLM-L6-v2",
        shot: int = 3,
        retrieve_k: int = 20,
        beam_size: int = 3,
        device: str = None,
    ):
        self.device     = device or ("cuda" if torch.cuda.is_available() else "cpu")
        # 1) Bi-encoder for fast retrieval
        self.embedder   = SentenceTransformer(embed_model, device=self.device)
        # 2) Causal LM for CE scoring & generation
        self.tok        = AutoTokenizer.from_pretrained(model_name)
        if self.tok.pad_token is None:
            self.tok.pad_token = self.tok.eos_token
        self.lm         = AutoModelForCausalLM.from_pretrained(model_name).to(self.device)
        self.lm.eval()

        self.shot       = shot
        self.retrieve_k = retrieve_k
        self.w          = beam_size

    def _batch_ce(self, texts):
        """Compute cross‐entropy for a batch of strings."""
        enc = self.tok(
            texts,
            return_tensors="pt",
            padding=True,
            truncation=True,
            max_length=512
        ).to(self.device)
        with torch.no_grad():
            out = self.lm(enc.input_ids, labels=enc.input_ids)
        lengths = enc.attention_mask.sum(dim=1).float()
        return (out.loss * lengths).cpu().tolist()

    def beam_search(self, prompt: str, label: str):
        # 1) retrieve top-K demos by SBERT cosine
        q_emb      = self.embedder.encode([prompt], convert_to_tensor=True)
        sims, idxs = (self.demos_emb @ q_emb.T).squeeze(1).topk(self.retrieve_k)
        candidates = [self.demos[i] for i in idxs.cpu().tolist()]

        # 2) beam-search with CE re-ranking
        beams = [([], 0.0)]
        for _ in range(self.shot):
            new_beams, batch_xc, batch_c, parents = [], [], [], []
            for seq, cum in beams:
                for d in candidates:
                    if d in seq:
                        continue
                    batch_xc.append(f"{d}\nReview: {prompt}\nSentiment: {label}")
                    batch_c .append(f"{d}\nSentiment: {label}")
                    parents.append((seq, d, cum))
            H_xc = self._batch_ce(batch_xc)
            H_c  = self._batch_ce(batch_c)
            for (seq, d, cum), hxc, hc in zip(parents, H_xc, H_c):
                new_beams.append((seq + [d], cum + (hxc - hc)))
            beams = sorted(new_beams, key=lambda x: x[1], reverse=True)[: self.w]
        return beams[0][0]

    def generate(self, prompt: str, demos: list[str]) -> str:
        full = "\n\n".join(demos) + f"\n\nReview: {prompt}\nSentiment:"
        enc  = self.tok(full, return_tensors="pt").to(self.device)
        gen  = self.lm.generate(
            **enc,
            max_new_tokens=5,
            pad_token_id=self.tok.eos_token_id
        )
        out = self.tok.decode(gen[0], skip_special_tokens=True)
        return out.rsplit("Sentiment:", 1)[-1].strip().split()[0]

    def evaluate(self, prompts, labels, demos):
        # precompute SBERT embeddings once
        self.demos     = demos
        self.demos_emb = self.embedder.encode(demos, convert_to_tensor=True)

        correct = 0
        for i, (p, l) in enumerate(zip(prompts, labels)):
            sel  = self.beam_search(p, l)
            pred = self.generate(p, sel)

            ok = (pred.lower() == l.lower())
            correct += ok

            # show first 3 examples
            if i < 3:
                print(f"\n--- Example {i} ---")
                print("Review:", p)
                print("Chosen demos:")
                for d in sel:
                    print(" •", d.replace("\n", " ↵ "))
                print(f"Prediction: {pred!r}  (gold: {l!r}) →", "✓" if ok else "✗")

        acc = correct / len(prompts)
        print(f"\nOverall SST-5 Accuracy: {100*acc:.2f}%  ({correct}/{len(prompts)})")


if __name__ == "__main__":
    # 1) load SST-5 (5-class Stanford Sentiment Treebank)
    ds = load_dataset("SetFit/sst5", split="train[:200]")  # first 200 for speed

    # 2) prompt & label mapping
    label_names = [
        "Very negative",
        "Negative",
        "Neutral",
        "Positive",
        "Very positive"
    ]
    prompts, labels = [], []
    for ex in ds:
        text = ex["text"]
        prompts.append(text)
        labels.append(label_names[ex["label"]])

    # 3) build demo‐pool entries: "Review ↵ <text> ↵ Sentiment: <label>"
    demos = [f"{d}\nSentiment: {l}" for d, l in zip(prompts, labels)]

    # 4) run Se₂
    selector = Se2ICLInference(
        model_name   = "gpt2-medium",
        embed_model  = "all-MiniLM-L6-v2",
        shot         = 3,
        retrieve_k   = 20,
        beam_size    = 3,
    )
    selector.evaluate(prompts, labels, demos)

README.md:   0%|          | 0.00/421 [00:00<?, ?B/s]

Repo card metadata block was not found. Setting CardData to empty.


train.jsonl:   0%|          | 0.00/1.32M [00:00<?, ?B/s]

dev.jsonl:   0%|          | 0.00/171k [00:00<?, ?B/s]

test.jsonl:   0%|          | 0.00/343k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/8544 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/1101 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/2210 [00:00<?, ? examples/s]


--- Example 0 ---
Review: a stirring , funny and finally transporting re-imagining of beauty and the beast and 1930s horror films
Chosen demos:
 • i could n't recommend this film more . ↵ Sentiment: Positive
 • this is a gorgeous film - vivid with color , music and life . ↵ Sentiment: Very positive
 • a perfectly acceptable , perfectly bland , competently acted but by no means scary horror movie . ↵ Sentiment: Neutral
Prediction: 'Positive'  (gold: 'Very positive') → ✗

--- Example 1 ---
Review: apparently reassembled from the cutting-room floor of any given daytime soap .
Chosen demos:
 • the movie is so thoughtlessly assembled . ↵ Sentiment: Positive
 • a metaphor for a modern-day urban china searching for its identity . ↵ Sentiment: Positive
 • apparently reassembled from the cutting-room floor of any given daytime soap . ↵ Sentiment: Negative
Prediction: 'Negative'  (gold: 'Negative') → ✓

--- Example 2 ---
Review: they presume their audience wo n't sit still for a sociology lesso