In [None]:
import os

# Temporarily set the Hugging Face token (only for this runtime session)
os.environ["HUGGINGFACE_TOKEN"] = ""


In [None]:
! pip install -U "huggingface_hub[cli]"

Collecting huggingface_hub[cli]
  Downloading huggingface_hub-0.33.4-py3-none-any.whl.metadata (14 kB)
Collecting InquirerPy==0.3.4 (from huggingface_hub[cli])
  Downloading InquirerPy-0.3.4-py3-none-any.whl.metadata (8.1 kB)
Collecting pfzy<0.4.0,>=0.3.1 (from InquirerPy==0.3.4->huggingface_hub[cli])
  Downloading pfzy-0.3.4-py3-none-any.whl.metadata (4.9 kB)
Downloading InquirerPy-0.3.4-py3-none-any.whl (67 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.7/67.7 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading huggingface_hub-0.33.4-py3-none-any.whl (515 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m515.3/515.3 kB[0m [31m12.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pfzy-0.3.4-py3-none-any.whl (8.5 kB)
Installing collected packages: pfzy, InquirerPy, huggingface_hub
  Attempting uninstall: huggingface_hub
    Found existing installation: huggingface-hub 0.33.2
    Uninstalling huggingface-hub-0.33.2:
      Successfully u

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

token = os.environ["HUGGINGFACE_TOKEN"]
model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=token)
model = AutoModelForCausalLM.from_pretrained(model_id, use_auth_token=token)




tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/551 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/608 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

In [None]:
paragraph = """
Remote work improves employee productivity. People working remotely often report fewer distractions and better focus. EVs have zero tailpipe emissions. Most EVs are charged with electricity generated from fossil fuels. AI-generated images mimic style but lack original thought. 3D-printed organs could reduce transplant wait times.
"""

In [None]:
import re
import json
import torch

def split_into_sentences(text):
    return re.split(r'(?<=[.!?])\s+', text.strip())

sentences = split_into_sentences(paragraph)

In [None]:
def run_prompt(prompt, max_new_tokens=10):
    try:
        inputs = tokenizer(prompt, return_tensors="pt")
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=max_new_tokens,
                pad_token_id=tokenizer.eos_token_id
            )
        result = tokenizer.decode(outputs[0], skip_special_tokens=True)
        return result[len(prompt):].strip()
    except Exception as e:
        print(f"⚠️ LLM failed: {e}")
        return "premise"  # Fallback

In [None]:
# STEP 6: Classification loop
classified = []
claims = []

for s in sentences:
    prompt = f"Classify the following sentence as either a 'claim' or a 'premise':\n\"{s}\"\nAnswer:"
    response = run_prompt(prompt)
    label = "claim" if "claim" in response.lower() else "premise"
    entry = {"text": s, "type": label}
    classified.append(entry)
    if label == "claim":
        claims.append(s)

# STEP 7: Output as structured JSON
print(json.dumps(classified, indent=2))

[
  {
    "text": "Remote work improves employee productivity.",
    "type": "premise"
  },
  {
    "text": "People working remotely often report fewer distractions and better focus.",
    "type": "premise"
  },
  {
    "text": "EVs have zero tailpipe emissions.",
    "type": "claim"
  },
  {
    "text": "Most EVs are charged with electricity generated from fossil fuels.",
    "type": "claim"
  },
  {
    "text": "AI-generated images mimic style but lack original thought.",
    "type": "claim"
  },
  {
    "text": "3D-printed organs could reduce transplant wait times.",
    "type": "claim"
  }
]


In [22]:
# Step: Convert to structured format
structured_output = {
    "claims": [{"text": entry["text"]} for entry in classified if entry["type"] == "claim"],
    "premises": [{"text": entry["text"]} for entry in classified if entry["type"] == "premise"]
}

In [23]:
print(json.dumps(structured_output, indent=2))

{
  "claims": [
    {
      "text": "EVs have zero tailpipe emissions."
    },
    {
      "text": "Most EVs are charged with electricity generated from fossil fuels."
    },
    {
      "text": "AI-generated images mimic style but lack original thought."
    },
    {
      "text": "3D-printed organs could reduce transplant wait times."
    }
  ],
  "premises": [
    {
      "text": "Remote work improves employee productivity."
    },
    {
      "text": "People working remotely often report fewer distractions and better focus."
    }
  ]
}
