In [None]:
!pip install -U transformers

## Local Inference on GPU
Model page: https://huggingface.co/openai-community/gpt2

⚠️ If the generated code snippets do not work, please open an issue on either the [model repo](https://huggingface.co/openai-community/gpt2)
			and/or on [huggingface.js](https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/model-libraries-snippets.ts) 🙏

In [None]:
# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("text-generation", model="openai-community/gpt2")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Device set to use cuda:0


In [None]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2")
model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2")

In [None]:
prompt = """In the coming decade, artificial intelligence will fundamentally reshape how humans work, learn, and communicate.
Some experts argue that automation will lead to mass unemployment, while others believe it will create entirely new categories of jobs.
The key question is:"""


In [None]:
from transformers import pipeline

pipe = pipeline("text-generation", model="openai-community/gpt2")
print(pipe(prompt, max_new_tokens=100, do_sample=True, temperature=0.8)[0]['generated_text'])


Device set to use cuda:0
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In the coming decade, artificial intelligence will fundamentally reshape how humans work, learn, and communicate. 
Some experts argue that automation will lead to mass unemployment, while others believe it will create entirely new categories of jobs. 
The key question is: what will be the new jobs? The United States as a whole is already on the march towards automation. 
In some areas, such as agriculture, computer vision and machine learning, we are already beginning to see large numbers of jobs created in the United States. 
One of the most common questions we get from a survey of employers, businesses, and consumers is "What will happen in the coming decade?" 
The most common answer is "very near."  This is when the


In [None]:
prompt = """It was a stormy night in Luxembourg City. The streetlights flickered as Sitraka walked past the old post office,
his mind racing with the possibilities of what AI could do next. Suddenly, his phone buzzed with a notification from:"""


In [None]:
# Requirements:
# pip install -U transformers torch sentencepiece regex
# optionally: pip install newspaper3k    # if you want to fetch articles from URLs

from transformers import pipeline
import re

# === Pipelines ===
# Zero-shot classification (for "impact" labels)
zshot = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

# Sentiment classification (general sentiment)
sentiment = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment")

# === Utilities ===

COMPANY_ALIASES = {
    "aramco": ["saudi aramco", "aramco", "saudi arabian oil company"],
    "exxon": ["exxon", "exxonmobil", "exxon mobil"],
    # add more companies/aliases here
}

def find_companies(text, aliases_map=COMPANY_ALIASES):
    """Return a set of company keys found in text (case-insensitive)."""
    found = set()
    lower = text.lower()
    for key, aliases in aliases_map.items():
        for a in aliases:
            if a in lower:
                found.add(key)
                break
    return found

def normalize_text(text):
    """Minimal cleaning: remove excessive whitespace, newlines, headlines glue, etc."""
    return re.sub(r'\s+', ' ', text.strip())

# === Approach A: Zero-shot (impact labels) ===
IMPACT_LABELS = ["positive impact", "negative impact", "neutral impact"]

def classify_impact_with_nli(text, company=None, labels=IMPACT_LABELS, hypothesis_template=None):
    """
    Use zero-shot NLI to predict whether the article implies positive/negative/neutral impact
    on the named company. Returns dict with labels and scores.
    hypothesis_template: optional string like "This news is {label} for {company}."
    """
    text = normalize_text(text)
    if company and not hypothesis_template:
        hypothesis_template = "This news would have a {} for " + company + "."
    elif not hypothesis_template:
        hypothesis_template = "This news would have a {} for the company."
    # The pipeline will use the labels as candidate classes and compute entailment scores.
    result = zshot(text, labels, hypothesis_template=hypothesis_template)
    # result: dict with 'labels' and 'scores'
    return result

# === Approach B: Sentiment ===
def classify_sentiment(text):
    """
    Returns list of token:score from sentiment pipeline.
    Note: cardiffnlp's outputs are labels like 'positive','neutral','negative'.
    """
    text = normalize_text(text)
    result = sentiment(text[:1000])  # limit length for safety
    # result is a list (one per input)
    return result[0]

# === Heuristics & combination ===
def numeric_flags(text):
    """
    Quick heuristic scanning for numeric indicators that typically correlate to 'impact':
    - 'fine', 'lawsuit', 'decline', 'loss', 'cut', 'shutdown', 'explosion' -> negative
    - 'profit', 'record', 'beat', 'surge', 'increase', 'expansion', 'discovery' -> positive
    This is a simple rule-based booster.
    """
    neg_words = ["fine", "lawsuit", "decline", "drop", "loss", "bankrupt", "shutdown",
                 "cut", "closure", "explosion", "spill", "sanction", "investigation"]
    pos_words = ["profit", "record", "beat", "surge", "increase", "expand", "growth",
                 "discovery", "agreement", "deal", "partnership", "upgrade"]
    text_l = text.lower()
    neg_count = sum(1 for w in neg_words if w in text_l)
    pos_count = sum(1 for w in pos_words if w in text_l)
    return {"neg_count": neg_count, "pos_count": pos_count}

def combine_signals(text, company=None, threshold=0.5):
    """
    Combine zero-shot NLI + sentiment + heuristics to produce a final decision.
    Returns a dict with components and final label: 'positive', 'negative', 'neutral'
    """
    comps = {}
    comps['companies_found'] = list(find_companies(text))
    comps['heuristics'] = numeric_flags(text)

    # NLI impact
    nli = classify_impact_with_nli(text, company=company or "the company")
    comps['nli_labels'] = dict(zip(nli['labels'], nli['scores']))

    # Sentiment
    sent = classify_sentiment(text)
    # cardiffnlp labels: POSITIVE / NEUTRAL / NEGATIVE (case might differ)
    comps['sentiment'] = sent

    # Simple fusion logic:
    pos_score = comps['nli_labels'].get("positive impact", 0) + (1.0 if sent['label'].lower()=="positive" else 0)
    neg_score = comps['nli_labels'].get("negative impact", 0) + (1.0 if sent['label'].lower()=="negative" else 0)
    neu_score = comps['nli_labels'].get("neutral impact", 0) + (1.0 if sent['label'].lower()=="neutral" else 0)

    # heuristic boosts
    pos_score += 0.3 * comps['heuristics']['pos_count']
    neg_score += 0.3 * comps['heuristics']['neg_count']

    # normalization
    total = pos_score + neg_score + neu_score + 1e-9
    norm = {"pos": pos_score/total, "neg": neg_score/total, "neu": neu_score/total}
    comps['fused_scores'] = norm

    # final decision by argmax, with threshold for "neutral"
    best = max(norm, key=norm.get)
    if norm[best] < threshold:
        final = "neutral"
    else:
        final = {"pos": "positive", "neg": "negative", "neu": "neutral"}[best]
    comps['final'] = final
    return comps



config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Device set to use cuda:0


config.json:   0%|          | 0.00/747 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/499M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/150 [00:00<?, ?B/s]

Device set to use cuda:0



--- ARTICLE ---
Aramco reports record quarterly profits after higher oil prices boosted revenue.
Companies found: ['aramco']
Heuristics: {'neg_count': 0, 'pos_count': 2}
NLI labels & scores: {'positive impact': 0.9899078607559204, 'neutral impact': 0.0057486253790557384, 'negative impact': 0.004343463573604822}
Sentiment: {'label': 'LABEL_2', 'score': 0.8355144262313843}
Fused scores: {'pos': 0.9936924435852693, 'neg': 0.0027146648171340643, 'neu': 0.0035928909725965203}
FINAL DECISION: positive

--- ARTICLE ---
A major pipeline explosion damages production, investigation launched into Aramco's safety procedures.
Companies found: ['aramco']
Heuristics: {'neg_count': 2, 'pos_count': 0}
NLI labels & scores: {'negative impact': 0.9893918037414551, 'positive impact': 0.006202775985002518, 'neutral impact': 0.004405457526445389}
Sentiment: {'label': 'LABEL_0', 'score': 0.8189228773117065}
Fused scores: {'pos': 0.0038767348979413456, 'neg': 0.993369853588859, 'neu': 0.002753410888199643}
FI

In [None]:
# === Example usage ===
if __name__ == "__main__":
    example_texts = [
        ("Aramco reports record quarterly profits after higher oil prices boosted revenue.", "aramco"),
        ("A major pipeline explosion damages production, investigation launched into Aramco's safety procedures.", "aramco"),
        ("New regulations on emissions could increase operating costs for petroleum companies across the region.", "aramco"),
    ]

    for t, comp in example_texts:
        print("\n--- ARTICLE ---")
        print(t)
        out = combine_signals(t, company=comp, threshold=0.45)
        print("Companies found:", out['companies_found'])
        print("Heuristics:", out['heuristics'])
        print("NLI labels & scores:", out['nli_labels'])
        print("Sentiment:", out['sentiment'])
        print("Fused scores:", out['fused_scores'])
        print("FINAL DECISION:", out['final'])


--- ARTICLE ---
Aramco reports record quarterly profits after higher oil prices boosted revenue.
Companies found: ['aramco']
Heuristics: {'neg_count': 0, 'pos_count': 2}
NLI labels & scores: {'positive impact': 0.9899078607559204, 'neutral impact': 0.0057486253790557384, 'negative impact': 0.004343463573604822}
Sentiment: {'label': 'LABEL_2', 'score': 0.8355144262313843}
Fused scores: {'pos': 0.9936924435852693, 'neg': 0.0027146648171340643, 'neu': 0.0035928909725965203}
FINAL DECISION: positive

--- ARTICLE ---
A major pipeline explosion damages production, investigation launched into Aramco's safety procedures.
Companies found: ['aramco']
Heuristics: {'neg_count': 2, 'pos_count': 0}
NLI labels & scores: {'negative impact': 0.9893918037414551, 'positive impact': 0.006202775985002518, 'neutral impact': 0.004405457526445389}
Sentiment: {'label': 'LABEL_0', 'score': 0.8189228773117065}
Fused scores: {'pos': 0.0038767348979413456, 'neg': 0.993369853588859, 'neu': 0.002753410888199643}
FI