In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch
import numpy as np

In [None]:
model_name = "mistralai/Mistral-7B-Instruct-v0.1"

# Load model in 8-bit quantization (for RTX 4080 Super)
quant_config = BitsAndBytesConfig(load_in_8bit=True)

# Load model fully into GPU
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=quant_config,
    device_map="auto"
)

tokenizer = AutoTokenizer.from_pretrained(model_name)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [25]:
# Precompute token validity mask (only run once)
vocab_size = len(tokenizer.get_vocab())
all_tokens = torch.arange(vocab_size, device="cuda")  # Tensor of token IDs
decoded_tokens = tokenizer.batch_decode(all_tokens.unsqueeze(1))  # Vectorized decoding

# Create boolean mask **directly as a tensor** (avoids Python list overhead)
valid_mask = torch.tensor(
    [token.isalpha() and len(token) > 1 for token in decoded_tokens], dtype=torch.bool, device="cuda"
).clone()  # `clone()` avoids PyTorch memory issues

# Save valid token IDs & decoded token texts
allowed_tokens = torch.masked_select(all_tokens, valid_mask)  # Fast retrieval during inference
allowed_token_texts = tokenizer.batch_decode(allowed_tokens.tolist())  # Decode only once

# Preallocate masked logits tensor for reuse
masked_logits = torch.empty(vocab_size, dtype=torch.float16, device="cuda")

In [64]:
def fast_inference(prompt):
    inference_prompt = "Based on this financial report my investment advice is to"
    new_prompt = prompt + inference_prompt

    # Tokenize input & move to GPU
    inputs = tokenizer(new_prompt, return_tensors="pt").to("cuda")

    # Get logits
    with torch.no_grad():
        outputs = model(**inputs)

    # Extract logits for next token (FAST)
    logits = outputs.logits[:, -1, :].squeeze()  # Shape: (vocab_size,)

    # Reset and mask logits (FAST IN-PLACE)
    masked_logits.fill_(-float("inf"))  # Reset all values in-place
    masked_logits[valid_mask] = logits[valid_mask]  # Keep only valid tokens

    # Compute probabilities
    probs = torch.nn.functional.softmax(masked_logits, dim=-1)

    # Get corresponding probabilities for allowed tokens
    allowed_probs = torch.masked_select(probs, valid_mask)

    # Create dictionary without CPU transfer overhead
    token_prob_dict = dict(zip(allowed_token_texts, allowed_probs.tolist()))

    # Sort & return top 10 words
    return dict(sorted(token_prob_dict.items(), key=lambda item: item[1], reverse=True))

token_prob_dict = fast_inference("Explain quantum mechanics in simple terms.")

In [45]:
good_report = """
NexaTech Inc. - Q1 2024 Financial Report
For the period ended March 31, 2024

NexaTech Inc. is pleased to report strong financial results for the first quarter of 2024, driven by robust revenue growth, margin expansion, and continued execution of strategic initiatives.

Financial Performance
For the quarter ended March 31, 2024, total revenue increased 18.4% year-over-year to $1.74 billion, reflecting higher demand across core product segments and continued market penetration in key geographies. Gross profit expanded 22.7% to $764 million, driven by pricing optimization and supply chain efficiencies. Gross margin improved to 43.9%, compared to 41.6% in the prior year.

Operating income increased 27.2% to $498 million, representing an operating margin of 28.6%, up from 26.3% in Q1 2023. Net income attributable to shareholders was $382 million, a 30.1% increase over the prior-year period, translating to diluted earnings per share (EPS) of $2.74, compared to $2.08 in Q1 2023.

Strategic and Operational Highlights
During the quarter, NexaTech successfully launched its AI-driven enterprise cloud platform, achieving widespread adoption among Fortune 500 clients. The company also expanded its international presence, securing strategic partnerships in Europe and Asia-Pacific, further diversifying its revenue streams.

Capital expenditures for Q1 totaled $112 million, reflecting continued investment in AI and cloud infrastructure. The company maintained a strong balance sheet, with $2.1 billion in cash and short-term investments and a net debt-to-equity ratio of 0.24, ensuring ample liquidity to fund future growth.

Outlook
Given the strong performance in Q1 and continued market momentum, NexaTech is raising full-year 2024 guidance, expecting revenue growth of 15%–18%, with an EPS range of $10.50–$11.20, up from previous guidance of $9.80–$10.50.

The company remains committed to operational efficiency, technological innovation, and shareholder value creation, positioning itself for sustained growth in a dynamic market environment.
"""

In [42]:
bad_report = """
CoreSteel Industries - Q1 2024 Financial Report
For the period ended March 31, 2024

CoreSteel Industries reports a challenging first quarter, as macroeconomic headwinds, supply chain disruptions, and weaker-than-expected demand weighed on financial results. The company remains focused on cost management and operational efficiency while navigating ongoing market volatility.

Financial Performance
For the quarter ended March 31, 2024, revenue declined 8.7% year-over-year to $640 million, primarily due to reduced order volumes and pricing pressures in the steel manufacturing segment. Gross profit decreased 11.2% to $172 million, with gross margin contracting to 26.9%, down from 29.3% in Q1 2023, reflecting higher raw material costs.

Operating income declined 22.8% to $74 million, with operating margin falling to 11.6%, compared to 14.2% in the prior-year period. Net income attributable to shareholders was $41 million, representing a 29.4% year-over-year decrease, leading to diluted earnings per share (EPS) of $0.88, compared to $1.23 in Q1 2023.

Operational Challenges and Cost Management
CoreSteel experienced weaker demand in North America and Europe, where key customers delayed capital investments amid economic uncertainty. Additionally, higher energy and labor costs pressured margins. The company initiated a cost-reduction program targeting $50 million in annualized savings, including workforce optimization and supply chain restructuring.

Capital expenditures in Q1 were $52 million, primarily allocated to equipment upgrades and digitalization initiatives. CoreSteel ended the quarter with $284 million in cash and equivalents, maintaining financial flexibility, although net debt increased to $1.18 billion, raising leverage concerns.

Outlook
Given the uncertain economic environment, CoreSteel adjusts its full-year 2024 guidance, now anticipating revenue contraction of 4%–6%, with EPS expected between $3.00–$3.40, down from prior estimates of $3.80–$4.20. The company remains focused on cost containment, operational efficiency, and supply chain resilience while assessing opportunities for strategic realignment.

While near-term headwinds persist, CoreSteel continues to leverage its strong industry position and long-term customer relationships to drive stability and recovery.
"""


In [65]:
token_prob_dict = fast_inference(bad_report)
# Step 7: Sort and display top predictions
sorted_token_probs = {k: v for k, v in sorted(token_prob_dict.items(), key=lambda item: item[1], reverse=True)}

print("\n🔹 **Next Token Prediction Probabilities (Only Meaningful Words):**")
for token, prob in list(sorted_token_probs.items())[:50]:  
    print(f"{token:<10} | Probability: {prob:.4f}")


🔹 **Next Token Prediction Probabilities (Only Meaningful Words):**
sell       | Probability: 0.3723
buy        | Probability: 0.1144
avoid      | Probability: 0.0516
stay       | Probability: 0.0372
remain     | Probability: 0.0259
maintain   | Probability: 0.0109
invest     | Probability: 0.0086
proceed    | Probability: 0.0083
consider   | Probability: 0.0046
recommend  | Probability: 0.0032
caut       | Probability: 0.0031
carefully  | Probability: 0.0026
Buy        | Probability: 0.0022
purchase   | Probability: 0.0020
closely    | Probability: 0.0017
caution    | Probability: 0.0017
approach   | Probability: 0.0017
Hold       | Probability: 0.0016
adopt      | Probability: 0.0012
exercise   | Probability: 0.0011
re         | Probability: 0.0011
Invest     | Probability: 0.0009
analyze    | Probability: 0.0009
divers     | Probability: 0.0009
investors  | Probability: 0.0008
evaluate   | Probability: 0.0008
advise     | Probability: 0.0007
BU         | Probability: 0.0006
minimize