In [1]:
import torch
import torch.nn.functional as F
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model_name = "unsloth/Qwen2.5-Coder-14B-Instruct-bnb-4bit"
from transformers import BitsAndBytesConfig
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
)

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",         
    quantization_config=bnb_config
)

Loading checkpoint shards: 100%|██████████| 2/2 [00:04<00:00,  2.45s/it]


In [None]:
import torch.nn.functional as F


prompt = """USER: Write a Python function that implements a graph-based pathfinding algorithm to find the shortest path between two nodes in a weighted, undirected graph, considering both distance and cost constraints. The function should:
1. Take a graph (as an adjacency list with weights and costs), start node, end node, and a maximum cost budget as input.
2. Return the shortest path (list of nodes) that respects the cost budget, or None if no valid path exists.
3. Use Dijkstra’s algorithm with a modified priority queue to account for both distance and cost.
4. Include error handling for invalid inputs (e.g., negative weights, non-existent nodes).
5. Provide an example usage with a graph containing at least 5 nodes, demonstrating both a successful path and a case where no path fits the budget.

Example graph format:
graph = {
    'A': {'B': (2, 5), 'C': (4, 10)},  
    'B': {'A': (2, 5), 'D': (3, 8)},
    'C': {'A': (4, 10), 'D': (1, 3), 'E': (5, 12)},
    'D': {'B': (3, 8), 'C': (1, 3), 'E': (2, 6)},
    'E': {'C': (5, 12), 'D': (2, 6)}
AI: 
}"""
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

generated_ids = model.generate(**inputs, max_new_tokens=4096)
result = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
print("Generated answer:")
print(result)

with torch.no_grad():
    outputs = model(**inputs, return_dict=True)
    logits = outputs.logits  
    
    
    if torch.isnan(logits).any() or torch.isinf(logits).any():
        print("Warning: NaNs or Infs in logits. Replacing with zeros.")
        logits = torch.where(torch.isnan(logits) | torch.isinf(logits), torch.zeros_like(logits), logits)
    
    
    k = 1000  
    top_k_logits, _ = torch.topk(logits, k, dim=-1)
    
    
    log_probs = F.log_softmax(top_k_logits, dim=-1)
    probs_seq = torch.exp(log_probs)  
    probs_seq = probs_seq + 1e-8  
    probs_seq = probs_seq / probs_seq.sum(dim=-1, keepdim=True)  
    
    
    if torch.isnan(probs_seq).any() or torch.isinf(probs_seq).any() or (probs_seq.sum(dim=-1) < 0.99).any():
        print("Warning: Invalid probabilities detected. Using uniform distribution.")
        probs_seq = torch.ones_like(probs_seq) / probs_seq.size(-1)
    
    
    entropy_per_token = torch.distributions.Categorical(probs_seq).entropy()
    avg_entropy = entropy_per_token.mean().item()














print(f"Avg token entropy: {avg_entropy:.4f}")
print(f"Allocated VRAM: {torch.cuda.memory_allocated() / 1024**2:.2f} MB")
print(f"Reserved VRAM: {torch.cuda.memory_reserved() / 1024**2:.2f} MB")

Generated answer:
USER: Write a Python function that implements a graph-based pathfinding algorithm to find the shortest path between two nodes in a weighted, undirected graph, considering both distance and cost constraints. The function should:
1. Take a graph (as an adjacency list with weights and costs), start node, end node, and a maximum cost budget as input.
2. Return the shortest path (list of nodes) that respects the cost budget, or None if no valid path exists.
3. Use Dijkstra’s algorithm with a modified priority queue to account for both distance and cost.
4. Include error handling for invalid inputs (e.g., negative weights, non-existent nodes).
5. Provide an example usage with a graph containing at least 5 nodes, demonstrating both a successful path and a case where no path fits the budget.

Example graph format:
graph = {
    'A': {'B': (2, 5), 'C': (4, 10)},  # (distance, cost)
    'B': {'A': (2, 5), 'D': (3, 8)},
    'C': {'A': (4, 10), 'D': (1, 3), 'E': (5, 12)},
    'D'

ValueError: Expected parameter probs (Tensor of shape (1, 315, 1000)) of distribution Categorical(probs: torch.Size([1, 315, 1000])) to satisfy the constraint Simplex(), but found invalid values:
tensor([[[8.1848e-02, 3.8971e-02, 3.4515e-02,  ..., 2.3794e-04,
          2.3794e-04, 2.3794e-04],
         [1.6357e-01, 1.3232e-01, 9.1614e-02,  ..., 3.3736e-05,
          3.3736e-05, 3.3736e-05],
         [7.7295e-01, 7.7087e-02, 5.7312e-02,  ..., 1.3113e-06,
          1.3113e-06, 1.3113e-06],
         ...,
         [3.7573e-01, 2.1899e-01, 1.4709e-01,  ..., 5.8413e-06,
          5.8413e-06, 5.8413e-06],
         [6.0986e-01, 1.4709e-01, 7.4036e-02,  ..., 2.8014e-06,
          2.8014e-06, 2.8014e-06],
         [1.8494e-01, 1.6455e-01, 8.7402e-02,  ..., 2.5094e-05,
          2.5094e-05, 2.5094e-05]]], device='cuda:0', dtype=torch.float16)