In [1]:
import json
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel  # For LoRA handling
import sys
import os

# Suppress parallelism warning
os.environ["TOKENIZERS_PARALLELISM"] = "false"

# Sample financial data (simplified for minimal memory usage)
financial_data = {
    "total_income": 60000,
    "debts": [{"type": "Credit Card", "balance": 5000}]
}
query = f"Best debt strategy? Data: {json.dumps(financial_data)}"
context = "You’re a financial advisor. Give a short answer."
prompt = f"{context}\n\nQuery: {query}"

# Define model path (your fine-tuned model)
model_path = "./distilgpt2-finance-final"

try:
    # Check PyTorch setup
    print(f"PyTorch version: {torch.__version__}")
    print(f"Device available: {torch.device('cpu')}")
    print(f"Memory allocated: {torch.cuda.memory_allocated() if torch.cuda.is_available() else 'N/A'}")
    print(f"Python version: {sys.version}")
    print(f"Operating system: {os.name} {sys.platform}")

    # Load tokenizer
    print("Loading tokenizer...")
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    tokenizer.pad_token = tokenizer.eos_token  # Match training setup
    print(f"Tokenizer loaded: {tokenizer.__class__.__name__}")

    # Load base model and apply LoRA weights
    print("Loading base model...")
    base_model = AutoModelForCausalLM.from_pretrained("distilgpt2")
    print("Applying fine-tuned LoRA weights...")
    model = PeftModel.from_pretrained(base_model, model_path)
    device = torch.device("cpu")  # Explicitly use CPU
    model.to(device)
    print(f"Model loaded on {device}, type: {model.__class__.__name__}")

    # Tokenize input
    print("Tokenizing input...")
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=64)  # Minimal input size
    inputs = {key: val.to(device) for key, val in inputs.items()}
    print(f"Input shape: {inputs['input_ids'].shape}")

    # Generate output with minimal settings
    print("Generating output...")
    outputs = model.generate(
        **inputs,
        max_new_tokens=20,  # Extremely small output to test stability
        temperature=0.7,
        top_p=0.9,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id  # Consistent with training
    )
    print(f"Output shape: {outputs.shape}")

    # Decode and print
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    print("\nGenerated Output:")
    print(generated_text)

    # Save to file
    with open("financial_report.txt", "w") as f:
        f.write(generated_text)
    print("Report saved to 'financial_report.txt'")

except Exception as e:
    print(f"Error: {str(e)}")
    print("Troubleshooting tips:")
    print(f" - Verify '{model_path}' contains valid files (config.json, adapter_model.bin, etc.).")
    print(" - Reinstall PyTorch for ARM: `conda install pytorch -c pytorch`")
    print(" - Test with base 'distilgpt2' alone:")
    print("     model = AutoModelForCausalLM.from_pretrained('distilgpt2')")
    print(" - Check RAM usage in Activity Monitor during 'Generating output...'")
    print(" - Re-run fine-tuning if model files might be corrupted.")

finally:
    # Clean up memory
    if 'model' in locals():
        del model
    if 'inputs' in locals():
        del inputs
    if 'outputs' in locals():
        del outputs
    torch.cuda.empty_cache()  # Clear residual GPU memory (if any)
    print("Memory cleared.")

PyTorch version: 2.6.0
Device available: cpu
Memory allocated: N/A
Python version: 3.11.9 | packaged by conda-forge | (main, Apr 19 2024, 18:34:54) [Clang 16.0.6 ]
Operating system: posix darwin
Loading tokenizer...
Tokenizer loaded: GPT2TokenizerFast
Loading base model...
Applying fine-tuned LoRA weights...
Model loaded on cpu, type: PeftModelForCausalLM
Tokenizing input...
Input shape: torch.Size([1, 49])
Generating output...


: 

In [1]:
from transformers import pipeline

# Replace with your model's identifier on Hugging Face Hub
model_name = "Sribhuvan/distilgpt2-finance"

# Create a pipeline for text generation (or another appropriate task)
generator = pipeline("text-generation", model=model_name)

# Enter your query
query = input("Enter your query: ")

# Get the model's response
results = generator(query)

# Print the results
print("Model output:", results)


Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


: 

In [9]:
# Load model directly
from transformers import AutoModel
model = AutoModel.from_pretrained("Sribhuvan/distilgpt2-finance")

Loading adapter weights from Sribhuvan/distilgpt2-finance led to unexpected keys not found in the model:  ['transformer.h.0.attn.c_attn.lora_A.default.weight', 'transformer.h.0.attn.c_attn.lora_B.default.weight', 'transformer.h.1.attn.c_attn.lora_A.default.weight', 'transformer.h.1.attn.c_attn.lora_B.default.weight', 'transformer.h.2.attn.c_attn.lora_A.default.weight', 'transformer.h.2.attn.c_attn.lora_B.default.weight', 'transformer.h.3.attn.c_attn.lora_A.default.weight', 'transformer.h.3.attn.c_attn.lora_B.default.weight', 'transformer.h.4.attn.c_attn.lora_A.default.weight', 'transformer.h.4.attn.c_attn.lora_B.default.weight', 'transformer.h.5.attn.c_attn.lora_A.default.weight', 'transformer.h.5.attn.c_attn.lora_B.default.weight']. 


In [7]:
prompt = "What is the best debt strategy?"

# Tokenize the prompt
input_ids = tokenizer.encode(prompt, return_tensors="pt")

# Generate output using the tokenized prompt
output_ids = model.generate(input_ids, max_length=512)

# Decode the generated tokens to text
output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
print("Generated Text:", output_text)

NameError: name 'tokenizer' is not defined

In [10]:
prompt = "What is the best debt strategy?"
llm = model.generate(prompt)
report = llm(prompt)

report


TypeError: The current model class (GPT2Model) is not compatible with `.generate()`, as it doesn't have a language model head. Please use one of the following classes instead: {'GPT2LMHeadModel'}