In [1]:
!pip install -q transformers torch ipywidgets flash-attn
!pip install -q accelerate>=0.26.0
!pip install -U -q bitsandbytes


In [2]:
import accelerate
print(accelerate.__version__)


1.1.1


In [3]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch
import accelerate
import bitsandbytes, flash_attn

quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,  # or load_in_8bit=True
)

model_name = "NousResearch/Hermes-3-Llama-3.1-8B"
model = AutoModelForCausalLM.from_pretrained(
    model_name, 
    torch_dtype=torch.float16, 
    device_map="auto",
    quantization_config=quantization_config,
    attn_implementation="flash_attention_2"
)
tokenizer = AutoTokenizer.from_pretrained(model_name)


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Checking memory

In [4]:
import torch

print(torch.cuda.memory_allocated() / (1024 ** 3), "GB allocated after loading")


11.290787696838379 GB allocated after loading


In [5]:
from pydantic import BaseModel, ValidationError
import json

class StudentProfile(BaseModel):
    major: str
    bio: str

def validate_json(response: str):
    try:
        # Attempt to parse and validate the response as a StudentProfile
        student_profile = StudentProfile.parse_raw(response)
        return student_profile
    except ValidationError as e:
        # Catch invalid JSON or schema errors and return None
        print(f"Invalid JSON or structure: {e}")
        return None
    except json.JSONDecodeError as e:
        # Catch invalid JSON formatting errors
        print(f"JSON Decode Error: {e}")
        return None

In [15]:
chat_template = """<|im_start|>system
You are a helpful assistant that answers in JSON. Here's the json schema you must adhere to:
<schema>
{{
    "major": "string",
    "bio": "string"
}}
</schema><|im_end|>
<|im_start|>user
Generate a student profile for {major} major.<|im_end|>
<|im_start|>assistant
<|im_end|>"""

major = "Computer Science"
formatted_prompt = chat_template.format(major=major)

In [23]:
def generate_response(model, tokenizer, prompt, temperature=1.0, num_return_sequences=1, device = "cuda"):
    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)

    
    # Generate
    generated_ids = model.generate(
        input_ids,
        temperature=temperature, 
        repetition_penalty=1.1,
        num_return_sequences=num_return_sequences,
        do_sample=True, 
        max_length=512,
        eos_token_id=tokenizer.eos_token_id
    )
    
    # Decode and return the generated text
    response = tokenizer.decode(generated_ids[0][input_ids.shape[-1]:], skip_special_tokens=True, clean_up_tokenization_space=True)

    
    return response
    

In [24]:
response = generate_response(model, tokenizer, formatted_prompt, temperature=0.5)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


In [25]:
import json

json.loads(response)


{'major': 'Computer Science',
 'bio': 'A passionate and ambitious student with an innate curiosity for technology and problem-solving, specializing in computer science. Skilled in various programming languages, experienced with algorithms and data structures. Enthusiastic about emerging technologies and their potential applications.'}

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


'\n:// End of response\n:// -------------------------------------------------\nphp\n// Start of user input\nenter code here\n{\n    "major": "Mathematics",\n    "bio": "I am an undergraduate studying Mathematics at XYZ University. My primary focus is on discrete mathematics, and I hope to apply my knowledge to develop efficient algorithms."\n}\n\n?>'

In [26]:
import json
import re

def extract_json_response(response):
    """
    Extract clean JSON from model response
    
    Args:
        response (str or list): Model response
    Returns:
        str: Formatted JSON string
    """
    # Convert list to string if necessary
    if isinstance(response, list):
        response = response[0]
    
    # Find the JSON object using regex
    match = re.search(r'\{[\s\S]*\}', response)
    if match:
        try:
            # Extract the matched JSON string
            json_str = match.group(0)
            # Parse and reformat to ensure valid JSON
            parsed_json = json.loads(json_str)
            # Return formatted JSON
            return json.dumps(parsed_json, indent=2)
        except json.JSONDecodeError:
            return "Invalid JSON"
    return "Invalid JSON"

# Example usage:
model_response = generate_response(model, tokenizer, formatted_prompt, temperature=0.5) # Your model response
clean_json = extract_json_response(model_response)
print(clean_json)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


{
  "major": "Computer Science",
  "bio": "A passionate and ambitious student with an innate curiosity for technology and problem-solving. Skilled in Python, Java, and JavaScript, with experience in web development and machine learning algorithms. Actively involved in various coding competitions and hackathons, always seeking opportunities to learn and grow within the dynamic field of computer science."
}


In [28]:
print(model_response[0])

Return only a valid JSON object for a college student profile for the "Computer Science" major.

The JSON must have these fields:
- "major": "Computer Science"
- "bio": Student's activities, achievements, projects, leadership positions, internships, research, and goals

Format rules:
- Use double quotes for all keys and values
- No text outside the JSON object
- No personal identifiers or names
- No single quotes or unescaped characters

Example format:
{"major": "Example Major", "bio": "Example bio text"}

Generate exactly one profile in valid JSON: 

{
  "major": "Computer Science",
  "bio": "Passionate computer science student with a strong foundation in programming, data structures, and algorithms. Experienced in multiple programming languages including Java, Python, and C++. Actively involved in hackathons and coding competitions, securing multiple top placements. Contributor to open-source projects, focusing on improving code efficiency and scalability. Seeking opportunities to a