In [1]:
!pip install -q transformers torch ipywidgets flash-attn
!pip install -q accelerate>=0.26.0
!pip install -U -q bitsandbytes


In [None]:
import accelerate
print(accelerate.__version__)


In [2]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch
import accelerate
import bitsandbytes, flash_attn

quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,  # or load_in_8bit=True
)

model_name = "NousResearch/Hermes-3-Llama-3.1-8B"
model = AutoModelForCausalLM.from_pretrained(
    model_name, 
    torch_dtype=torch.float16, 
    device_map="auto",
    quantization_config=quantization_config,
    attn_implementation="flash_attention_2"
)
tokenizer = AutoTokenizer.from_pretrained(model_name)


2024-11-24 10:38:41.442324: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [13]:
model.bnb_4bit_compute_dtype = torch.float16  # Align with `torch_dtype=torch.float16`


Checking memory

In [None]:
import torch

print(torch.cuda.memory_allocated() / (1024 ** 3), "GB allocated after loading")


In [15]:
from pydantic import BaseModel, ValidationError
import json

class StudentProfile(BaseModel):
    major: str
    bio: str

def validate_json(response: str):
    try:
        # Attempt to parse and validate the response as a StudentProfile
        student_profile = StudentProfile.parse_raw(response)
        return student_profile
    except ValidationError as e:
        # Catch invalid JSON or schema errors and return None
        print(f"Invalid JSON or structure: {e}")
        return None
    except json.JSONDecodeError as e:
        # Catch invalid JSON formatting errors
        print(f"JSON Decode Error: {e}")
        return None

In [34]:
chat_template = """<|im_start|>system
You are a helpful assistant that answers in JSON. Follow the exact JSON schema below:
<schema>
{{
    "major": "string",
    "bio": "string"
}}
</schema>
When writing the "bio", strictly adhere to these rules:
1. Only include the student's experiences, activities, achievements, projects, leadership positions, internships, research, and career goals.
2. Do NOT include any personal information such as the student's name, gender, school name, hometown, ethnicity, or other identifiable details.
3. Write the bio in 3-6 sentences, focusing on the professional and academic highlights of the student, using concise and formal language.
<|im_end|>
<|im_start|>user
Generate a college student profile for a {major} major. Write a "bio" that is detailed but does not include any personal identifiers, and only focuses on the student's academic and professional experiences, activities, and goals. <|im_end|>
<|im_start|>assistant
"""

major = "Computer Science"
formatted_prompt = chat_template.format(major=major)

In [26]:
print(formatted_prompt)

<|im_start|>system
You are a helpful assistant that answers in JSON. Here's the json schema you must adhere to:
<schema>
{
    "major": "string",
    "bio": "string"
}
</schema><|im_end|>
<|im_start|>user
Generate a college student profile for Computer Science major. For the bio, in 3-6 sentences write a detailed description of the student's activities, achievements, projects, leadership positions, internships, research, and goals.<|im_end|>
<|im_start|>assistant



In [35]:
def generate_response(model, tokenizer, prompt, temperature=1.0, num_return_sequences=1, device = "cuda")->json:
    """
    Generate a response from the model given a prompt. Runs 4 times before failing and returning None.
    """
    
    for i in range(4):

        try:
            input_ids = tokenizer(prompt, padding=True, return_tensors="pt").input_ids.to(device)

            # Generate
            generated_ids = model.generate(
                input_ids,
                temperature=temperature, 
                repetition_penalty=1.1,
                num_return_sequences=num_return_sequences,
                do_sample=True, 
                max_length=512,
                eos_token_id=tokenizer.eos_token_id
            )
            
            # Decode and return the generated text
            response = tokenizer.decode(generated_ids[0][input_ids.shape[-1]:], skip_special_tokens=True, clean_up_tokenization_space=True)
            json_response = json.loads(response)
            return json_response
        
        except Exception as e:
            print(f"Attempt {i+1} failed with: {e}")
            print(response)
            if i == 3:
                return None

        

In [36]:
generate_response(model, tokenizer, formatted_prompt, temperature=0.5)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


{'major': 'Computer Science',
 'bio': 'A dedicated student with a keen interest in computer science, this individual has consistently demonstrated strong analytical skills and a passion for innovation. Through various programming projects and coursework, they have honed their proficiency in languages such as Python, Java, and C++. Their involvement in hackathons and coding competitions has not only sharpened their problem-solving abilities but also exposed them to collaborative teamwork. With aspirations towards contributing meaningful solutions in the tech industry, this student seeks to further develop their expertise in areas like artificial intelligence and cybersecurity. They believe that continuous learning and real-world experience will be instrumental in achieving their goal of making a significant impact in the field.'}

In [41]:
majors = {
    "STEM": [
        "Computer Science",
        "Mechanical Engineering",
        "Biology",
        "Physics",
    ],
    "Arts": [
        "Graphic Design",
        "Music",
        "Theater",
    ],
    "Business": [
        "Business Administration",
        "Finance",
        "Marketing",
        # "Accounting",
        # "Economics"
    ],
    "Humanities": [
        "Psychology",
        # "Sociology",
        # "Philosophy",
        # "Political Science"
    ]
}

In [42]:
import pandas as pd

data = []
for field, majors in majors.items():

    for major in majors:
        formatted_prompt = chat_template.format(major=major)
        response = generate_response(model, tokenizer, formatted_prompt, temperature=0.5)
        if response:
            data.append({"field": field, "major": major, "bio": response["bio"]})

df = pd.DataFrame(data)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


In [43]:
df.head()

Unnamed: 0,field,major,bio
0,STEM,Computer Science,A dedicated student with a passion for technol...
1,STEM,Mechanical Engineering,A dedicated student with a passion for innovat...
2,STEM,Biology,"A dedicated and curious student, the Biology m..."
3,STEM,Physics,"A dedicated and curious learner, the Physics m..."
4,Arts,Graphic Design,A dedicated and creative individual with a pas...


In [44]:
# save the df
df.to_csv("student_profiles.csv", index=False)