In [1]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("./Qwen2.5-0.5B-Instruct")
model = AutoModelForCausalLM.from_pretrained("./Qwen2.5-0.5B-Instruct", temperature = 0.1, top_p = 0.6, do_sample=True)

  from .autonotebook import tqdm as notebook_tqdm


In [9]:
import yaml

# Load the YAML data
with open("./me.yaml", "r") as file:
    data = yaml.safe_load(file)

# Process the education data
education = data["education"]
education_text = "\n".join(
    [f"- Degree: {e['degree']}, Institution: {e['institution']}, Graduation Date: {e['graduation_date']}" for e in education]
)

# Process the work experience data
work = data["work_experience"]
work_text = "\n".join(
    [
        f"- Role: {w['title']}, Company: {w['company']}, Start Date: {w['start_date']}, End Date: {w.get('end_date', 'Present')}, Work Description: {w['achievements']}"
        for w in work
    ]
)

# Process the skills data
skills = data["skills"]
skills_text = (
    f"Programming: {skills['programming_languages']}, Tools: {skills['tools']}, Languages: {skills['languages']}"
)


In [10]:
system_prompt = '''
You are an AI assistant of Henry Liang.
You are designed to assist users with questions about Henry Liang, his work, and his experience.

**Tone and Scope:**
Answer all questions cheerfully, but do not provide more information than what is explicitly asked.

**Uncertainty:**
If you do not know the answer, state clearly, "I do not know." Avoid guessing or fabricating information.

**Restrictions:**
Do not answer questions that are inappropriate, harmful, racist, or illegal.
Avoid using inappropriate language under any circumstance.
Do not provide medical, legal, or financial advice.
Do not share any information that can identify or locate a person.
Follow these guidelines strictly, and always prioritize clarity, accuracy, and adherence to the scope of your role.
'''

def get_user_prompt(education_text, work_text, skills_text, q):
    # Combine the information into the final content string
    content = f"""
    The following is data about Henry Liang:

    **Education:**
    {education_text}

    **Work Experience:**
    {work_text}

    **Skills:**
    {skills_text}

    If it is a question about Henry Liang, use the above information to answer the following question about Henry Liang.

    {q}
    """


    return content


In [11]:
qa_dataset = [
    # Education-related questions
    {
        "question": "Where did Henry complete his master's degree?",
        "answer": "Henry completed his M.S. in Machine Learning and Data Science at Northwestern University, graduating in December 2023."
    },
    {
        "question": "When did he graduate from UCLA with a bachelor's degree?",
        "answer": "He graduated from the University of California, Los Angeles, with a B.S. in Applied Mathematics and Statistics in March 2022."
    },
    {
        "question": "What did Henry study for his undergraduate degree?",
        "answer": "He studied Applied Mathematics and Statistics for his undergraduate degree."
    },

    # Work experience-related questions
    {
        "question": "What is his current role?",
        "answer": "He is currently a Data Scientist II at Vail Systems, Inc., a role he began in February 2024."
    },
    {
        "question": "What major achievement did Henry accomplish at Vail Systems?",
        "answer": "Henry created a novel hybrid embedding that reduced the no-context rate for RFP completion by 83.33%, leading to a paper acceptance at the AI-ML Systems Conference in October 2024."
    },
    {
        "question": "Where did he work before joining Vail Systems?",
        "answer": "Before joining Vail Systems, he worked as an Applied Scientist Intern at Amazon and a Data Science Intern at Roblox."
    },
    {
        "question": "What tool did he develop during his Amazon internship?",
        "answer": "He developed a custom LLM app to explain knowledge graph differences and detect network routing anomalies, achieving 99% accuracy in summarizing path attributes with KGAG."
    },
    {
        "question": "Which internship involved building an ETL pipeline for analyzing game metrics?",
        "answer": "Henry built an ETL pipeline for analyzing game-level metrics at Roblox during his Data Science Internship."
    },
    {
        "question": "What was his role at Northwestern University?",
        "answer": "He was a Machine Learning Researcher at Northwestern University, where he designed and implemented a scalable AWS IoT pipeline for real-time forecasting."
    },

    # Skills and tools-related questions
    {
        "question": "What programming languages does Henry know?",
        "answer": "Henry is proficient in Python, R, PostgreSQL, Java, and JavaScript."
    },
    {
        "question": "What machine learning tools is he skilled in?",
        "answer": "He is skilled in TensorFlow, PyTorch, LangChain, and Hugging Face."
    },
    {
        "question": "What cloud and container tools does he use?",
        "answer": "He uses Docker, Kubernetes, and has experience with AWS IoT pipelines."
    },
    {
        "question": "Which languages does Henry speak?",
        "answer": "Henry speaks English, Mandarin, and French."
    },
    {
        "question": "What tools has he used for big data processing?",
        "answer": "He has used Apache Spark and Apache Hadoop for big data processing."
    },
]


In [None]:
from transformers import AutoTokenizer, AutoModelForTokenClassification
from transformers import pipeline

tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-large-finetuned-conll03-English")
model = AutoModelForTokenClassification.from_pretrained("xlm-roberta-large-finetuned-conll03-English")

nlp = pipeline("ner", model=model, tokenizer=tokenizer)
example = "My name is Wolfgang and I live in Berlin"

ner_results = nlp(example)
print(ner_results)

In [2]:
import spacy

nlp = spacy.load("en_core_web_lg")
# prompt = '''He has used the following tools for big data processing:
# - TensorFlow
# - PyTorch
# - LangChain
# - Hugging Face
# - Git
# - Docker
# - Kubernetes
# - Apache Spark
# - Apache Hadoop'''
# hypothesis = "You graduated in 2023 with a master's from ABC University, and currently doing a PhD at XYZ University."

# # Prompt and hypothesis
prompt = "I graduated with a master's in December 2023 from XYZ University."
hypothesis = "You graduated in 2023 with a master's from ABC University, and currently doing a PhD at XYZ University."

# Extract entities
def extract_entities(text):
    doc = nlp(text)
    return [(ent.text, ent.label_) for ent in doc.ents]

def extract_dates(entities):
    new_entities = []
    for ent, label in entities:
        if label == "DATE":
            splits = ent.split(" ")
            for split in splits:
                new_entities.append((split, label))
        else:
            new_entities.append((ent, label))
    return new_entities

prompt_entities = extract_dates(extract_entities(prompt))
hypothesis_entities = extract_dates(extract_entities(hypothesis))

# Compare entities
hallucinated = [ent for ent in hypothesis_entities if ent not in prompt_entities]
non_hallucinated = [ent for ent in hypothesis_entities if ent in prompt_entities]


print("Prompt Entities:", prompt_entities)
print("Hypothesis Entities:", hypothesis_entities)
print("Hallucinated Entities:", hallucinated)
print("Non-Hallucinated Entities:", non_hallucinated)

Prompt Entities: [('December', 'DATE'), ('2023', 'DATE'), ('XYZ University', 'ORG')]
Hypothesis Entities: [('2023', 'DATE'), ('ABC University', 'ORG'), ('PhD', 'WORK_OF_ART'), ('XYZ University', 'ORG')]
Hallucinated Entities: [('ABC University', 'ORG'), ('PhD', 'WORK_OF_ART')]
Non-Hallucinated Entities: [('2023', 'DATE'), ('XYZ University', 'ORG')]


In [12]:
import torch
from sklearn.metrics import accuracy_score
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
import spacy

nlp = spacy.load("en_core_web_sm")
def extract_entities(text):
    doc = nlp(text)
    return [(ent.text, ent.label_) for ent in doc.ents]



# Initialize metrics
results = []

for item in qa_dataset:
    q = item["question"]
    expected_answer = item["answer"]

    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": get_user_prompt(education_text, work_text, skills_text, q)}
    ]
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )
    model_inputs = tokenizer([text], return_tensors="pt").to(model.device)

    generated_ids = model.generate(
        **model_inputs,
        max_new_tokens=512
    )
    generated_ids = [
        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
    ]

    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
    
    # Log generated response
    print(f"Question: {q}")
    print(f"Generated Answer: {response}")
    print(f"Expected Answer: {expected_answer}")
    print()

    prompt_entities = extract_entities(expected_answer)
    hypothesis_entities = extract_entities(response)

    # Simple similarity metric (BLEU score for QA)
    hallucinated = [ent for ent in hypothesis_entities if ent not in prompt_entities]
    non_hallucinated = [ent for ent in hypothesis_entities if ent in prompt_entities]   

    # Append to results
    results.append({
        "question": q,
        "expected_answer": expected_answer,
        "generated_answer": response,
        "hallucinated_entities_in_response": len(hallucinated) / len(hypothesis_entities)
    })

# Calculate average BLEU score
average_hallucination_rate = sum([result["hallucinated_entities_in_response"] for result in results]) / len(results)

# Log evaluation metrics
print(f"Average Hallucination Rate: {average_hallucination_rate:.4f}")

# Optional: Save results to a file for further analysis
import json
with open("evaluation_results.json", "w") as f:
    json.dump(results, f, indent=4)


Question: Where did Henry complete his master's degree?
Generated Answer: Henry completed his Master's degree from Northwestern University in December 2023.
Expected Answer: Henry completed his M.S. in Machine Learning and Data Science at Northwestern University, graduating in December 2023.

Question: When did he graduate from UCLA with a bachelor's degree?
Generated Answer: Henry Liang graduated from University of California, Los Angeles with a Bachelor of Arts degree in Applied Mathematics and Statistics on March 2022.
Expected Answer: He graduated from the University of California, Los Angeles, with a B.S. in Applied Mathematics and Statistics in March 2022.

Question: What did Henry study for his undergraduate degree?
Generated Answer: Henry studied for his undergraduate degree at Northwestern University. Specifically, he graduated with a Master's degree in Machine Learning and Data Science in December 2023.
Expected Answer: He studied Applied Mathematics and Statistics for his un

ZeroDivisionError: division by zero

In [15]:
print(model)

Qwen2ForCausalLM(
  (model): Qwen2Model(
    (embed_tokens): Embedding(151936, 896)
    (layers): ModuleList(
      (0-23): 24 x Qwen2DecoderLayer(
        (self_attn): Qwen2SdpaAttention(
          (q_proj): Linear(in_features=896, out_features=896, bias=True)
          (k_proj): Linear(in_features=896, out_features=128, bias=True)
          (v_proj): Linear(in_features=896, out_features=128, bias=True)
          (o_proj): Linear(in_features=896, out_features=896, bias=False)
          (rotary_emb): Qwen2RotaryEmbedding()
        )
        (mlp): Qwen2MLP(
          (gate_proj): Linear(in_features=896, out_features=4864, bias=False)
          (up_proj): Linear(in_features=896, out_features=4864, bias=False)
          (down_proj): Linear(in_features=4864, out_features=896, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): Qwen2RMSNorm((896,), eps=1e-06)
        (post_attention_layernorm): Qwen2RMSNorm((896,), eps=1e-06)
      )
    )
    (norm): Qwen2RMSNorm((