In [4]:

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

# Clear GPU cache
if torch.cuda.is_available():
    torch.cuda.empty_cache()

model_name = "meta-llama/Llama-3.1-8B-Instruct"  # Replace with exact path for 8B model
tokenizer = AutoTokenizer.from_pretrained(model_name)


ModuleNotFoundError: No module named 'torch'

In [4]:

# Check if CUDA is available
device = 0 if torch.cuda.is_available() else -1  # Use GPU if available, otherwise fallback to CPU

# Load the model with the appropriate device settings
model = AutoModelForCausalLM.from_pretrained(
    model_name,                   
    return_dict=True,
    low_cpu_mem_usage=True,
    torch_dtype=torch.float16,     # Use float16 for efficient memory usage on GPU
    device_map="auto" if device == 0 else None,  # Automatically distribute across devices or use CPU
    trust_remote_code=True
)

# Initialize the text generation pipeline
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.float16,
    device_map="auto" if device == 0 else None,  # Explicitly set device to GPU if available, else CPU
)

# # Example usage of the pipeline
# output = pipe("Once upon a time,", max_length=50)
# print(output)


Loading checkpoint shards: 100%|██████████| 4/4 [00:08<00:00,  2.15s/it]


In [5]:
# For model_0
for name, param in model.named_parameters():
    print(f"{name} is on {param.device}")


model.embed_tokens.weight is on cuda:0
model.layers.0.self_attn.q_proj.weight is on cuda:0
model.layers.0.self_attn.k_proj.weight is on cuda:0
model.layers.0.self_attn.v_proj.weight is on cuda:0
model.layers.0.self_attn.o_proj.weight is on cuda:0
model.layers.0.mlp.gate_proj.weight is on cuda:0
model.layers.0.mlp.up_proj.weight is on cuda:0
model.layers.0.mlp.down_proj.weight is on cuda:0
model.layers.0.input_layernorm.weight is on cuda:0
model.layers.0.post_attention_layernorm.weight is on cuda:0
model.layers.1.self_attn.q_proj.weight is on cuda:0
model.layers.1.self_attn.k_proj.weight is on cuda:0
model.layers.1.self_attn.v_proj.weight is on cuda:0
model.layers.1.self_attn.o_proj.weight is on cuda:0
model.layers.1.mlp.gate_proj.weight is on cuda:0
model.layers.1.mlp.up_proj.weight is on cuda:0
model.layers.1.mlp.down_proj.weight is on cuda:0
model.layers.1.input_layernorm.weight is on cuda:0
model.layers.1.post_attention_layernorm.weight is on cuda:0
model.layers.2.self_attn.q_proj.w

In [4]:
from time import time



system_message = """
You are an AI assistant designed to answer questions.
Please restrict your answer to the exact question and use the exact answer format asked.
Answer should not have implied information. If the answer is yes, always provide related phrases. 
"""


def colorize_text(text):
    for word, color in zip(["Reasoning", "Question", "Answer", "Total time"], ["blue", "red", "green", "magenta"]):
        text = text.replace(f"{word}:", f"\n\n**<font color='{color}'>{word}:</font>**")
    return text

def format_prompt(text):
    question_content = "Does the paragraph mention any of the following topics:\n"
    for i in range(len(query_df)):
        question_content += f"  ({i+1}) {query_df.fea[i]}: {query_df.description[i]}.\n"
    answer_content = "Return answer in format:\n"
    for i in range(len(query_df)): 
        answer_content += f"  ({i+1}) {query_df.fea[i]}: [yes/no], related phrases if any: \n"
    paragragh_content = f"Paragraph: '{text}' \n"
    user_message = question_content + answer_content + paragragh_content
    #print(user_message)
    
    return user_message



def query_model_batch(
        system_message,
        user_messages,
        temperature=0,
        max_length=1024
    ):
    start_time = time()
    # Add "Question: ... Answer:" to each user message for clarity
    batched_messages = [
        "Question: " + message + " Answer:" for message in user_messages
    ]
    
    # Construct prompts for each message in batch
    all_prompts = [
        pipe.tokenizer.apply_chat_template(
            [
                {"role": "system", "content": system_message},
                {"role": "user", "content": user_message}
            ],
            tokenize=False,
            add_generation_prompt=True
        ) for user_message in batched_messages
    ]
    
    # Define the end-of-sequence terminators
    terminators = [
        pipe.tokenizer.eos_token_id,
        pipe.tokenizer.convert_tokens_to_ids("<|eot_id|>")
    ]
    
    # Run the batch inference
    sequences = pipe(
        all_prompts,
        do_sample=True,
        top_p=0.5,
        temperature=temperature,
        num_return_sequences=1,
        eos_token_id=terminators,
        max_new_tokens=max_length,
        return_full_text=False,
        pad_token_id=terminators[0]
    )
    
    # Extract generated text for each sequence
    answers = []
    for i, sequence in enumerate(sequences):
        answer = sequence[0]['generated_text']
        total_time = f"Total time: {round(time() - start_time, 2)} sec."
        # Format the response with timing information
        answers.append(batched_messages[i] + " " + answer + " " + total_time)

    return answers

# Paths
- raw data stored under folder "./data"
- results from llama or gpt should be stored under "./gpt_data" and "./llama_data"


In [7]:
import pandas as pd
import numpy as np
answer_df = pd.read_csv("./data/answer_df_raw.csv")
query_df = pd.read_csv("./data/fea_df.csv")
print(answer_df.shape)

# # test example

# text = "i have always wondered how people with ana make it when theyre eating very few calorie s. do their organs not shut down. i suffered with an eating disorder for a few year s and i am recovered now but i never struggled with severe anorexia. does the body only begin to cannibalize the organs after all of their fat storage is gone. what is the process."


# prompts = [format_prompt(text)]
# responses = query_model_batch(
#         system_message=system_message,
#         user_messages=prompts,
#         temperature=0.5,
#         max_length=512
#     )


# # Display and process responses in a loop
# for i, response in enumerate(responses):
#     print(colorize_text(f"{response}"))

print(format_prompt("i have always wondered how people with ana make it when theyre eating very few calorie s. do their organs not shut down. i suffered with an eating disorder for a few year s and i am recovered now but i never struggled with severe anorexia. does the body only begin to cannibalize the organs after all of their fat storage is gone. what is the process."))


(77175, 2)
Does the paragraph mention any of the following topics:
  (1) relation: Family and social relationships.
  (2) protein: High protein diet, carbohydrate-reduced(low-carb) high-protein diet.
  (3) ed: Eating disorders(ED) diagnosis or recovery, ED includes anorexia nervosa, anorexic, bulimia, bulimic, binge eating disorders, arfid, osfed, pica.
  (4) exercise: Physical exercise.
  (5) meal: Routine of meals.
  (6) crave: Craving for high calorie food or carbs.
  (7) restrict: Restrict nutrition or calorie intake.
  (8) binge: Binge eating.
  (9) loss: Body weight loss.
  (10) gain: Body weight gain.
  (11) calorie: Count calorie.
  (12) thinspo: Drive for thinness, want to be thinner or skinny.
  (13) leanbody: Drive for lean body mass, low body fat, more muscular or muscles. Must related to body shape..
  (14) bodyhate: Body dissatisfaction, feel bad about body image and appearrance. Must related to body image..
  (15) feargain: Fear of body weight gain. Must related to body 

In [12]:
for k in range(1):
    batch_size = 10

    # Filter for rows where 'answer_string' is NaN
    unanswered_df = answer_df[answer_df['answer_string'].isna()]

    # Get the indices of these NaN entries in the original DataFrame
    indices_to_update = unanswered_df.index[:batch_size]

    # Prepare prompt content for the first 10 entries with NaN answer_string
    prompts = [format_prompt(text) for text in unanswered_df['text_w_eos'].iloc[:batch_size]]

    # Save the indices list if needed for later use
    indices_to_update_list = list(indices_to_update)


    # Batch process all prompts at once
    responses = query_model_batch(
        system_message=system_message,
        user_messages=prompts,
        temperature=0.1,
        max_length=512
    )


    # Display and process responses in a loop
    for i, response in enumerate(responses):
        #display(Markdown(colorize_text(f"{response}")))

        # Extract answer if available
        if "Answer:" in response:
            answer = response.split("Answer:")[1]
            # Use the original index from indices_to_update_list
            answer_df.loc[indices_to_update_list[i], 'answer_string'] = answer
        else:
            # Use the original index from indices_to_update_list
            answer_df.loc[indices_to_update_list[i], 'answer_string'] = "Answer not found"


    print(answer_df.loc[indices_to_update_list, :])
    answer_df.to_csv('./gpt_data/answer_df.csv', index=False)



Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32)


        sm_id                                         text_w_eos  \
3070  12wvfn8  i have really hit a new low here. i do not kno...   
3071  12wwf17  pretty sure this whole thing is a scam because...   
3072  12wwm61  i have been overweight ever since i can rememb...   
3073  12wxnn0  hi all i hope itsec okay to ask here im conduc...   
3074  12wzhzp  what uncommon indian dish did you really enjoy...   
3075  12wzoxk  so i love eating avocado and hardboiled egg sa...   
3077  12x29qn  i got egg white powder on discount and baked 3...   
3078  12x2vkx  hi. so my fridge is a chaotic mess and i just ...   
3079  12x71qs  get your dunce hats out fittit itsec time for ...   

                                          answer_string  \
3070   (1) relation: no, \n(2) protein: no, \n(3) th...   
3071   (1) relation: no, \n(2) protein: no, \n(3) th...   
3072   (1) relation: yes, related phrases if any:'my...   
3073   (1) relation: no, \n(2) protein: no, \n(3) th...   
3074   (1) relation: no,