In [2]:
import pandas as pd
import random
import torch
from transformers import LlamaForCausalLM, LlamaTokenizer, pipeline

Step 1: Request Access
Fill out the form provided by Meta AI (or the entity distributing the LLaMA weights) to request access to the model weights.

Step 2: Download the Weights
Once you have access, download the weights to a directory on your local machine.

Step 3: Convert the Weights
Use the provided conversion script to convert the weights to the Hugging Face format.

Here is a detailed guide on how to perform the conversion:

Prerequisites
Make sure you have Python installed and the transformers library from Hugging Face.

pip install transformers
Conversion Script
Download the Conversion Script:
The script is part of the Hugging Face Transformers repository. You can find it in the src/transformers/models/llama directory.

git clone https://github.com/huggingface/transformers
cd transformers
Run the Conversion Script:
Replace /path/to/downloaded/llama/weights with the actual path where you downloaded the LLaMA weights and specify the model size (e.g., 7B).

python src/transformers/models/llama/convert_llama_weights_to_hf.py \
    --input_dir /path/to/downloaded/llama/weights \
    --model_size 7B \
    --output_dir /path/to/output/directory

Example
Suppose you downloaded the weights to /home/user/llama_weights and want to convert the 7B model. The command would look like this:
python src/transformers/models/llama/convert_llama_weights_to_hf.py \
    --input_dir /home/user/llama_weights \
    --model_size 7B \
    --output_dir /home/user/llama_hf

After conversion, you can load the model and tokenizer from the specified output directory using the Hugging Face transformers library

In [4]:

model_dir = "huggingface/llama-7b" #NOT THE ACTUAL DIRECTORY
try:
    tokenizer = LlamaTokenizer.from_pretrained(model_dir)
    model = LlamaForCausalLM.from_pretrained(model_dir)
    print("model and tokenizer loaded successfully.")
except Exception as e:
    print(f"An error occurred: {e}")

An error occurred: llama-7b is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models'
If this is a private repository, make sure to pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=<your_token>`


In [None]:
text_generation_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.float16,
    device_map="auto",
)

In [None]:
# CSV path (change accordingly)
csv_path = "lungu_scale_updated.csv" 
prompts = []
fields = ['Instructions', 'Condition', 'Study', 'Type', 'Proposition', 'NAND', 'NOR', 'Positive AND']  # Columns used in CSV
row_order = 1
 
# Reading CSV to dataframe
prompts_df = pd.read_csv(csv_path, usecols=fields)
for _, row in prompts_df.iterrows():
    prompt = {}

    if (row['Condition'] == "Control"):
        text_parts = [row['Instructions'], row['Proposition']]
    
    else:
        # Check if either "NAND" or "NOR" or "AND" exists in the row
        choices = {"NAND":row["NAND"], "NOR":row["NOR"], "Contradictory":row["Positive AND"]}
        available_choices = {key: value for key, value in choices.items() if not pd.isna(value)}

        if available_choices:
            # Randomly choose one from available choices
            chosen_column, content = random.choice(list(available_choices.items()))
        else:
            chosen_column, content = '', ''  # "Control" rows without continuations "A" or "B" or "C"

        text_parts = [row['Instructions'], row['Proposition'], str(content)]
        
    # Filter out empty strings before joining
    prompt["text"] = ' '.join(filter(None, text_parts))
    prompt["condition"] = row['Condition']
    prompt["study"] = row['Study']
    prompt["type"] = row['Type']
    if chosen_column:
        prompt["choice"] = chosen_column
    else:
        prompt["choice"] = "none"
    
    prompt["order"] = row_order
    row_order += 1
    prompts.append(prompt)

random.shuffle(prompts)

print(f"Found {len(prompts)} prompts in the given CSV file.")

In [None]:
outputs = []

# Function to generate responses using the chosen model
def generate_response(prompt_text):
    system_message = "If prompted to respond with a number on a scale, only respond with the whole number. For example, only respond with '6' if you believe it is 6 on the scale. Do not give any explanations. Interpret both sentences together, not separately."
    user_message = prompt_text

    # Combine system and user messages into a single string
    combined_prompt = f"{system_message}\nUser: {user_message}\n"

    inputs = tokenizer(combined_prompt, return_tensors="pt")
    outputs = model.generate(inputs["input_ids"], max_length=50, num_return_sequences=1)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # response = text_generation_pipeline(combined_prompt, max_length=50, num_return_sequences=1)
    return response # response[0]['generated_text']

# Generate responses
for prompt in prompts:
    answer = generate_response(prompt["text"])
    outputs.append(answer)

print("messages generated")


In [None]:
df = pd.DataFrame({"Prompt": [prompt["text"] for prompt in prompts], 
                   "Continuation": [prompt["choice"] for prompt in prompts], 
                   "Condition": [prompt["condition"] for prompt in prompts],
                   "Study": [prompt["study"] for prompt in prompts],
                   "Type": [prompt["type"] for prompt in prompts],
                   "Original order": [prompt["order"] for prompt in prompts],
                   "Response": outputs})

# Save the DataFrame to a CSV file
output_csv_path = "output_responses_llama.csv"
df.to_csv(output_csv_path, index=False)

print(f"Responses saved to {output_csv_path}")