In [None]:
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from huggingface_hub import login

# Authenticate using Hugging Face token

In [None]:
login(token='')

# Load tokenizer and model from Hugging Face Hub
# Ensure authentication for private models if necessary

In [None]:
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-2b", token='your_hf_token')
model = AutoModelForCausalLM.from_pretrained("google/gemma-2-2b", token='your_hf_token')

# Set device (GPU if available, else CPU)
# Clear any cached GPU memory
# Move model to the selected device

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.cuda.empty_cache() 
model.to(device)  

# Function to generate responses using the model
Generates responses for input data in JSON format.
    
  Args:
    json_data (list): List of queries with context and additional instructions.
    max_length (int): Maximum length of the generated response.
    batch_size (int): Number of queries to process per batch.

  Returns:
    list: Generated responses with queries.

In [None]:
def generate_responses(json_data, max_length=20, batch_size=5):
  responses = []  # Store generated responses
  tokenizer.pad_token = tokenizer.eos_token  # Set the padding token

  # Prepare input queries with context and additional instructions
  queries_with_context = [
    f"Answer the following in strict format as: {item['Response Formats']}.\n"
    f"Question: {item['Query']}\n"
    f"Context: {item['Context']}\n"
    f"Additional Instructions: {item.get('Additional Instructions', '')}"
    for item in json_data
  ]

  # Process queries in batches
  for i in range(0, len(queries_with_context), batch_size):
    batch_questions = queries_with_context[i:i + batch_size]

    # Tokenize input queries
    inputs = tokenizer(
      batch_questions,
      return_tensors="pt",
      padding=True,
      truncation=True,
      max_length=512
    ).to(device)

    # Generate responses from the model
    outputs = model.generate(
      input_ids=inputs['input_ids'],
      attention_mask=inputs['attention_mask'],
      max_new_tokens=max_length,
      pad_token_id=tokenizer.eos_token_id,
      no_repeat_ngram_size=2
    )

    # Decode model outputs and map to original queries
    for output, item in zip(outputs, json_data[i:i + batch_size]):
      decoded_output = tokenizer.decode(output, skip_special_tokens=True)
      responses.append({
        "query": item['Query'],
        "response": decoded_output
      })

  return responses

# Load the input data from a JSON file
# Convert DataFrame to list of dictionaries

In [None]:
json_file = "./ValueTest.json"
df = pd.read_json(json_file)
json_data = df.to_dict(orient='records')  

# Generate responses and save the results to a JSON file

In [None]:
output_data = generate_responses(json_data)

# Save the responses

In [None]:
with open('generated_responses.json', 'w') as f:
    pd.json.dump(output_data, f, indent=2)

print("Responses have been saved to 'generated_responses.json'")