In [None]:
import sys, subprocess

pkgs = [
    "transformers",
    "bitsandbytes",
    "tqdm",
    "accelerate",
    "pandas"
]

subprocess.check_call([sys.executable, "-m", "pip", "install"] + pkgs)

subprocess.run([sys.executable, "-m", "pip", "install", "flash-attn", "--no-build-isolation"], check=True)

print("Packages installed")

In [None]:
import os
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig, BitsAndBytesConfig

notebook_dir = os.getcwd()
parent_dir = os.path.dirname(notebook_dir)

if parent_dir not in sys.path:
    sys.path.append(parent_dir)

import common


In [None]:
DATA_PATH   = "../data/Electronics_sample.csv"
DATASET_BASE_NAME = "Electronics_sample"  
TEXT_COL    = "text"  # Column containing the reviews/comments text                       
CONTENT_TYPE   = "electronics"
OUTPUT_DIR  = "../output"
MODEL_NAME  = "google/gemma-2-9b-it"
PROMPT_PATH = "../prompt.txt"

df = pd.read_csv(DATA_PATH)
df = df.head(100).copy()

with open(PROMPT_PATH, 'r') as f:
    instruction = f.read()

In [None]:
dtype = torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    device_map="cuda",
    dtype=dtype,
    low_cpu_mem_usage=True,
    trust_remote_code=True,
    attn_implementation="flash_attention_2"
)

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True, trust_remote_code=True)
if tokenizer.pad_token_id is None:
    tokenizer.pad_token_id = tokenizer.eos_token_id

model.eval()


In [None]:
def run_local_model_chat(system_prompt: str, user_prompt: str) -> str:
    merged_user_content = f"{system_prompt.strip()}\n\n: {user_prompt.strip()}"

    messages = [{"role": "user", "content": merged_user_content}]
    input_ids = tokenizer.apply_chat_template(
        messages, tokenize=True, add_generation_prompt=True, return_tensors="pt"
    ).to(model.device)

    end_turn_id = tokenizer.convert_tokens_to_ids("<end_of_turn>")
    if end_turn_id is None or end_turn_id < 0:
        end_turn_id = tokenizer.eos_token_id

    gen_cfg = GenerationConfig(
        max_new_tokens=900,
        min_new_tokens=5,
        do_sample=False,
        pad_token_id=tokenizer.pad_token_id or tokenizer.eos_token_id,
        eos_token_id=end_turn_id,
    )

    with torch.no_grad():
        out = model.generate(input_ids=input_ids, generation_config=gen_cfg)

    gen_ids = out[0, input_ids.shape[1]:]
    text = tokenizer.decode(gen_ids, skip_special_tokens=False).strip()
    text = text.replace("```json", "")
    text = text.replace("```<end_of_turn>", "")
    text = text.replace("<end_of_turn>", "")
    text = text.strip()
    return text

In [None]:
response_list = common.run_inference(
    df=df,
    text_col=TEXT_COL,
    tokenizer=tokenizer,
    run_local_model_chat=run_local_model_chat,
    instruction=instruction,
    content_type=CONTENT_TYPE,
    MODEL_NAME=MODEL_NAME,
    system="You are an opinion mining assistant.",
    MAX_TOKENS= 7500,
    SAFE_TOKENS= 5000,
    max_attempts= 2
)

In [None]:
pred_tuples = common.build_pred_tuples(response_list)
common.save_pred_tuples_to_pickle(OUTPUT_DIR, MODEL_NAME, pred_tuples)

In [None]:
updated_df = common.add_tuples_to_df(df, response_list)
common.save_dataset(updated_df, OUTPUT_DIR, MODEL_NAME, DATASET_BASE_NAME)