In [None]:
import sys, subprocess

pkgs = [
    "transformers",
    "bitsandbytes",
    "tqdm",
    "accelerate",
    "pandas"
]

subprocess.check_call([sys.executable, "-m", "pip", "install"] + pkgs)

subprocess.run([sys.executable, "-m", "pip", "install", "flash-attn", "--no-build-isolation"], check=True)

print("Packages installed")

In [None]:
import os
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig, BitsAndBytesConfig

notebook_dir = os.getcwd()
parent_dir = os.path.dirname(notebook_dir)

if parent_dir not in sys.path:
    sys.path.append(parent_dir)

import common


In [None]:
DATA_PATH   = "../data/Electronics_sample.csv"
DATASET_BASE_NAME = "Electronics_sample"  
TEXT_COL    = "text"  # Column containing the reviews/comments text                       
CONTENT_TYPE   = "electronics"
OUTPUT_DIR  = "../output"
MODEL_NAME  = "Qwen/Qwen2.5-7B-Instruct"
PROMPT_PATH = "../prompt.txt"

df = pd.read_csv(DATA_PATH)

with open(PROMPT_PATH, 'r') as f:
    instruction = f.read()

In [None]:
dtype = torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16


# bnb_config = BitsAndBytesConfig(
#     load_in_8bit=True,
# )

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    device_map="auto",
    dtype=dtype,
    #quantization_config=bnb_config,
    low_cpu_mem_usage=True,
    trust_remote_code=True,
    attn_implementation="flash_attention_2"
)

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True, trust_remote_code=True)
if tokenizer.pad_token_id is None:
    tokenizer.pad_token_id = tokenizer.eos_token_id

model.eval()

def run_local_model_chat(system_prompt: str, user_prompt: str) -> str:
    gen_cfg = GenerationConfig(max_new_tokens=900, do_sample=False)
    
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user",   "content": user_prompt},
    ]
    chat_text = tokenizer.apply_chat_template(
        messages, tokenize=False, add_generation_prompt=True
    )
    
    enc = tokenizer(chat_text, return_tensors="pt").to(model.device)
    with torch.no_grad():
        out = model.generate(
            **enc,
            generation_config=gen_cfg,
            pad_token_id=tokenizer.pad_token_id,
            eos_token_id=tokenizer.eos_token_id,
        )
    gen_only = out[0][enc["input_ids"].shape[1]:] # Only the answer
    return tokenizer.decode(gen_only, skip_special_tokens=True)

In [None]:
response_list = common.run_inference(
    df=df,
    text_col=TEXT_COL,
    tokenizer=tokenizer,
    run_local_model_chat=run_local_model_chat,
    instruction=instruction,
    content_type=CONTENT_TYPE,
    MODEL_NAME=MODEL_NAME,
    system="You are an opinion mining assistant.",
    MAX_TOKENS= 7500,
    SAFE_TOKENS= 5000,
    max_attempts= 2
)

In [None]:
pred_tuples = common.build_pred_tuples(response_list)
common.save_pred_tuples_to_pickle(OUTPUT_DIR, MODEL_NAME, pred_tuples)

In [None]:
updated_df = common.add_tuples_to_df(df, response_list)
common.save_dataset(updated_df, OUTPUT_DIR, MODEL_NAME, DATASET_BASE_NAME)