In [1]:
import torch
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer


  from .autonotebook import tqdm as notebook_tqdm


In [23]:
class ChatBot():
    def __init__(self, model_path, instruction=""):
        self.model_id = model_path
        self.pipeline = pipeline(
            "text-generation",
            model=self.model_id,
            model_kwargs={
                "torch_dtype": torch.float16,
                "quantization_config": {"load_in_4bit": True},
                "low_cpu_mem_usage": True,
            },
        )
        self.terminators = [
            self.pipeline.tokenizer.eos_token_id,
            self.pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>"),
        ]   
        self.instruction = [{"role": "system", "content": instruction}]
  
    def get_response(
          self, query, message_history=[], max_tokens=4096, temperature=0.6, top_p=0.9
      ):
        user_prompt = message_history + [{"role": "user", "content": query}]
        prompt = self.pipeline.tokenizer.apply_chat_template(
            user_prompt, tokenize=False, add_generation_prompt=True
        )
        outputs = self.pipeline(
            prompt,
            max_new_tokens=max_tokens,
            eos_token_id=self.terminators,
            do_sample=True,
            temperature=temperature,
            top_p=top_p,
        )
        response = outputs[0]["generated_text"][len(prompt):]
        return response, user_prompt + [{"role": "assistant", "content": response}]
    
    def generate_response(self, query,  max_tokens=4096, temperature=0.6, top_p=0.9):
        user_prompt = self.instruction + [{"role": "user", "content": query}]
        prompt = self.pipeline.tokenizer.apply_chat_template(
            user_prompt, tokenize=False, add_generation_prompt=True
        )
        outputs = self.pipeline(
            prompt,
            max_new_tokens=max_tokens,
            eos_token_id=self.terminators,
            do_sample=True,
            temperature=temperature,
            top_p=top_p,
        )
        response = outputs[0]["generated_text"][len(prompt):]
        # return response, user_prompt + [{"role": "assistant", "content": response}]
        return response
    
    def chatbot(self, system_instructions=""):
        conversation = [{"role": "system", "content": system_instructions}]
        while True:
            user_input = input("User: ")
            if user_input.lower() in ["exit", "quit"]:
                print("Exiting the chatbot. Goodbye!")
                break
            response, conversation = self.get_response(user_input, conversation)
            print(f"Assistant: {response}")
  
  

In [33]:
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
instruction = "You are an expert in social network analysis. I am training on a imbalanced dataset on Twitter. "\
             "As the positive samples are limited, I need you to help me a generate synthetic user. "\
                "Only your output should only contain the user information in a list, the first element is username and the second element is user description. "
chatbot = ChatBot(model_id, instruction)

Loading checkpoint shards: 100%|██████████| 4/4 [00:03<00:00,  1.23it/s]
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [34]:
chatbot.generate_response("username: medicalsupply, profile: medical supply, location: USA, followers: 1000, following: 500")

Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


'Here are 10 synthetic users:\n\n```\n[\n["medtechpro", "Medical technology professional"],\n["healthcarehero", "Healthcare enthusiast"],\n["medsupplypro", "Medical supply expert"],\n["pharmpro", "Pharmaceutical professional"],\n["healthcarehelper", "Healthcare assistant"],\n["medicalmate", "Medical supply enthusiast"],\n["healthcareheroine", "Healthcare advocate"],\n["medsupplymate", "Medical supply assistant"],\n["pharmfriend", "Pharmaceutical enthusiast"],\n["healthcareheroic", "Healthcare hero"]\n]\n```'

In [29]:
instruction

'You are an expert in social network analysis. I am training on a imbalanced dataset on Twitter. As the positive samples are limited, I need you to help me generate synthetic users. Return the user information in a list, the first element is username and the second element is user description. '