In [35]:
import torch
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer


In [36]:
class ChatBot():
    def __init__(self, model_path, instruction=""):
        self.model_id = model_path
        self.pipeline = pipeline(
            "text-generation",
            model=self.model_id,
            model_kwargs={
                "torch_dtype": torch.float16,
                "quantization_config": {"load_in_4bit": True},
                "low_cpu_mem_usage": True,
            },
        )
        self.terminators = [
            self.pipeline.tokenizer.eos_token_id,
            self.pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>"),
        ]   
        self.instruction = [{"role": "system", "content": instruction}]
  
    def get_response(
          self, query, message_history=[], max_tokens=4096, temperature=0.6, top_p=0.9
      ):
        user_prompt = message_history + [{"role": "user", "content": query}]
        prompt = self.pipeline.tokenizer.apply_chat_template(
            user_prompt, tokenize=False, add_generation_prompt=True
        )
        outputs = self.pipeline(
            prompt,
            max_new_tokens=max_tokens,
            eos_token_id=self.terminators,
            do_sample=True,
            temperature=temperature,
            top_p=top_p,
        )
        response = outputs[0]["generated_text"][len(prompt):]
        return response, user_prompt + [{"role": "assistant", "content": response}]
    
    def generate_response(self, query,  max_tokens=4096, temperature=0.6, top_p=0.9):
        user_prompt = self.instruction + [{"role": "user", "content": query}]
        prompt = self.pipeline.tokenizer.apply_chat_template(
            user_prompt, tokenize=False, add_generation_prompt=True
        )
        outputs = self.pipeline(
            prompt,
            max_new_tokens=max_tokens,
            eos_token_id=self.terminators,
            do_sample=True,
            temperature=temperature,
            top_p=top_p,
        )
        response = outputs[0]["generated_text"][len(prompt):]
        # return response, user_prompt + [{"role": "assistant", "content": response}]
        return response
    
    def chatbot(self, system_instructions=""):
        conversation = [{"role": "system", "content": system_instructions}]
        while True:
            user_input = input("User: ")
            if user_input.lower() in ["exit", "quit"]:
                print("Exiting the chatbot. Goodbye!")
                break
            response, conversation = self.get_response(user_input, conversation)
            print(f"Assistant: {response}")
  
  

In [40]:
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
instruction = "You are an expert in social network analysis. I am training on a imbalanced dataset on Twitter. "\
             "As the positive samples are limited, I need you to help me a generate similar synthetic user. "\
                "Only your output should only contain the user information in a list, the first element is username and the second element is user description. "
chatbot = ChatBot(model_id, instruction)

Loading checkpoint shards: 100%|██████████| 4/4 [00:03<00:00,  1.25it/s]
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [42]:
chatbot.generate_response("username: y2k_mula, profile: Online dispensary 🍃 premium lab-tested psychedelic products 🥇  Giveaways 🌱 must be 18+🔞 licensed &legal 🥂 order 👉👉")

Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


'Here are 5 synthetic users with similar characteristics:\n\n```\n[user1, "PsychedelicPete", "Your one-stop shop for premium lab-tested psychedelic products 🍃"]\n[user2, "trippy_tom", "Get ready to trip with our selection of legal and licensed psychedelic products 🌈"]\n[user3, "stoned_sam", "Explore the world of psychedelics with us 🌊"]\n[user4, "high_hope", "Your source for premium psychedelic products and giveaways 🎁"]\n[user5, "mindful_mike", "Discover the benefits of psychedelics with our expertly curated products 🧠"]\n```\n\nPlease note that these users are randomly generated and may not exactly match the characteristics of the original user.'

In [29]:
instruction

'You are an expert in social network analysis. I am training on a imbalanced dataset on Twitter. As the positive samples are limited, I need you to help me generate synthetic users. Return the user information in a list, the first element is username and the second element is user description. '