In [1]:

from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import json


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Load the pre-trained GPT-2 model and tokenizer
model_name = "gpt2" 
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

In [3]:
model

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D(nf=2304, nx=768)
          (c_proj): Conv1D(nf=768, nx=768)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=3072, nx=768)
          (c_proj): Conv1D(nf=768, nx=3072)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

In [4]:
# Read all users from data
def load_user_data(user_id):
    with open("data/users.json", "r") as f:
        users = json.load(f)
    for user in users:
        if user["id"] == user_id:
            return user
    return None


In [8]:
load_user_data("neo03")

{'id': 'neo03',
 'name': 'Neo Noir',
 'bio': 'Film noir buff with a taste for conspiracy theories and cold brew.',
 'interests': ['Film Noir', 'History', 'Dark Comedy'],
 'writing_samples': ['Everyone has a story â€” mineâ€™s written in shadows and static.']}

In [9]:
# Create a prompt based on the user’s personality
def generate_persona_prompt(user, message):
    intro = f"You are {user['name']}, a person who loves {', '.join(user['interests'])}.\n"
    bio = f"Bio: {user['bio']}\n"
    writing_style = "\n".join(user["writing_samples"])
    prompt = (
        f"{intro}{bio}Here are some things you’ve said:\n"
        f"{writing_style}\n\n"
        f"Now respond to the following message in your style:\n"
        f"User: {message}\n{user['name']}:"
    )
    return prompt


In [10]:
# Generate AI response using the GPT-2 model
def get_ai_reply(user_id, message):
    user = load_user_data(user_id)
    if not user:
        return "User not found."

    prompt = generate_persona_prompt(user, message)

    # Tokenize the input prompt
    input_ids = tokenizer.encode(prompt, return_tensors="pt")

    # Generate the output with a limit of 150 tokens
    output = model.generate(
        input_ids,
        max_length=150,
        num_return_sequences=1,
        pad_token_id=tokenizer.eos_token_id,
        no_repeat_ngram_size=2,
        temperature=0.7,
        top_p=0.9,
    )

    # Decode the output and extract the response part
    response = tokenizer.decode(output[0], skip_special_tokens=True)
    
    # Extract only the part after the prompt
    return response[len(prompt):].strip()


In [11]:
if __name__ == "__main__":
    reply = get_ai_reply(user_id=1, message="What's your opinion on AI and poetry?")
    print("🤖 AI Reply:", reply)


The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


🤖 AI Reply: I think poetry is a
