In [5]:
import torch
import torch.nn as nn
from transformers import AutoTokenizer, AutoModel

class AttentionMemory(nn.Module):
    def __init__(self):
        super(AttentionMemory, self).__init__()
        self.attention = nn.Linear(768, 2)

    def forward(self, input_embed):
        attention_weights = torch.softmax(self.attention(input_embed), dim=1)
        return attention_weights

class Chatbot(nn.Module):
    def __init__(self):
        super(Chatbot, self).__init__()
        self.embedder = AutoModel.from_pretrained("gpt2")
        self.tokenizer = AutoTokenizer.from_pretrained("gpt2")
        self.tokenizer.padding_side = "right"
        self.tokenizer.pad_token = self.tokenizer.eos_token
        self.attention_memory = AttentionMemory()

    def forward(self, input_text, memories):
        input_tokens = self.tokenizer(input_text, return_tensors="pt", padding=True, truncation=True)
        input_embed = self.embedder(**input_tokens).last_hidden_state.mean(dim=1)

        memory_embeddings = []
        for memory in memories:
            memory_tokens = self.tokenizer(memory, return_tensors="pt", padding=True, truncation=True)
            memory_embed = self.embedder(**memory_tokens).last_hidden_state.mean(dim=1)
            memory_embeddings.append(memory_embed)

        memory_embeddings = torch.stack(memory_embeddings, dim=1).squeeze(0)
        attention_weights = self.attention_memory(input_embed)
        weighted_memory = torch.matmul(attention_weights, memory_embeddings)

        return weighted_memory, attention_weights



model = Chatbot()
optimizer = torch.optim.Adam(model.attention_memory.parameters(), lr=0.01)

input_text = "What is the capital of France?"
memories = ["The capital of France is Paris.", "The capital of Germany is Berlin."]
target_output = "The capital of France is Paris."

num_episodes = 1000
gamma = 0.99

def reward_function(output_tokens, target_output):
    return 1.0 if output_tokens.strip() == target_output.strip() else 0.0

learning_rate = 0.01
num_episodes = 1000

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

for episode in range(num_episodes):
    optimizer.zero_grad()
    output, attention_weights = model(input_text, memories)
    
    output_tokens = model.tokenizer.decode(output.argmax(dim=1).tolist(), skip_special_tokens=True)
    reward = reward_function(output_tokens, target_output)

    loss = -torch.log(attention_weights[0, 0]) * reward
    loss.backward()
    optimizer.step()

    if episode % 10 == 0:
        print(f"Episode: {episode}, Loss: {loss.item()}, Reward: {reward}, Output: {output_tokens}")


Episode: 0, Loss: 0.0, Reward: 0.0, Output: age
Episode: 10, Loss: 0.0, Reward: 0.0, Output: age
Episode: 20, Loss: 0.0, Reward: 0.0, Output: age
Episode: 30, Loss: 0.0, Reward: 0.0, Output: age
Episode: 40, Loss: 0.0, Reward: 0.0, Output: age
Episode: 50, Loss: 0.0, Reward: 0.0, Output: age
Episode: 60, Loss: 0.0, Reward: 0.0, Output: age
Episode: 70, Loss: 0.0, Reward: 0.0, Output: age
Episode: 80, Loss: 0.0, Reward: 0.0, Output: age
Episode: 90, Loss: 0.0, Reward: 0.0, Output: age
Episode: 100, Loss: 0.0, Reward: 0.0, Output: age
Episode: 110, Loss: 0.0, Reward: 0.0, Output: age
Episode: 120, Loss: 0.0, Reward: 0.0, Output: age
Episode: 130, Loss: 0.0, Reward: 0.0, Output: age
Episode: 140, Loss: 0.0, Reward: 0.0, Output: age


KeyboardInterrupt: 

In [None]:
 think you derailed let's start from scratch:

the model takes as input the question and uses this class to transform it in a vector

class OpenAiEmbedder:
    def get_embedding_size(self):
        return 1536
    def embed(self, data, embed_mark = True, verbose = False):
        try:
            if embed_mark is False and type(data) is dict and "content" in data:
                print("Embedding without mark", data["content"])
                out = openai.Embedding.create(input=data["content"], engine='text-embedding-ada-002')
            else:
                if verbose is True:
                    print("Embedding without preprocessing the input", data)
                out = openai.Embedding.create(input=str(data), engine='text-embedding-ada-002')
        except:
            raise ValueError("The data  is not valid")
        return out.data[0].embedding
    def embed_list(self,data):
        #use the batched version of the API by giving a list as input
        #che that is listo of strings
        if type(data) is not list:
            raise ValueError("The data  is not valid")
        out = openai.Embedding.create(input=data, engine='text-embedding-ada-002')

then it uses the same embedding class to his memory and uses the three embedded vectors to with a self attention mechanism to choose which of the two memory to choose. If the memory chosen is the good memory reward of +1 otherwise reward of 0, very very simple

In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
# Note: you need to be using OpenAI Python v0.27.0 for the code below to work
import openai
openai.api_key = "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"
class OpenAiEmbedder:
    def get_embedding_size(self):
        return 1536
    def embed(self, data, embed_mark = True, verbose = False):
        try:
            if embed_mark is False and type(data) is dict and "content" in data:
                print("Embedding without mark", data["content"])
                out = openai.Embedding.create(input=data["content"], engine='text-embedding-ada-002')
            else:
                if verbose is True:
                    print("Embedding without preprocessing the input", data)
                out = openai.Embedding.create(input=str(data), engine='text-embedding-ada-002')
        except:
            raise ValueError("The data  is not valid")
        return out.data[0].embedding
    def embed_list(self,data):
        #use the batched version of the API by giving a list as input
        #che that is listo of strings
        if type(data) is not list:
            raise ValueError("The data  is not valid")
        out = openai.Embedding.create(input=data, engine='text-embedding-ada-002')

class SimpleAttentionModel(nn.Module):
    def __init__(self, embedder):
        super(SimpleAttentionModel, self).__init__()
        self.embedder = embedder
        self.attention = nn.Linear(embedder.get_embedding_size(), 1)

    def forward(self, question, memories):
        question_embedding = torch.tensor(self.embedder.embed(question)).unsqueeze(0)
        memory_embeddings = torch.tensor([self.embedder.embed(memory) for memory in memories]).unsqueeze(0)

        attention_logits = self.attention(memory_embeddings)
        attention_weights = torch.softmax(attention_logits, dim=1)

        chosen_memory = torch.sum(attention_weights * memory_embeddings, dim=1).squeeze()
        return chosen_memory, attention_weights

# ... (Same as before: OpenAiEmbedder and SimpleAttentionModel classes) ...

def reward_function(chosen_memory, good_memory_embedding):
    cosine_similarity = torch.nn.CosineSimilarity(dim=0)
    similarity = cosine_similarity(chosen_memory, torch.tensor(good_memory_embedding))
    return similarity.item()

# Example usage
openai_embedder = OpenAiEmbedder()
model = SimpleAttentionModel(openai_embedder)
optimizer = optim.Adam(model.parameters(), lr=0.01)

question = "What is the capital of France?"
memories = ["The capital of France is Paris.", "An irrelevant memory."]
good_memory = memories[0]

num_episodes = 500

for episode in range(num_episodes):
    optimizer.zero_grad()

    chosen_memory, attention_weights = model(question, memories)
    reward = reward_function(chosen_memory, torch.tensor(openai_embedder.embed(good_memory)))

    loss = -torch.log(attention_weights[0, 0]) * reward
    loss.backward()
    optimizer.step()

    if episode % 50 == 0:
        print(f"Episode: {episode}, Loss: {loss.item()}, Reward: {reward}")

print("Final attention weights:", attention_weights)

  similarity = cosine_similarity(chosen_memory, torch.tensor(good_memory_embedding))


Episode: 0, Loss: 0.6417206525802612, Reward: 0.926105797290802
Episode: 50, Loss: 0.0059396191500127316, Reward: 0.9999890327453613
Episode: 100, Loss: 0.003237340599298477, Reward: 0.999995231628418
Episode: 150, Loss: 0.002143227495253086, Reward: 0.9999969005584717
Episode: 200, Loss: 0.001533786067739129, Reward: 0.9999974966049194
Episode: 250, Loss: 0.0011595649411901832, Reward: 0.9999997615814209
Episode: 300, Loss: 0.0009122475748881698, Reward: 0.9999997615814209
Episode: 350, Loss: 0.0007396688451990485, Reward: 0.9999997019767761
Episode: 400, Loss: 0.0006137584568932652, Reward: 0.9999998807907104
Episode: 450, Loss: 0.0005188737995922565, Reward: 1.0
Final attention weights: tensor([[[9.9955e-01],
         [4.4558e-04]]], grad_fn=<SoftmaxBackward0>)


In [14]:
import torch
import torch.nn as nn
import torch.optim as optim
import openai


class SimpleAttentionModel(nn.Module):
    def __init__(self, embedder):
        super(SimpleAttentionModel, self).__init__()
        self.embedder = embedder
        self.embedding_size = embedder.get_embedding_size()
        self.conv1 = nn.Linear(self.embedding_size, self.embedding_size)

    def forward(self, question, memories):
        question_embedding = torch.tensor(self.embedder.embed(question))
        memory_embeddings = torch.tensor([self.embedder.embed(memory) for memory in memories])

        conv1_question = self.conv1(question_embedding)
        conv1_memories = self.conv1(memory_embeddings)

        dot_products = torch.matmul(conv1_memories, conv1_question)

        attention_weights = torch.softmax(dot_products, dim=-1)

        chosen_memory = torch.sum(attention_weights.unsqueeze(-1) * memory_embeddings, dim=0)
        return chosen_memory, attention_weights

def reward_function(chosen_memory, good_memory_embedding):
    cosine_similarity = torch.nn.CosineSimilarity(dim=0)
    similarity = cosine_similarity(chosen_memory, torch.tensor(good_memory_embedding))
    return similarity.item()

openai_embedder = OpenAiEmbedder()
model = SimpleAttentionModel(openai_embedder)
optimizer = optim.Adam(model.parameters(), lr=0.01)

question = "What is the capital of France?"
memories = ["The capital of France is Paris.", "An irrelevant memory.", "the capital of italy is rome"]
good_memory = memories[0]

num_episodes = 500

for episode in range(num_episodes):
    optimizer.zero_grad()

    chosen_memory, attention_weights = model(question, memories)
    reward = reward_function(chosen_memory, torch.tensor(openai_embedder.embed(good_memory)))

    loss = -torch.log(attention_weights[0]) * reward
    loss.backward()
    optimizer.step()

    if episode % 50 == 0:
        print(f"Episode: {episode}, Loss: {loss.item()}, Reward: {reward}")

print("Final attention weights:", attention_weights)


  similarity = cosine_similarity(chosen_memory, torch.tensor(good_memory_embedding))


Episode: 0, Loss: 1.0000296831130981, Reward: 0.94023197889328
Episode: 50, Loss: -0.0, Reward: 1.0000001192092896


ValueError: The data  is not valid