# Collaborative Recommendation

In [1]:
from dotenv import load_dotenv

load_dotenv()

True

In [2]:
import pandas as pd
import numpy as np
import random

# fix seeds
random.seed(42)
np.random.seed(42)

## Load data

In [3]:
data = pd.read_csv("../data/cd_and_vinyl/dense_subset.csv")
data_items = pd.read_json("../data/cd_and_vinyl/meta_CDs_and_Vinyl.jsonl", lines=True)

## Train Split

In [4]:
# Split trainint by user
# sample 20% of users
percent_users = 0.1
users = data["user_id"].unique()
users = np.random.choice(users, int(len(users) * percent_users), replace=False)

train_data = []
test_data = []

train_sample = 8 // 2
test_sample = 4 // 2

for user in users:
    sample_user = data.query("user_id == @user")

    # Positive (rating > 3)
    liked_items = sample_user.query("rating > 3")
    liked_items_train = liked_items.sample(train_sample // 2, random_state=42)
    liked_items_test = liked_items.drop(liked_items_train.index).sample(
        test_sample // 2, random_state=42
    )

    # Negative (rating < 3)
    disliked_items = sample_user.query("rating < 3")
    disliked_items_train = disliked_items.sample(train_sample // 2, random_state=42)
    disliked_items_test = disliked_items.drop(disliked_items_train.index).sample(
        test_sample // 2, random_state=42
    )

    # Concat
    train_data.append(liked_items_train)
    train_data.append(disliked_items_train)
    test_data.append(liked_items_test)
    test_data.append(disliked_items_test)

# Final data
train_data = pd.concat(train_data).reset_index(drop=True)
test_data = pd.concat(test_data).reset_index(drop=True)

# Results
print(f"Train Data: {train_data.shape}")
print(f"Test Data: {test_data.shape}")

Train Data: (32, 10)
Test Data: (16, 10)


In [5]:
# Count itens by user in train
train_data.groupby("user_id")["parent_asin"].count()

user_id
AERDZPURXIO2TZ5PTAUPJWJXMBMA    4
AESVLHPI65WY6Z3SDMWSTJE357KQ    4
AFFXC2NELPCGJVYJTNCJS5R3NYEA    4
AFHCS4IBOQ6FSCX2OYE6PWCJOG7Q    4
AFK45FDKEM67UNIUUKU3NRM4UAVA    4
AGVFLXOWJIHQVUYPNDSKDL4REH4A    4
AHJRJCJMK3XVV4BSPBRAHIYEODWA    4
AHWW7ANU7P7LGCBTIO2ACTADQK5A    4
Name: parent_asin, dtype: int64

In [6]:
# Count itens by user in test
test_data.groupby("user_id")["parent_asin"].count()

user_id
AERDZPURXIO2TZ5PTAUPJWJXMBMA    2
AESVLHPI65WY6Z3SDMWSTJE357KQ    2
AFFXC2NELPCGJVYJTNCJS5R3NYEA    2
AFHCS4IBOQ6FSCX2OYE6PWCJOG7Q    2
AFK45FDKEM67UNIUUKU3NRM4UAVA    2
AGVFLXOWJIHQVUYPNDSKDL4REH4A    2
AHJRJCJMK3XVV4BSPBRAHIYEODWA    2
AHWW7ANU7P7LGCBTIO2ACTADQK5A    2
Name: parent_asin, dtype: int64

## Agents

In [7]:
# from langchain_ollama import ChatOllama
from langchain_openai import ChatOpenAI

# ollama = ChatOllama(model="phi3:3.8b")
openai = ChatOpenAI(model="gpt-4o-mini", temperature=0)

In [8]:
# liked_items_train = liked_items_train.merge(items_user, on="parent_asin")
# liked_items_test = liked_items_test.merge(items_user, on="parent_asin")

# disliked_items_train = disliked_items_train.merge(items_user, on="parent_asin")
# disliked_items_test = disliked_items_test.merge(items_user, on="parent_asin")

In [9]:
# from langchain_core.prompts import PromptTemplate

PROMPT_USER_ITEM = """
Item: {title_item}
Item category: {item_category}
Description: {description}
Price: ${price}
Store: {store}
Categories: {categories}
User rating: {rating}
User comment title: {title_comment}
User comment: {text_comment}
"""

PROMPT_ITEM = """
Item: {title_item}
Item category: {item_category}
Description: {description}
Price: ${price}
Store: {store}
Categories: {categories}
"""


def encode_item(item):
    title_item = item["title_y"]
    item_category = item["main_category"]
    description = " ".join(item["description"])
    price = item["price"]
    store = item["store"]
    categories = item["categories"]

    return {
        "title_item": title_item,
        "item_category": item_category,
        "description": description,
        "price": price,
        "store": store,
        "categories": categories,
    }


def encode_item_format(item):
    item_info = encode_item(item)

    return PROMPT_ITEM.format(
        title_item=item_info["title_item"],
        item_category=item_info["item_category"],
        description=item_info["description"],
        price=item_info["price"],
        store=item_info["store"],
        categories=item_info["categories"],
    )


def encode_user(user):
    title_comment = user["title_x"]
    text_comment = user["text"]
    rating = user["rating"]

    return {
        "title_comment": title_comment,
        "text_comment": text_comment,
        "rating": rating,
    }


def encode_item_user_format(item_user):
    item_info = encode_item(item_user)
    user_info = encode_user(item_user)

    return PROMPT_USER_ITEM.format(
        title_item=item_info["title_item"],
        item_category=item_info["item_category"],
        description=item_info["description"],
        price=item_info["price"],
        store=item_info["store"],
        categories=item_info["categories"],
        rating=user_info["rating"],
        title_comment=user_info["title_comment"],
        text_comment=user_info["text_comment"],
    )

In [10]:
# liked_items_train["encoded_item_user"] = liked_items_train.apply(
#     lambda row: encode_item_user_format(row), axis=1
# )
# disliked_items_train["encoded_item_user"] = disliked_items_train.apply(
#     lambda row: encode_item_user_format(row), axis=1
# )

# liked_items_test["encoded_item"] = liked_items_test.apply(
#     lambda row: encode_item_format(row), axis=1
# )
# disliked_items_test["encoded_item"] = disliked_items_test.apply(
#     lambda row: encode_item_format(row), axis=1
# )

In [11]:
# liked_items_train_formatted = "\n".join(
#     [
#         f"ITEM {i}:{item}"
#         for i, item in list(enumerate(liked_items_train["encoded_item_user"].values))
#     ]
# )

# disliked_items_train_formatted = "\n".join(
#     [
#         f"ITEM {i}:{item}"
#         for i, item in list(enumerate(disliked_items_train["encoded_item_user"].values))
#     ]
# )

## Collaborative Recommendation

Using [supervisor architecture](https://langchain-ai.github.io/langgraph/concepts/multi_agent/)

In [12]:
from langgraph.graph import START, END, StateGraph


from langchain_core.messages import AnyMessage
from langgraph.graph.message import add_messages

from langgraph.graph import MessagesState


from typing_extensions import TypedDict, Annotated

In [13]:
from pydantic import BaseModel
from langchain_core.messages import HumanMessage, SystemMessage, AIMessage

# PROMPT SYSTEM USER
PROMPT_SYSTEM_USER = """
You represent a user who has interacted with and rated various items. Based on the provided memories, reflect on key aspects such as item scope, categories, pricing, and other relevant details. Use both long-term and short-term memories to guide your response, ensuring accuracy and relevance.

**Instructions:**
1. **Memory-Driven Reflection:** Use only the information explicitly available in the memories. Do not create or infer details that are not supported by the provided memories.
2. **Long-Term vs. Short-Term Memory:** 
   - Long-term memory contains general, lasting insights about your preferences and behavior patterns.
   - Short-term memory highlights recent interactions and temporary preferences.
3. **Relevance and Precision:** Focus on providing concise, targeted answers that reflect the user's experiences and preferences as documented in the memories.
4. **Strict Memory Adherence:** Do not introduce any assumptions or unsupported details.

### Input:
Long-term memory:
{long_term_memory}

Short-term memory:
{short_term_memory}
"""

# REPRESENTATION OF THE USER
PROMPT_GENERATE_REPRESENTATION_USER = """
Based on your memories, generate a self-representation that highlights key preferences and behaviors to help predict future choices. Use only information directly supported by your memories to ensure accuracy.

**Instructions:**
1. **Memory-Based Representation:** Focus solely on the information stored in your memories to describe yourself.
2. **Key Preferences:** Highlight patterns, preferences, and behaviors that are likely to influence future decisions.
3. **Conciseness:** Limit the representation to a single paragraph that effectively summarizes relevant insights.
4. **Accuracy:** Avoid assumptions or details not present in the provided memories.

### Output:
A single-paragraph representation based on your memories, capturing essential traits and patterns relevant to future predictions.
"""


# TRAINING
class TrainTaskOutput(BaseModel):
    item_selected: int
    explanation: str


PROMPT_TRAIN_TASK = """
Based on your memories, select one of the following items that you believe aligns best with your preferences, using the information provided by each item.

**Instructions:**
1. **Selection Criteria:** Use your stored memories to evaluate both items, identifying which one you prefer.
2. **Explanation:** Provide a brief explanation of your choice in **no more than 3 phrases**, focusing on key aspects that influenced your decision.
3. **Output Format:** Return a JSON containing:
   - **"item_selected"** (1 or 2): The item you chose.
   - **"explanation"**: A concise reason for your selection based on your preferences.

### Items:

Item 1:
{item_1}

Item 2:
{item_2}

### Output:
Return a JSON object containing the keys "item_selected" and "explanation."
"""

# MEMORIES UPDATE


class MemoryUpdateBackward(BaseModel):
    long_term_memory: list[str]
    short_term_memory: list[str]


PROMPT_USER_MEMORIES_UPDATE = """
This is part of a training loop to optimize your memory for future decision-making.

**Instructions:**
1. **Long-Term Memory:** Identify and maintain the most relevant general and enduring information, such as item types or categories, that will guide your preferences over time. This should help align future decisions and explanations with the correct outcome (y_true).
2. **Short-Term Memory:** Select recent, specific information from recent interactions, such as details about particular items and explanations of your decisions, that can improve short-term predictions and decisions.
3. **Memory Management:** 
   - Each memory category (**long_term_memory** and **short_term_memory**) can have a maximum of 3 short phrases.
   - Add, update, or remove memory entries as needed to better align your predictions (y_pred) and explanations with the expected decision (y_true).
   - You may repeat important information if it needs reinforcement.

### Details:
1. The item expected to be liked is: {item_liked}
2. The item expected to be disliked is: {item_disliked}

Your decision is choose/like (y_pred): {y_pred}

The correct decision choose/like (y_true): {y_true}

Your explanation: {explanation}

### Output:
Return a JSON object with the keys **"long_term_memory"** and **"short_term_memory"**, each containing up to 3 short phrases.
"""


class UserAgent:

    def __init__(self, user_id: str):

        self.user_id = user_id
        self.llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

        # Long Term
        self.long_term_memory = ["No previous experience."]

        # Short Term
        self.short_term_memory = ["No previous experience."]

        # Representation of the user
        self.user_representation = """
        Based on the available memories, I currently have no previous experience or interactions to draw upon, which means there are no established preferences or behaviors to inform future choices.
        As such, my self-representation is blank, indicating that I am open to new experiences and have yet to develop specific patterns or inclinations that would guide my decisions moving forward."""

    def __repr__(self):
        return f"UserAgent(user_id={self.user_id})\nLong Term Memory: {self.long_term_memory}\nShort Term Memory: {self.short_term_memory}\n{self.user_representation}"

    def generate_user_representation(self):
        messages = [
            SystemMessage(
                content=PROMPT_SYSTEM_USER.format(
                    long_term_memory="\n".join(self.long_term_memory),
                    short_term_memory="\n".join(self.short_term_memory),
                )
            ),
            HumanMessage(content=PROMPT_GENERATE_REPRESENTATION_USER),
        ]
        return self.llm.invoke(messages).content

    def update_backwards(self, item_liked, item_disliked, y_pred, explanation, y_true):
        """
        Updates the agent’s long-term and short-term memory based on user feedback about liked
        and disliked items. This method uses a language model to generate an updated memory
        representation, then applies those changes to the agent’s memory and generates a user
        representation.

        Parameters:
            item_liked (str): The item representation that the user liked.
            item_disliked (str): The item representation that the user disliked.
            y_pred (int): 1 or 2, indicating the item that the user chose or liked.
            explanation (str): A textual explanation for why the recommendation was made.
            y_true (Any): The actual feedback or ground-truth data from the user.
        Returns:
            Any: A structured object containing updated long-term and short-term memory, as
            well as any additional information provided by the language model.
        self, item_liked, item_disliked, y_pred, explanation, y_true
        """

        llm = self.llm.with_structured_output(MemoryUpdateBackward)

        messages = [
            SystemMessage(
                content=PROMPT_SYSTEM_USER.format(
                    long_term_memory="\n".join(self.long_term_memory),
                    short_term_memory="\n".join(self.short_term_memory),
                )
            ),
            HumanMessage(
                content=PROMPT_USER_MEMORIES_UPDATE.format(
                    item_liked=item_liked,
                    item_disliked=item_disliked,
                    y_pred=y_pred,
                    explanation=explanation,
                    y_true=y_true,
                )
            ),
        ]

        memories_update = llm.invoke(messages)

        self.long_term_memory = memories_update.long_term_memory
        self.short_term_memory = memories_update.short_term_memory
        self.user_representation = self.generate_user_representation()

        return memories_update

    def train_forward(self, item_1, item_2):
        """
        In train_forward we are considering the task of select one item from two items that the user liked and disliked.

        Args:
        - item_1: The first item to be compared.
        - item_2: The second item to be compared.
        """

        messages = [
            SystemMessage(
                content=PROMPT_SYSTEM_USER.format(
                    long_term_memory="\n".join(self.long_term_memory),
                    short_term_memory="\n".join(self.short_term_memory),
                )
            ),
            HumanMessage(
                content=PROMPT_TRAIN_TASK.format(item_1=item_1, item_2=item_2)
            ),
        ]

        train_predict = self.llm.with_structured_output(TrainTaskOutput).invoke(
            messages
        )

        return train_predict

    def predict(self, item):
        pass

In [14]:
PROMPT_SYSTEM_ITEM = """
You represent an item that has been rated and interacted with by various users. Based on the provided **item_json** and your **memory** (which includes insights from past interactions), analyze and reflect on key aspects such as user demographics, categories, pricing, preferences, and any other relevant details.

**Instructions:**
1. **Comprehensive Analysis:** Extract valuable insights from both the item data and memory, focusing on patterns related to user feedback, pricing strategies, and category relevance.
2. **Relevance:** Address information that can improve the item's appeal and usefulness to users, prioritizing data that enhances their experience or decision-making process.
3. **Concise and Focused:** Provide answers that are both informative and direct, avoiding unnecessary details while emphasizing critical information.
4. **Adaptability:** Ensure the response is adaptable for future queries by reflecting long-term, actionable insights derived from previous ratings and user interactions.

### Input:
Item Json:
{item_json}

Memory:
{memory}

### Output:
A concise, well-structured response that synthesizes all relevant information from the item data and memory, providing actionable insights based on previous ratings and user experiences.
"""


class MemoryItem(BaseModel):

    memory: list[str]


# @TODO Improve that prompt, that's not giving the right context
PROMPT_UPDATE_ITEM_MEMORY = """
Given the following details, update your memory with **collaborative filtering insights** that can guide future users when evaluating this item. Focus on general patterns learned from the user's correct or incorrect decisions and explanations to improve item recommendations for others.

**Instructions:**
1. **Operations:** Determine which operations to apply to the memory:  
   - **Add:** Introduce new insights if they provide valuable context for future users.  
   - **Maintain:** Retain existing insights if they remain relevant based on the current interaction.  
   - **Remove:** Discard outdated or irrelevant insights that no longer contribute meaningfully.  
   - **Change:** Update existing information to reflect new insights learned from the user's decision and explanation.
2. **Collaborative Filtering:** Adapt based on the alignment between user decision (y_pred) and the correct outcome (y_true):
   - If correct, reinforce positive or negative reasons for the item's rating.
   - If incorrect, adjust to explain why certain users might not align with the item.
3. **Generalization:** Avoid specific references to the current user and instead focus on capturing patterns that could apply to a broad range of users.
4. **Memory Limit:** Maintain a maximum of 5 phrases that provide long-term, reusable insights about the item.

### Details:
User decision (y_pred):
{y_pred}

User information:
{user_representation}

User explanation:
{user_explanation}

Real decision (y_true):
{y_true}

### Output:
Return up to 5 key phrases that summarize collaborative filtering insights for this item, reflecting general patterns of user preferences.
"""


PROMPT_GENERATE_REPRESENTATION_ITEM = """
Based on your existing memories and available information, generate a concise self-representation that helps users determine whether they resonate with you. Be authentic and highlight the most relevant traits, experiences, and interactions that may predict or align with user preferences.

**Instructions:**
1. **Honesty & Relevance:** Be truthful and focus on core memories that are meaningful to user interaction and decision-making.
2. **Context Awareness:** Leverage any prior memory to enrich the description. If no memory exists, generate insights based on the current context.
3. **Conciseness:** Limit your representation to a single, well-structured paragraph that reflects key traits and experiences relevant to user engagement.
4. **Adaptability:** Present yourself in a way that balances both specificity and general relevance, ensuring it can guide future interactions effectively.

### Context:
{item_json}

### Output:
A single-paragraph representation that reflects your self-description based on past experiences and the current context.
"""


class ItemAgent:

    def __init__(self, item_id: str, item_json: dict):

        self.item_id = item_id

        self.llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

        # Memory

        self.memory = ["No previous experience."]

        # Item Json

        self.item_json = item_json

        # Item Representation
        self.item_representation = self.generate_item_representation()

    def __repr__(self):

        return f"ItemAgent(item_id={self.item_id})\nMemory: {self.memory}\n{self.item_json}\n{self.item_representation}"

    def generate_item_representation(self):
        messages = [
            SystemMessage(
                content=PROMPT_SYSTEM_ITEM.format(
                    item_json=self.item_json, memory="\n".join(self.memory)
                )
            ),
            HumanMessage(
                content=PROMPT_GENERATE_REPRESENTATION_ITEM.format(
                    item_json=self.item_json
                )
            ),
        ]
        return self.llm.invoke(messages).content

    def update_backwards(self, y_pred, user_representation, explanation, y_true):
        """
        is_correct indicates if the user decision was correct or not.
        """

        llm = self.llm.with_structured_output(MemoryItem)

        messages = [
            SystemMessage(
                content=PROMPT_SYSTEM_ITEM.format(
                    item_json=self.item_json, memory="\n".join(self.memory)
                )
            ),
            HumanMessage(
                content=PROMPT_UPDATE_ITEM_MEMORY.format(
                    y_pred=y_pred,
                    user_representation=user_representation,
                    user_explanation=explanation,
                    y_true=y_true,
                )
            ),
        ]

        self.memory = llm.invoke(messages).memory
        self.item_representation = self.generate_item_representation()

        return self.memory

### Start Agents

In [15]:
import time


class Timer:
    def __init__(self, name=""):
        self.name = name

    def __enter__(self):
        self.start = time.time()

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.end = time.time()
        print(f"{self.name} took {self.end - self.start:.2f} seconds")

### User

In [16]:
from tqdm.notebook import tqdm

with Timer("Creation of User Agents"):
    id2user = {
        f"{user_id}": UserAgent(user_id)
        for user_id in tqdm(users, desc="Creating User Agents")
    }

Creating User Agents:   0%|          | 0/8 [00:00<?, ?it/s]

Creation of User Agents took 2.31 seconds


### Item

In [17]:
all_train_items = train_data["parent_asin"].unique()
all_test_items = test_data["parent_asin"].unique()

items = all_train_items.tolist() + all_test_items.tolist()

items = data_items.query("parent_asin in @items").drop_duplicates("parent_asin")

items = [(item["parent_asin"], item) for item in items.to_dict(orient="records")]

In [18]:
with Timer("Creation of items"):
    id2items = {
        item_id: ItemAgent(item_id, item_json)
        for item_id, item_json in tqdm(items, desc="Creating Item Agents")
    }

Creating Item Agents:   0%|          | 0/40 [00:00<?, ?it/s]

Creation of items took 143.68 seconds


## Dataloader

In [19]:
class BinaryClassificationLoader:
    def __init__(self, data, id2user, id2items, n_sample_per_user):
        self.data = data
        self.id2user = id2user
        self.id2items = id2items
        self.n_sample_per_user = n_sample_per_user
        self.samples = self._generate_samples()

    def __len__(self):
        return len(self.samples)

    def _generate_samples(self):
        """
        Generates samples for training a recommendation model.
        This method iterates over each unique user in the dataset and generates a specified number of samples per user.
        Each sample consists of a user, a positive item (with a rating greater than 3), and a negative item (with a rating less than 3).
        The positive and negative items are randomly assigned to item_1 and item_2, and the label y_true indicates the position of positive item.
        Returns:
            list: A list of dictionaries, each containing the following keys:
                - "user_agent": The user agent corresponding to the user.
                - "item_1": The first item agent (either positive or negative).
                - "item_2": The second item agent (either negative or positive).
                - "y_true": The label indicating position of posittive item.
        """

        samples = []
        for user in self.data["user_id"].unique():

            sample_user = self.data.query("user_id == @user")

            for i in range(self.n_sample_per_user):
                # Items
                positive_item = sample_user.query("rating > 3").sample(1)
                negative_item = sample_user.query("rating < 3").sample(1)

                positive_item_agent = self.id2items[
                    positive_item["parent_asin"].values[0]
                ]
                negative_item_agent = self.id2items[
                    negative_item["parent_asin"].values[0]
                ]

                # User
                user_agent = self.id2user[user]

                # random choose if item 1 will be positive or negative

                if random.choice([True, False]):

                    samples.append(
                        {
                            "user_agent": user_agent,
                            "item_1": positive_item_agent,
                            "item_2": negative_item_agent,
                            "y_true": 1,
                        }
                    )

                else:

                    samples.append(
                        {
                            "user_agent": user_agent,
                            "item_1": negative_item_agent,
                            "item_2": positive_item_agent,
                            "y_true": 2,
                        }
                    )
        samples = random.sample(samples, len(samples))
        return samples

In [20]:
train_dataloader = BinaryClassificationLoader(train_data, id2user, id2items, 2)
test_dataloader = BinaryClassificationLoader(test_data, id2user, id2items, 2)

## Training Loop - Select One

In [21]:
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
)


def evaluate(y_pred, y_true):
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, zero_division=0)
    recall = recall_score(y_true, y_pred, zero_division=0)
    f1 = f1_score(y_true, y_pred, zero_division=0)
    # conf_matrix = confusion_matrix(y_true, y_pred)

    # print("Evaluation Metrics:")
    # print(f"Accuracy: {accuracy:.4f}")
    # print(f"Precision: {precision:.4f}")
    # print(f"Recall: {recall:.4f}")
    # print(f"F1-Score: {f1:.4f}")
    # print("\nConfusion Matrix:")

    return {
        "accuracy": accuracy,
        "precision": precision,
        "recall": recall,
        "f1_score": f1,
        # "confusion_matrix": conf_matrix,
    }


def run_evaluation(test_dataloader):

    y_pred = []
    y_true = []

    for sample in tqdm(
        test_dataloader.samples, total=len(test_dataloader.samples), desc="Evaluating"
    ):

        user_agent = sample["user_agent"]
        item_1 = sample["item_1"]
        item_2 = sample["item_2"]
        y_true.append(sample["y_true"])

        train_predict = user_agent.train_forward(item_1, item_2)

        item_selected = train_predict.item_selected

        y_pred.append(item_selected)

    return evaluate(y_pred, y_true)

In [22]:
run_evaluation(test_dataloader)

Evaluating:   0%|          | 0/16 [00:00<?, ?it/s]

{'accuracy': 0.4375,
 'precision': 0.4444444444444444,
 'recall': 0.5,
 'f1_score': 0.47058823529411764}

In [23]:
run_evaluation(test_dataloader)

Evaluating:   0%|          | 0/16 [00:00<?, ?it/s]

{'accuracy': 0.4375,
 'precision': 0.4444444444444444,
 'recall': 0.5,
 'f1_score': 0.47058823529411764}

### Train Loop

In [24]:
n_epochs = 1

In [26]:
for epoch in range(n_epochs):

    for sample in tqdm(
        train_dataloader.samples,
        total=len(train_dataloader.samples),
        desc=f"Epoch {epoch + 1}/{n_epochs}",
    ):

        user_agent = sample["user_agent"]
        item_1 = sample["item_1"]
        item_2 = sample["item_2"]
        y_true = sample["y_true"]

        # forward
        train_predict = user_agent.train_forward(item_1, item_2)

        item_selected = train_predict.item_selected
        explanation = train_predict.explanation

        # backwards

        # y_true represents the positive item position
        if y_true == 1:
            item_liked, item_disliked = item_1, item_2
        else:
            item_liked, item_disliked = item_2, item_1

        y_pred = item_selected

        # update agent
        user_agent.update_backwards(
            item_liked.item_representation,
            item_disliked.item_representation,
            y_pred,
            explanation,
            y_true,
        )  # update_backwards(self, item_liked, item_disliked, y_pred, explanation, y_true):

        # update items - @ TODO: Analyzes prompts
        item_1.update_backwards(
            y_pred, user_agent.user_representation, explanation, y_true
        )

        item_2.update_backwards(
            y_pred, user_agent.user_representation, explanation, y_true
        )

Epoch 1/1:   0%|          | 0/16 [00:00<?, ?it/s]

### Update backward

In [124]:
user

UserAgent(user_id=AGDX6GPEK772FFKOWYJVJRLKWTRQ)
Long Term Memory: ['No previous experience.']
Short Term Memory: ['No previous experience.']
Based on the available memories, I currently have no previous experience or interactions to draw from, which means I lack established preferences or behaviors that could influence future choices. As such, my decision-making will be guided by new experiences and interactions as they occur, without any prior patterns or insights to inform those choices.

In [125]:
user_update = user.update_backwards(
    positive_item.item_representation,
    negative_item.item_representation,
    y_pred,
    explanation,
    y_true,
)

In [126]:
print(f"Long Term Memory: {user_update.long_term_memory}")
print(f"Short Term Memory: {user_update.short_term_memory}")

Long Term Memory: ['Passionate about grunge and alternative rock', 'Appreciate lyrical depth and emotional narratives', 'Value thoughtful critiques and diverse perspectives']
Short Term Memory: ["Liked Nirvana's 'In Utero' for its insights", "Disliked Green Day's 'Dookie' due to mixed opinions", 'Strong average ratings influence my preferences']


In [127]:
user

UserAgent(user_id=AGDX6GPEK772FFKOWYJVJRLKWTRQ)
Long Term Memory: ['Passionate about grunge and alternative rock', 'Appreciate lyrical depth and emotional narratives', 'Value thoughtful critiques and diverse perspectives']
Short Term Memory: ["Liked Nirvana's 'In Utero' for its insights", "Disliked Green Day's 'Dookie' due to mixed opinions", 'Strong average ratings influence my preferences']
I am passionate about grunge and alternative rock music, with a strong appreciation for lyrical depth and emotional narratives. My preferences are influenced by thoughtful critiques and diverse perspectives, leading me to favor albums that offer insights and resonate on a deeper level. Recently, I enjoyed Nirvana's 'In Utero' for its meaningful content, while I found Green Day's 'Dookie' less appealing due to mixed opinions. I tend to rate items based on strong average ratings, which significantly impacts my future choices.

### Update Item Backward

In [128]:
positive_item

ItemAgent(item_id=B000003TAR)
Memory: ['No previous experience.']
{'rating': 4.0, 'title_x': '4.25 stars,better than Nevermind, worse than Bleach', 'text': 'I think I\'m one of the very few diehards that believes \'In Utero\' was not Nirvanas best studio album. Now dont get me wrong, it is far from a bad album, justifying the four star rating, but there are many crudentials lacking of \'In Utero\' that aren\'t as lacking on \'Bleach\'. However, some of Nirvanas best and most beautiful songs can be found here; songs like \'Heart Shaped Box\', with its depressing lyrics about Kurts heroine addiction and his misfortune that befell him soon after he was hooked, "forever in debt to your priceless advise" lets us know that Cobain would much rather have chosen a drug-free path. \'Rape Me\' was Kurts anti-Teen Spirit. It is a song about how SLTS "raped" Nirvana of their undergroung status and turned them into alternative heroes. "Rape me my friend" describes how Kurt almost felt betrayed, beca

In [129]:
positive_updated_memory = positive_item.update_backwards(
    y_dict["y_pred_positive"],
    user_representation=user.user_representation,
    explanation=explanation,
    y_true=y_dict["y_true_positive"],
)

In [130]:
for i, phrase in enumerate(positive_updated_memory):
    print(f"{i}: {phrase}")

0: Users passionate about grunge and alternative rock often prioritize albums with lyrical depth and emotional narratives, leading to higher satisfaction with items like 'In Utero'.
1: Thoughtful critiques and diverse perspectives significantly influence user preferences, making albums that resonate on a deeper level more appealing.
2: Strong average ratings play a crucial role in user decision-making, as they reflect a shared appreciation among fans and can guide future choices.
3: Users who enjoy albums with meaningful content tend to rate them positively, reinforcing the value of lyrical insight in music selections.
4: When evaluating music, users often compare albums within the same genre, leading to preferences based on critical reception and personal resonance.


In [131]:
negative_update_memory = negative_item.update_backwards(
    y_dict["y_pred_negative"],
    user_representation=user.user_representation,
    explanation=explanation,
    y_true=y_dict["y_true_negative"],
)

In [132]:
for i, phrase in enumerate(negative_update_memory):
    print(f"{i}: {phrase}")

0: Users who appreciate lyrical depth and emotional narratives may find Green Day's 'Dookie' less appealing compared to grunge-focused albums.
1: Strong average ratings significantly influence user decisions, leading them to favor albums with higher perceived quality.
2: Thoughtful critiques and diverse perspectives are essential for users seeking meaningful music experiences.
3: Users passionate about alternative rock often prefer albums that resonate on a deeper emotional level rather than mainstream pop-punk.
4: Mixed opinions about an album can deter users from selecting it, especially if they prioritize critical acclaim.


: 

## Evaluation

Evaluation comparasion with sample x no sample

In [None]:
from typing import List

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import mean_absolute_error, mean_squared_error

# warning level
import warnings

warnings.filterwarnings("ignore")


def compute_metrics(y_true: List[int], y_pred: List[int]):

    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average="weighted")
    recall = recall_score(y_true, y_pred, average="weighted")
    f1 = f1_score(y_true, y_pred, average="weighted")
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)

    return {
        "accuracy": accuracy,
        "precision": precision,
        "recall": recall,
        "f1": f1,
        "mae": mae,
        "mse": mse,
    }