In [6]:
from sqlalchemy import create_engine
import pandas as pd
import uuid

# Sample data list
data_list = ['LLM Powered Autonomous Agents is a concept discussed by Lilian Weng.',
 'The date of the article is June 23, 2023.',
 'The estimated reading time for the article is 31 minutes.',
 'Lilian Weng is the author of the article.',
 'Building agents with LLM as its core controller is a cool concept.',
 'Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer, and BabyAGI, serve as inspiring examples.',
 'The potentiality of LLM extends beyond generating well-written copies, stories, essays, and programs.',
 'LLM can be framed as a powerful general problem solver.',
 'In a LLM-powered autonomous agent system, LLM functions as the agent’s brain.',
 'Several key components complement LLM in the agent system.',
 'Planning is one of the key components of the agent system.',
 'Task decomposition allows the agent to break down large tasks into smaller, manageable subgoals.',
 'Self-reflection enables the agent to learn from mistakes and refine future actions.',
 'Memory is another key component of the agent system.',
 'Short-term memory is utilized for in-context learning.',
 'Long-term memory provides the agent with the capability to retain and recall information over extended periods.',
 'Tool use is a distinguishing characteristic of human beings.',
 'Equipping LLMs with external tools can significantly extend the model capabilities.',
 'MRKL is a neuro-symbolic architecture for autonomous agents.',
 'MRKL systems contain a collection of expert modules.',
 'The general-purpose LLM works as a router to route inquiries to the best suitable expert module.',
 'TALM and Toolformer fine-tune a language model to learn to use external tool APIs.',
 'ChatGPT Plugins and OpenAI API function calling are examples of LLMs augmented with tool use capability.',
 'HuggingGPT is a framework to use ChatGPT as the task planner.',
 'ChemCrow is a domain-specific example where LLM is augmented with expert-designed tools for scientific tasks.',
 'Generative Agents is an experiment where virtual characters controlled by LLM-powered agents interact in a sandbox environment.',
 'AutoGPT is a proof-of-concept demo for setting up autonomous agents with LLM as the main controller.',
 'The article discusses challenges such as finite context length and reliability of natural language interface.',
 'The article includes citations and references for further reading.']

# Create DataFrame
df = pd.DataFrame(data_list, columns=['sentences'])

# Add a unique ID for each entry
df['uuid'] = [str(uuid.uuid4()) for _ in range(len(df))]

# Add the specified UUID to all entries
df['text_id'] = "78a7d0ea-0d7d-4b99-827b-83fab343a86e"

# Create SQLite engine
engine = create_engine('sqlite:///test.db')

# Upload DataFrame to SQLite
df.to_sql('sentences', engine, if_exists='replace', index=False)

print("Data uploaded successfully!")

Data uploaded successfully!


In [7]:
from sqlalchemy import create_engine, text

# Create an SQLite database
DATABASE_URL = "sqlite:///./test.db"

# Create an engine
engine = create_engine(DATABASE_URL)

# Retrieve UUIDs from the 'page_content' table
ids = []
with engine.connect() as connection:
    result = connection.execute(text("SELECT uuid FROM sentences"))
    ids.extend([str(row[0]) for row in result])

page_contents = {}
with engine.connect() as connection:
    for uuid in ids:
        result = connection.execute(text("SELECT sentences FROM sentences WHERE uuid = :uuid"), {'uuid': uuid})
        for row in result:
            page_contents[uuid] = row[0]

# Separate the page contents into a list
sentences = list(page_contents.values())

from langchain_core.documents import Document
import uuid

sentence_documents = []

for text in sentences:
    id = str(uuid.uuid4())
    sentence_documents.append(Document(id=id , page_content=text, metadata={'source': 'test', "id": id}))

sentence_documents

[Document(id='15755d07-16f7-44d2-899d-4769ae1762fe', metadata={'source': 'test', 'id': '15755d07-16f7-44d2-899d-4769ae1762fe'}, page_content='LLM Powered Autonomous Agents is a concept discussed by Lilian Weng.'),
 Document(id='1715d119-7e46-44df-b0d1-a1cd72d4b6de', metadata={'source': 'test', 'id': '1715d119-7e46-44df-b0d1-a1cd72d4b6de'}, page_content='The date of the article is June 23, 2023.'),
 Document(id='f12c2c1e-2ae1-403b-9ea5-51d6dcf0b769', metadata={'source': 'test', 'id': 'f12c2c1e-2ae1-403b-9ea5-51d6dcf0b769'}, page_content='The estimated reading time for the article is 31 minutes.'),
 Document(id='22610cc3-5138-44ad-9f84-af61dcf076f3', metadata={'source': 'test', 'id': '22610cc3-5138-44ad-9f84-af61dcf076f3'}, page_content='Lilian Weng is the author of the article.'),
 Document(id='97b55eba-12b5-4ad8-91b4-cf792ecd1936', metadata={'source': 'test', 'id': '97b55eba-12b5-4ad8-91b4-cf792ecd1936'}, page_content='Building agents with LLM as its core controller is a cool concept.'

In [41]:
import os
from dotenv import load_dotenv
load_dotenv()

from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_openai import ChatOpenAI

OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
ASTRA_DB_API_KEY = os.getenv('ASTRA_DB_API_KEY')
ASTRA_DB_ENDPOINT = os.getenv('ASTRA_DB_ENDPOINT')
ASTRA_DB_KEYSPACE = os.getenv('ASTRA_DB_KEYSPACE')

model = ChatOpenAI(model='gpt-4o')

embeddings = OpenAIEmbeddings()

#text_embeddings = embeddings.embed_documents(sentences)

In [None]:
from langchain_community.vectorstores import SQLiteVec



# create the open-source embedding function

connection = SQLiteVec.create_connection(db_file=DATABASE_URL)

db1 = SQLiteVec(
    table="vectorstore", 
    embedding=embeddings, 
    connection=connection
)

db1.add_documents(sentence_documents)

[1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29]

In [45]:
from sqlalchemy import create_engine, text
import struct
import sqlite3
import sqlite_vec

# Create an SQLite database
DATABASE_URL = "./vec.db"

db = sqlite3.connect(DATABASE_URL)
db.enable_load_extension(True)
sqlite_vec.load(db)
db.enable_load_extension(False)


# Example query to retrieve vectors
query = "SELECT text_embedding FROM vectorstore"
result = db.execute(query).fetchall()

# Deserialize the vectors
def deserialize_float32(blob):
    return struct.unpack(f'{len(blob) // 4}f', blob)

text_embeddings = [deserialize_float32(row[0]) for row in result]

len(text_embeddings)

29

In [46]:
from sklearn.cluster import KMeans

num_clusters = 4

# Cluster the embeddings and assign a cluster to each proposition
kmeans = KMeans(n_clusters=num_clusters, random_state=0).fit(text_embeddings)
cluster_assignments = kmeans.labels_

# Create a list of dict to store the embeddings, the text, and the cluster assignment
props_clustered = [
    {"text": prop, "embeddings": emb, "cluster": cluster}
    for prop, emb, cluster in zip(sentences, text_embeddings, cluster_assignments)
]

# Display clusters and their propositions

for cluster in range(num_clusters):
    print(f"Cluster {cluster}:")
    for prop in props_clustered:
        if prop["cluster"] == cluster:
            print(f"  - {prop['text']}")
    print() 
len(props_clustered)

Cluster 0:
  - The date of the article is June 23, 2023.
  - The estimated reading time for the article is 31 minutes.
  - Planning is one of the key components of the agent system.
  - Self-reflection enables the agent to learn from mistakes and refine future actions.
  - Memory is another key component of the agent system.
  - Short-term memory is utilized for in-context learning.
  - Long-term memory provides the agent with the capability to retain and recall information over extended periods.

Cluster 1:
  - The potentiality of LLM extends beyond generating well-written copies, stories, essays, and programs.
  - LLM can be framed as a powerful general problem solver.
  - Several key components complement LLM in the agent system.
  - Tool use is a distinguishing characteristic of human beings.
  - Equipping LLMs with external tools can significantly extend the model capabilities.
  - The general-purpose LLM works as a router to route inquiries to the best suitable expert module.
  -

29

In [56]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

def cluster_embeddings(embeddings, radius=0.01):
    n = len(embeddings)
    labels = -np.ones(n)  # Initialize all labels to -1 (unlabeled)
    current_label = 0

    def label_cluster(center_idx):
        nonlocal current_label
        to_label = [center_idx]
        while to_label:
            idx = to_label.pop()
            if labels[idx] == -1:  # If not labeled
                labels[idx] = current_label
                similarities = cosine_similarity([embeddings[idx]], embeddings)[0]
                neighbors = np.where(similarities > 1 - radius)[0]
                to_label.extend(neighbors)
        current_label += 1

    for i in range(n):
        if labels[i] == -1:  # If not labeled
            label_cluster(i)

    return labels

# Example usage
radius = 0.14  # Adjust this value based on your needs
cluster_assignments = cluster_embeddings(text_embeddings, radius)

# Create a list of dict to store the embeddings, the text, and the cluster assignment
props_clustered = [
    {"text": prop, "embeddings": emb, "cluster": cluster}
    for prop, emb, cluster in zip(sentences, text_embeddings, cluster_assignments)
]

# Display clusters and their propositions
for cluster in set(cluster_assignments):
    print(f"Cluster {cluster}:")
    for prop in props_clustered:
        if prop["cluster"] == cluster:
            print(f"  - {prop['text']}")
print(len(props_clustered))
print()

Cluster 0.0:
  - LLM Powered Autonomous Agents is a concept discussed by Lilian Weng.
  - Building agents with LLM as its core controller is a cool concept.
  - Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer, and BabyAGI, serve as inspiring examples.
  - In a LLM-powered autonomous agent system, LLM functions as the agent’s brain.
  - Several key components complement LLM in the agent system.
  - Generative Agents is an experiment where virtual characters controlled by LLM-powered agents interact in a sandbox environment.
  - AutoGPT is a proof-of-concept demo for setting up autonomous agents with LLM as the main controller.
Cluster 1.0:
  - The date of the article is June 23, 2023.
Cluster 2.0:
  - The estimated reading time for the article is 31 minutes.
Cluster 3.0:
  - Lilian Weng is the author of the article.
Cluster 4.0:
  - The potentiality of LLM extends beyond generating well-written copies, stories, essays, and programs.
  - LLM can be framed as a powerful gen