### R2R Engine Test
Review how meta data is used to identify the user (`user_id`) and/are enhance the result queries.

####  Engine docs here
https://r2r-docs.sciphi.ai/documentation/python-sdk/ingestion


In [None]:
import requests
import json

def get_users_overview(user_ids=None):
    if user_ids is None:
        user_ids = []

    url = 'http://localhost:9311/v1/users_overview'
    headers = {
        'accept': 'application/json',
        'Content-Type': 'application/json'
    }
    data = {
        "user_ids": user_ids
    }

    response = requests.post(url, headers=headers, json=data)
    return response.json()



def get_documents_for_user(user_ids=None):
    if user_ids is None:
        user_ids = []

    url = 'http://localhost:9311/v1/documents_overview'
    headers = {
        'accept': 'application/json',
        'Content-Type': 'application/json'
    }
    data = {
        "document_ids": [],
        "user_ids": user_ids
    }

    response = requests.post(url, headers=headers, json=data)
    return response.json()




# get all users
users_overview = get_users_overview()
print(json.dumps(users_overview, indent=4))


# get all docs for user ID
documents_overview = get_documents_for_user(["2bc1f0e5-89ee-4e4b-be75-cdcb22144efb"])
print(json.dumps(documents_overview, indent=4))


In [None]:
from r2r import Document, GenerationConfig, R2R

"""
This is creating 3 entries in `deweylearn_vecs_collection` entries in the database.
https://supabase.com/dashboard/project/wwsgmwylkhzowrfagkuf/editor/88376?schema=vecs

and 1 entry in `document_info_deweylearn_vecs_collection` entry in the database.
https://supabase.com/dashboard/project/wwsgmwylkhzowrfagkuf/editor/88384?schema=public

"""

# Initialize the R2R app
app = R2R()  # You may pass a custom configuration to `R2R`


res = app.engine.management_service.users_overview([])
print(res)

In [None]:

from r2r import Document, GenerationConfig, R2R

"""
This is creating 3 entries in `deweylearn_vecs_collection` entries in the database.
https://supabase.com/dashboard/project/wwsgmwylkhzowrfagkuf/editor/88376?schema=vecs

and 1 entry in `document_info_deweylearn_vecs_collection` entry in the database.
https://supabase.com/dashboard/project/wwsgmwylkhzowrfagkuf/editor/88384?schema=public

"""

# Initialize the R2R app
app = R2R()  # You may pass a custom configuration to `R2R`

meta = {
  "title": "Dirks NEW text",
  "user": "dliebich",
  "entity": "DeweyLearn",
  "user_id": "2bc1f0e5-89ee-4e4b-be75-cdcb22144efb"
}

# Ingest the test document
test_string = """
XXX DeweyLearn is an innovative AI-powered platform designed to assist teachers in analyzing classroom data. 
It processes classroom video recordings, transcribes them using speech-to-text technology, and then applies 
machine learning algorithms to extract key insights. These insights include student engagement, participation 
levels, and teacher feedback patterns. Additionally, DeweyLearn generates actionable recommendations for 
improving teaching strategies and enhancing student learning outcomes. The platform also integrates with 
existing LMS (Learning Management Systems) to seamlessly incorporate data into teachers’ workflows. Furthermore, 
DeweyLearn offers detailed reports and visual analytics, allowing educators to identify areas for improvement 
and track progress over time. With a focus on K-12 education, DeweyLearn aims to drive continuous improvement 
in both teaching practices and student performance through data-driven insights. The developer who invented and 
created DeweyLearn is Dirk Liebich. The platform work was started in July of 2023 and has since grown significantly.
"""

# Ingest the test document into the R2R system
res = app.ingest_documents(
    [
        Document(
            type="txt",
            data=test_string,
            metadata=meta,
        )
    ]
)

print(res)


In [None]:
# Ingest the test document into the R2R system


longer_text = """
DeweyLearn is an innovative AI-powered platform designed to assist teachers in analyzing classroom data. 
It processes classroom video recordings, transcribes them using speech-to-text technology, and then applies 
machine learning algorithms to extract key insights. These insights include student engagement, participation 
levels, and teacher feedback patterns. Additionally, DeweyLearn generates actionable recommendations for 
improving teaching strategies and enhancing student learning outcomes. The platform also integrates with 
existing LMS (Learning Management Systems) to seamlessly incorporate data into teachers’ workflows. Furthermore, 
DeweyLearn offers detailed reports and visual analytics, allowing educators to identify areas for improvement 
and track progress over time. With a focus on K-12 education, DeweyLearn aims to drive continuous improvement 
in both teaching practices and student performance through data-driven insights. The developer who invented and 
created DeweyLearn is Dirk Liebich. The platform work was started in July of 2023 and has since grown significantly.
In addition to its core functionalities, DeweyLearn also supports real-time feedback mechanisms, enabling teachers 
to adjust their teaching methods on the fly. The platform's advanced analytics can identify trends and patterns 
that may not be immediately obvious, providing a deeper understanding of classroom dynamics. DeweyLearn's 
integration with various educational tools and resources makes it a versatile addition to any educational 
institution's technology stack. The platform's user-friendly interface ensures that teachers can easily navigate 
and utilize its features without extensive training. DeweyLearn's commitment to data privacy and security ensures 
that all user data is protected and used responsibly. As the platform continues to evolve, it plans to incorporate 
more advanced AI features, such as predictive analytics and personalized learning pathways, to further enhance 
its value to educators. DeweyLearn is not just a tool, but a comprehensive solution for modern education, 
empowering teachers and students alike to achieve their full potential.
"""

app.ingest_documents(
    [
        Document(
            type="txt",
            data=longer_text,
            metadata={},
        )
    ]
)

print(res)

#### SEARCH

In [None]:
from r2r import R2RClient
import json

# we are using a different port for DeweyLearn
client = R2RClient(base_url="http://localhost:9311")

filtered_search_response = client.search(
    "What is DeweyLearn?",
    use_vector_search=True,
    search_filters={"user_id": "2bc1f0e5-89ee-4e4b-be75-cdcb22144efb"},
    search_limit=2
)

print(json.dumps(filtered_search_response, indent=4))


In [None]:
from r2r import R2RClient

# we are using a different port for DeweyLearn
client = R2RClient(base_url="http://localhost:9311")

hyde_response = client.rag(
    "What are the main themes in Shakespeare's plays?",
    vector_search_settings={
        "search_strategy": "hyde",
        "search_limit": 10
    }
)

print('hyde_response = ', hyde_response)


In [None]:
hybrid_search_response = client.search(
    "Who is Dirk Liebich?",
    use_vector_search=True,
    search_limit=20,
    do_hybrid_search=True,
    search_filters={  # If you have specific filters, otherwise leave empty
        "hybrid_search_settings": {
            "full_text_weight": 1.0,
            "semantic_weight": 10.0,
            "full_text_limit": 200,
            "rrf_k": 25,
        }
    }
)

print(json.dumps(hybrid_search_response, indent=4))


In [None]:
# get all documents
documents_overview = client.documents_overview()
print(json.dumps(documents_overview, indent=4))

# get all documents by user_id
documents_overview = client.documents_overview(user_ids=['2bc1f0e5-89ee-4e4b-be75-cdcb22144efb'])
print(json.dumps(documents_overview, indent=4))



#### RAG

In [None]:

rag_response = client.rag(
    "Who is Dirk Liebich",
    use_vector_search=True,
    search_filters={"user_id": "2bc1f0e5-89ee-4e4b-be75-cdcb22144efb"},
    search_limit=2, 
)

print(rag_response['results']['completion']['choices'][0]['message']['content'])
print('')
print(json.dumps(rag_response, indent=4))


In [None]:
from r2r import R2RClient
import json

# we are using a different port for DeweyLearn
client = R2RClient(base_url="http://localhost:9311")

response = client.rag(
    "What are the latest advancements in quantum computing?",
    rag_generation_config={
        "stream": False,
        "model": "openai/gpt-4o-mini",
        "temperature": 0.7,
        "max_tokens": 150
    }
)
print(json.dumps(response, indent=4))

In [None]:


# Sample questions for testing
questions_answers = [
    {"question": "What is the purpose of DeweyLearn?"},
    {"question": "How does DeweyLearn process classroom videos?"},
    {"question": "What kind of insights does DeweyLearn generate?"},
    {"question": "How does DeweyLearn integrate into teachers' workflows?"},
    {"question": "What are the main educational levels targeted by DeweyLearn?"},
    {"question": "How does DeweyLearn help improve teaching practices?"},
    {"question": "What kind of reports and analytics does DeweyLearn provide?"}
]

# Function to test RAG with the provided questions
def test_rag_function(app, questions_answers):
    results = []
    for qa in questions_answers:
        question = qa["question"]
        rag_results = app.rag(
            question, GenerationConfig(model="gpt-4o-mini", temperature=0.0)
        )
        result = {
            "question": question,
            "search_results": rag_results.search_results,
            "completion": rag_results.completion
        }
        results.append(result)
    return results

# Run test on RAG function
rag_test_results = test_rag_function(app, questions_answers)

# Print test results
for result in rag_test_results:
    print(f"Question: {result['question']}")
    print(f"Search Results:\n{result['search_results']}")
    print(f"Completion:\n{result['completion']}\n")
