In [1]:
### LLM
from langchain_ollama import ChatOllama

local_llm = "llama3.2:latest"
llm = ChatOllama(model=local_llm, temperature=0)
llm_json_mode = ChatOllama(model=local_llm, temperature=0, format="json")

In [2]:
from langchain_text_splitters import CharacterTextSplitter
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.documents import Document
from typing import List

def summarize_text_map_reduce(llm, docs, token_max: int):
    text_splitter = CharacterTextSplitter.from_tiktoken_encoder(chunk_size=token_max, chunk_overlap=0)

    map_prompt = ChatPromptTemplate.from_messages([("human", "You are an expert content summarizer. Combine your understanding of the following into a detailed nested bullet point summary:\n\n{context}")])
    reduce_template = """
    The following is a set of summaries:
    {docs}
    Combine all of your understanding into a single, detailed nested bullet point summary with overview at the beggining.
    """
    reduce_prompt = ChatPromptTemplate([("human", reduce_template)])
    
    map_chain = map_prompt | llm | StrOutputParser()
    reduce_chain = reduce_prompt | llm | StrOutputParser()

    split_docs = text_splitter.split_documents(docs)
    summaries = [map_chain.invoke(doc.page_content) for doc in split_docs]

    def length_function(docs):
        return sum(llm.get_num_tokens(doc) for doc in docs)

    def chunk_summaries(summaries, max_tokens):
        chunks = []
        current_chunk = []
        current_chunk_tokens = 0
        
        for summary in summaries:
            summary_tokens = llm.get_num_tokens(summary)
            
            if current_chunk_tokens + summary_tokens <= max_tokens:
                current_chunk.append(summary)
                current_chunk_tokens += summary_tokens
            else:
                chunks.append(current_chunk)
                current_chunk = [summary]
                current_chunk_tokens = summary_tokens
        
        if current_chunk:
            chunks.append(current_chunk)
        
        return chunks

    while length_function(summaries) > token_max:
        chunks = chunk_summaries(summaries, token_max)
        new_summaries = []
        for chunk in chunks:
            chunk_text = "\n\n".join(chunk)
            new_summaries.append(reduce_chain.invoke(chunk_text))
        summaries = new_summaries

    final_summary = reduce_chain.invoke(summaries)
    
    return final_summary


In [21]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import SKLearnVectorStore
from langchain_nomic.embeddings import NomicEmbeddings

def create_index_retriver(docs_list):    
    text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
        chunk_size=1000, chunk_overlap=200
    )
    doc_splits = text_splitter.split_documents(docs_list)

    # Add to vectorDB
    vectorstore = SKLearnVectorStore.from_documents(
        documents=doc_splits,
        embedding=NomicEmbeddings(model="nomic-embed-text-v1.5", inference_mode="local", device="nvidia"),
    )

    # Create retriever
    retriever = vectorstore.as_retriever(search_kwargs={'k':1})
    return retriever

In [4]:
import json
from langchain_core.messages import HumanMessage, SystemMessage


def is_chunk_relevant(chunk, question, llm_json_mode):
    # Doc grader instructions
    doc_grader_instructions = """You are a grader assessing relevance of a retrieved document to a user question.

    If the document contains keyword(s) or semantic meaning related to the question, grade it as relevant."""

    # Grader prompt
    doc_grader_prompt = """Here is the retrieved document: \n\n {document} \n\n Here is the user question: \n\n {question}. 

    This carefully and objectively assess whether the document contains at least some information that is relevant to the question.

    Return JSON with single key, binary_score, that is 'yes' or 'no' score to indicate whether the document contains at least some information that is relevant to the question."""
   
    doc_grader_prompt_formatted = doc_grader_prompt.format(document=chunk, question=question)
    result = llm_json_mode.invoke(
        [SystemMessage(content=doc_grader_instructions)]
        + [HumanMessage(content=doc_grader_prompt_formatted)]
    )
    grade = json.loads(result.content)["binary_score"]
    return grade.lower() == "yes"


In [5]:
def generate(question, relevant_chunks, llm):
    rag_prompt = """You are an assistant for question-answering tasks. 

    Here is the context to use to answer the question:

    {context} 

    Think carefully about the above context. 

    Now, review the user question:

    {question}

    Provide an answer to this questions using only the above context. 

    Use three sentences maximum and keep the answer concise.

    Answer:"""
    def format_docs(docs):
        return "\n\n".join(doc.page_content for doc in docs)

    context = format_docs(relevant_chunks)
    rag_prompt_formatted = rag_prompt.format(context=context, question=question)
    generation = llm.invoke([HumanMessage(content=rag_prompt_formatted)])
    return generation.content

In [6]:
def hallucination_grader(answer, relevant_chunks, llm_json_mode):
    
    # Hallucination grader instructions
    hallucination_grader_instructions = """

    You are a teacher grading a quiz. 

    You will be given FACTS and a STUDENT ANSWER. 

    Here is the grade criteria to follow:

    (1) Ensure the STUDENT ANSWER is grounded in the FACTS. 

    (2) Ensure the STUDENT ANSWER does not contain "hallucinated" information outside the scope of the FACTS.

    Score:

    A score of yes means that the student's answer meets all of the criteria. This is the highest (best) score. 

    A score of no means that the student's answer does not meet all of the criteria. This is the lowest possible score you can give.

    Explain your reasoning in a step-by-step manner to ensure your reasoning and conclusion are correct. 

    Avoid simply stating the correct answer at the outset."""

    # Grader prompt
    hallucination_grader_prompt = """FACTS: \n\n {documents} \n\n STUDENT ANSWER: {generation}. 

    Return JSON with two two keys, binary_score is 'yes' or 'no' score to indicate whether the STUDENT ANSWER is grounded in the FACTS. And a key, explanation, that contains an explanation of the score."""


    def format_docs(docs):
        return "\n\n".join(doc.page_content for doc in docs)    
    

    hallucination_grader_prompt_formatted = hallucination_grader_prompt.format(
        documents=format_docs(relevant_chunks), generation=answer
    )
    result = llm_json_mode.invoke(
        [SystemMessage(content=hallucination_grader_instructions)]
        + [HumanMessage(content=hallucination_grader_prompt_formatted)]
    )
    grade = json.loads(result.content)["binary_score"]
    return grade

In [7]:
SEARCH_QUERIES = [
    "High-level overview of intelligent user interfaces and their impact on modern UI/UX design",
    "Tools and frameworks for building intelligent user interfaces: A 2024 guide"
]

SPECIFIC_QUESTIONS = [
    "What are the latest trends in intelligent user interfaces, and how are they shaping user experience?",
    "What are the best practices for ensuring accessibility and inclusivity in AI-powered user interfaces?",
]

TIME_HORIZON = 185  

# Max Outputs
MAX_OUTPUTS = 1

PLATFORM = 'google'
SOURCES_PER_QUERY = 2

# google


In [23]:
from langchain_google_community import GoogleSearchAPIWrapper
import os
from langchain_community.document_loaders import WebBaseLoader

if not os.getenv("GOOGLE_API_KEY") or not os.getenv("GOOGLE_CSE_ID"):
    raise EnvironmentError("Missing GOOGLE_API_KEY or GOOGLE_CSE_ID environment variables.")

search = GoogleSearchAPIWrapper()
unique_urls = set()

for search_query in SEARCH_QUERIES:
    results = search.results(search_query, SOURCES_PER_QUERY, search_params={'dateRestrict': f'd{TIME_HORIZON}', 'gl': 'EN'})
    urls = [item['link'] for item in results]
    unique_urls.update(urls)

source_items = {}
for url in unique_urls:
    loader = WebBaseLoader(url)
    doc = loader.load()
    title = doc[0].metadata.get('title', url)
    source_items[title] = {'url': url, 'page_content': doc[0].page_content, 'qa':{}}
    retriver = create_index_retriver(doc)
    for question in SPECIFIC_QUESTIONS:
        semantic_search_chunks = retriver.invoke(question)
        relevant_chunks = []
        for chunk in semantic_search_chunks:
            if is_chunk_relevant(chunk, question, llm_json_mode):  
                relevant_chunks.append(chunk)
        if len(relevant_chunks)==0:
            break
        answer = generate(question, relevant_chunks, llm)
        print(answer)
        is_answer_make_sense = hallucination_grader(answer, relevant_chunks, llm_json_mode)
        if is_answer_make_sense == "yes":
            print("Hallucination check passed")
            source_items[title]['qa'][question] = answer 
        else:
            print("Hallucination check failed")
            break
    question_relevance_score = len(source_items[title]['qa'])
    if question_relevance_score == 0:
        source_items.pop(title)
        print("No relevant information item")
    else:
        summary = summarize_text_map_reduce(llm, doc, 7500)
        source_items[title]['summary'] = summary
        source_items[title]['question_relevance_score'] = question_relevance_score
    
ranked_data_items = dict(sorted(source_items.items(), key=lambda x: x[1]['question_relevance_score'], reverse=True))

MAX_OUTPUTS = min(MAX_OUTPUTS, len(ranked_data_items)) 

top_data = {}
less_relevant_data = {}
for i, (title, meta_data) in enumerate(ranked_data_items.items()):
    if i < MAX_OUTPUTS:
        top_data[title] = meta_data
    else:
        less_relevant_data[title] = meta_data

def remove_relevance_score(data):
    for item in data.values():
        if 'question_relevance_score' in item:
            del item['question_relevance_score']
    return data

top_data = remove_relevance_score(top_data)
less_relevant_data = remove_relevance_score(less_relevant_data)

Based on the provided context, recent research has focused on developing context-aware concept evaluation approaches (Wang et al., 2021) and graph-based requirement elicitation frameworks (Wang et al., 2019; Wang et al., 2021) for smart product-service systems. These trends aim to create more dynamic and adaptive interfaces that consider user emotions and contexts, as suggested by Wattearachchi et al.'s framework (2020). By incorporating these intelligent features, user experience is expected to be enhanced through more personalized and responsive interactions.
Hallucination check passed
Based on the provided context, ensuring accessibility and inclusivity in AI-powered user interfaces can be achieved through a rule-based approach, as introduced by Stephanidis et al. (1998). This methodology involves dynamically adjusting the UI to individual user needs, particularly in terms of accessibility, using predefined rules to govern how the interface should adapt under specific conditions. By

Created a chunk of size 10088, which is longer than the specified 7500


Hallucination check passed
The latest trend in intelligent user interfaces is the integration of Artificial Intelligence (AI) with User Experience (UX) design, enabling enhanced personalisation and automation of design workflows. AI-powered tools can generate multiple design variations, speed up the design process, and ensure consistency across different elements of the interface. This synergy is transforming digital landscapes, reshaping how users interact with the world around them through more intuitive and responsive interfaces.
Hallucination check passed


Token indices sequence length is longer than the specified maximum sequence length for this model (1348 > 1024). Running this sequence through the model will result in indexing errors


No relevant information item
The latest trend in intelligent user interfaces is the shift towards gesture-based interactions, which are becoming increasingly popular due to their natural and intuitive nature. Gesture-based UIs are revolutionizing the way users interact with applications, offering a more direct and engaging experience that can increase user satisfaction and loyalty. By recognizing and adapting to users' gestures and movements, modern devices and applications are delivering innovative solutions while outlining new challenges for developers to overcome.
Hallucination check passed
To ensure accessibility and inclusivity in AI-powered user interfaces, it is crucial to consider users with limited hand mobility and provide alternative methods for interaction, such as voice commands or haptic feedback. Developers should also establish clear and intuitive gestures that are immediately apparent, but if not, offer visual prompts and guidance to direct the user. Additionally, thor

# Youtube

In [22]:
from langchain_community.document_loaders import WebBaseLoader, YoutubeLoader
import os
from langchain_community.tools import YouTubeSearchTool
import ast
from datetime import datetime, timedelta


if not os.getenv("GOOGLE_API_KEY") or not os.getenv("GOOGLE_CSE_ID"):
    raise EnvironmentError("Missing GOOGLE_API_KEY or GOOGLE_CSE_ID environment variables.")

tool = YouTubeSearchTool()
unique_urls = set()
for search_query in SEARCH_QUERIES:
    urls_str = tool.run(search_query, 2 * SOURCES_PER_QUERY)
    urls = set(ast.literal_eval(urls_str))
    unique_urls = unique_urls | urls

unique_urls = list(unique_urls)

source_items = {}
for url in unique_urls:
    loader = YoutubeLoader.from_youtube_url(url, add_video_info=True)
    doc = loader.load()
    title = doc[0].metadata.get('title', url)
    source_items[title] = {'url': url, 'page_content': doc[0].page_content, 'qa':{}}
    
    published_date_str = doc[0].metadata.get('publish_date', None)
    published_date = datetime.strptime(published_date_str, '%Y-%m-%d %H:%M:%S')
    if datetime.now() - published_date <= timedelta(days=TIME_HORIZON):
        source_items.pop(title)
        break
    
    retriver = create_index_retriver(doc)
    for question in SPECIFIC_QUESTIONS:
        semantic_search_chunks = retriver.invoke(question)
        relevant_chunks = []
        for chunk in semantic_search_chunks:
            if is_chunk_relevant(chunk, question, llm_json_mode):  
                relevant_chunks.append(chunk)
        if len(relevant_chunks)==0:
            break
        answer = generate(question, relevant_chunks, llm)
        print(answer)
        is_answer_make_sense = hallucination_grader(answer, relevant_chunks, llm_json_mode)
        if is_answer_make_sense == "yes":
            print("Hallucination check passed")
            source_items[title]['qa'][question] = answer 
        else:
            print("Hallucination check failed")
            break
    question_relevance_score = len(source_items[title]['qa'])
    if question_relevance_score == 0:
        source_items.pop(title)
        print("No relevant information item")
    else:
        summary = summarize_text_map_reduce(llm, doc, 7500)
        source_items[title]['summary'] = summary
        source_items[title]['question_relevance_score'] = question_relevance_score
    
ranked_data_items = dict(sorted(source_items.items(), key=lambda x: x[1]['question_relevance_score'], reverse=True))

MAX_OUTPUTS = min(MAX_OUTPUTS, len(ranked_data_items)) 

top_data = {}
less_relevant_data = {}
for i, (title, meta_data) in enumerate(ranked_data_items.items()):
    if i < MAX_OUTPUTS:
        top_data[title] = meta_data
    else:
        less_relevant_data[title] = meta_data

def remove_relevance_score(data):
    for item in data.values():
        if 'question_relevance_score' in item:
            del item['question_relevance_score']
    return data

top_data = remove_relevance_score(top_data)
less_relevant_data = remove_relevance_score(less_relevant_data)

[32;1m[1;3m['https://www.youtube.com/watch?v=nS1UrJnncWc&pp=ygVaSGlnaC1sZXZlbCBvdmVydmlldyBvZiBpbnRlbGxpZ2VudCB1c2VyIGludGVyZmFjZXMgYW5kIHRoZWlyIGltcGFjdCBvbiBtb2Rlcm4gVUkvVVggZGVzaWdu', 'https://www.youtube.com/watch?v=XZf5A0wcruE&pp=ygVaSGlnaC1sZXZlbCBvdmVydmlldyBvZiBpbnRlbGxpZ2VudCB1c2VyIGludGVyZmFjZXMgYW5kIHRoZWlyIGltcGFjdCBvbiBtb2Rlcm4gVUkvVVggZGVzaWdu'][0m[32;1m[1;3m['https://www.youtube.com/watch?v=MOyl58VF2ak&pp=ygVLVG9vbHMgYW5kIGZyYW1ld29ya3MgZm9yIGJ1aWxkaW5nIGludGVsbGlnZW50IHVzZXIgaW50ZXJmYWNlczogQSAyMDI0IGd1aWRl', 'https://www.youtube.com/watch?v=B6tn6ojJ2wg&pp=ygVLVG9vbHMgYW5kIGZyYW1ld29ya3MgZm9yIGJ1aWxkaW5nIGludGVsbGlnZW50IHVzZXIgaW50ZXJmYWNlczogQSAyMDI0IGd1aWRl'][0mNo relevant information item
No relevant information item
No relevant information item
No relevant information item
