In [None]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.core.prompts import PromptTemplate
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from llama_index.legacy.embeddings.langchain import LangchainEmbedding

import torch

# Load documents
documents = SimpleDirectoryReader("/content/data").load_data()

# Define system prompt
system_prompt = """
You are a Q&A assistant. Your are going to answer questions as
accurately as possible based on the instructions and context provided.
"""

# Define query wrapper prompt
query_wrapper_prompt = PromptTemplate("{query_str}")

# Initialize LLM
llm = HuggingFaceLLM(
    context_window=4096,
    max_new_tokens=256,
    generate_kwargs={"temperature": 0.0, "do_sample": False},
    system_prompt=system_prompt,
    query_wrapper_prompt=query_wrapper_prompt,
    tokenizer_name="meta-llama/Llama-2-7b-chat-hf",
    model_name="meta-llama/Llama-2-7b-chat-hf",
    device_map="auto",
    model_kwargs={"torch_dtype": torch.float16, "load_in_8bit": True}
)

# Initialize embeddings
embeddings = LangchainEmbedding(
    HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
)



# Create service context
service_context = ServiceContext.from_defaults(
    chunk_size=1024,
    llm=llm,
    embed_model=embeddings
)


index = VectorStoreIndex.from_documents(documents, service_context=service_context)
query_engine = index.as_query_engine()
input=input("Please enter your data quality issue: ")
response=query_engine.query(input)
print(response)



In [None]:
#evaluation
# List of queries and their expected answers for evaluation
queries_and_answers = [
    {
        "query": "The currency column has these ($,€,&&, ߾) values. are these valid currencies",
        "expected_answer": "Yes, $, €, and are valid currency symbols, but ߾ is not a standard currency symbol."
    },

     {
        "query": "How to check if the stocks data is not stale ",
        "expected_answer": "To determine if stock data is not too old and stale, you should use a combination of methods. First, check the timestamps to ensure data is recent. Implement version control to identify outdated data and use data freshness indicators like flags or counters. Regularly assess data quality for accuracy and completeness. Monitor user activity logs to see how frequently the data is accessed or updated. Evaluate performance metrics for anomalies in response times or error rates. Cross-reference your data with reliable external sources and set up automated monitoring systems with alerts for stale data."
    }
]

# Automate evaluation
for item in queries_and_answers:
    query = item["query"]
    expected_answer = item["expected_answer"]
    response = query_engine.query(query)
    print(f"Query: {query}")
    print(f"Response: {response}")

    # Evaluate response using Ragas
    evaluation_result = evaluate(
        metrics=[faithfulness, answer_relevance, answer_correctness],
        llm=wrapped_llm,
        embeddings=wrapped_embeddings,
        query=query,
        response=response,
        ground_truth=expected_answer
    )
    print(f"Evaluation Result: {evaluation_result}")

