### Simple App to Query a Dataset

Some Notes: 

* Some packages will need to be installed the first time this notebook is ran `pip install gradio boto3 langchain_community` 

* Setting up AWS: https://medium.com/genai-io/aws-bedrock-quick-setup-with-boto3-94ba0d0088ca

* Getting LLM response: https://docs.aws.amazon.com/bedrock/latest/userguide/bedrock-runtime_example_bedrock-runtime_Converse_AnthropicClaude_section.html

* Data
    * For this code to work you will need to have the data downloaded 
    * Link: https://www.kaggle.com/datasets/olistbr/brazilian-ecommerce?select=olist_orders_dataset.csv



* Gradio is used to create a very simple interface 
    * Docs: https://www.gradio.app
    * Streamlit is another good option: https://streamlit.io


Any questions reach out!

In [None]:
!pip install gradio boto3 langchain_community

In [None]:
import os
import gradio as gr
import pandas as pd
from langchain_community.document_loaders import DataFrameLoader
import json
import boto3
from botocore.exceptions import ClientError

In [None]:
import os

DATA_FOLDER = os.getcwd().split("genai_solution")[0] + "genai_solution\\data\\"

file_list = (
    "olist_customers_dataset.csv",
    "olist_geolocation_dataset.csv",
    "olist_order_items_dataset.csv",
    "olist_order_payments_dataset.csv",
    "olist_order_reviews_dataset.csv",
    "olist_orders_dataset.csv",
    "olist_products_dataset.csv",
    "olist_sellers_dataset.csv",
    "product_category_name_translation.csv",
)

file_list = ["product_category_name_translation.csv"]

In [None]:
# Defining a function to get a llm response from aws
# This code is taken from above links


def get_output_aws(prompt):
    client = boto3.client("bedrock-runtime", region_name="eu-west-1")

    model_id = "eu.anthropic.claude-3-5-sonnet-20240620-v1:0"

    native_request = {
        "anthropic_version": "bedrock-2023-05-31",
        "max_tokens": 512,
        "temperature": 0.5,
        "messages": [
            {
                "role": "user",
                "content": [{"type": "text", "text": prompt}],
            }
        ],
    }

    request = json.dumps(native_request)

    try:
        response = client.invoke_model(modelId=model_id, body=request)

    except (ClientError, Exception) as e:
        print(f"ERROR: Can't invoke '{model_id}'. Reason: {e}")
        exit(1)

    # Decode the response body.
    model_response = json.loads(response["body"].read())

    # Extract and print the response text.
    return model_response["content"][0]["text"]

In [None]:
# Gradio works by defining a function that you wish to run in the interface
# Our file and prompt are inputs and it returns the llm response


def llm_query_df(file_path, prompt):
    df = pd.read_csv(DATA_FOLDER + file_path)
    data_loaded = DataFrameLoader(
        df, page_content_column="product_category_name"
    ).load()

    prompt = prompt + str(data_loaded)
    return get_output_aws(prompt)

In [None]:
# Create the Gradio interface
iface = gr.Interface(
    fn=llm_query_df,  # Function to be called
    inputs=[
        gr.Dropdown(choices=file_list, label="Select a file to query"),  # File dropdown
        gr.Textbox(
            lines=2, placeholder="Enter your prompt here...", label=""
        ),  # Text input
    ],
    outputs=[gr.Textbox(label="Summary")],  # Output type
    title="Query your Data",  # Title of the app
    description="This is a description",  # Description of the app
)

iface.launch()