In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('..\\dashboards\\Iris.xls')

df.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


In [3]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings

Settings.embed_model = HuggingFaceEmbedding(
    model_name="BAAI/bge-small-en-v1.5"
)




In [4]:
import datetime
from llama_index.core.readers.json import JSONReader
from llama_index.core import VectorStoreIndex
import json
import numpy as np

def return_vals(df, c):
    if isinstance(df[c].iloc[0], (int, float, complex)):
        return [max(df[c]), min(df[c]), np.mean(df[c])]
    elif isinstance(df[c].iloc[0], datetime.datetime):
        return [str(max(df[c])), str(min(df[c]))]
    else:
        return df[c].value_counts().nlargest(10).to_dict()

dict_ = {}
for c in df.columns:
    dict_[c] = {'column_name': c, 'type': str(type(df[c].iloc[0])), 'variable_information': return_vals(df, c)}

with open("dataframe.json", "w") as fp:
    json.dump(dict_, fp)

reader = JSONReader()
documents = reader.load_data(input_file='dataframe.json')

dataframe_index = VectorStoreIndex.from_documents(documents)


In [5]:
from llama_index.core import Document
from llama_index.core import VectorStoreIndex

styling_instructions =[Document(text="""
  Dont ignore any of these instructions.
        For a line chart always use plotly_white template, reduce x axes & y axes line to 0.2 & x & y grid width to 1. 
        Always give a title and make bold using html tag axis label and try to use multiple colors if more than one line
        Annotate the min and max of the line
        Display numbers in thousand(K) or Million(M) if larger than 1000/100000 
        Show percentages in 2 decimal points with '%' sign
        """
        )
        , Document(text="""
        Dont ignore any of these instructions.
        For a bar chart always use plotly_white template, reduce x axes & y axes line to 0.2 & x & y grid width to 1. 
        Always give a title and make bold using html tag axis label and try to use multiple colors if more than one line
        Always display numbers in thousand(K) or Million(M) if larger than 1000/100000. Add annotations x values
        Annotate the values on the y variable
        If variable is a percentage show in 2 decimal points with '%' sign.
        """)


       # You should fill in instructions for other charts and play around with these instructions
       , Document(text=
          """ General chart instructions
        Do not ignore any of these instructions
         always use plotly_white template, reduce x & y axes line to 0.2 & x & y grid width to 1. 
        Always give a title and make bold using html tag axis label 
        Always display numbers in thousand(K) or Million(M) if larger than 1000/100000. Add annotations x values
        If variable is a percentage show in 2 decimal points with '%'""")
         ]
# Creating an Index
style_index =  VectorStoreIndex.from_documents(styling_instructions)

In [6]:
dataframe_index

<llama_index.core.indices.vector_store.base.VectorStoreIndex at 0x1a7bce179a0>

In [15]:
from llama_index.core.agent import ReActAgent
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.llms.groq import Groq
import os
from dotenv import load_dotenv
load_dotenv()

groqkey=os.environ.get('GROQ_API_KEY')
llm = Groq(model="llama3-70b-8192", api_key=groqkey)

# Build query engines over your indexes
styling_engine = style_index.as_query_engine(similarity_top_k=1, llm=llm)
dataframe_engine = dataframe_index.as_query_engine(similarity_top_k=1, llm=llm)


# Builds the tools
query_engine_tools = [
    QueryEngineTool(
        query_engine=dataframe_engine,
        metadata=ToolMetadata(
            name="dataframe_index",
            description="Provides information about the data in the data frame. Only use column names in this tool",
        ),
    ),
    QueryEngineTool(
        query_engine=styling_engine,
        metadata=ToolMetadata(
            name="Styling",
            description="Provides instructions on how to style your Plotly plots. Use a detailed plain text question as input to the tool.",
        ),
    ),
]

# Use only Groq model


# Initialize ReAct agent
agent = ReActAgent.from_tools(query_engine_tools, llm=llm, verbose=True)

In [8]:
from llama_index.core import PromptTemplate

new_prompt_txt= """You are designed to help with building data visualizations in Plotly. You may do all sorts of analyses and actions using Python

## Tools

You have access to a wide variety of tools. You are responsible for using the tools in any sequence you deem appropriate to complete the task at hand.
This may require breaking the task into subtasks and using different tools to complete each subtask.

You have access to the following tools, use these tools to find information about the data and styling:
{tool_desc}


## Output Format

Please answer in the same language as the question and use the following format:

```
Thought: The current language of the user is: (user's language). I need to use a tool to help me answer the question.
Action: tool name (one of {tool_names}) if using a tool.
Action Input: the input to the tool, in a JSON format representing the kwargs (e.g. {{"input": "hello world", "num_beams": 5}})
```

Please ALWAYS start with a Thought.

Please use a valid JSON format for the Action Input. Do NOT do this {{'input': 'hello world', 'num_beams': 5}}.

If this format is used, the user will respond in the following format:

```
Observation: tool response
```

You should keep repeating the above format till you have enough information to answer the question without using any more tools. At that point, you MUST respond in the one of the following two formats:

```
Thought: I can answer without using any more tools. I'll use the user's language to answer
Answer: [your answer here (In the same language as the user's question)]
```

```
Thought: I cannot answer the question with the provided tools.
Answer: [your answer here (In the same language as the user's question)]
```

## Current Conversation

Below is the current conversation consisting of interleaving human and assistant messages."""

# Adding the prompt text into PromptTemplate object
new_prompt = PromptTemplate(new_prompt_txt)

# Updating the prompt
agent.update_prompts({'agent_worker:system_prompt':new_prompt})

In [18]:
response = agent.chat("Give Plotly code for a bar chart for the column SepalLengthCm , Species")

> Running step 939dd3aa-42b2-4d03-b75a-a7478a4dad8b. Step input: Give Plotly code for a bar chart for the column SepalLengthCm , Species
[1;3;38;5;200mThought: The current language of the user is: Python. I need to use a tool to help me answer the question.
Action: dataframe_index
Action Input: {'input': 'SepalLengthCm, Species'}
[0m[1;3;34mObservation: SepalLengthCm is a floating-point number and Species is a string.
[0m> Running step c10e81ce-5919-4e08-9899-df6b6804a673. Step input: None
[1;3;38;5;200mThought: I have the information about the columns. Now I need to provide the Plotly code for a bar chart.
Here is the Plotly code for a bar chart for the column SepalLengthCm, Species:

```
import plotly.express as px

fig = px.bar(df, x='Species', y='SepalLengthCm')
fig.update_layout(template='plotly_white', title='<b>Sepal Length by Species</b>')
fig.show()
```

Thought: I can answer without using any more tools. I'll use the user's language to answer
Answer: The above code will 