In [None]:
%pip install llama-index-agent-openai
%pip install llama-index-readers-file
%pip install llama-index-postprocessor-cohere-rerank
%pip install llama-index-llms-openai
%pip install llama-index-embeddings-openai
!pip install llama-index llama-hub

In [23]:
from llama_index.core import (
    SimpleDirectoryReader,
    VectorStoreIndex,
    StorageContext,
    load_index_from_storage,
)
from llama_index.core.tools import QueryEngineTool, ToolMetadata
import os
import nest_asyncio
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core import Settings
from llama_index.agent.openai import OpenAIAgent
from llama_index.core import SummaryIndex
from llama_index.core.node_parser import SentenceSplitter
from tqdm.notebook import tqdm
import pickle

os.environ["OPENAI_API_KEY"] = "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"


nest_asyncio.apply()


Settings.llm = OpenAI(model="gpt-3.5-turbo")
Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-large")

In [18]:
import os
import pandas as pd
from llama_index.core.query_engine import PandasQueryEngine
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.core.agent import ReActAgent
from llama_index.llms.openai import OpenAI

from llama_index.core.tools import FunctionTool
import os

note_file = os.path.join("data", "notes.txt")


def save_note(note):
    if not os.path.exists(note_file):
        open(note_file, "w")

    with open(note_file, "a") as f:
        f.writelines([note + "\n"])

    return "note saved"


note_engine = FunctionTool.from_defaults(
    fn=save_note,
    name="note_saver",
    description="this tool can save a text based note to a file for the user",
)


In [19]:
import csv
import os

def get_base_name_without_extension(file_path):
    base_name = os.path.splitext(os.path.basename(file_path))[0]
    return base_name

def get_file_names_from_csv_folder(folder_path):
    file_names = []
    if os.path.exists(folder_path):
        for file_name in os.listdir(folder_path):
            if file_name.endswith('.csv'):
                base_name = get_base_name_without_extension(file_name)
                file_names.append(base_name)
    else:
        print(f"{folder_path} doesn't exist.")

    return file_names


csv_folder_path = '/content/data'

file_names = get_file_names_from_csv_folder(csv_folder_path)
print("Names of the CSV files:", file_names)


Names of the CSV files: ['hourly', 'monthly', 'daily', 'weekly']


In [20]:
def get_csv_files(folder_path):
    csv_files = []
    if os.path.exists(folder_path):
        for file_name in os.listdir(folder_path):
            if file_name.endswith('.csv'):
                csv_files.append(os.path.join(folder_path, file_name))
    else:
        print(f"{folder_path} bulunamadı.")
    return csv_files

def csv_to_dataframe(csv_files):
    dataframes = {}
    for csv_file in csv_files:
        base_name = os.path.splitext(os.path.basename(csv_file))[0]
        dataframes[base_name] = pd.read_csv(csv_file)
    return dataframes

def main():
    folder_path = '/content/data'
    csv_files = get_csv_files(folder_path)
    dataframes = csv_to_dataframe(csv_files)
    print(len(dataframes))

    return dataframes
dataframes = main()

4


In [66]:
from llama_index.core import PromptTemplate


instruction_str = """\
    1. You can choose whether or not to use the provided tools but give the proper answers for given query.
    2. Analyze and summarize relevant data using advanced Data Science techniques.
    3. Only use the data given and requested to you.
    5. PRINT ONLY THE ESPRESSION.
    6. Do not quote the expression.

"""


new_prompt = PromptTemplate(
    """\
    You are working with a pandas dataframe in Python.
    The name of the dataframe is `df`.
    This is the result of `print(df.head())`:
    {df_str}

    Follow these instructions:
    {instruction_str}
    Query: {query_str}

    Expression:  """
)

context = """Purpose: The primary role of this agent is to assist users by providing accurate
            information about medicine sales with their codes based on some time periods and details about some medicine with code informations. """

In [67]:
tools = []

query_engines = {}

for file_name, df in dataframes.items():
  query_engine = PandasQueryEngine(
      df=df, verbose=True, instruction_str=instruction_str
  )
  query_engine.update_prompts({"pandas_prompt": new_prompt})

  tool = [
      note_engine,
      QueryEngineTool(
          query_engine=query_engine,
          metadata=ToolMetadata(
              name=f"pandas_tool_for_{file_name}_sales",
              description="Useful for questions related to specific aspects of"
              f" medicine sales for the {file_name} time period (e.g. hourly, daily, weekly, monthly medicine sales for a pharmacy)"
          ),
      ),
  ]
  query_engines[file_name] = query_engine
  tools.extend(tool)

llm = OpenAI(model="gpt-3.5-turbo-0613")
agent = ReActAgent.from_tools(tools, llm=llm, verbose=True, context=context)



while (prompt := input("Enter a prompt (q to quit): ")) != "q":
    result = agent.query(prompt)
    print(result)





Enter a prompt (q to quit): daily sales M01AB
[1;3;38;5;200mThought: I need to use a tool to help me answer the question.
Action: pandas_tool_for_daily_sales
Action Input: {'input': 'M01AB'}
[0m> Pandas Instructions:
```
df['M01AB'].describe()
```
> Pandas Output: count    2106.000000
mean        5.033683
std         2.737579
min         0.000000
25%         3.000000
50%         4.990000
75%         6.670000
max        17.340000
Name: M01AB, dtype: float64
[1;3;34mObservation: count    2106.000000
mean        5.033683
std         2.737579
min         0.000000
25%         3.000000
50%         4.990000
75%         6.670000
max        17.340000
Name: M01AB, dtype: float64
[0m[1;3;38;5;200mThought: I can answer without using any more tools.
Answer: The daily sales for medicine M01AB are as follows:
- Count: 2106
- Mean: 5.033683
- Standard Deviation: 2.737579
- Minimum: 0
- 25th Percentile: 3
- 50th Percentile: 4.99
- 75th Percentile: 6.67
- Maximum: 17.34
[0mThe daily sales for medi