In [2]:
%load_ext dotenv
%dotenv

In [3]:
from langchain_community.utilities import WikipediaAPIWrapper
from langchain_community.tools import WikipediaQueryRun
from langchain_community.vectorstores import Chroma

from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_openai.chat_models import ChatOpenAI

from langchain.tools.retriever import create_retriever_tool
from langchain.tools import tool

from langchain import hub

from platform import python_version

from langchain.agents import (create_tool_calling_agent, 
                              AgentExecutor)

In [4]:
wikipedia_tool = WikipediaQueryRun(api_wrapper = WikipediaAPIWrapper())

In [5]:
vectorstore = Chroma(persist_directory = "./intro-to-ds-lectures", 
                     embedding_function = OpenAIEmbeddings())

retriever = vectorstore.as_retriever(search_type = 'mmr', 
                                     search_kwargs = {'k':3, 
                                                      'lambda_mult':0.7})

retriever_tool = create_retriever_tool(retriever = retriever, 
                                       name = "Introduction-to-Data-and-Data-Science-Course-Lectures", 
                                       description = '''For any questions regarding 
                                       the Introduction to Data and Data Science course, you must use this tool.''')

  vectorstore = Chroma(persist_directory = "./intro-to-ds-lectures",


In [6]:
@tool
def get_python_version() -> str:
    ''' Useful for questions regarding the version of Python currently used. '''
    return python_version()

In [7]:
tools = [wikipedia_tool, retriever_tool, get_python_version]

In [8]:
chat = ChatOpenAI(model_name = 'gpt-4o-mini', 
                  temperature = 0,
                  seed = 365)

In [9]:
chat_prompt_template = hub.pull("hwchase17/openai-tools-agent")



In [10]:
agent = create_tool_calling_agent(llm = chat,
                                  tools = tools,
                                  prompt = chat_prompt_template)

In [11]:
agent_executor = AgentExecutor(agent=agent, 
                               tools=tools, 
                               verbose=True, 
                               return_intermediate_steps=True)

In [12]:
response = agent_executor.invoke({"input":"Could you tell me the version of Python I'm currently using?"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `get_python_version` with `{}`


[0m[38;5;200m[1;3m3.10.10[0m[32;1m[1;3mYou are currently using Python version 3.10.10.[0m

[1m> Finished chain.[0m


In [25]:
response

{'input': "Could you tell me the version of Python I'm currently using?",
 'output': 'You are currently using Python version 3.10.10.',
 'intermediate_steps': [(ToolAgentAction(tool='get_python_version', tool_input={}, log='\nInvoking: `get_python_version` with `{}`\n\n\n', message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_aBY8ZCWez4g5L54o1gmdKpPz', 'function': {'arguments': '{}', 'name': 'get_python_version'}, 'type': 'function'}]}, response_metadata={'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_06737a9306'}, id='run-470cf862-6c6b-4f13-990d-6f7dc9074489', tool_calls=[{'name': 'get_python_version', 'args': {}, 'id': 'call_aBY8ZCWez4g5L54o1gmdKpPz', 'type': 'tool_call'}], tool_call_chunks=[{'name': 'get_python_version', 'args': '{}', 'id': 'call_aBY8ZCWez4g5L54o1gmdKpPz', 'index': 0, 'type': 'tool_call_chunk'}])], tool_call_id='call_aBY8ZCWez4g5L54o1gmdKpPz'),
   '3.10.10')]}

In [27]:
response = agent_executor.invoke({"input":'''
I'm currently following the Introduction to Data and Data Science course.
Could you list the programming languages a data scientist should know?
Additionally, could you tell me who their creators are?
'''})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `Introduction-to-Data-and-Data-Science-Course-Lectures` with `{'query': 'programming languages for data science'}`


[0m[33;1m[1;3mThus, we need a lot of computational power, and we can expect people to use the languages similar to those in the big data column. Apart from R, Python, and MATLAB, other, faster languages are used like Java, JavaScript, C, C++, and Scala. Cool. What we said may be wonderful, but that’s not all! By using one or more programming languages, people create application software or, as they are sometimes called, software solutions, that are adjusted for specific business needs

What about big data? Apart from R and Python, people working in this area are often proficient in other languages like Java or Scala. These two have not been developed specifically for doing statistical analyses, however they turn out to be very useful when combining data from multiple sources. All right! Let’s fini