# CREATE A FINACIAL ANALYST USING CREWAI AND GROQ


[medium article](https://medium.com/the-ai-forum/build-a-financial-analyst-agent-using-crewai-and-llamaindex-6553a035c9b8)

## SETUP


In [63]:
import os
from dotenv import load_dotenv, find_dotenv

_ = load_dotenv(find_dotenv())

In [64]:
from llama_index.llms.groq import Groq

GROQ_API_KEY = os.getenv("GROQ_API_KEY")

In [69]:
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

## DATA

- [dataset of ds_salaries](https://github.com/YuluDuan/Hypothesis-Testing-Data-Science-salary-comparison-in-different-location/blob/main/ds_salaries.csv)

In [51]:
!wget "https://s23.q4cdn.com/407969754/files/doc_financials/2019/ar/Uber-Technologies-Inc-2019-Annual-Report.pdf" -O uber_10k.pdf

--2025-02-10 16:49:30--  https://s23.q4cdn.com/407969754/files/doc_financials/2019/ar/Uber-Technologies-Inc-2019-Annual-Report.pdf
Resolving s23.q4cdn.com (s23.q4cdn.com)... 68.70.205.4, 68.70.205.1, 68.70.205.3, ...
Connecting to s23.q4cdn.com (s23.q4cdn.com)|68.70.205.4|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2829436 (2.7M) [application/pdf]
Saving to: ‘uber_10k.pdf’


2025-02-10 16:49:31 (4.83 MB/s) - ‘uber_10k.pdf’ saved [2829436/2829436]



## SETUP LOGGER

## SETUP THE LLM

[groq api documentation](https://console.groq.com/docs/quickstart)

In [66]:
llm_groq = Groq(model="llama-3.3-70b-versatile", api_key=GROQ_API_KEY)
llm_groq.complete("What is 2+2")

CompletionResponse(text='2 + 2 = 4.', additional_kwargs={}, raw=ChatCompletion(id='chatcmpl-d50d856b-502b-4377-a12d-88d42cd4a0d8', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='2 + 2 = 4.', refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1739224597, model='llama-3.3-70b-versatile', object='chat.completion', service_tier=None, system_fingerprint='fp_4e32347616', usage=CompletionUsage(completion_tokens=9, prompt_tokens=41, total_tokens=50, completion_tokens_details=None, prompt_tokens_details=None, queue_time=0.067928131, prompt_time=0.007755283, completion_time=0.032727273, total_time=0.040482556), x_groq={'id': 'req_01jkrxthbmfmwsxbphx2w6e309'}), logprobs=None, delta=None)

In [70]:
# crew requires chat based model
from langchain_openai import ChatOpenAI

chat_llm = ChatOpenAI(
    api_key=OPENAI_API_KEY,
    model="gpt-4o-mini",
)

chat_llm.invoke("what is 2+2?")

AIMessage(content='2 + 2 equals 4.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 9, 'prompt_tokens': 14, 'total_tokens': 23, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_72ed7ab54c', 'finish_reason': 'stop', 'logprobs': None}, id='run-6bc36ab2-7aa7-4903-9fbf-b61801f82364-0', usage_metadata={'input_tokens': 14, 'output_tokens': 9, 'total_tokens': 23, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

## PARSE DATA

In [54]:
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
from llama_index.llms.openai import OpenAI
import os
from langchain_openai import ChatOpenAI

reader = SimpleDirectoryReader(input_files=["../data/uber_10k.pdf"])
docs = reader.load_data()
docs[0]

Document(id_='64d02e77-da1d-40b8-9e48-336801a71272', embedding=None, metadata={'page_label': '1', 'file_name': 'uber_10k.pdf', 'file_path': '../data/uber_10k.pdf', 'file_type': 'application/pdf', 'file_size': 2829436, 'creation_date': '2025-02-10', 'last_modified_date': '2020-03-30'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text_resource=MediaResource(embeddings=None, data=None, text='2019\nAnnual  \nReport', path=None, url=None, mimetype=None), image_resource=None, audio_resource=None, video_resource=None, text_template='{metadata_str}\n\n{content}')

In [55]:
# setup embeddings
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

In [56]:
# build index/vectors
index = VectorStoreIndex.from_documents(documents=docs, embed_model=embed_model)

In [57]:
query_engine = index.as_query_engine(similarity_top_k=5, llm=llm)

In [58]:
from crewai import Agent
from crewai_tools import LlamaIndexTool

query_tool = LlamaIndexTool.from_query_engine(
    query_engine=query_engine,
    name="Uber 2019 10K Query Tool",
    description="Use this tool to lookup the 2019 Uber 10k Annual Report",
)

query_tool.args_schema.model_json_schema()

{'description': 'Schema for query tool.',
 'properties': {'query': {'description': 'Search query for the query tool.',
   'title': 'Query',
   'type': 'string'}},
 'required': ['query'],
 'title': 'QueryToolSchema',
 'type': 'object'}

## AGENTS

In [71]:
import os
from crewai import Agent, Task, Crew, Process

In [72]:
researcher = Agent(
    role="Senior Financial Analyst",
    goal="Uncover insights about different tech companies",
    backstory="""You work at an asset management firm. Your goal is to understand tech stocks like Uber.""",
    verbose=True,
    allow_delegation=False,
    llm=chat_llm,
)

In [73]:
writer = Agent(
    role="Technology Content Writer",
    goal="Craft compelling content on technology advancements",
    backstory="You are a renowned Content writer, known for your insightful and engaging articles. You transform complex concepts into compelling narratives",
    verbose=True,
    allow_delegation=False,
    llm=chat_llm,
)

## CREATE TOOLS

## TASKS

In [74]:
task_analysis = Task(
    description="Conduct a comprehensive analysis of Uber's risk factors in 2019.",
    expected_output="Full analysis report in bullet points",
    agent=researcher,
)

In [75]:
task_writing = Task(
    description="""Using the insights provided, develop an engaging blog post that highlights the headwinds that Uber faces. Your post should be informative yet accesible, catering to a casual audience. Make it simple, informative.""",
    expected_output="Full blog post of atleast 2 Paragraphs targetted to Medium users.",
    agent=writer,
)

## CREW

In [76]:
crew = Crew(
    agents=[researcher, writer], tasks=[task_analysis, task_writing], verbose=True
)



In [77]:
# Get your crew to work!
result = crew.kickoff()

print("######################")
print(result)

[1m[95m# Agent:[00m [1m[92mSenior Financial Analyst[00m
[95m## Task:[00m [92mConduct a comprehensive analysis of Uber's risk factors in 2019.[00m


[1m[95m# Agent:[00m [1m[92mSenior Financial Analyst[00m
[95m## Final Answer:[00m [92m
**Comprehensive Analysis of Uber's Risk Factors in 2019**

- **Regulatory Risk:**
  - Increasing scrutiny from regulatory bodies regarding pricing algorithms, surge pricing practices, and driver classification.
  - Potential impact of government regulations on ridesharing operations across different markets, especially in cities imposing stricter regulations.
  - Changes in labor laws that could redefine the classification of rideshare drivers from independent contractors to employees, leading to increased operational costs.

- **Market Competition:**
  - Intense competition from established players like Lyft in the U.S., as well as emerging rivals in international markets such as Didi Chuxing in China and Ola in India.
  - Price wars an