In [1]:
import os, json
import pandas as pd

from langchain_chroma import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import TextLoader
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser

embedding_function = HuggingFaceEmbeddings(model_name="C:/Users/evanq/Documents/EAS_Work/CTIA/git/main/models/thenlper_gte-large/")
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)

from dotenv import load_dotenv
load_dotenv(dotenv_path='.env') 


model = input("Choose MistralAI or OpenAI: ")
# For Mistral AI
if model.lower() == "m":
    from langchain_mistralai import ChatMistralAI
    llm = ChatMistralAI(model='open-mixtral-8x7b', 
                        temperature=0)
    print("Loaded Mistral AI's open-mixtral-8x7b model.")

# For Open AI
else:
    import openai
    from langchain_openai import ChatOpenAI   
    llm = ChatOpenAI(model="gpt-4o-mini",
                     temperature=0)
    print("Loaded OpenAI's gpt-4o-mini model.")

MARKET_LIST = ["WHEAT", "CORN", "OATS", "SOYBEANS", "SOYBEAN OIL", "CRUDE OIL, LIGHT SWEET", "LEAN HOGS", "LIVE CATTLE", "FEEDER CATTLE"]

if not os.path.exists("data"):
    os.makedirs("data")

if not os.path.exists("vectors_db"):
    os.makedirs("vectors_db")

  from tqdm.autonotebook import tqdm, trange


Choose MistralAI or OpenAI:  a


Loaded OpenAI's gpt-4o-mini model.


In [2]:
query = "Is soybean bullish right now?"
file_path = "../COT/Commodities Futures/c_year.xlsx"

In [3]:
def main(query):
    response = get_response(query)
    print(response)

def get_data(file_path, query):
    market = routing_agent(query)
    if os.path.exists(f"vectors_db/{market}"):
        print(f"{market} vector db already exists!")
        return Chroma(persist_directory=f"vectors_db/{market}", embedding_function=embedding_function)

    else:
        data = pd.read_excel(file_path)
        data['Date'] = data['Date'].astype(str)
        data["Market"] = data["Market"].str.strip()
        
        data = data[data["Market"] == market].reset_index(drop=True)
        data.to_csv(f"data/{market}.csv", index=False)
        
        json_data = data.to_json()
        with open(f"data/{market}.txt", "w") as f:
            f.write(json.dumps(json_data, indent=4))
    
        loader = TextLoader(f"data/{market}.txt")
        docs = loader.load()
        return Chroma.from_documents(documents=docs,
                                     embedding=embedding_function,
                                     persist_directory=f"vectors_db/{market}")

def get_response(query):
    db = get_data(file_path, query)
    retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 5})
    qa_chain = RetrievalQA.from_chain_type(llm=llm,
                                           chain_type="stuff",
                                           retriever=retriever,
                                           return_source_documents=True)
    prompt = get_analyst_prompt(query)
    llm_response = qa_chain.invoke(prompt)
    return llm_response['result']

def routing_agent(query):
    '''
    Classifies the query to determine the market.
    '''
    prompt = PromptTemplate.from_template("""Given the user query below, perform a classification task. Classify which market does the query belongs to.
    Do not respond with more than the category name.
    
    <categories>
    {market_list}                                                                        
    </categories>    
    
    <question>
    {question}
    </question>
    
    Market:""")
    
    chain = prompt | llm | StrOutputParser()
    
    return chain.invoke({"question": query, "market_list": MARKET_LIST})

def get_analyst_prompt(query):
    prompt = f"""Act as a professional futures market analyst. You are given a JSON data from COT.
    The data is arranged in descending order, starting from the latest entry.
    You must analyze the trends before answering.

    Question: {query}
    Answer:"""
    return prompt

In [4]:
main(query)

Number of requested results 5 is greater than number of elements in index 1, updating n_results = 1


To determine if soybeans are currently bullish, we can analyze the provided COT (Commitments of Traders) data, focusing on key indicators such as open interest, long and short positions, and changes in these metrics.

1. **Open Interest**: The latest entry shows an open interest of 1,075,036, which is an increase of 18,388 from the previous week. Rising open interest typically indicates that new money is entering the market, which can be a bullish sign.

2. **Long Positions**: The data shows that the total long positions (including producer/merchant long, swap long, and other reportable long) are at 1,028,715, while total short positions are at 995,548. The net long position suggests that there are more long positions than short positions, which is generally a bullish indicator.

3. **Change in Positions**: The change in producer/merchant long positions shows a decrease of 4,445, while the change in producer/merchant short positions shows an increase of 12,776. This indicates that whil