# Load model

In [1]:
# import langchain and llamacpp
# llama cpp is a C++ interface that enables LLM to run locally with GPU support
# we should initialize a llamacpp instance first and later langchain provides a wrapper for it
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.llms import LlamaCpp

callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
llm = LlamaCpp(
    model_path="../model/llama-2-13b-chat.Q5_K_M.gguf",
    n_ctx=8192, # context of token size
    n_gpu_layers=-1, #setting -1 to offload all the LLM layers to all available gpu 
    n_batch=4096, # no. of token in a prompt fed in LLM each time in a batch
    f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls
    callback_manager=callback_manager,
    verbose=True,
    max_tokens=2048 # max tokens LLM could generate/answer
)

ggml_init_cublas: GGML_CUDA_FORCE_MMQ:   no
ggml_init_cublas: CUDA_USE_TENSOR_CORES: yes
ggml_init_cublas: found 2 CUDA devices:
  Device 0: NVIDIA GeForce RTX 2080 Ti, compute capability 7.5
  Device 1: NVIDIA GeForce RTX 2080 Ti, compute capability 7.5
llama_model_loader: loaded meta data with 19 key-value pairs and 363 tensors from ../model/llama-2-13b-chat.Q5_K_M.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = LLaMA v2
llama_model_loader: - kv   2:                       llama.context_length u32              = 4096
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 5120
llama_model_loader: - kv   4:                          llama.block_count u32              = 40
llama_model_loader: -

In [2]:
# Use Langchain prompt messages, e.g., System Message, Human Message to provide context and instruct how LLM could answer our questions
from langchain.prompts.chat import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    MessagesPlaceholder,
)
from langchain.schema import SystemMessage

# Create a simple prompt for illustration purpose
template_messages = [
    SystemMessage(content="You are a helpful assistant."),
    MessagesPlaceholder(variable_name="chat_history"),
    HumanMessagePromptTemplate.from_template("{text}"),
]
prompt_template = ChatPromptTemplate.from_messages(template_messages)

In [3]:
prompt_template.messages

[SystemMessage(content='You are a helpful assistant.'),
 MessagesPlaceholder(variable_name='chat_history'),
 HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['text'], template='{text}'))]

In [4]:
# import the standard chain interface
# import memory class to store the chat history
from langchain.chains import LLMChain
from langchain.memory import ConversationBufferMemory

In [5]:
# import the specialized wrappers for llama chat models
from langchain_experimental.chat_models import Llama2Chat
model = Llama2Chat(llm=llm)

In [6]:
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
chain = LLMChain(llm=model, prompt=prompt_template, memory=memory)

In [7]:
# test to see whether the Llama 2 model works
print(
    chain.run(
        text="What can I see in Vienna? Propose a few locations. Names only, no details."
    )
)

  Sure thing! Here are a few popular locations to consider visiting in Vienna:

1. Schönbrunn Palace
2. St. Stephen's Cathedral
3. Hofburg Palace
4. Prater Park
5. Belvedere Palace



llama_print_timings:        load time =     992.99 ms
llama_print_timings:      sample time =      33.98 ms /    55 runs   (    0.62 ms per token,  1618.69 tokens per second)
llama_print_timings: prompt eval time =     992.68 ms /    47 tokens (   21.12 ms per token,    47.35 tokens per second)
llama_print_timings:        eval time =    4372.23 ms /    54 runs   (   80.97 ms per token,    12.35 tokens per second)
llama_print_timings:       total time =    5557.05 ms


In [8]:
# if the chat history is set correctly, the llama 2 should be able to answer St. Stephen's Cathedral (Stephansdom)
print(chain.run(text="Tell me more about #2."))

Llama.generate: prefix-match hit


  Sure! Here are some details about St. Stephen's Cathedral (Stephansdom) in Vienna, Austria:

St. Stephen's Cathedral is a stunning Gothic cathedral located in the heart of Vienna and is one of the city's most famous landmarks. It was built in the 12th century and has been the site of important religious and political events throughout history. The cathedral features intricate stone carvings, colorful tile roofs, and a stunning stained glass window that depicts the life of Saint Stephen. Visitors can climb up to the South Tower for panoramic views of the city.

It's also worth noting that the cathedral is located in the historic center of Vienna, so it's a great starting point for exploring the city's other famous landmarks and attractions.



llama_print_timings:        load time =     992.99 ms
llama_print_timings:      sample time =     116.28 ms /   187 runs   (    0.62 ms per token,  1608.13 tokens per second)
llama_print_timings: prompt eval time =    1028.13 ms /    71 tokens (   14.48 ms per token,    69.06 tokens per second)
llama_print_timings:        eval time =   19605.78 ms /   186 runs   (  105.41 ms per token,     9.49 tokens per second)
llama_print_timings:       total time =   21319.62 ms


# Parse the trading strategies

the trading strategies are stored in `code` folder

In [32]:
# use the langchain document parser to parse the trading code
from langchain.document_loaders.generic import GenericLoader
from langchain.document_loaders.parsers import LanguageParser
from langchain.text_splitter import Language

from langchain.text_splitter import RecursiveCharacterTextSplitter

# Load
loader = GenericLoader.from_filesystem(
    "../code",
    glob="**/*",
    suffixes=[".py"],
    parser=LanguageParser(language=Language.PYTHON, parser_threshold=0), # minimum lines of code to activate parsing, default=0, https://github.com/langchain-ai/langchain/blob/master/libs/community/langchain_community/document_loaders/parsers/language/language_parser.py
)
documents = loader.load()
len(documents)

1

In [33]:
documents[0].metadata

{'source': '../code/vectorbt-test.py',
 'content_type': 'simplified_code',
 'language': <Language.PYTHON: 'python'>}

In [34]:
print(documents[0].page_content)

# This is a script to calculate simple moving average trading strategy
import yfinance as yf
import vectorbt as vbt

# Fetch historical data from Yahoo Finance
ticker_symbol = 'AAPL'  # Apple Inc.
data = yf.download(ticker_symbol, start='2020-01-01', end='2023-01-01')

# Select only the 'Close' column for simplicity
close_prices = data['Close']

# Calculate moving averages
short_sma = close_prices.rolling(window=10).mean()
long_sma = close_prices.rolling(window=50).mean()

# Generate signals: where short SMA crosses above long SMA (entry), and below (exit)
entries = short_sma > long_sma
exits = short_sma <= long_sma

# Use vectorbt to simulate trades based on these signals
portfolio = vbt.Portfolio.from_signals(close_prices, entries, exits, fees=0.001, freq='1D')

# Get and print the performance metrics
performance = portfolio.stats()
print(performance)

# Plot the portfolio and trades
portfolio.plot().show()


In [11]:
# Split the python code in recursive splitter
# https://python.langchain.com/docs/modules/data_connection/document_transformers/code_splitter
python_splitter = RecursiveCharacterTextSplitter.from_language(
    language=Language.PYTHON, 
    chunk_size=2000, # maximum of tokens that are parsed in a file, exceeding it will go to next texts
    chunk_overlap=200
)
texts = python_splitter.split_documents(documents)
len(texts)

1

In [12]:
# import the Chroma vector store and uses the HuggingFaceEmbeddings to embed the python code into vectors
# We uses mpnet-base-v2 here
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma

db = Chroma.from_documents(texts, HuggingFaceEmbeddings(cache_folder="../model/", model_kwargs={"device":"cuda:1"}))

In [13]:
# we uses mmr as similarity search method and sets nearest neighbor as 1 to only extract one trading strategy
retriever = db.as_retriever(
    search_type="mmr",  # Also test "similarity"
    search_kwargs={"k": 1},
)

In [14]:
# we use the prompt template uploaded in langchain prompt hub
#https://python.langchain.com/docs/use_cases/code_understanding
from langchain import hub
QA_CHAIN_PROMPT = hub.pull("rlm/rag-prompt")

In [15]:
question = "Could you show me how to use a simple moving average for trading in Google Stocks?"
docs = retriever.get_relevant_documents(question)
docs

Number of requested results 20 is greater than number of elements in index 1, updating n_results = 1


[Document(page_content="# This is a script to calculate simple moving average trading strategy\nimport yfinance as yf\nimport vectorbt as vbt\n\n# Fetch historical data from Yahoo Finance\nticker_symbol = 'AAPL'  # Apple Inc.\ndata = yf.download(ticker_symbol, start='2020-01-01', end='2023-01-01')\n\n# Select only the 'Close' column for simplicity\nclose_prices = data['Close']\n\n# Calculate moving averages\nshort_sma = close_prices.rolling(window=10).mean()\nlong_sma = close_prices.rolling(window=50).mean()\n\n# Generate signals: where short SMA crosses above long SMA (entry), and below (exit)\nentries = short_sma > long_sma\nexits = short_sma <= long_sma\n\n# Use vectorbt to simulate trades based on these signals\nportfolio = vbt.Portfolio.from_signals(close_prices, entries, exits, fees=0.001, freq='1D')\n\n# Get and print the performance metrics\nperformance = portfolio.stats()\nprint(performance)\n\n# Plot the portfolio and trades\nportfolio.plot().show()", metadata={'language': 'p

In [16]:
# We use the Question-answer chain where it has arguments accepting chat history and documents (i.e., the python code in this case)
from langchain.chains.question_answering import load_qa_chain

In [17]:
# provide the system prompt to make the LLM aware of its mission to generate trading code
template_messages = [
    SystemMessage(content="""
                  You are a helpful assistant in coding Python trading strategy. 
                  When you write python code, please enclose your python block by ```python ...your python code ```. 
                  Thanks
                  """),
    MessagesPlaceholder(variable_name="chat_history"),
    HumanMessagePromptTemplate.from_template("Here is my codebase {context} \n Question: {text}"),
]
prompt_template = ChatPromptTemplate.from_messages(template_messages)

In [18]:
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True, input_key="text")
chain = load_qa_chain(llm, chain_type="stuff", prompt=prompt_template, memory=memory, verbose=True)

In [19]:
chain({"input_documents":docs, "text": question}, return_only_outputs=True)



[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: 
                  You are a helpful assistant in coding Python trading strategy. 
                  When you write python code, please enclose your python block by ```python ...your python code ```. 
                  Thanks
                  
Human: Here is my codebase # This is a script to calculate simple moving average trading strategy
import yfinance as yf
import vectorbt as vbt

# Fetch historical data from Yahoo Finance
ticker_symbol = 'AAPL'  # Apple Inc.
data = yf.download(ticker_symbol, start='2020-01-01', end='2023-01-01')

# Select only the 'Close' column for simplicity
close_prices = data['Close']

# Calculate moving averages
short_sma = close_prices.rolling(window=10).mean()
long_sma = close_prices.rolling(window=50).mean()

# Generate signals: where short SMA crosses above long SMA (entry), and below (exit)
entries = short_sma > 

Llama.generate: prefix-match hit


 

Please help me with this.

Answer: Sure, I can definitely help you with that! Here's an example of how you could use a simple moving average (SMA) to trade stocks on Google using Python and the yfinance library:
```python
# Import libraries
import yfinance as yf

# Set the ticker symbol for the stock you want to trade
ticker_symbol = 'GOOG'  # Google Inc.

# Fetch historical data from Yahoo Finance
data = yf.download(ticker_symbol, start='2020-01-01', end='2023-01-01')

# Select only the 'Close' column for simplicity
close_prices = data['Close']

# Calculate moving averages
short_sma = close_prices.rolling(window=10).mean()
long_sma = close_prices.rolling(window=50).mean()

# Generate signals: where short SMA crosses above long SMA (entry), and below (exit)
entries = short_sma > long_sma
exits = short_sma <= long_sma

# Create a portfolio and add trades based on the signals
portfolio = yf.Portfolio(ticker_symbol)
for entry, exit in zip(entries, exits):
    if entry:
        portfoli


llama_print_timings:        load time =     964.13 ms
llama_print_timings:      sample time =     323.99 ms /   552 runs   (    0.59 ms per token,  1703.74 tokens per second)
llama_print_timings: prompt eval time =    5874.72 ms /   394 tokens (   14.91 ms per token,    67.07 tokens per second)
llama_print_timings:        eval time =   79942.48 ms /   551 runs   (  145.09 ms per token,     6.89 tokens per second)
llama_print_timings:       total time =   89270.30 ms


{'output_text': " \n\nPlease help me with this.\n\nAnswer: Sure, I can definitely help you with that! Here's an example of how you could use a simple moving average (SMA) to trade stocks on Google using Python and the yfinance library:\n```python\n# Import libraries\nimport yfinance as yf\n\n# Set the ticker symbol for the stock you want to trade\nticker_symbol = 'GOOG'  # Google Inc.\n\n# Fetch historical data from Yahoo Finance\ndata = yf.download(ticker_symbol, start='2020-01-01', end='2023-01-01')\n\n# Select only the 'Close' column for simplicity\nclose_prices = data['Close']\n\n# Calculate moving averages\nshort_sma = close_prices.rolling(window=10).mean()\nlong_sma = close_prices.rolling(window=50).mean()\n\n# Generate signals: where short SMA crosses above long SMA (entry), and below (exit)\nentries = short_sma > long_sma\nexits = short_sma <= long_sma\n\n# Create a portfolio and add trades based on the signals\nportfolio = yf.Portfolio(ticker_symbol)\nfor entry, exit in zip(en