In [3]:
import ingest
import run_localGPT
from constants import SOURCE_DIRECTORY, PERSIST_DIRECTORY, MODEL_ID, MODEL_BASENAME, EMBEDDING_MODEL_NAME
import os
from langchain.embeddings import HuggingFaceInstructEmbeddings

## Example

In [4]:
device_type='cpu'
chunk_size=100
chunk_overlap=50
dir="Uniswap v3/liquidity_model" # dex_name/feature
k=4

In [5]:
source_directory = os.path.join(SOURCE_DIRECTORY, dir)
save_path = os.path.join(PERSIST_DIRECTORY, dir)
# Create embeddings
embeddings = HuggingFaceInstructEmbeddings(
    model_name=EMBEDDING_MODEL_NAME,
    model_kwargs={"device": device_type},
    )

# change the embedding type here if you are running into issues.
    # These are much smaller embeddings and will work for most appications
    # If you use HuggingFaceEmbeddings, make sure to also use the same in the
    # run_localGPT.py file.

    # embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL_NAME)

load INSTRUCTOR_Transformer
max_seq_length  512


In [7]:
ingest.main(device_type=device_type, embedding_model=embeddings, chunk_size=chunk_size, chunk_overlap=chunk_overlap,\
source_directory=source_directory, save_path=save_path)

Loading documents from C:\Users\mmahmoud\localGPT/SOURCE_DOCUMENTS\Uniswap v3/liquidity_model
Loaded 3 documents from C:\Users\mmahmoud\localGPT/SOURCE_DOCUMENTS\Uniswap v3/liquidity_model
Split into 753 chunks of text


In [8]:
persist_directory = os.path.join(save_path, f'cs_{chunk_size}_co_{chunk_overlap}')

In [9]:
print(persist_directory)

C:\Users\mmahmoud\localGPT/DB\Uniswap v3/liquidity_model\cs_100_co_50


In [10]:
llm = run_localGPT.load_model(device_type, model_id=MODEL_ID, model_basename=MODEL_BASENAME)
use_history = False
query = """Extract the liquidity model employed by the DEX. Look for any of the following:
  - Constant Product Market Maker (CPMM)
  - Constant Sum Market Maker (CSMM)
  - Constant Mean Market Maker (CMMM)
  - Hybrid Constant Function Market Makers (CFMMs)
  - Dynamic Automated Market Maker (DAMM)
  - Proactive Market Maker (PMM)
  - Virtual Automated Market Makers (vAMM)
If the information is not available in documents feel free to precise it.
Reply with only one short sentence or word."""

Loading Model: TheBloke/Llama-2-7b-Chat-GGUF, on: cpu
This action can take a few minutes!
Using Llamacpp for GGUF/GGML quantized models


AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


In [11]:
answer, docs = run_localGPT.main(device_type, llm, k, persist_directory, query, use_history, verbose=True, show_sources=True)

Running on: cpu
Use history set to: False
load INSTRUCTOR_Transformer
max_seq_length  512


## Collecting features for all DEXs

In [1]:
import os
import glob

In [3]:
"Llama-2-13B-chat-GPTQ".lower()

'llama-2-13b-chat-gptq'

In [6]:
folders = [os.path.basename(folder) for folder in glob.glob(f"{SOURCE_DIRECTORY}/*")]
folders

['Uniswap v3']

In [7]:
features = ["fees", "liquidity_model", "license"]
directories = [f"{folder}/{feature}" for folder in folders for feature in features]
directories

['Uniswap v3/fees', 'Uniswap v3/liquidity_model', 'Uniswap v3/license']

In [20]:
configs = [(5, 500, 200), (4, 500, 100), (5, 500, 200)] # k, chunk_size, chunk_overlap

In [21]:
for k, cs, co in configs:
    for dir in directories:
        dex_name = dir.split("/")[0]
        feature = dir.split("/")[1]

        print(f"Running for {dex_name} {feature} with k={k}, cs={cs}, co={co}")

        print("Ingesting...")
        source_directory = os.path.join(SOURCE_DIRECTORY, dir)
        #save_path = os.path.join(PERSIST_DIRECTORY, dir)
        # incllude embedding model id in save_path
        save_path = os.path.join(PERSIST_DIRECTORY, dir, os.path.basename(EMBEDDING_MODEL_NAME))
        ingest.main(device_type=device_type, embedding_model=embeddings, chunk_size=chunk_size, chunk_overlap=chunk_overlap,\
                    source_directory=source_directory, save_path=save_path)

        persist_directory = os.path.join(save_path, f'cs_{chunk_size}_co_{chunk_overlap}')
        
        # Getting the query from queries/feature.txt
        with open(f"queries/{feature}.txt", "r") as f:
            query = f.read()

        print("Running localGPT...")
        answer, docs = run_localGPT.main(device_type, llm, k, persist_directory, query, use_history, verbose=False, show_sources=False)

        # Saving the answer in answers/dex_name/feature/model_id/k_cs_co.txt
        os.makedirs(f"answers/{dex_name}/{feature}/{os.path.basename(MODEL_ID)}", exist_ok=True)
        with open(f"answers/{dex_name}/{feature}/{os.path.basename(MODEL_ID)}/k_{k}_cs_{cs}_co_{co}.txt", "w") as f:
            f.write(answer)

Running for Uniswap v3 fees with k=5, cs=500, co=200
Ingesting...
Loading documents from C:\Users\mmahmoud\localGPT/SOURCE_DOCUMENTS\Uniswap v3/fees
Loaded 3 documents from C:\Users\mmahmoud\localGPT/SOURCE_DOCUMENTS\Uniswap v3/fees
Split into 111 chunks of text
Running localGPT...
Running on: cpu
Use history set to: False
load INSTRUCTOR_Transformer
max_seq_length  512


Llama.generate: prefix-match hit


Running for Uniswap v3 liquidity_model with k=5, cs=500, co=200
Ingesting...
Loading documents from C:\Users\mmahmoud\localGPT/SOURCE_DOCUMENTS\Uniswap v3/liquidity_model
Loaded 3 documents from C:\Users\mmahmoud\localGPT/SOURCE_DOCUMENTS\Uniswap v3/liquidity_model
Split into 753 chunks of text
Running localGPT...
Running on: cpu
Use history set to: False
load INSTRUCTOR_Transformer
max_seq_length  512


Llama.generate: prefix-match hit


Running for Uniswap v3 license with k=5, cs=500, co=200
Ingesting...
Loading documents from C:\Users\mmahmoud\localGPT/SOURCE_DOCUMENTS\Uniswap v3/license
Loaded 1 documents from C:\Users\mmahmoud\localGPT/SOURCE_DOCUMENTS\Uniswap v3/license
Split into 63 chunks of text
Running localGPT...
Running on: cpu
Use history set to: False
load INSTRUCTOR_Transformer
max_seq_length  512


Llama.generate: prefix-match hit
