In [8]:
import ingest
import run_localGPT
from langchain.embeddings import HuggingFaceInstructEmbeddings

import requests
from googlesearch import search
import hashlib
import os
import base64

In [9]:
# Replace with your GitHub personal access token
TOKEN = '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'

# Define the folder for storing database
SOURCE_DIRECTORY = f"tmp_data"
# Define the folder for storing the embeddings
PERSIST_DIRECTORY = f"tmp_persist"

In [10]:
def save_url_as_html(url, save_path):
    try:
        # Send a GET request to the URL to fetch the content
        response = requests.get(url, timeout=10)
        
        # Check if the request was successful (status code 200)
        if response.status_code == 200:
            # Save the content as an HTML file
            with open(save_path, 'w', encoding='utf-8') as html_file:
                html_file.write(response.text)
            print(f"HTML content saved as {save_path}")
        else:
            print(f"Failed to fetch the URL. Status code: {response.status_code}")
    except Exception as e:
        print(f"An error occurred: {str(e)}")

def get_documents(dex_name, save_path=SOURCE_DIRECTORY):
    # Extract links for liquidity model using googlesearch (only html files)
    query = f'{dex_name} liquidity model'
    search_results = search(query, num_results=1)
    liquidity_model_link = list(search_results)

    # create a folder for the dex liquidity model
    os.makedirs(f'{save_path}/{dex_name}/liquidity_model', exist_ok=True)

    # save liquidity model pages as html
    for i, link in enumerate(liquidity_model_link):
        try:
            save_url_as_html(link, f'{save_path}/{dex_name}/liquidity_model/{hashlib.md5(link.encode()).hexdigest()}_{i+1}.html')
        except:
            print("Could not save the page.")

    # create a folder for the dex if it doesn't exist
    os.makedirs(f'{save_path}/{dex_name}/license', exist_ok=True)
    # Flag to track if a license has been found for this DEX
    license_found = False
    
    # Make a GitHub API repository search request based on the DEX name
    search_url = f'https://api.github.com/search/repositories?q={dex_name}&per_page=10'
    headers = {'Authorization': f'token {TOKEN}'}
    response = requests.get(search_url, headers=headers)

    if response.status_code == 200:
        search_results = response.json()['items']

        for repo in search_results:
            # Check if a license file exists and retrieve the license text
            license_url = f'https://api.github.com/repos/{repo["owner"]["login"]}/{repo["name"]}/license'
            response = requests.get(license_url, headers=headers)

            if response.status_code == 200:
                license_data = response.json()
                if 'content' in license_data:
                    license_text = base64.b64decode(license_data['content']).decode('utf-8')
                    # Save license text to a file in the dex folder in the license folder 
                    with open(f'{save_path}/{dex_name}/license/{repo["full_name"].replace("/", "__")}.txt', 'w') as f:
                        f.write(license_text)
                    # Set the flag to True to indicate that a license has been found
                    license_found = True
                    break  # Stop searching for licenses in other repositories for this DEX
            else:
                print(f'Failed to fetch license for {repo["full_name"]}: {response.status_code}')
    
        # If no official license is found, create a txt file with the message
        if not license_found:
            with open(f'{save_path}/{dex_name}/license/no_license.txt', 'w') as f:
                f.write("No official license found for this DEX.")
    else:
        print(f'Failed to search for repositories related to {dex_name}: {response.status_code}')
    
    # return path to the dex folder
    return f'{save_path}/{dex_name}'

In [12]:
import gradio as gr

In [16]:
def user_interaction(dex_name, k, co, cs, progress=gr.Progress()):
    results = {}

    # Check if all parameters are provided
    if dex_name and k is not None and co is not None and cs is not None:
        progress(0.2, desc="Scraping documents...")
        #time.sleep(1)
        #dex_folder = get_documents(dex_name) # SOURCE_DIRECTORY/dex_name
        dex_folder = "tmp_data/Uniswap v3"

        progress(0.4, desc="Loading embedding model...")
        #time.sleep(1)
        embedding_model = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-large", model_kwargs={"device": "cpu"})

        progress(0.6, desc="Loading LLM model...")
        #time.sleep(1)
        llm = run_localGPT.load_model(device_type="cpu", model_id="TheBloke/Llama-2-7b-Chat-GGUF", model_basename="llama-2-7b-chat.Q4_K_M.gguf")

        # Define features to process
        features = ["liquidity_model", "license"]
        
        for feature in features:
            source_directory = f"{dex_folder}/{feature}"
            #progress(0.5, desc="Ingesting documents...")
            #time.sleep(1)
            #save_path = f"{source_directory}/{embedding_model.model_name}"
            save_path = f"{PERSIST_DIRECTORY}/{dex_name}/{feature}/{embedding_model.model_name.replace('/', '_')}"
            # Convert chunk_size and chunk_overlap to integers
            cs = int(cs)
            co = int(co)
            ingest.main(device_type="cpu", embedding_model=embedding_model, chunk_size=cs, chunk_overlap=co,
                        source_directory=source_directory, save_path=save_path)
            
            persist_directory = os.path.join(save_path, f'cs_{cs}_co_{co}')

            # Getting the query from queries/feature.txt
            with open(f"queries/{feature}.txt", "r") as f:
                query = f.read()
                query = query.replace("the DEX", dex_name)

            # Convert k to an integer
            k = int(k)

            # Running localGPT
            #progress(1, desc="Running localGPT...")
            #time.sleep(1)
            answer, docs = run_localGPT.main("cpu", llm, k, persist_directory, query, verbose=False, show_sources=False, promptTemplate_type="llama")

            # Store the results
            results[feature] = {"answer": answer, "sources": [document.page_content for document in docs]}

        # Unload the model and free up resources
        #del llm

    return results

In [17]:
solution1 = "C:/Users/mmahmoud/Pictures/chatwdoc (1).png"
solution2 = "C:/Users/mmahmoud/Pictures/chatwdoc (2).png"

In [18]:
import gradio as gr
import pandas as pd

df = pd.DataFrame({"Dex name": ["Uniswap v3", "SushiSwap", "PancakeSwap"], "Liquidity model": ["LM1", "LM2", "LM3"], "License": ["MIT", "Apache", "GPL"]})


with gr.Blocks(gr.themes.Default()) as demo:
    # title
    gr.Markdown("<h1 style='color: #4285F4; font-size: 36px;'>🌟 DEX Navigator</h1>")
    gr.Markdown("<p style='font-size: 20px;'>An app that helps you find answers to questions about a DEX.</p>")
    with gr.Tab("Table"):
        table = gr.DataFrame(df)
    with gr.Tab("Interact with the app"):
        with gr.Column():
            with gr.Row():
                with gr.Column():
                    dex = gr.Textbox(label="DEX Name", value="Uniswap v3", placeholder="Enter the name of the DEX")
                    gr.Slider(minimum=0, maximum=1, value=0, label="Temperature", info="Choose between 0 and 1")
                    gr.Slider(minimum=0, maximum=1, value=0, label="Top P", info="Choose between 0 and 1")
                with gr.Column("Ingesting Parameters"):
                    cs = gr.Number(label="Chunk Size", value=500) 
                    co = gr.Number(label="Chunk Overlap", value=100)
                    k = gr.Number(label="Number of Chunks", minimum=1, maximum=5, value=3, info="Choose between 1 and 5")              
            with gr.Column():
                results = gr.JSON(label="Results")
        extract_button = gr.Button("Extract")
    extract_button.click(user_interaction, inputs=[dex, k, co, cs], outputs=[results])

    with gr.Tab("How it works ?"):
        gr.Gallery(label="Solution", value=[solution1, solution2], columns=2, rows=1, object_fit="scale-down")

demo.queue(concurrency_count=20).launch()


Running on local URL:  http://127.0.0.1:7862

To create a public link, set `share=True` in `launch()`.




load INSTRUCTOR_Transformer
max_seq_length  512
Loading Model: TheBloke/Llama-2-7b-Chat-GGUF, on: cpu
This action can take a few minutes!
Using Llamacpp for GGUF/GGML quantized models


AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


Loading documents from tmp_data/Uniswap v3/liquidity_model
Loaded 5 documents from tmp_data/Uniswap v3/liquidity_model
Split into 138 chunks of text
Running on: cpu
load INSTRUCTOR_Transformer
max_seq_length  512
Loading documents from tmp_data/Uniswap v3/license
Loaded 1 documents from tmp_data/Uniswap v3/license
Split into 13 chunks of text
Running on: cpu
load INSTRUCTOR_Transformer
max_seq_length  512


Llama.generate: prefix-match hit
