In [44]:
!pip install llama-index deeplake python-dotenv llama_hub streamlit deeplake replicate



In [48]:
import os
import textwrap
from dotenv import load_dotenv
from llama_index import download_loader
from llama_hub.github_repo import GithubRepositoryReader, GithubClient
from llama_index import VectorStoreIndex
from llama_index.vector_stores import DeepLakeVectorStore
from llama_index.storage.storage_context import StorageContext
from llama_index.retrievers import VectorIndexRetriever
from llama_index import get_response_synthesizer
from llama_index.indices.postprocessor import SimilarityPostprocessor
from llama_index.query_engine import RetrieverQueryEngine
from llama_index.llms import Replicate
from llama_index import ServiceContext
import deeplake
import re

In [3]:
load_dotenv()

replicate_token = os.getenv("REPLICATE_API_TOKEN")
active_loop_token = os.getenv("ACTIVELOOP_TOKEN")
dataset_path = os.getenv("DATASET_PATH")
github_token = os.getenv("GITHUB_TOKEN")

In [6]:
def parse_github_url(url):
    pattern = r"https://github\.com/([^/]+)/([^/]+)"
    match = re.match(pattern, url)
    return match.groups() if match else (None, None)

In [7]:
def validate_owner_repo(owner, repo):
    return bool(owner) and bool(repo)

def initialize_github_client():
    github_token = os.getenv("GITHUB_TOKEN")
    return GithubClient(github_token)

In [49]:
def main(message, chat_history):
    if not replicate_token:
        raise EnvironmentError("Replicate token not found in environment variables")

    # Check for GitHub Token
    if not github_token:
        raise EnvironmentError("GitHub token not found in environment variables")

    # Check for Activeloop Token
    if not active_loop_token:
        raise EnvironmentError("Activeloop token not found in environment variables")

    github_client = initialize_github_client()
    download_loader("GithubRepositoryReader")

    github_url = "https://github.com/facebookresearch/segment-anything"
    # owner, repo = parse_github_url(github_url)

    while True:
        owner, repo = parse_github_url(github_url)
        if validate_owner_repo(owner, repo):
            loader = GithubRepositoryReader(
                github_client,
                owner = owner,
                repo = repo,
                filter_file_extensions=(
                    [".py", ".js", ".ts", ".md"],
                    GithubRepositoryReader.FilterType.INCLUDE,
                ),
                verbose=False,
                concurrent_requests=5,
            )
            print(f"Loading {repo} repository by {owner}")
            docs = loader.load_data(branch="main")
            print("Documents uploaded: ")
            for doc in docs:
                print(doc.metadata)
            break # Exit the loop once the valid URL is processed
        else:
            print("Invalid GitHub URL. Please try again.")

    print("Uploading to vector store... ")

    # Create vector store and upload data
    try:
        exists = deeplake.exists(dataset_path)
        if exists:
            vector_store = DeepLakeVectorStore(
                dataset_path=dataset_path,
                overwrite=False,
                runtime={"tensor_db": True},
            )
        else:
            vector_store = DeepLakeVectorStore(
                dataset_path=dataset_path,
                overwrite=True,
                runtime={"tensor_db": True},
            )
    except Exception as e:
        print(f"An unexpected error occurred while creating or fetching the vector store: {str(e)}")

    llm = Replicate(model="mistralai/mistral-7b-instruct-v0.1:5fe0a3d7ac2852264a25279d1dfb798acbc4d49711d126646594e212cb821749")
    service_context = ServiceContext.from_defaults(llm=llm)
    storage_context = StorageContext.from_defaults(vector_store=vector_store)
    index = VectorStoreIndex.from_documents(docs, storage_context=storage_context, service_context=service_context)
    retriever = VectorIndexRetriever(index=index, similarity_top_k=4)
    response_synthesizer = get_response_synthesizer()
    query_engine = RetrieverQueryEngine.from_args(
        retriever=retriever,
        response_mode='default',
        response_synthesizer=response_synthesizer,
        node_postprocessors=[
            SimilarityPostprocessor(similarity_cutoff=0.7)]
    )



    user_question = message
    answer = query_engine.query(user_question)
    return str(answer)

In [50]:
import gradio as gr

In [51]:
demo = gr.ChatInterface(main).queue()

In [52]:
demo.launch(debug=True)

Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
Running on public URL: https://51e701a9571ebcd221.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


Loading segment-anything repository by facebookresearch
Documents uploaded: 
{'file_path': 'CODE_OF_CONDUCT.md', 'file_name': 'CODE_OF_CONDUCT.md', 'url': 'https://github.com/facebookresearch/segment-anything/blob/main/CODE_OF_CONDUCT.md'}
{'file_path': 'CONTRIBUTING.md', 'file_name': 'CONTRIBUTING.md', 'url': 'https://github.com/facebookresearch/segment-anything/blob/main/CONTRIBUTING.md'}
{'file_path': 'README.md', 'file_name': 'README.md', 'url': 'https://github.com/facebookresearch/segment-anything/blob/main/README.md'}
{'file_path': 'demo/README.md', 'file_name': 'README.md', 'url': 'https://github.com/facebookresearch/segment-anything/blob/main/demo/README.md'}
{'file_path': 'demo/configs/webpack/common.js', 'file_name': 'common.js', 'url': 'https://github.com/facebookresearch/segment-anything/blob/main/demo/configs/webpack/common.js'}
{'file_path': 'demo/configs/webpack/dev.js', 'file_name': 'dev.js', 'url': 'https://github.com/facebookresearch/segment-anything/blob/main/demo/co

100%|██████████| 52/52 [00:01<00:00, 38.55it/s]


Dataset(path='hub://gamingambidextrous/repochat', tensors=['embedding', 'id', 'metadata', 'text'])

  tensor      htype       shape      dtype  compression
  -------    -------     -------    -------  ------- 
 embedding  embedding  (624, 1536)  float32   None   
    id        text      (624, 1)      str     None   
 metadata     json      (624, 1)      str     None   
   text       text      (624, 1)      str     None   
Loading segment-anything repository by facebookresearch
Documents uploaded: 
{'file_path': 'CODE_OF_CONDUCT.md', 'file_name': 'CODE_OF_CONDUCT.md', 'url': 'https://github.com/facebookresearch/segment-anything/blob/main/CODE_OF_CONDUCT.md'}
{'file_path': 'CONTRIBUTING.md', 'file_name': 'CONTRIBUTING.md', 'url': 'https://github.com/facebookresearch/segment-anything/blob/main/CONTRIBUTING.md'}
{'file_path': 'README.md', 'file_name': 'README.md', 'url': 'https://github.com/facebookresearch/segment-anything/blob/main/README.md'}
{'file_path': 'demo/README.md', 'file_name':

100%|██████████| 52/52 [00:01<00:00, 33.58it/s]


Dataset(path='hub://gamingambidextrous/repochat', tensors=['embedding', 'id', 'metadata', 'text'])

  tensor      htype       shape      dtype  compression
  -------    -------     -------    -------  ------- 
 embedding  embedding  (676, 1536)  float32   None   
    id        text      (676, 1)      str     None   
 metadata     json      (676, 1)      str     None   
   text       text      (676, 1)      str     None   
Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7861 <> https://51e701a9571ebcd221.gradio.live




In [54]:
!gradio deploy

Creating new Spaces Repo in '/content'. Collecting metadata, press Enter to accept default value.
Enter Spaces app title [content]: Chat with Repo
Formatted to Chat_with_Repo. 
Enter Gradio app file : Assignment.ipynb
Traceback (most recent call last):
  File "/usr/local/bin/gradio", line 8, in <module>
    sys.exit(cli())
  File "/usr/local/lib/python3.10/dist-packages/gradio/cli.py", line 15, in cli
    gradio.deploy_space.deploy()
  File "/usr/local/lib/python3.10/dist-packages/gradio/deploy_space.py", line 155, in deploy
    configuration = add_configuration_to_readme(
  File "/usr/local/lib/python3.10/dist-packages/gradio/deploy_space.py", line 49, in add_configuration_to_readme
    raise FileNotFoundError("Failed to find Gradio app file.")
FileNotFoundError: Failed to find Gradio app file.
