## Retreiving information and code from GitHub Repository

In [1]:
import nest_asyncio
nest_asyncio.apply()

### OpenAI API Key

In [2]:
%env OPENAI_API_KEY=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
from llama_index import GPTVectorStoreIndex, GithubRepositoryReader
from IPython.display import Markdown, display
import os

env: OPENAI_API_KEY=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


In [3]:
%env GITHUB_TOKEN=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
github_token = os.environ.get("GITHUB_TOKEN")
owner = "dair-ai"
repo = "Prompt-Engineering-Guide"
branch = "main"

documents = GithubRepositoryReader(
    github_token=github_token,
    owner=owner,
    repo=repo,
    use_parser=False,
    verbose=False,
).load_data(branch=branch)

env: GITHUB_TOKEN=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


## Quering using default High Level Query Engine

In [4]:
index = GPTVectorStoreIndex.from_documents(documents)

query_engine = index.as_query_engine()

In [5]:
response = query_engine.query("What is the difference between Zero shot prompting and Chain of thought prompting?")
display(Markdown(f"<b>{response}</b>"))

<b>
Zero-shot prompting is a technique used to enable in-context learning where no examples are provided in the prompt. The model is expected to generate a response based on its understanding of the task. Chain-of-thought (CoT) prompting is a more advanced prompting technique used to address more complex arithmetic, commonsense, and symbolic reasoning tasks. It involves breaking down the problem into steps and providing demonstrations to the model.</b>

In [6]:
response = query_engine.query("Can you give the code for set parameters function?")
display(Markdown(f"<b>{response}</b>"))

<b>
def set_open_params(
    model="text-davinci-003",
    temperature=0.7,
    max_tokens=256,
    top_p=1,
    frequency_penalty=0,
    presence_penalty=0,
):
    """ set openai parameters"""

    openai_params = {}   

    openai_params['model'] = model
    openai_params['temperature'] = temperature
    openai_params['max_tokens'] = max_tokens
    openai_params['top_p'] = top_p
    openai_params['frequency_penalty'] = frequency_penalty
    openai_params['presence_penalty'] = presence_penalty
    return openai_params</b>

## Customizing Quering Engine using LLM Predictor

In [7]:
from llama_index import LLMPredictor, GPTVectorStoreIndex
from langchain.chat_models import ChatOpenAI

# define LLM
llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo"))

# define prompt helper
# set maximum input size
max_input_size = 4096
# set number of output tokens
num_output = 1048
# set maximum chunk overlap
max_chunk_overlap = 20

custom_LLM_index = GPTVectorStoreIndex.from_documents(
    documents, llm_predictor=llm_predictor)

In [8]:
query_engine_1 = custom_LLM_index.as_query_engine()

In [9]:
response = query_engine_1.query("Can you give the code for setting parameters function?")
display(Markdown(f"<b>{response}</b>"))

<b>
def set_open_params(
    model="text-davinci-003",
    temperature=0.7,
    max_tokens=256,
    top_p=1,
    frequency_penalty=0,
    presence_penalty=0,
):
    """ set openai parameters"""

    openai_params = {}    

    openai_params['model'] = model
    openai_params['temperature'] = temperature
    openai_params['max_tokens'] = max_tokens
    openai_params['top_p'] = top_p
    openai_params['frequency_penalty'] = frequency_penalty
    openai_params['presence_penalty'] = presence_penalty
    return openai_params</b>

In [10]:
response = query_engine_1.query("I'm trying to learn about chatgpt and recently worked with text davinci 002 model.")
display(Markdown(f"<b>{response}</b>"))

<b>
What is the difference between ChatGPT and Text-Davinci-002?

Answer: ChatGPT is a more advanced version of the Text-Davinci-002 model. It uses a chat format as input, expecting a series of messages as input and uses those to generate a response. It also supports single-turn tasks similar to what we used with Text-Davinci-002, but it is more efficient and can generate more natural-sounding responses.</b>

## Creating custom Query Engine using Llama index's Retrievers similarity cutoff

In [11]:
from llama_index import (
    GPTVectorStoreIndex,
    ResponseSynthesizer,
)
from llama_index.retrievers import VectorIndexRetriever
from llama_index.query_engine import RetrieverQueryEngine
from llama_index.indices.postprocessor import SimilarityPostprocessor

# configure retriever
retriever = VectorIndexRetriever(
    index=custom_LLM_index, 
    similarity_top_k=2,
)

# configure response synthesizer
response_synthesizer = ResponseSynthesizer.from_args(
    node_postprocessors=[
        SimilarityPostprocessor(similarity_cutoff=0.7)
    ]
)

# assemble query engine
query_engine = RetrieverQueryEngine(
    retriever=retriever,
    response_synthesizer=response_synthesizer,
)

# query
response = query_engine.query("Can you give the code for setting parameters function?")
print(response)


def set_open_params(
    model="text-davinci-003",
    temperature=0.7,
    max_tokens=256,
    top_p=1,
    frequency_penalty=0,
    presence_penalty=0,
):
    """ set openai parameters"""

    openai_params = {}    

    openai_params['model'] = model
    openai_params['temperature'] = temperature
    openai_params['max_tokens'] = max_tokens
    openai_params['top_p'] = top_p
    openai_params['frequency_penalty'] = frequency_penalty
    openai_params['presence_penalty'] = presence_penalty
    return openai_params
