<a href="https://colab.research.google.com/github/Tibex88/wiki_GPT/blob/main/wiki_py.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

###Install dependancies

####WIKIPEDIA API



In [None]:
!pip install wikipedia-api

####FLASH AND NGROK

In [None]:
!pip install flask-ngrok
!pip install pyngrok==4.1.1
!ngrok authtoken 2MBCLllCx6vqHIh1DgyvGTs60iK_7JitdzfDGUU8khCWtopq3

####CHANGE ENCODING FOR THE FF DEPENDANCIES


In [None]:
import locale
encoding = locale.getpreferredencoding()
if encoding != "UTF-8":
  locale.getpreferredencoding = lambda: "UTF-8"
  !ls
locale.getpreferredencoding()

In [None]:
!pip install flask_cors

In [None]:
!pip install -q accelerate langchain bitsandbytes

In [None]:
!pip install pinecone-client

In [None]:
!pip install llama_index sentence_transformers

In [None]:
import wikipediaapi as wapi

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

### Get text from articles

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

user_agent = 'Mozilla/5.0 (iPhone; CPU iPhone OS 5_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9B179 Safari/7534.48.3'

def get_wikipedia_article(title):
    wiki_wiki = wapi.Wikipedia(user_agent = user_agent,language='en', extract_format= wapi.ExtractFormat.WIKI)
    page = wiki_wiki.page(title)


    if not page.exists():
        return None
    text = page.text
    return text

### Choose Article

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

article_title = 'USS Marmora (1862)'
# summary, sections = get_wikipedia_article(article_title)
article = get_wikipedia_article(article_title)
# print(article)
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 100,           # Usually chunk sizes are much larger than this
    chunk_overlap  = 20,        # Overlap is needed incase the text is split in odd places
    length_function = len,
)
text = text_splitter.split_text(article)
# print((text))

### Define the embedding model

In [None]:
# from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.embeddings import HuggingFaceEmbeddings, SentenceTransformerEmbeddings
from llama_index import LangchainEmbedding

embed_model = LangchainEmbedding(
  HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
)


### Embed texts from chunks


In [None]:
embeddings = []

for i in text:
    section_embedding = embed_model.get_text_embedding(i)
    embeddings.append(section_embedding)
    # print(section_embedding)

#### Check dimensions from the embedding

In [None]:
dimension = embed_model.get_text_embedding("hello")
len(dimension)

### Connect to Pinecone as a vector database store and upsert

In [None]:
import pinecone

# initialize connection to pinecone
pinecone.init(
    api_key= 'f0048841-1886-4d17-bffd-a5fe39c183a9',
    environment= 'asia-southeast1-gcp-free'
)

# create the index if it does not exist already
index_name = 'wiki'
if index_name not in pinecone.list_indexes():
    pinecone.create_index(
        index_name,
        dimension=len(dimension),
        metric='cosine'
    )

# connect to the index
pinecone_index = pinecone.Index(index_name)
docs = []
for idx, i in enumerate(text):
  docs.append((
        str(idx),
        embeddings[idx],
        {'text': i},
    ))

pinecone_index.upsert(vectors= docs, show_progress = True)

In [None]:
import torch
from llama_index.llms import HuggingFaceLLM
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig #, LLaMATokenizer, LLaMAForCausalLM

### Login to Huggingface

In [None]:
!huggingface-cli login

In [None]:
#token for huggingface
# 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

### Define model and tokenizer

In [None]:

tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf",
                                          use_auth_token=True)

model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf",
                                             device_map='auto',
                                             torch_dtype=torch.float16,
                                             use_auth_token=True,
                                             load_in_4bit=True
                                             )

### Get from db

In [None]:
def get_data_from_db(query):
    # embed query
    xq = embed_model.get_text_embedding(query)
    # query database
    result = pinecone_index.query(xq, top_k=20, includeMetadata=True)
    matches = []
    for i in result['matches']:
      # filter by score
      if (i['score'] > 0.6):
        print(i.metadata['text'])
        # append best results
        matches.append(i.metadata['text'])
    return str(matches)

### Prep and wrap instruction in prompt

In [None]:

B_INST, E_INST = "[INST]", "[/INST]"
B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
DEFAULT_SYSTEM_PROMPT = """\
  You will be given texts related to a certain topic. Write a summary response that answers the question based on what is discussed in the texts.
  Do not mention anything outside of what is provided. Don't answer anything outside the context you are provided.
  If there isn't enough context, simply reply "This topic was not discussed previously"
  """

SYSTEM_PROMPT = B_SYS + DEFAULT_SYSTEM_PROMPT + E_SYS

def get_prompt(instruction):
    prompt_template =  B_INST + SYSTEM_PROMPT + instruction + E_INST
    return prompt_template

### Format context long with the query

In [None]:
def format_prompt(query, context):
    return '''
    ### Texts:
    {context}

    ### Question:
    {query}
    '''.format(context=context, query=query)

###Generate output per prompt

In [None]:
def generate(prompt_template):
    output = ""
    inputs = tokenizer(
        prompt_template,
        return_tensors="pt",
    )
    input_ids = inputs["input_ids"]

    generation_config = GenerationConfig(
        temperature=0.6,
        top_p=0.95,
        repetition_penalty=1.2,
    )
    print("Generating...")
    generation_output = model.generate(
        input_ids=input_ids,
        generation_config=generation_config,
        return_dict_in_generate=True,
        output_scores=True,
        max_new_tokens=4028,
    )
    for s in generation_output.sequences:
        output += tokenizer.decode(s)
    return output

In [None]:
# Define a function that runs the model
def answer(query):
    context = get_data_from_db(query)

    prompt = format_prompt(query, context)
    prompt_template = get_prompt(prompt)
    return generate(prompt_template)

### List of possible queries to test the model
- when was Marmora built?
- when was the USS Marmora built?
- which ship captain was it built for?
- what was the other ship that he operated?

In [None]:
answer("When was Marmora built?")

### Make the app available as an API


In [None]:
from flask_ngrok import run_with_ngrok
from flask_cors import CORS
from flask import Flask, request


#Running the flask app
app = Flask(__name__)

#lallow all origins access
CORS(app, origins=['*'])

#We meed to start ngrok when the app is run
run_with_ngrok(app)

@app.route("/", methods=['GET'])
def index():
  # extract query param
  query = request.args['query']
  if query:
    print("query" ,query)
    # pass query into the model
    ans = answer(query)
    return ans
  else:
    # reply for when no query in the params
    return "<h1>No valid response</p>"

# test route
@app.route("/test", methods=['GET'])
def test_page():
  return "<h1>This is the test page!</hl>"


app.run()