In [None]:
pip install python-dotenv

In [None]:
from flask import Flask, request, jsonify,render_template,redirect,url_for,send_file
from openai import AzureOpenAI
import tiktoken
import os
import http.server
import json
from urllib.parse import urlparse, parse_qs
from flask_cors import CORS
from io import BytesIO
 
 
client = AzureOpenAI(
  azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT"),
  api_key = os.getenv("AZURE_OPENAI_API_KEY"),
  api_version = "2024-02-01"
)
 
app = Flask(__name__)
CORS(app)


with open('data.txt','r') as file:
    text = file.read()
with open('promptData.txt','r') as file:
    promptData = file.read()


@app.route('/answer', methods=['POST','OPTIONS'])
def answer():
    radio = request.form['radioAnswer']
    if request.form['targetLang'] != '':
        targetLang = request.form['targetLang']
    else:
         targetLang = 'english'
    if radio == 'on':
         answer = suggest(targetLang)
         return jsonify({'result': { "answer": answer}})
        
    else:
          question = request.form['question']
          answer = chunking(question,text,targetLang)
          return jsonify({'result': {"question": question, "answer": answer}})
         
         

def num_tokens_from_string(string: str, encoding_name: str) -> int:
    """Returns the number of tokens in a text string."""
    encoding = tiktoken.get_encoding(encoding_name)
    num_tokens = len(encoding.encode(string))
    return num_tokens

def chunking(queries,text,targetLang):
  llm_deployment_name="gpt-35-turbo"
  embeddings_deployment_name="text-embedding-ada-002"

  def completion(prompt, llm_deployment_name):
        response = client.chat.completions.create(
            model=llm_deployment_name,
            messages=prompt,
            )
        return response.choices[0].message.content
 
  def get_embedding(text, embeddings_deployment_name):
        response = client.embeddings.create(
            input = text,  # text input to embed goes here
            model=embeddings_deployment_name
        )
        return response.data[0].embedding

  def cosine_similarity(embedding1, embedding2):
        dot_product = sum(embedding1[i] * embedding2[i] for i in range(len(embedding1)))
        magnitude1 = sum(x**2 for x in embedding1)**0.5
        magnitude2 = sum(x**2 for x in embedding2)**0.5
        return dot_product / (magnitude1 * magnitude2)

  data=text
  chunk_size=4096
  overlap=0
  def overlap_chunks(data, chunk_size, overlap):
        chunks = []
        start = 0
        while start < len(data):
            end = min(start + chunk_size, len(data))
            chunks.append(str(data[start:end]))
            start += chunk_size - overlap
        return chunks
   
  ChunksList = overlap_chunks(data, chunk_size, overlap)
  Embedded_chunks=[]
  for i in ChunksList:
        Embedded_chunks.append(get_embedding(i, embeddings_deployment_name))
  query=queries
  query_embedding=get_embedding(query,embeddings_deployment_name)
    
  Distance=[]
  for i in Embedded_chunks:
        Distance.append(cosine_similarity(i, query_embedding))
        min_value = max(Distance)
  index = Distance.index(min_value)
  Prompt_paragraph=ChunksList[index]
  if len(ChunksList) >= 3:
        temp = Distance.copy()
        temp.sort()
        temp.reverse()
        for i in range(0,5):
            t1 = Distance.index(temp[i])
            Prompt_paragraph=Prompt_paragraph+ChunksList[t1]
  elif len(ChunksList) == 2:
        temp = Distance.copy()
        temp.sort()
        temp.reverse()
        t1 = temp.index(temp[0])
        t2 = temp.index(temp[1])
        Prompt_paragraph=ChunksList[t1]+ChunksList[t2]
  numOfToken=num_tokens_from_string(text, "cl100k_base")

  prompt = f"""Please use the information provided below to answer complete question in {targetLang} language
  Additional information:
  \"\"\"
  {Prompt_paragraph}
  \"\"\"
  Question:{query}"""

  num_of_token=num_tokens_from_string(prompt, "cl100k_base")
  messages=[{'role': 'user', 'content': prompt}]
 
  response = completion(messages, llm_deployment_name)
  return response
 

def suggest(selected_language):
    prompt = f"{promptData}+Please give the information in {selected_language} language"
    response = client.chat.completions.create(
    model="gpt-35-turbo", 
    messages=[
            {"role": "user", "content": prompt}
        ],
        )
    answer = response.choices[0].message.content
    return answer


if __name__ == '__main__':
    app.run()

In [None]:
pip install flask flask_cors