In [None]:
!pip install json
!pip install numpy
!pip install openai
!pip install pandas
!pip install tenacity
!pip install flask-ngrok
!pip install scikit-learn

In [29]:
import os
import json
import openai
import numpy as np
import pandas as p
from flask import Flask, request
from flask_ngrok import run_with_ngrok
from tenacity import retry, wait_random_exponential, stop_after_attempt

In [5]:
openai.api_key = "sk-"

## GPT Models

In [6]:
@retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(3))
def get_embedding(text: str, model="text-embedding-ada-002") -> list[float]:
    response = openai.Embedding.create(
        input=text,
        model=model,
    )
    return response['data'][0]["embedding"]

In [7]:
@retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(3))
def get_completion(message, model="text-davinci-003", max_tokens=257) -> str:
    response = openai.Completion.create(
        model=model,
        prompt=message,
        temperature=0.7,
        max_tokens=max_tokens,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0,
        stop=["\n\n"]
    )
    return response["choices"][0]["text"]

In [18]:
def cosine_similarity(a, b):
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

In [19]:
def get_similarity(embedding_1: list[float], embedding_2: list[float]) -> float:
    return cosine_similarity(embedding_1, embedding_2)

## Semantic Search

In [8]:
def merge_dataframes(df1, df2):
    df1['embedding'] = df1['embedding'].apply(tuple)
    df2['embedding'] = df2['embedding'].apply(tuple)
    merged_df = pd.merge(df1, df2, on=['source', 'takeaway', 'status', 'embedding'], how='outer')
    merged_df['embedding'] = merged_df['embedding'].apply(list)
    return merged_df

In [9]:
def search_takeaways(prompt, df, n=3):
    prompt_embedding = get_embedding(prompt)
    similarities = [get_similarity(prompt_embedding, row['embedding']) for index, row in df.iterrows()]
    df['similarity'] = similarities
    top_n_df = df.nlargest(n, 'similarity')
    top_n_takeaways = top_n_df['takeaway'].tolist()
    return top_n_takeaways

# UNICorn 🦄 ChatBot

In [10]:
def get_answer(message, df):
    top_3_takeaways = search_takeaways(message, df)
    message = f'Using the data answer the following questions:\n\nQuestion:{message}\n\nData:{"".join(top_3_takeaways)}\n\nAnswer:'
    answer = get_completion(message)
    return answer

> Test chatbot

In [21]:
def chatbot():
    df = pd.read_json('nameless_embedding2.jsonl', lines=True)
    df2 = pd.read_json('website_embedding.jsonl', lines=True)
    df = merge_dataframes(df, df2)
    while True:
        message = input('User input: ')
        if message == 'exit':
            break
        print('UNICorn: ', end='')
        answer = get_answer(message, df)
        print(answer, end='\n\n')

In [None]:
chatbot()

## Server Experiment

In [None]:
app = Flask(__name__)
run_with_ngrok(app)

@app.route('/', methods=['POST'])
def receive_message():
  message = request.json['message']
  print(message)
  response = {"message": "Hi, How can I help you", "role": "bot"}

  # Return response as JSON
  return json.dumps(response)

# Start Flask app
if __name__ == '__main__':
  app.run()