Skip to content

Commit

Permalink
Merge pull request #40 from Azure-Samples/gk/26-aggregate-miyagi-skil…
Browse files Browse the repository at this point in the history
…ls-into-a-notebook
  • Loading branch information
thegovind committed May 1, 2023
2 parents c4c710c + b953fed commit 3e1ced6
Show file tree
Hide file tree
Showing 9 changed files with 5,048 additions and 5,162 deletions.
4 changes: 2 additions & 2 deletions services/data-prep/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ RUN pip install -r requirements.txt
COPY . .

# expose port 8081
EXPOSE 8081
EXPOSE 8082

# run uvicorn command
CMD ["uvicorn", "main:app", "--port", "8081", "--host", "0.0.0.0", "--reload"]
CMD ["uvicorn", "main:app", "--port", "8082", "--host", "0.0.0.0", "--reload"]
105 changes: 101 additions & 4 deletions services/data-prep/main.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,66 @@
import logging
import openai
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import RedirectResponse
from pydantic import BaseModel, BaseSettings
import asyncpraw
from azure.storage.blob.aio import BlobServiceClient
import os
import json
import asyncio
from qdrant_client.http import models
from qdrant_client.http.models import Batch
from typing import List

from qdrant_client import QdrantClient

COLLECTION_NAME = "miyagi-customer-profiles"
EMBEDDING_MODEL = "gk-ada-002"
EMBEDDING_MODEL_DIMENSION = 1536


class Settings(BaseSettings):
openai.api_type = "azure"
openai.api_base = os.getenv("OPENAI_API_BASE")
openai.api_version = "2022-12-01"
openai.api_key = os.getenv("OPENAI_API_KEY")

# to connect to local Qdrant instance
client = QdrantClient(":memory:")
# to connect to remote Qdrant instance in docker
# client = QdrantClient(host="localhost", port=6333)
client.recreate_collection(
collection_name=("%s" % COLLECTION_NAME),
vectors_config=models.VectorParams(size=EMBEDDING_MODEL_DIMENSION, distance=models.Distance.COSINE),
)

logging.getLogger("aiohttp").setLevel(logging.ERROR)

settings = Settings()

logging.getLogger("aiohttp").setLevel(logging.ERROR)

app = FastAPI()

origins = ["*"]

app.add_middleware(
CORSMiddleware,
allow_origins=origins,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)


@app.get("/health")
async def pong():
return {"status": "Ok"}


@app.get("/", include_in_schema=False)
async def docs_redirect():
return RedirectResponse(url='/redoc')


class SubredditInput(BaseModel):
subreddit: str
Expand Down Expand Up @@ -45,7 +93,7 @@ async def fetch_comments(subreddit_input: SubredditInput):
user_agent=os.getenv("REDDIT_USER_AGENT"),
) as reddit:
subreddit = await reddit.subreddit(subreddit_input.subreddit)
top_posts = subreddit.top("month", limit=5)
top_posts = subreddit.top("month", limit=1)

comments_data = []

Expand All @@ -67,3 +115,52 @@ async def fetch_comments(subreddit_input: SubredditInput):
await container_client.client.close()

return {"detail": "Comments successfully fetched and stored in Azure Storage Files"}


async def get_comments_from_storage(subreddit: str):
container_client = await get_blob_container()
blob_name = f"{subreddit}_comments.json"
blob_client = container_client.get_blob_client(blob_name)
if await blob_client.exists():
response = await blob_client.download_blob()
comments_data = json.loads(await response.content_as_text())
return comments_data
return None


async def get_openai_embeddings(user_profile: str):
embeddings = openai.Embedding.create(deployment_id=EMBEDDING_MODEL,
input=user_profile)
print(f'GPT output: {embeddings}')
return embeddings


async def store_comments_in_qdrant(comments: List[str]):
embeddings = []
# Due to Internal AOAI Rate limits, only fetching 5 comments
comments = comments[:5]
# TODO: Remove ^ after AOAI rate limits are removed
for comment in comments:
gpt_output = await get_openai_embeddings(comment)
embeddings.append(gpt_output["data"][0]["embedding"])
settings.client.upsert(collection_name=COLLECTION_NAME,
points=Batch(
ids=list(range(1, len(embeddings) + 1)),
vectors=embeddings
))
return {"detail": "Comments successfully stored in Qdrant vector store"}


@app.post("/store_comments_in_qdrant")
async def store_comments_in_qdrant_endpoint(subreddit_input: SubredditInput):
comments = await get_comments_from_storage(subreddit_input.subreddit)
if not comments:
raise HTTPException(status_code=404, detail="No comments found in storage")
return await store_comments_in_qdrant(comments)


@app.post("/fetch_and_store_comments")
async def fetch_and_store_comments(subreddit_input: SubredditInput):
fetch_result = await fetch_comments(subreddit_input)
store_result = await store_comments_in_qdrant_endpoint(subreddit_input)
return {"fetch_result": fetch_result, "store_result": store_result}
6 changes: 4 additions & 2 deletions services/data-prep/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
uvicorn>=0.21.1
uvicorn==0.22.0
fastapi>=0.95.1
pydantic>=1.10.7
asyncpraw>=7.7.0
azure-storage-blob>=12.16.0
setuptools==67.7.1
setuptools==67.7.2
qdrant-client==1.1.6
openai==0.27.5
11 changes: 6 additions & 5 deletions services/data-prep/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,14 @@
description="Preps private data for Miyagi",
packages=find_packages(),
install_requires=[
"uvicorn>=0.20.0",
"openai>=0.26.1",
"fastapi>=0.89.1",
"pydantic>=1.10.4",
"praw>=7.7.0",
"uvicorn>=0.22.0",
"openai>=0.27.5",
"fastapi>=0.95.1",
"pydantic>=1.10.7",
"asyncpraw>=7.7.0",
"pypdf>=3.8.0",
"azure-storage-blob>=12.16.0",
"qdrant-client>=1.1.6"
],
extras_require={
"dev": [
Expand Down
1 change: 1 addition & 0 deletions services/frontend/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
"next-seo": "^6.0.0",
"next-sitemap": "^4.0.7",
"next-themes": "^0.2.1",
"nextjs-google-analytics": "^2.3.3",
"overlayscrollbars": "^2.1.1",
"overlayscrollbars-react": "^0.5.0",
"rc-slider": "^10.1.1",
Expand Down
2 changes: 1 addition & 1 deletion services/frontend/src/layouts/header/header.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ export default function Header({ className }: { className?: string }) {
<SearchButton
variant="transparent"
className="ltr:-ml-[17px] rtl:-mr-[17px] dark:text-white"
/> Semantic Search, Powered by Azure Cognitive Search
/> <span className="opacity-25">Semantic Search, Powered by Azure Cognitive Search</span>
</div>
<HeaderRightArea />
</div>
Expand Down
2 changes: 2 additions & 0 deletions services/frontend/src/pages/_app.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import {ThemeProvider} from 'next-themes';
import {QueryClient, QueryClientProvider} from 'react-query';
import ModalsContainer from '@/components/modal-views/container';
import {AccountProvider} from '@/lib/hooks/use-connect';
import { GoogleAnalytics } from 'nextjs-google-analytics';
import 'overlayscrollbars/overlayscrollbars.css';
// base css file
import 'swiper/css';
Expand Down Expand Up @@ -32,6 +33,7 @@ function CustomApp({ Component, pageProps }: AppPropsWithLayout) {
content="width=device-width, initial-scale=1 maximum-scale=1"
/>
<title>Miyagi - Intelligent Financial Coach</title>
<GoogleAnalytics />
</Head>
<QueryClientProvider client={queryClient}>
<ThemeProvider
Expand Down
Loading

0 comments on commit 3e1ced6

Please sign in to comment.