In [1]:
import pandas as pd
import numpy as np
import nanoid
import pymongo
from openai import OpenAI

In [76]:
quantites = np.random.randint(1,100,size=48)
prices_buy = np.random.randint(1,2,size=48)*10 + np.random.randint(0,10,size=48) + np.random.randint(0,10,size=48)*0.1
prices_sale = prices_buy - np.random.randint(0,10,size=48) + np.random.randint(0,10,size=48)*0.1


In [130]:
data = pd.read_csv('data/flowershopdata.csv')
data['Description'] = data[['Description', 'Needs','Season']].apply(lambda x: ' '.join(x.dropna().astype(str)), axis=1)
data['Buy_price'] = prices_buy
data['Sale_price'] = np.round(prices_sale,2)
data['Quantities'] = quantites
jpg_paths = [f"/Users/kacperjarzyna/systemy_rozproszone/test_notebooks/data/img_flowers/{flower.lower()}.jpg" for flower in data['Name']]
data['pic_path'] = jpg_paths
data['id'] = [nanoid.generate(size=10) for x in range(48)]
data.drop(columns=['Needs','Season'],inplace=True)

In [131]:
data.to_csv('data/flowershopdata_clean.csv')

In [46]:
import os
import requests
from bs4 import BeautifulSoup
from PIL import Image
from io import BytesIO
from selenium import webdriver
from selenium.webdriver.common.by import By
import time

def fetch_first_image_url(query):
    options = webdriver.SafariOptions()
    driver = webdriver.Safari(options=options)

    search_url = f"https://pl.pinterest.com/search/pins/?q={query}%20flower&rs=typed"
    driver.get(search_url)

    time.sleep(2)
    image_elements = driver.find_elements(By.CSS_SELECTOR, 'img')

    if len(image_elements) >= 10:
        # Get the URL of the 10th image
        image_element = image_elements[9]  # Index 9 corresponds to the 10th image (0-based index)
        image_url = image_element.get_attribute('src')
    else:
        print("Not enough images found on the page")
        image_url = None

    driver.quit()
    return image_url

def save_image_from_url(url, directory, filename):
    response = requests.get(url)
    image = Image.open(BytesIO(response.content))
    image_path = os.path.join(directory, filename)
    image.save(image_path)
    print(f"Image saved to {image_path}")



In [6]:
import nanoid
def create_product(product_data):
    base_url = "http://127.0.0.1:8000"
    response = requests.post(f"{base_url}/products/", json=product_data)
    print(response.json())

In [7]:
data = pd.read_csv('data/flowershopdata_clean.csv').drop(columns=['Unnamed: 0'])
# for index,item in data.iterrows():
#     product_data = {
#     "id": item['id'],
#     "name": item['Name'][0] + item['Name'][1:].lower(),
#     "description": item['Description'],
#     "sale_price": item['Sale_price'],
#     "quantity": item['Quantities'],
#     "buy_price": item['Buy_price'],
#     "date": 'None',
#     "picture_path": item['pic_path']
#     }
#     create_product(product_data)


In [70]:
from sentence_transformers import SentenceTransformer, util
from qdrant_client import QdrantClient
from qdrant_client.models import Distance, VectorParams
from qdrant_client.models import PointStruct

model = SentenceTransformer('intfloat/e5-small-v2')
vec_len = model[1].word_embedding_dimension
client = QdrantClient(url="http://127.0.0.1:6333")
names = [item.name for item in client.get_collections().collections]
if 'products_description' not in names:
    client.create_collection(
        collection_name="products_description",
        vectors_config=VectorParams(size=vec_len, distance=Distance.COSINE),)
else:
    print('Collection exits')


Collection exits


In [74]:
client.delete_collection("products_description")

True

In [8]:
points = [PointStruct(id=index, 
                      vector=model.encode(item['Description'], normalize_embeddings=True), 
                      payload={"Id": item['id'],
                               "Name": item['Name'],
                               "Description":item['Description']}) for index,item in data.iterrows()]


In [215]:
operation_info = client.upsert(
    collection_name="products_description",
    wait=True,
    points=points,
)

print(operation_info)

operation_id=0 status=<UpdateStatus.COMPLETED: 'completed'>


In [36]:
from qdrant_client import QdrantClient, models
from pymongo import MongoClient
from qdrant_client import QdrantClient
from openai import OpenAI
from typing import List
from sentence_transformers import SentenceTransformer

class Chatbot():
    mongo_client: MongoClient
    qdrant_client: QdrantClient
    openai_client: OpenAI
    embedding_model: str

    def __init__(self, **data):
        self.mongo_client = data.pop('mongo_client', None)
        self.qdrant_client = data.pop('qdrant_client', None)
        self.openai_client = data.pop('openai_client', None)
        self.embedding_model = SentenceTransformer(data.pop('embedding_model', None))

    def answer(self,question:str) -> str:
        system,user = self.__prepare_question(question)
        chat_completion = self.openai_client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[{"role":"system", "content" : system},
            {"role": "user", "content": user}])
        
        return chat_completion.choices[0].message.content


    def __fill_prompt(self,question:str, jsons:List[str]) -> List[str]:
        SYSTEM_TEMPLATE = """You are a professional flower shop assistatnt, you reccomend products that fullfils
        every user expectations or answer his question based only on data in json format below:
        {}
        Always check the quantity before recommending if it's equal to 0 ask to wait until restock.
        If none of products fulfill expectations say that sadly we don't have product you need.
        Your answer should short and contain only what flower you recommend, reasoning why and friendly insight.
        Don't encourage further conversation.
        """

        filled_prompt = SYSTEM_TEMPLATE.format(jsons)
        return filled_prompt, question


    def __qdrant_search(self, question:str):
        query = self.embedding_model.encode(question, normalize_embeddings=True)
        results = self.qdrant_client.search(
            collection_name="products_description",
            query_vector=query,
            limit=3,
        )
        return results


    def __search_mongo(self, results):
        db = self.mongo_client["ecommerce-app"]
        collection = db['products']
        all_jsons = []
        for flower_id in [res.payload.get('Id') for res in results]:
            json_data = {}
            for item in collection.find({"id":flower_id}):
                json_data['Name'] = item.get('name')
                json_data['Description'] = item.get('description')
                json_data['Quantity'] = item.get('quantity')
                all_jsons.append(json_data)
        return all_jsons


    def __prepare_question(self, question):
        qdrant_result = self.__qdrant_search(question)
        mongo_result = self.__search_mongo(qdrant_result)
        return self.__fill_prompt(question,mongo_result)
    

data = {
    "embedding_model": "intfloat/e5-small-v2",
    "mongo_client": MongoClient("mongodb://localhost:27017/"),
    "qdrant_client": QdrantClient("http://localhost:6333"),
    "openai_client":  OpenAI(api_key="sk-proj-8j1NyU5gvA4njucbGG3uT3BlbkFJxBLHcRn4z7futfYb4sgb")
}

bot = Chatbot(**data)


In [45]:
data = pd.read_csv('/Users/kacperjarzyna/systemy_rozproszone/app/databases/init_data/products/flowershopdata_clean.csv')

In [75]:
question = "I would like white flower that needs acidic soil"
bot.answer(question)

'I recommend Rhododendron. It is a beautiful white flower that requires acidic soil. Unfortunately, we only have 7 in stock, so I recommend purchasing it soon.'

In [69]:
# Connect to the MongoDB server
client_mongo = pymongo.MongoClient("mongodb://localhost:27017/")

# Select your database
db = client_mongo["ecommerce-app"]

# List all collection names in the database
collection_names = db.list_collection_names()


collection = db['products']
# Find all documents in the collection
documents = collection.find()
for x in documents:
    print(x)



In [73]:
db.drop_collection('products')
for document in documents:
    print(document)