In [4]:
from openai import OpenAI
import os
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma
from mistralai.client import MistralClient
from mistralai.models.chat_completion import ChatMessage
from langchain_mistralai import MistralAIEmbeddings
import chromadb

api_key = os.environ["OPENAI_API_KEY"]="API"
client = OpenAI()
openai_model_name = 'text-embedding-3-small'
openai_embed = OpenAIEmbeddings(model=openai_model_name)


api_key_mistral = os.environ["MISTRAL_API_KEY"] = "5DZg7ZyUoJiJ1FGrbXjfzp6Bad40aSrp"
client_mistral = MistralClient(api_key=api_key_mistral)
mistral_embed = MistralAIEmbeddings(model = "mistral-embed")




In [26]:
def retriever_list(embedding_model, path, collection, q, k):
    persistent_client = chromadb.PersistentClient(path=path)
    vector_store_from_client = Chroma(
        client=persistent_client,
        collection_name=collection,
        embedding_function=embedding_model,
        )
    
    results = vector_store_from_client.similarity_search(
        q,
        k=k,
    )
    final_results = [results[i].page_content for i in range(len(results))]
    return final_results

# Embedding Comparison

## Openai-2 (without feature engineering)

In [27]:
retriever_list(embedding_model=openai_embed,
               path='openai_embed_db2', collection='openai_embed2', 
          q = "What are the specific features or aspects that users appreciate the most in our application?", 
          k=5)

['Nice features and user friendly',
 'Awesome features. Love the app',
 'User friendly and awesome features',
 'Features are great. User friendly too.',
 'I love the features of this App.']

In [7]:
retriever_list(embedding_model=openai_embed, path='openai_embed_db2', collection='openai_embed2', 
          q = "In comparison to our application, which music streaming platform are users most likely to compare ours with?", 
          k=5)

['Best music streaming application on the market',
 'Best of any and all streaming music apps/sites',
 'Best music streaming App on the market',
 'Best music streaming app in the market',
 'Best music streaming app so far']

In [8]:
retriever_list(openai_embed, path='openai_embed_db2', collection='openai_embed2', 
          q = "What are the primary reasons users express dissatisfaction with Spotify?" ,
          k=5)

["I love my music. As far as players go, it's a decent player. As far as selection of music goes, I think they do the best they can; no serious complaints there. They can't force artists/record labels.  My major complaints relate to the social media aspect of the app and other features. It's terrible delivering notifications. The most recent version of the app doesn't even have the notification icon. I can't view notifcations within the app or the desktop version. I have trouble finding other user profiles even though they are public and I have verified the spelling. Sometimes playlists that I share don't show up with other users. I never know what to expect  Spotify has never allowed users to add links or descriptions to playlists or individual tracks which I would think is a basic feature for any social media app. When another user shares a track with me, I've had it show as a new message for weeks, even though I've clicked on it and read it/listened to it numerous times.  Also, feat

In [9]:
retriever_list(openai_embed, path='openai_embed_db2', collection='openai_embed2', 
          q = "Can you identify emerging trends or patterns in recent user reviews that may impact our product strategy?", 
          k=5)

['Can we know specifically what changes or improvements are being made with every update?',
 'Maybe it\'s just me, but I\'m interested to know what\'s new or what\'s in the updates you release, so in addition to the standard "We\'re always adding new things and improving your experience" message, would you consider just tell us what each update consists of? Thanks guys. Great app.',
 'Could use some UX/UI updates.',
 "Updates always improve UX and I've had great customer experience.",
 'No complaints really but I am waiting to see what new features can be integrated into the future.']

In [10]:
retriever_list(openai_embed,path='openai_embed_db2', collection='openai_embed2', 
          q = "apakah ada masalah terkait pembayaran?", 
          k=5)


['Mau langganan via kartu kredit sistem nya gagal melulu, katanya "system belum tersedia / bermasalah" padahal kartu kreditnya uda Visa sm Mastercard. Akhirnya bayar via bank transfer per bulan. Tapi ga praktis jadinya',
 'Kalau bisa, opsi untuk pembayaran akun premium ditambah. Di Indonesia pengguna lebih suka metode pembayaran pemotongan pulsa untuk pra bayar/tagihan di simcard pasca bayar. Semoga bisa ditingkatkan. Pada akhirnya, aplikasi ini jauh lebih baik dari kompetitornya, menurut saya.',
 'Ane mau up ke premium saldo ada tapi ga cukup mulu, kenapa ini -_-',
 'Aplikasi berbayar ini OK bgt',
 'please pay providers use telkomsel']

In [11]:
retriever_list(openai_embed, path='openai_embed_db2', collection='openai_embed2', 
          q = "bagaimana review design interface spotify?", 
          k=5)



['Fantastic app design with the power of spotify',
 'Great way to access Spotify`s services with an easy-to-use interface and fluid design.',
 'Love the selection and design of spotify.',
 'Love Spotify. Great app with beautiful design.',
 'Well designed and easy to use. Thanks Spotify!']

## Openai-1 (Feature engineering)

In [12]:
retriever_list(openai_embed, path='openai_embed_db', collection='openai_embed', 
          q = "What are the specific features or aspects that users appreciate the most in our application?", 
          k=5)

['Excellent review at 3.7.0.833 version: Love everything about the app',
 'Excellent review at 3.6.0.789 version: Love everything about this app â˜º',
 'Excellent review at 3.6.0.789 version: Love everything about this app',
 'Excellent review at 3.7.0.833 version: LOVE THE APP',
 'Excellent review at 3.7.0.833 version: Love everything about this app, especially the premium upgrade! Totally worth it!']

In [13]:
retriever_list(embedding_model=openai_embed, path='openai_embed_db', collection='openai_embed', 
          q = "In comparison to our application, which music streaming platform are users most likely to compare ours with?", 
          k=5)

['Excellent review at 4.6.0.772 version: Best of any and all streaming music apps/sites',
 "Excellent review at 4.4.0.1008 version: If you are looking for a good music app, this is the one for you! Only 10 bucks a month and you get features that other streaming services don't provide. Honestly cannot compete with any other. While this app is great I would love for there to be a service similar to sound hound so that I could find new music and add it right through the app rather than searching for it with Google now or sound hound and then searching for it in spotify. If you add that this app would be perfect!",
 "Excellent review at 4.7.0.878 version: I love streaming platforms because I have to many songs, over 800, and that's quite a lot of space and money that I would require if I were down to buy every single one of them. Personally the best streaming app so far.",
 'Excellent review at 4.8.0.1004 version: Best Music Streaming Platform',
 'Excellent review at 4.7.0.878 version: The

In [14]:
retriever_list(openai_embed, path='openai_embed_db', collection='openai_embed', 
          q = "What are the primary reasons users express dissatisfaction with Spotify?" ,
          k=5)

["Average review at 5.2.0.885 version: I love my music. As far as players go, it's a decent player. As far as selection of music goes, I think they do the best they can; no serious complaints there. They can't force artists/record labels.  My major complaints relate to the social media aspect of the app and other features. It's terrible delivering notifications. The most recent version of the app doesn't even have the notification icon. I can't view notifcations within the app or the desktop version. I have trouble finding other user profiles even though they are public and I have verified the spelling. Sometimes playlists that I share don't show up with other users. I never know what to expect  Spotify has never allowed users to add links or descriptions to playlists or individual tracks which I would think is a basic feature for any social media app. When another user shares a track with me, I've had it show as a new message for weeks, even though I've clicked on it and read it/liste

In [15]:
retriever_list(openai_embed, path='openai_embed_db', collection='openai_embed', 
          q = "Can you identify emerging trends or patterns in recent user reviews that may impact our product strategy?", 
          k=5)

['Average review at 5.9.0.774 version: There is twice as many commercial then there was a month ago',
 "Excellent review at 5.9.0.774 version: Been a user since day one and I wouldn't change it for any other app",
 'Good review at 3.5.0.963 version: Getting alot more commerical than before',
 'Excellent review at 5.9.0.774 version: Way more interesting to use than the other app!',
 "Excellent review at 5.9.0.774 version: Been using this app for 3 years and wouldn't change it x"]

In [16]:
retriever_list(openai_embed,path='openai_embed_db', collection='openai_embed', 
          q = "apakah ada masalah terkait pembayaran?", 
          k=5)


["Below Average review at - version: Can't pay With telkomsel billing",
 'Below Average review at 7.1.0.1025 version: Saya mau nih bayar premium, asal pembayarannya dipermudah yaitu dengan billing operator (telkomsel). Tolong dong ditampung...males tau transfer dulu ke atm, mana ga punya kartu kredit. Tolong yah!',
 'Good review at 5.5.0.653 version: Mau langganan via kartu kredit sistem nya gagal melulu, katanya "system belum tersedia / bermasalah" padahal kartu kreditnya uda Visa sm Mastercard. Akhirnya bayar via bank transfer per bulan. Tapi ga praktis jadinya',
 'Poor review at - version: Pelit amat kasi vip aja cuma 7 hari habis itu bayar lg hrs ny 3 bln kek :v',
 'Good review at - version: tp selalu harus berbayar']

In [17]:
retriever_list(openai_embed, path='openai_embed_db', collection='openai_embed', 
          q = "bagaimana review design interface spotify?", 
          k=5)



['Excellent review at 4.6.0.772 version: Props to the spotify team for their rediculously good UX design and implementation, such a good ap',
 'Excellent review at 4.4.0.1008 version: Love Spotify. Great app with beautiful design.',
 'Excellent review at 4.7.0.878 version: Spotify has a good interface, a lot of music, and is easy to use.',
 "Excellent review at 4.2.0.739 version: I'd recommend Spotify to anyone into music. Simple design and easy to use.",
 'Excellent review at 4.4.0.1008 version: Spotify is the greatest service for any fan of music. Never has there been a better way to, not just listen, but experience music. The app keeps getting better and the design is beautiful.']

## Mistral-Embedding


In [18]:
retriever_list(mistral_embed, path='spotify_review_db2', collection='spotify_review2', 
          q = "What are the specific features or aspects that users appreciate the most in our application?", 
          k=5)

['Excellent review at 2.3.0.828 version: You guys have great music that I have been missing by using other apps- thank you spotify',
 'Excellent review at 2.2.0.636 version: Use to have other apps n they suck n then I found spotify n love it can ask for more',
 "Excellent review at - version: This application is great I could listen to my favourite music's... Thanks spotify",
 'Excellent review at 5.0.0.956 version: Spotify is the best app for anyone  who remotely likes music',
 'Excellent review at 1.2.0.534 version: One of the best app designs out there. Artistic, functional, reliable. Spotify keeps my money for these very reasons. Keep up the great work']

In [19]:
retriever_list(mistral_embed, path='spotify_review_db2', collection='spotify_review2', 
          q = "In comparison to our application, which music streaming platform are users most likely to compare ours with?", 
          k=5)

['Excellent review at 2.0.0.702 version: Pretty much the best program for music streaming. Only thing I would insist is better/more relevant recommendations',
 'Excellent review at 3.6.0.789 version: Best music streaming app about.. can not fault it',
 'Excellent review at 3.6.0.789 version: Best music streaming app on the market.',
 'Excellent review at 2.0.0.702 version: There are a lot of music streaming apps out there, but I believe the one that sets this one apart are the great playlist!',
 'Excellent review at 3.6.0.789 version: Tried all the streaming musolic apps..best one']

In [20]:
retriever_list(mistral_embed, path='spotify_review_db2', collection='spotify_review2', 
          q = "What are the primary reasons users express dissatisfaction with Spotify?", 
          k=5)

['Average review at 4.0.0.800 version: Music selection is stupendous; however they pay artists close to nothing so they\'re crushing the very people they need to keep creating stuff in order for their model to work.  That said, for an app that is this popular and costs $10/month, I don\'t understand why the UI is so terrible. My guess is UX by committee, so no one wins. :(  It takes 3 taps to get to the Search function. The buttons you use most often should be available all the time on a phone.  Please, please do not make me click the menu button to get there.  I\'m sure you\'ve done some UX research, please listen to your UX researchers and designers.  How do people actually use your app?  What do they go to most?  Make those easiest to get to.   Also, I can no longer wrap my head around what I should expect to find in "Your Music".  If I\'ve saved an Album is it in Albums or Playlists? Why isn\'t voice search integrated.  I love listening in my car but it\'s beyond dangerous to type 

In [21]:
retriever_list(mistral_embed, path='spotify_review_db2', collection='spotify_review2', 
          q = "Can you identify emerging trends or patterns in recent user reviews that may impact our product strategy?", 
          k=5)

['Excellent review at 3.6.0.789 version: Just that u have to be specific about what u want when u r searching',
 'Good review at 2.2.0.636 version: The ux often lacks guidance for the user and is not always intuitive.',
 'Excellent review at 3.6.0.789 version: It gives u the option to what u want',
 'Good review at 3.7.0.833 version: No complaints so far. Seems to be easy to use and I have yet for it not to find a search topic',
 'Excellent review at 3.6.0.789 version: Almost anything i wanna find is in here! Cool']

In [22]:
retriever_list(mistral_embed, path='spotify_review_db2', collection='spotify_review2', 
          q = "apakah ada masalah terkait pembayaran?", 
          k=5)

["Excellent review at 1.3.0.423 version: The only problem is that how students without paypal & debit cards can avail the premium promo? It's budget friendly but the paying process is kinda complicated esp. To those who are not use to paypal & credit/debit cards. Thanks ðŸ˜Š",
 'Good review at 4.1.0.868 version: PLEASE DO ONE PAYMENT METHOD FOR CELCOM USERS TOO.ITS NOT FAIR.',
 "Excellent review at 3.6.0.789 version: Love the app so much but can't go for premium... Want to apply for premium but you only accept cards... Is it possible to request for you to change some payment methods.. I'm using a prepaid load and no credit card at all.. Is it possible to just deduct it to my prepaid load instead? Please make it more convenient to all people who wants to go for.premium not only for card holders. Thank you and God bless!",
 "Average review at 3.0.0.1124 version: But it's have payment offline  and I don't have money",
 "Poor review at 1.3.0.423 version: I really would want to go premium w

In [23]:
retriever_list(mistral_embed, path='spotify_review_db2', collection='spotify_review2', 
          q = "bagaimana review design interface spotify?", 
          k=5)

['Excellent review at 1.6.0.952 version: Just improve the interface of the app... beats got a good one! I would like to see what else we can get from spotify',
 "Excellent review at 3.5.0.963 version: Spotify is well designed. While it's not always as intuitive as I'd like - mostly because it isn't like what i was used to using before, they have thought of just about everything from a feature perspective. I really like the user paradigm. Once i figured that out, it became much easier to use. Love the device linking. Very cool job, y'all! Thank you!",
 'Excellent review at 4.6.0.772 version: Props to the spotify team for their rediculously good UX design and implementation, such a good ap',
 'Excellent review at 3.5.0.963 version: Well designed and easy to use. Thanks Spotify!',
 "Good review at 3.6.0.789 version: Spotify's catalog is fabulous, but the interface is half-baked."]

# Summary

it is hard to find the best embedding model for RAG. Those samples performs very good. But, I would rather to chose mistral-embedding because of its dimension. Openai emnedding has larger dimension than mistral embedding, so i decide to chose mistral for minimum storage size