# Initializations

In [None]:
!pip install langchain python-magic unstructured chromadb openai

In [None]:
import os
import nltk
import json
import magic
import random
import datetime
import pandas as pd
from datetime import timedelta
from langchain.chains import RetrievalQA
from langchain.vectorstores import Chroma
from langchain.chat_models import ChatOpenAI
from IPython.display import Markdown, display
from langchain.document_loaders import DirectoryLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter

!rm -rf './data'
os.makedirs('data')

os.environ["OPENAI_API_KEY"] = "HERE IS THE API KEY"

def genRandomDate():
  end_date = datetime.datetime.now()
  start_date = end_date - timedelta(days=1000)
  return str(start_date + (end_date - start_date) * random.random())

def genRandomUser():
  users = ["Poppyhead", "Zygomatic", "Calathus", "Skedaddle", "Negative", "Rejigger", "Apolaustic", "Phalanx", "Havelock", "Sardoodledom", "Galimatias", "Palomino", "Thaumatogeny", "Piffling", "Vaniloquence", "Trencherman", "Pilgarlic", "Curlicue", "KilimZol", "EnchanMizzen", "Ludicrism", "DartmoorYawp", "Anorchous", "Umpteen", "IquidDuck", "JcavalJunket", "Symptosis", "RuffminOxter", "Luciform", "Antimacassar", "Onomatopoeia", "Blackguard", "Xerotripsis", "Orgulous", "Almacantar", "Comeuppance", "ChichimWonky", "Logorrhea", "Knickknack", "Cornucopia", "Rupellary", "Peewee", "Armigerous", "Gardyloo", "Nephralgia", "Piccadilly", "Aleeman171999", "Harebrained", "Nektonseph2004", "Limburger", "Zonulet", "Sambur", "Aasvogel", "Primp", "Cinemuck", "Jubilee", "Capripede", "Flophouse", "Tachyphrasia", "Ephemeral", "Paraenesis", "Normalcy", "Morwolf1596", "Hotbed", "Limnguinhd", "Parapet", "Ateknia", "Taradiddle", "Blowback", "Kudzu", "Opsiometer", "Outflow", "Prismatic", "Roundabout", "Bellycheer", "Ewer", "Rabblefalnd", "Mufti", "Anemograph", "Cheeky"]
  return random.choice(users)


# Preparing the data

In [None]:
data = pd.read_csv("test.csv", delimiter=",")

with open(f'data/file.jsonl', 'w', encoding='utf-8') as f:
  for row in data['review']:
    f.write(json.dumps(str({
        'username': genRandomUser(),
        'commented_on': genRandomDate(),
        'comment': row.split('READ MORE')[0]
    })) + '\n')

# Starting the main code

In [None]:
# load all documents
loader = DirectoryLoader('data', glob="**/*.jsonl")
documents = loader.load()

# split the documents into chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 2000,
    chunk_overlap  = 0,
    length_function = len,
)

texts = text_splitter.split_documents(documents)

# embed all those chunks and store them into a vectorDB (chroma)
embeddings = OpenAIEmbeddings(openai_api_key=os.environ['OPENAI_API_KEY'])
docsearch = Chroma.from_documents(texts, embeddings)

# initialize the QA model
qa = RetrievalQA.from_llm(llm=ChatOpenAI(), retriever=docsearch.as_retriever(), return_source_documents=True)

Exiting: Cleaning up .chroma directory
Running Chroma using direct local API.
Using DuckDB in-memory for database. Data will be transient.


DEBUG:Chroma:Index not found
DEBUG:Chroma:Index saved to .chroma/index/index.bin
DEBUG:Chroma:Index saved to .chroma/index/index.bin


In [None]:
while True: 
  query = input("Ques: ")
  if query == "exit": break

  response = qa({"query": query})
  display(Markdown(f"Ans: <b>{response['result']}</b>"))
  # print('source_documents', response['source_documents'])

Ques: Comments count on 2020-08-03?


DEBUG:Chroma:time to pre process our knn query: 2.384185791015625e-06
DEBUG:Chroma:time to run knn query: 0.0007333755493164062


Ans: <b>There is no comment provided in the given context on 2020-08-03, so it is not possible to determine the comment count on that date.</b>

Ques: what's on your context?


DEBUG:Chroma:time to pre process our knn query: 2.6226043701171875e-06
DEBUG:Chroma:time to run knn query: 0.0002694129943847656


Ans: <b>I am a language model AI and I do not have any personal experiences or context. However, I do have sets of comments/reviews regarding headphones that I can help you with.</b>

Ques: How many reviews are there in your context?


DEBUG:Chroma:time to pre process our knn query: 3.0994415283203125e-06
DEBUG:Chroma:time to run knn query: 0.00024819374084472656


Ans: <b>There are 15 reviews in the context.</b>

Ques: Classify them in positive and negative reviews


DEBUG:Chroma:time to pre process our knn query: 1.9073486328125e-06
DEBUG:Chroma:time to run knn query: 0.0007910728454589844


Ans: <b>Positive reviews:
- "Sound nd Bass is osm..." by user Vaniloquence
- "Super ... battery backup is excellent" by user Opsiometer
- "It's Amazing" by user Negative
- "This is my honest review after 10 months of use..." by user Palomino
- "I give review after 7 month using  this is awesome" by user Trencherman
- "this headphones are very good in this price range..." by user IquidDuck
- "excellent sound quality with superior bass backed with great battery power..." by user Havelock
- "Good headphones gives u decent bass. very comfortable..." by user Taradiddle
- "Worth every rupees, Good clear sound quality and bass , it's a trustworthy product." by user Jubilee
- "i belive that in this range this is the best bluetooth headphone..bass awasome and i realy like this.." by user Ephemeral
- "fantastic bass and crisp and clear voice overall super" by user Anemograph
- "'beautiful design and comfortable to wear sound quality and battery back up is also very very good thank you Flipkart for giving such a nice product on this range" by user Taradiddle
- "Awesome product by Boat. Great delivery by Flipkart, 1day before expected date.Bass and Sound clarity is great..." by user Apolaustic
- "just love itsuperb bass and sound quality is too much good..." by user Cinemuck

Negative reviews:
- "Average product... not as much good as I was assuming..." by user Orgulous
- "There is no issue just few things..." by user Blackguard
- "Mic not work well in voice calls..." by user Normalcy
- "mic doesn't work properly..." by user Calathus</b>

Ques: Classify them based on the product name


DEBUG:Chroma:time to pre process our knn query: 2.384185791015625e-06
DEBUG:Chroma:time to run knn query: 0.0008156299591064453


Ans: <b>I'm sorry, I cannot classify them based on the product name because the product name is not provided in the context.</b>

Ques: Which product are most popular?


DEBUG:Chroma:time to pre process our knn query: 2.1457672119140625e-06
DEBUG:Chroma:time to run knn query: 0.00025773048400878906


Ans: <b>I'm sorry, I cannot determine the most popular product based on the given context. The comments are about a product or products from the brand "Boat," but there isn't enough information to determine which product is the most popular.</b>

Ques: What are these reviews of?


DEBUG:Chroma:time to pre process our knn query: 3.0994415283203125e-06
DEBUG:Chroma:time to run knn query: 0.00024080276489257812


Ans: <b>These reviews seem to be about headphones, specifically the boAt Rockerz headphones.</b>

Ques: Which are the best headphones?


DEBUG:Chroma:time to pre process our knn query: 2.1457672119140625e-06
DEBUG:Chroma:time to run knn query: 0.00029778480529785156


Ans: <b>I'm sorry, I cannot give you a definitive answer without additional information as each comment has different opinions and criteria for defining the "best" headphones. It ultimately depends on your personal preferences and needs. Please let me know if you have any specific requirements or priorities, and I can try to recommend headphones that might fit your needs.</b>

Ques: Which headphone reviews are mostly positive?


DEBUG:Chroma:time to pre process our knn query: 2.86102294921875e-06
DEBUG:Chroma:time to run knn query: 0.0007929801940917969


Ans: <b>The following headphone reviews are mostly positive:

- "Nice one..battery back up is great, can be played up to 6hr if it is charged fully bass too is great..but the headphone design n build quality is not much good doesn't look so beautiful n attracting, anyway sound, bass, battery backup is much great, well n satisfaction.GO FOR IT."
- "This is my honest review after 10 months of use. I bought this for 1499 from Flipkart. If you are a bass lover go for it without any confusion. The bass is very punchy you can feel your ear vibrate with this Pros:-* Punchy bass. Don't know if any other headsets can provide this much bass* Mids & treble are crystal clear. You can hear every instrument* Battery backup is excellent."
- "excellent sound quality with superior bass backed with great battery power and feather light built quality... Mind blowing purchase.. good for music enthusiasts.. and wise choice purchase this at this price range... I'm totally loving it!!"
- "Good headphones give u decent bass. very comfortable...if u want good headphones with overall decent quality go for it."
- "even though it is little tight on your head it provides super bass and clarity. very good backup. overall fabulous product." 
- "okk i have been using these for almost 5 months now and here is my review for these headphones:- Superb sound quality, Bass is osm, minimal design, a little tight for sometime but adjusts after a week or so..,minimal controler keys, soft cushions, nice battery backup/decent charging speed, I didnt find the mic that good but overall its a beast in this price range."</b>

Ques: exit
