<a href="https://colab.research.google.com/github/Sejaltalekar3/GQT-Java-Training/blob/main/IBM_hackaton.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
from langchain.document_loaders import TextLoader
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings

# Use Sentence-Transformers Model
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Load customer data
loader = TextLoader("/content/Customer Purchase History.txt")
documents = loader.load()

# Create vector store
vectorstore = FAISS.from_documents(documents, embedding_model)

# Save for reuse
vectorstore.save_local("customer_vector_store")


In [4]:
pip install -U langchain-community faiss-cpu

Collecting faiss-cpu
  Downloading faiss_cpu-1.11.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.8 kB)
Downloading faiss_cpu-1.11.0-cp311-cp311-manylinux_2_28_x86_64.whl (31.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.3/31.3 MB[0m [31m43.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.11.0


In [8]:
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings

embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

vectorstore = FAISS.load_local(
    "customer_vector_store",
    embedding_model,
    allow_dangerous_deserialization=True
)


In [9]:
pip install transformers accelerate sentencepiece


Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.wh

In [5]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA
from langchain.vectorstores import FAISS

# Embeddings - updated import
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Load vectorstore with safety flag
vectorstore = FAISS.load_local(
    "customer_vector_store",
    embedding_model,
    allow_dangerous_deserialization=True
)

# Load model/tokenizer
model_name = "MBZUAI/LaMini-Flan-T5-248M"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# HuggingFace pipeline - removed temperature flag
hf_pipeline = pipeline(
    "text2text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=256,
    do_sample=False,
)

llm = HuggingFacePipeline(pipeline=hf_pipeline)

qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=vectorstore.as_retriever())

# Use invoke instead of run
query = "What product recommendations would you suggest for a customer interested in headphones?"
response = qa_chain.invoke({"query": query})

print(response)


Device set to use cpu


{'query': 'What product recommendations would you suggest for a customer interested in headphones?', 'result': 'I would suggest the following product recommendations for a customer interested in headphones: - Noise Cancelling Headphones - Bluetooth Speaker - Smartwatch Browsing - Audio devices - Mobile accessories - Tripod - Camera Lens Browsing - Photography gear - Storage cards - Personalized Offers - 15% off on new audio products - Free tripod with next DSLR purchase - Dynamic Content Preferences - Preferred Communication: Email Interests: Music, Podcasts, Smart Devices - SMS Interests: Photography, Travel, Gadgets'}


In [4]:
!pip install langchain_huggingface

Collecting langchain_huggingface
  Downloading langchain_huggingface-0.2.0-py3-none-any.whl.metadata (941 bytes)
Downloading langchain_huggingface-0.2.0-py3-none-any.whl (27 kB)
Installing collected packages: langchain_huggingface
Successfully installed langchain_huggingface-0.2.0


In [7]:
from transformers import pipeline
from langchain.document_loaders import TextLoader
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings

# Load sentiment pipeline
sentiment_classifier = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")

# Load customer reviews text file
loader = TextLoader("/content/customer_reviews.txt")
documents = loader.load()

# Add sentiment labels as metadata
for doc in documents:
    sentiment = sentiment_classifier(doc.page_content[:512])[0]  # limit to first 512 tokens for speed
    doc.metadata["sentiment"] = sentiment["label"]

# Create embeddings and vectorstore as usual
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.from_documents(documents, embedding_model)

# Save vectorstore for later use
vectorstore.save_local("customer_reviews_vector_store")


Device set to use cpu


In [8]:
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

vectorstore = FAISS.from_documents(documents, embedding_model)

# Save vectorstore for later use
vectorstore.save_local("customer_reviews_vectorstore")


In [9]:
from transformers import pipeline
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFacePipeline
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

# Load sentiment analysis pipeline
sentiment_model_name = "distilbert-base-uncased-finetuned-sst-2-english"
sentiment_classifier = pipeline("sentiment-analysis", model=sentiment_model_name)

# Load vectorstore
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.load_local("customer_reviews_vectorstore", embedding_model, allow_dangerous_deserialization=True)

# Optional: create a retrieval QA chain for answering questions (if desired)
# or just retrieve relevant documents to analyze sentiment

def analyze_sentiment(query):
    # Retrieve relevant documents
    relevant_docs = vectorstore.similarity_search(query, k=5)
    reviews = [doc.page_content for doc in relevant_docs]

    # Run sentiment analysis on each review
    results = sentiment_classifier(reviews)

    # Aggregate results (basic example)
    sentiments = [res['label'] for res in results]
    positive = sentiments.count('POSITIVE')
    negative = sentiments.count('NEGATIVE')
    neutral = len(sentiments) - positive - negative

    print(f"Positive: {positive}, Negative: {negative}, Neutral: {neutral}")
    return list(zip(reviews, sentiments))

# Example usage
query = "headphones"
sentiment_results = analyze_sentiment(query)
for review, sentiment in sentiment_results:
    print(f"Sentiment: {sentiment} | Review: {review}")


Device set to use cpu


Positive: 0, Negative: 1, Neutral: 0
Sentiment: NEGATIVE | Review: I absolutely love this product! It exceeded my expectations and works flawlessly.
The delivery was late and the packaging was damaged. Not happy with the service.
Great value for money. I would definitely recommend this to my friends and family.
The product stopped working after two weeks. Very disappointed with the quality.
Customer support was helpful and resolved my issue quickly. Good experience overall.
The item does not match the description on the website. Felt cheated.
Excellent features and easy to use. The battery life is impressive.
Not satisfied with the purchase. The size was smaller than advertised.
The website interface is user-friendly, and checkout was smooth.
Received a defective unit and the return process is taking too long.
Love the design and color options available. Will buy again!
Poor customer service response times. Waiting for an update for days.
The product performs well under heavy usage. St

In [10]:
from transformers import pipeline

sentiment_classifier = pipeline("sentiment-analysis", model="nlptown/bert-base-multilingual-uncased-sentiment")

reviews = [
    "I absolutely love this product! It exceeded my expectations and works flawlessly.",
    "The delivery was late and the packaging was damaged. Not happy with the service."
]

results = sentiment_classifier(reviews)
for review, result in zip(reviews, results):
    print(f"Review: {review}\nSentiment: {result['label']} (score: {result['score']:.2f})\n")


config.json:   0%|          | 0.00/953 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/669M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/39.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/872k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Device set to use cpu


Review: I absolutely love this product! It exceeded my expectations and works flawlessly.
Sentiment: 5 stars (score: 0.98)

Review: The delivery was late and the packaging was damaged. Not happy with the service.
Sentiment: 2 stars (score: 0.47)

