In [None]:
from google.colab import auth
auth.authenticate_user()

In [None]:
!pip install langchain
!pip install -U langchain-community
!pip install -qU langchain-core langchain-google-vertexai
!pip install -qU langchain-pinecone pinecone-notebooks
!pip install google-cloud-bigquery
!pip install --upgrade google-cloud-bigquery
!pip install fpdf
!pip install reportlab
!pip install google-cloud-aiplatform

# LangChain Library Set-up

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import getpass
import vertexai
import time
import tempfile
import google.generativeai as genai
import json
import numpy as np


from transformers import pipeline
from langchain.embeddings import VertexAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Pinecone
from langchain.document_loaders import TextLoader
from pinecone import Pinecone, ServerlessSpec
from langchain_pinecone import PineconeVectorStore
from langchain_google_vertexai import VertexAIEmbeddings
from uuid import uuid4
from langchain_core.documents import Document
from google.cloud import bigquery
from fpdf import FPDF
from io import BytesIO
from google.cloud import aiplatform

In [None]:
# Authenticate to your GCP project
client = bigquery.Client(project='learning-v-441023')

# Google Cloud/Vertex AI Set-up

In [None]:
PROJECT_ID = "learning-v-441023"  # @param {type:"string"}
LOCATION = "us-central1"  # @param {type:"string"}
vertexai.init(project=PROJECT_ID, location=LOCATION)

In [None]:
# Initialize the a specific Embeddings Model version
embeddings = VertexAIEmbeddings(model_name="text-embedding-004")

# PineCone Vector Database Set-up

In [None]:
os.environ["PINECONE_API_KEY"] = "pcsk_Vb3Ps_7vtZJ9MmUeRs3PZjm7U3g916y58efVjQM41uCKrzSqJrp5KGBHuJHUkd84hsMQq"
os.environ["PINECONE_ENV"] = "us-west1-gcp"

if not os.getenv("PINECONE_API_KEY"):
    os.environ["PINECONE_API_KEY"] = getpass.getpass("Enter your Pinecone API key: ")

pinecone_api_key = os.environ.get("PINECONE_API_KEY")

pc = Pinecone(api_key=pinecone_api_key)


# Store JSON Review/News data to PineCone Vector Database



In [None]:
index_name = "business-reviews-index"

existing_indexes = [index_info["name"] for index_info in pc.list_indexes()]
if index_name not in existing_indexes:
    pc.create_index(
        name=index_name,
        dimension=768,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1"),
    )
    while not pc.describe_index(index_name).status["ready"]:
        time.sleep(1)
# Load the reviews data from the JSON file
with open('/content/business_reviews_updated.json', 'r') as f:
    reviews_data = json.load(f)

index = pc.Index(index_name)
# Prepare the data and for Pinecone
upserts = []
for business in reviews_data:
    business_name = business['name']
    for review in business['reviews']:
        review_text = review['review_text']
        review_source = review['source']
        review_date = review['date']
        # Generate the embedding for the review text
        review_embedding = embeddings.embed([review_text])[0]  # Get the first embedding from the list-LangChain
        # Create a unique ID for each review (business_name + review_date)
        review_id = f"{business_name}-{review_date}"
        # Prepare the data for upserting into Pinecone
        upserts.append({
            "id": review_id,  # Unique ID
            "values": review_embedding,  # Embedding vector
            "metadata": {
                "business_name": business_name,
                "review_source": review_source,
                "review_date": review_date,
                "review_text": review_text
            }
        })

# Upsert the data into Pinecone
index.upsert(vectors=upserts)

# Set up PineconeVectorStore with LangChain embedding
vector_store = PineconeVectorStore(index=index, embedding=embeddings)



In [None]:
#monthly_data = get_business_sentiment("Haven Craft Kitchen & Bar")
results, monthly_data = get_business_sentiment("Orange Hill")

# Update BigQuery data table for RAG Use.

In [None]:
client = bigquery.Client(project='learning-v-441023')

In [None]:
dataset_ref = client.dataset('reg')
table_ref = dataset_ref.table('btest3')
table_id = table_ref
schema = [
    bigquery.SchemaField("Date", "STRING"),
    bigquery.SchemaField("Converted_Score", "STRING")
]

client.delete_table(table_id, not_found_ok=True)
print(f"Table {table_id} deleted.")
# Create the table if it doesn't exist
table = bigquery.Table(table_id, schema=schema)
table = client.create_table(table)  # API request
print(f"Created table {table.project}.{table.dataset_id}.{table.table_id}")

Table learning-v-441023.reg.btest3 deleted.
Created table learning-v-441023.reg.btest3


In [None]:
# Prepare data for BigQuery insertion
rows_to_insert = []
for index, row in monthly_data.iterrows():
    rows_to_insert.append((row['Date'].strftime('%Y-%m-%d'), int(row['Converted_Score'])))

# Insert data into the table
errors = client.insert_rows(table, rows_to_insert)  # Use the table object, not table_id

# Print errors if any
if errors == []:
    print("Rows inserted successfully.")
else:
    print(f"Encountered errors while inserting rows: {errors}")

Rows inserted successfully.


In [None]:

# Query the table
query = """
SELECT Date, Converted_Score as Sentiment_Score
FROM `learning-v-441023.reg.btest3`
"""

# Run the query
query_job = client.query(query)  # API call

# Print the results
for row in query_job:
  print(row)

# Impliment RAG, extracting data from BigQuery and PineCone Vector database.

## TRANSFORMERS BERT Classifier for Sentiment Score Extraction

In [None]:
def get_business_sentiment(business_name):
    # Initialize Pinecone index
    index_name = "business-reviews-index"
    index = pc.Index(index_name)

    # Initialize the BERT sentiment analysis pipeline
    classifier = pipeline("sentiment-analysis", model="nlptown/bert-base-multilingual-uncased-sentiment")

    filter_query = {"business_name": {"$eq": business_name}}

    dummy_query = "a"  # Dummy query to get the vector
    query_embedding = embeddings.embed([dummy_query])[0] # LangChain embedding

    # Query Pinecone index
    results = index.query(
        vector=query_embedding,
        top_k=100,
        include_metadata=True,
        filter=filter_query
    )

    # Process each review and extract sentiment scores
    data = []
    for result in results['matches']:
        review_text = result['metadata']['review_text']
        review_date = result['metadata']['review_date']

        # Perform sentiment analysis using BERT/Hugginface
        sentiment = classifier(review_text)[0]

        data.append({
            "Date": review_date,
            "Sentiment_Score": sentiment['score'],
            "Sentiment_Label": sentiment['label']
        })

    # Create the Pandas DataFrame
    df = pd.DataFrame(data)
    df['Converted_Score'] = df.apply(lambda row: row['Sentiment_Score'] * 100 if row['Sentiment_Label'] not in ['1 stars', '2 stars'] else row['Sentiment_Score'] * -100, axis=1)

    # Group by month and calculate the average sentiment score
    df['Date'] = pd.to_datetime(df['Date'])
    monthly_data = df.groupby(pd.Grouper(key='Date', freq='M')).agg({'Converted_Score': 'mean'})
    monthly_data = monthly_data.reset_index()
    monthly_data['Date'] = monthly_data['Date'] + pd.offsets.MonthEnd(0)
    monthly_data['Converted_Score'] = monthly_data['Converted_Score'].fillna(method='ffill').fillna(0)
    return monthly_data


# Build Context Relevant Prompt

In [None]:
def create_prompt(business,user_query):
  index_name = "business-reviews-index"
  index = pc.Index(index_name)
  monthly_data = get_business_sentiment(business)
      # Define the filter for the given business name
  filter_query = {"business_name": {"$eq": business}}
    # Fetch all reviews from Pinecone based on the customer query
  dummy_query = user_query
  query_embedding = embeddings.embed([dummy_query])[0]

    # Query Pinecone index
  results = index.query(
        vector=query_embedding,
        top_k=10,
        include_metadata=True,
        filter=filter_query
    )
  prompt = f"**Prompt:**\n\nGiven the following context about the business  \"{business}\"\n\n"
  # Extract key information from Pinecone results
  for result in results['matches']:
    review_text = result['metadata']['review_text']
    review_date = result['metadata']['review_date']
    business_name = result['metadata']['business_name']
    review_source = result['metadata']['review_source']
    match_cosine_score = result['score']

    prompt += f"* **Review:** {review_text} (Date: {review_date}) (review_source: {review_source}) (business_name: {business_name})(match_cosine_score: {match_cosine_score})\n"
  prompt += "\n**Sentiment Analysis Trends:**\n"
  # Add BigQuery sentiment analysis results
  for index, row in monthly_data.iterrows():
        date = row['Date'].strftime('%Y-%m-%d')
        sentiment_score = row['Converted_Score']
        prompt += f"* **Date:** {date}, **Sentiment Score:** {sentiment_score}\n"

  prompt += f"\n**User Query:** {user_query}\n\n**Task:**\nProvide a comprehensive response to the user's query, addressing the following aspects:\n1. **Business Overview **(please mention the business name inthe overview) What are the strengths and frequently reported weaknesses  based on the reviews?\n2. **Suggest Improvements:** What specific improvements can be suggested to enhance the services?\n3. **Customer Sentiment:** How does the sentiment score trend in last 12 months ? What review sources are causing this\n4. **Actionable Insights:** What actionable insights can be derived from the reviews for business decision-making?\n"
  return prompt, monthly_data



# Integrate GenAI into Workflow

In [None]:
os.environ["GOOGLE_API_KEY"]="AIzaSyAt5Ls0O2XHK6PQKENIL2ZTnAdJCOIrryc"
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))

def generate_model_response(biz,user_query):
  prompt,monthly_data = create_prompt(biz,user_query)

  model = genai.GenerativeModel('gemini-pro')
  response = model.generate_content(prompt).text
  return prompt,monthly_data,response

In [None]:
# List of sample Businesses:
# Haven Craft Kitchen & Bar
# Orange Hill

biz = "Haven Craft Kitchen & Bar"
user_query = "Describe " + biz + " customer complaints performance in last 12 months"

prompt,monthly_data,response = generate_model_response(biz,user_query)

  monthly_data = df.groupby(pd.Grouper(key='Date', freq='M')).agg({'Converted_Score': 'mean'})
  monthly_data['Converted_Score'] = monthly_data['Converted_Score'].fillna(method='ffill').fillna(0)


# Engineered Prompt

In [None]:
print(prompt)

**Prompt:**

Given the following context about the business  "Haven Craft Kitchen & Bar"

* **Review:** Haven Craft Wins Prestigious Culinary Award

Haven Craft Kitchen & Bar has been awarded the 'Innovative Cuisine Award' for its outstanding seasonal offerings and consistent improvement. The restaurant continues to impress critics and diners alike. This story has sparked a wave of reactions in the local community, as many expressed their opinions on social media. The establishment's management has issued a statement addressing the issue, vowing to make necessary changes. This news comes at a time when the local restaurant scene is buzzing with competition and innovation. (Date: 2024-05-13) (review_source: News) (business_name: Haven Craft Kitchen & Bar)(match_cosine_score: 0.737227142)
* **Review:** Haven Craft has shown remarkable improvement. Each visit is better than the last, with consistent food quality and service. (Date: 2024-11-22) (review_source: Google Reviews) (business_nam

In [None]:
print(response)

**1. Business Overview: Haven Craft Kitchen & Bar**

Haven Craft Kitchen & Bar has garnered positive feedback for its consistent improvement, innovative cuisine, seasonal offerings, and fresh, locally sourced ingredients. Its strengths include:

- Consistently high-quality food and service
- Inventive menu and use of seasonal ingredients
- Positive customer experiences

Despite these strengths, some weaknesses emerge based on the provided reviews:

- Limited menu options for some diners
- Occasional issues with service
- Perceived high prices

**2. Suggest Improvements:**

To enhance its services, Haven Craft Kitchen & Bar could consider:

- Expanding menu options to cater to a wider range of tastes and preferences.
- Enhancing staff training to ensure consistent service quality.
- Exploring flexible pricing options or offering specials to address affordability concerns.

**3. Customer Sentiment:**

The overall customer sentiment towards Haven Craft Kitchen & Bar has been positive in t

# Generate a formatted PDF Report with Graph

In [None]:
from fpdf import FPDF
import matplotlib.pyplot as plt
import seaborn as sns
import tempfile
import os

# Step 1: Prepare your data and plot
sns.set_theme(style="whitegrid")
plt.figure(figsize=(12, 6))
sns.lineplot(data=monthly_data, x='Date', y='Converted_Score', marker='o', linewidth=2, color='dodgerblue')
plt.title('Business Perception Score Over Months', fontsize=16, fontweight='bold')
plt.xlabel('Date', fontsize=12)
plt.ylabel('Business Perception Score', fontsize=12)
plt.xticks(rotation=45)
plt.tight_layout()

# Step 2: Save the plot to a temporary file
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp_file:
    plt.savefig(temp_file.name, format='PNG', bbox_inches='tight')
    temp_file_name = temp_file.name
plt.close()


title = "Performance Overview of " + biz + " in last 12 months"


# Step 3: Create the PDF class
class PDF(FPDF):
    def header(self):
        self.set_font('Arial', 'B', 12)  # Use Arial for header
        self.cell(0, 10, title, ln=True, align='C')

    def footer(self):
        self.set_y(-15)
        self.set_font('Arial', 'I', 8)  # Use Arial for footer
        self.cell(0, 10, f'Page {self.page_no()}', align='C')

# Step 4: Generate the PDF
pdf = PDF()
pdf.set_auto_page_break(auto=True, margin=15)
pdf.add_page()

# Add the plot to the PDF
pdf.image(temp_file_name, x=10, y=None, w=190, h=100)  # Adjust size and position as needed
pdf.ln(10)


# Add formatted text to the PDF
for line in response.split('\n'):
    if line.startswith('**') and line.endswith('**'):
        pdf.set_font('Arial', 'B', 12)  # Bold for headings
        pdf.cell(0, 10, line.strip('*'), ln=True)
    #elif line.startswith('*'):
     #   pdf.set_font('Arial', '', 12)  # Normal font for bullet points
      #  pdf.cell(10)  # Indent bullet points
       # pdf.multi_cell(0, 10, line.strip('* '))
    elif line.startswith('###'):
        pdf.set_font('Arial', 'B', 14)  # Larger bold for subheadings
        pdf.cell(0, 10, line.strip('#').strip(), ln=True)
    else:
        pdf.set_font('Arial', '', 12)  # Normal font for body text
        pdf.multi_cell(0, 10, line)

# Step 6: Save the PDF
pdf_output_path = 'generated_output_with_unicode.pdf'
pdf.output(pdf_output_path)
print(f"PDF saved to {pdf_output_path}")

# Clean up temporary file
os.remove(temp_file_name)


PDF saved to generated_output_with_unicode.pdf
