In [None]:
from chromadb import PersistentClient
from chromadb.utils import embedding_functions

In [None]:
def get_chroma_client():
    return PersistentClient(path="redacted")

In [None]:
embedding_function_name = "all-mpnet-base-v2"
embedding_function = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=embedding_function_name)

In [5]:
collection = get_chroma_client().get_or_create_collection("KotorReviews", embedding_function=embedding_function)

In [16]:
def get_entries_by_distance(collection, keyword: str, distance_bound: float=2):
    results = collection.query(query_texts=[keyword])
    filtered_results = {
        "ids": [],
        "documents": [],
        "metadatas": [],
        "distances": [],
    }

    if results["distances"] and results["ids"]:
        for i, distance in enumerate(results["distances"][0]):
            if distance <= distance_bound:
                filtered_results["ids"].append(results["ids"][0][i])
                filtered_results["documents"].append(results["documents"][0][i])
                filtered_results["metadatas"].append(results["metadatas"][0][i])
                filtered_results["distances"].append(distance)
    return filtered_results

In [7]:
def get_entries_low(collection):
    return collection.get(where={"review_rating": {"$lte": 3}})

def get_entries_high(collection):
    return collection.get(where={"review_rating": {"$gte": 4}})

In [8]:
def get_summarize_reviews_prompt(keyword, reviews_text):
    return f"""
User Interest: {keyword}

Reviews Text:
---BEGIN REVIEWS---
{reviews_text}
---END REVIEWS---

Analyze these reviews.
"""

def structure_reviews(reviews_text):
    return f"""
Reviews Text:
---BEGIN REVIEWS---
{reviews_text}
---END REVIEWS---
"""

system_summarize_prompt: str = """
You are a highly focused Review Analysis Engine. Your task is to extract positive and negative insights from customer reviews, strictly filtered by the user's provided interest keyword.

Instructions:
1. Analyze the user-provided 'Reviews Text'.
2. Identify and isolate *only* the statements directly relevant to user keyword. Ignore all other content.
3. From the relevant statements, extract distinct positive points (pros, praise, satisfaction).
4. From the relevant statements, extract distinct negative points (cons, criticism, dissatisfaction).
5. Generate a summary based *only* on these extracted points. The summary should be in sentence form only, without any lists or bullet points.

Output Format:
- If relevant insights are found, present them in sentence form, clearly distinguishing between positive and negative aspects related to '{keyword}'. For example: "Summary: Positively, customers frequently praised the {keyword} for its [positive aspect]. On the other hand, some users expressed concern regarding [negative aspect] of the {keyword}."
- If *no* relevant insights concerning the keyword are found, output *only*:
    No specific insights related to provided keyword were found in the provided reviews.
"""

system_positive_prompt="""
Analyze the following positive customer reviews to pinpoint concrete strengths and areas of excellence. Identify the specific features, aspects, or experiences that customers explicitly praise and find valuable. Focus on extracting actionable insights into what is working well and driving positive sentiment. Summarize these key positive takeaways concisely in sentence form.

Instructions:
1. Scrutinize the 'Reviews Text' to identify all statements expressing positive sentiment or approval related to the user's keyword. Disregard neutral or negative comments.
2. For each positive statement, determine the precise element being praised (e.g., specific feature, service quality, ease of use).
3. Group similar positive points to reveal recurring strengths and popular aspects.
4. Highlight any particularly strong endorsements or frequently mentioned benefits that clearly indicate customer satisfaction.
5. Formulate a concise summary using complete sentences that explicitly detail the identified strengths and positive aspects. Avoid lists or bullet points.

Output Format:
- Present the key positive findings in sentence form, clearly articulating what customers specifically appreciate and value.
"""

system_negative_prompt="""
Critically examine the following negative customer reviews to expose significant weaknesses and areas needing urgent improvement. Identify the precise issues, shortcomings, and frustrations that customers explicitly mention as unsatisfactory or problematic. Focus on extracting actionable insights into what is failing to meet expectations and generating negative sentiment. Summarize these critical negative takeaways concisely in sentence form, emphasizing what customers found genuinely poor or unacceptable.

Instructions:
1. Thoroughly review the 'Reviews Text' to identify all statements expressing negative sentiment, complaints, or criticism related to the user's keyword. Disregard neutral or positive comments.
2. For each negative statement, determine the exact problem or deficiency being highlighted (e.g., faulty feature, poor service, difficult process).
3. Group similar negative points to reveal recurring weaknesses and widespread issues.
4. Emphasize any severe complaints, frequently mentioned problems, or aspects that customers strongly disliked or found unusable.
5. Formulate a concise summary using complete sentences that directly address the identified weaknesses and unsatisfactory aspects, clearly stating what customers found bad or unacceptable. Avoid lists or bullet points.

Output Format:
- Present the key negative findings in sentence form, clearly articulating what customers specifically disliked, found problematic, or deemed unacceptable.
"""

In [None]:
from google import genai
from google.genai import types

GEMINI_API_KEY="redacted"

def critique_reviews(text, keyword: str):
    client = genai.Client(
        api_key=GEMINI_API_KEY,
    )
    response = client.models.generate_content(
        model="gemini-2.0-flash",
        contents=[
            types.Content(
                parts=[
                    {"text": system_summarize_prompt},
                    {"text": get_summarize_reviews_prompt(keyword, text)}
                ],
                role="user"
            )
        ]
    )
    return response.text

def generate_positive_summary(reviews: str):
    client = genai.Client(
        api_key=GEMINI_API_KEY,
    )
    response = client.models.generate_content(
        model="gemini-2.0-flash",
        contents=[
            types.Content(
                parts=[
                    {"text": system_positive_prompt},
                    {"text": structure_reviews(reviews)}
                ],
                role="user"
            )
        ]
    )
    return response.text

def generate_negative_summary(reviews: str):
    client = genai.Client(
        api_key=GEMINI_API_KEY,
    )
    response = client.models.generate_content(
        model="gemini-2.0-flash",
        contents=[
            types.Content(
                parts=[
                    {"text": system_negative_prompt},
                    {"text": structure_reviews(reviews)}
                ],
                role="user"
            )
        ]
    )
    return response.text

In [11]:
def get_summary(collection, keyword: str, distance_bound: float=1.1):
    relevant_reviews = get_entries_by_distance(collection, keyword, distance_bound)
    if not relevant_reviews:
        return "There are no reviews for this keyword"
    return critique_reviews(relevant_reviews.get("documents", []), keyword)

def get_good_summary(collection):
    reviews = get_entries_high(collection).get("documents", [])
    return generate_positive_summary("\n".join(reviews))

def get_bad_summary(collection):
    reviews = get_entries_low(collection).get("documents", [])
    return generate_negative_summary("\n".join(reviews))

In [None]:
bad_summary = get_bad_summary(collection)
good_summary = get_good_summary(collection)
view_summary = get_summary(collection, "view")

In [19]:
expert_view = "Positively, reviews are saying that the view is standalone feature and is described as best, mad and super. All the reviews are saying that its not to be missed."
expert_bad = "Many customers express strong dissatisfaction with the €8 entrance fee, which they find unjustified given the poor condition of the site. Visitors frequently complain about extensive litter, overflowing bins, foul smells (notably of urine), and general neglect throughout the path and fortress. Safety is a major concern, with reports of slippery, worn steps, unstable structures, and lack of protective barriers, posing serious risks, especially for children and the elderly. The fortress itself is described as dilapidated, nearly empty, and lacking any signage, historical information, or services. Tourists feel misled, with some noting a free, more pleasant alternative route exists. Overall, the site is widely criticized for being overpriced, unsafe, and poorly maintained, with the impressive view being the only redeeming feature."
expert_good = "Customers overwhelmingly praise the breathtaking panoramic views from the top of the fortress, often describing them as unforgettable, spectacular, or worth every step of the challenging climb. Many emphasize that the sense of accomplishment upon reaching the summit enhances the experience, especially when paired with dramatic vistas of Kotor, the bay, the old town, and surrounding mountains. The fortress's historical significance and well-preserved medieval architecture are also consistently highlighted, contributing to a deeper sense of cultural immersion. Despite the physical difficulty, the hike is perceived as a rewarding adventure, with many noting that even those not in peak physical condition can manage it with breaks. Travelers appreciate the flexibility of multiple access routes, including free scenic alternatives that offer a quieter, more natural path. Early morning or evening visits are favored for cooler temperatures and optimal lighting, especially at sunset. Additional conveniences like water and drinks sold by local vendors along the route are valued, especially during hot weather. Overall, the experience is seen as a must-do activity in Kotor, delivering a memorable combination of stunning natural beauty, historical depth, and personal achievement."


In [21]:
from rouge_score import rouge_scorer
from evaluate import load

bert_score = load("bertscore")
reference_summaries = [expert_view, expert_bad, expert_good]
predicted_summaries = [view_summary, bad_summary, good_summary]
results = bert_score.compute(predictions=predicted_summaries, references=reference_summaries, lang="en")

scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
score = []
for i in range(len(reference_summaries)):
    score.append(scorer.score(reference_summaries[i], predicted_summaries[i]))
    
results, score

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


({'precision': [0.9015474319458008, 0.8989661335945129, 0.9093489050865173],
  'recall': [0.8961949348449707, 0.8831160068511963, 0.8750645518302917],
  'f1': [0.8988631963729858, 0.8909705877304077, 0.8918773531913757],
  'hashcode': 'roberta-large_L17_no-idf_version=0.3.12(hug_trans=4.50.3)'},
 [{'rouge1': Score(precision=0.37037037037037035, recall=0.3448275862068966, fmeasure=0.35714285714285715),
   'rouge2': Score(precision=0.07692307692307693, recall=0.07142857142857142, fmeasure=0.07407407407407408),
   'rougeL': Score(precision=0.3333333333333333, recall=0.3103448275862069, fmeasure=0.32142857142857145)},
  {'rouge1': Score(precision=0.5568181818181818, recall=0.4049586776859504, fmeasure=0.4688995215311005),
   'rouge2': Score(precision=0.13793103448275862, recall=0.1, fmeasure=0.11594202898550725),
   'rougeL': Score(precision=0.3409090909090909, recall=0.24793388429752067, fmeasure=0.28708133971291866)},
  {'rouge1': Score(precision=0.7228915662650602, recall=0.322580645161