In [None]:
pip install together

In [None]:
pip install langchain_together

In [None]:
from langchain_together import TogetherEmbeddings
import pandas as pd
import numpy as np

In [None]:
df = pd.read_excel('reviews_data.xlsx')

In [None]:
df

In [None]:
import os
import getpass
if not os.getenv("TOGETHER_API_KEY"):
    os.environ["TOGETHER_API_KEY"] = "45cc35a9dc8621b295401d3b841e56b82afef4d426a90e3c2f72fde63927ab92"

# Initialize the TogetherEmbeddings model
embeddings = TogetherEmbeddings(
    model="togethercomputer/m2-bert-80M-8k-retrieval"  # Example model; check docs for Llama-based alternatives
)

In [None]:
pip install pinecone

# Create embeddings for all reviews - this will take close to 1.5 - 2 hours

In [None]:
reviews = df["Review"].tolist()

# Process embeddings in batches
embedding_list = []
for i in range(0, len(reviews), 128):
    batch = reviews[i : i + 128]  # Get batch
    batch_embeddings = embeddings.embed_documents(batch)  # Generate embeddings for batch
    embedding_list.extend(batch_embeddings)  # Store results
    print(f"Processed {i + len(batch)} / {len(reviews)} reviews")

In [10]:
metadata_list = df.apply(lambda row: {
    "customer_id": int(row["customer_id"]),
    "review_date": row["review_date_numeric"],
    "Rating": int(row["Rating"]),
    "review_id":row['review_id']
}, axis=1).tolist()

In [11]:
metadata_list[0]

{'customer_id': 5380, 'review_date': 20241104, 'Rating': 6, 'review_id': 11722}

In [12]:
from pinecone import Pinecone, ServerlessSpec

pc = Pinecone(
    api_key='pcsk_3GJ2wg_R6FhBarLn8wJNR1rqTvUv8FNkwkXSa4V2cujUrVGW3uadaw6YXLtQAWVhBMTMp9'
)

# Check whether any indexes already exist

In [14]:
pc.list_indexes()
if "reviews-index" in pc.list_indexes().names():
    pc.delete_index("reviews-index")



In [16]:
pc.list_indexes()

[
    {
        "name": "hotel-reviews",
        "metric": "cosine",
        "host": "hotel-reviews-izeoe32.svc.aped-4627-b74a.pinecone.io",
        "spec": {
            "serverless": {
                "cloud": "aws",
                "region": "us-east-1"
            }
        },
        "status": {
            "ready": true,
            "state": "Ready"
        },
        "vector_type": "dense",
        "dimension": 768,
        "deletion_protection": "enabled",
        "tags": null
    }
]

# create the index

In [15]:
pc.create_index(
    name='hotel-reviews',
    dimension=768,
    metric='cosine',
    deletion_protection='enabled',
    spec=ServerlessSpec(
        cloud='aws',
        region='us-east-1'
    )
)

PineconeApiException: (409)
Reason: Conflict
HTTP response headers: HTTPHeaderDict({'content-type': 'text/plain; charset=utf-8', 'access-control-allow-origin': '*', 'vary': 'origin,access-control-request-method,access-control-request-headers', 'access-control-expose-headers': '*', 'x-pinecone-api-version': '2025-01', 'X-Cloud-Trace-Context': '334ec1c22b72047551f62737197d5b28', 'Date': 'Sat, 15 Mar 2025 02:46:23 GMT', 'Server': 'Google Frontend', 'Content-Length': '85', 'Via': '1.1 google', 'Alt-Svc': 'h3=":443"; ma=2592000,h3-29=":443"; ma=2592000'})
HTTP response body: {"error":{"code":"ALREADY_EXISTS","message":"Resource  already exists"},"status":409}


In [21]:
#index = pc.Index(host="hotel-reviews-yug9kjl.svc.aped-4627-b74a.pinecone.io")
index = pc.Index(host="https://hotel-reviews-izeoe32.svc.aped-4627-b74a.pinecone.io")


In [23]:
vectors = [
    (str(i), embedding, metadata_list[i])
    for i, embedding in enumerate(embedding_list)
]

# Insert embeddings + metadata into the index

In [24]:
batch_size = 100  # Adjust this number as needed.

for i in range(0, len(embedding_list), batch_size):
    batch_vectors = [
        (str(i + j), embedding_list[i + j], metadata_list[i + j])
        for j in range(min(batch_size, len(embedding_list) - i))
    ]
    index.upsert(vectors=batch_vectors)
    print(f"Upserted batch from {i} to {i + len(batch_vectors)}")

Upserted batch from 0 to 100
Upserted batch from 100 to 200
Upserted batch from 200 to 300
Upserted batch from 300 to 400
Upserted batch from 400 to 500
Upserted batch from 500 to 600
Upserted batch from 600 to 700
Upserted batch from 700 to 800
Upserted batch from 800 to 900
Upserted batch from 900 to 1000
Upserted batch from 1000 to 1100
Upserted batch from 1100 to 1200
Upserted batch from 1200 to 1300
Upserted batch from 1300 to 1400
Upserted batch from 1400 to 1500
Upserted batch from 1500 to 1600
Upserted batch from 1600 to 1700
Upserted batch from 1700 to 1800
Upserted batch from 1800 to 1900
Upserted batch from 1900 to 2000
Upserted batch from 2000 to 2100
Upserted batch from 2100 to 2200
Upserted batch from 2200 to 2300
Upserted batch from 2300 to 2400
Upserted batch from 2400 to 2500
Upserted batch from 2500 to 2600
Upserted batch from 2600 to 2700
Upserted batch from 2700 to 2800
Upserted batch from 2800 to 2900
Upserted batch from 2900 to 3000
Upserted batch from 3000 to 310

# experiment with below parts

In [25]:
query_embedding = embeddings.embed_query("What are some of the reviews that mention restaurant, food, lunch, breakfast, dinner")

In [26]:
results = index.query(
    vector=query_embedding,
    top_k=5,
    namespace="",
    include_metadata=True,
    filter={
        "Rating": {"$lte": 9},
        "review_date": {"$gte": 20240101, "$lte": 20240108}
    }
)

In [27]:
matches = results["matches"]

# Extract review_ids from the matches (convert them to int if necessary)
matched_ids = [int(match["metadata"]["review_id"]) for match in matches]

In [28]:
matched_ids

[5410, 8647, 11531, 2910, 2593]

In [29]:
req_df = df[df["review_id"].isin(matched_ids)]

In [30]:
req_df['Review']

486      Shower taps need descaling horrid smell from ...
1622     The people working on the terrace bar are pre...
2018     When we arrived to check in there was a misun...
5794     Having to pay for one item I used in the mini...
8249     Loved everything only negative was STK We wer...
Name: Review, dtype: object

In [31]:
concatenated_reviews = " ".join(req_df["Review"].tolist())

In [32]:
concatenated_reviews

' Shower taps need descaling horrid smell from drains the new booth seating is so unfriendly the older decor made more sense New menu card room dining not advisable limited choice of average food Tea cakes were cold from fridge so cream in the cakes was There were no more small tasty affordable snacks like the crab cakes of tandoori salmon or tatty etc Pity You lost 15 customers who went to eat elsewhere daily for 3 days   location  The people working on the terrace bar are pretty slow preparing beverages so you need to be patient awaiting your order especially when you can see quite a lot of people up there We were forced to cancel our order after 40 minutes waiting as we had another appointment planned after   The location of the hotel is just perfect 150m to Diagonal Metro station Rambla is just 300 m away So for our family tour with some sightseeing and enjoying good restaurants this was a perfect choice The hotel is very comfortable and quiet You can have rest on the terrace bar w

In [33]:
from together import Together
import os
client = Together()
response = client.chat.completions.create(
    model="meta-llama/Llama-Vision-Free",
    messages = [{"role": "user", "content": f"""Briefly Summarize the overall sentiment of customers about food and restaurant based on these reviews - {
    concatenated_reviews}. Dont mention the name of the hotel"""}]
)
print(response.choices[0].message.content)


Here is a summary of the overall sentiment of customers about food and restaurants based on these reviews:

**Food and Restaurant Sentiment:**

* Most customers were disappointed with the food, citing limited choice, average quality, and high prices.
* Specific complaints included cold tea cakes, unfriendly booth seating, and a lack of affordable snacks.
* There were some positive comments about the location and views, but these were largely overshadowed by negative reviews of the food and service.
* The only positive review of a specific restaurant was for the singer at the Chico Latino bar, but even this was minor compared to the overall negative sentiment.

**Common Issues:**

* High prices and limited choice
* Poor service, including long wait times and unfriendly staff
* Unappealing atmosphere, including loud music and uncomfortable seating
* Lack of affordable snacks and drinks

**Standout Positives:**

* None were mentioned, but the location and views of the hotel were cited as 