In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GlobalAveragePooling1D, Dense
import requests
from transformers import pipeline
from openai import OpenAI
from prettytable import PrettyTable

# Parameters
vocab_size = 10000  # Vocabulary size
maxlen = 300        # Maximum length of review
embedding_size = 50 # Dimension of the embedding vector

# Load the dataset
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=vocab_size)

# Pad sequences to ensure uniform input size
x_train = pad_sequences(x_train, maxlen=maxlen)
x_test = pad_sequences(x_test, maxlen=maxlen)


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [None]:
# Build the model
model = Sequential()
model.add(Embedding(vocab_size, embedding_size, input_length=maxlen))
model.add(GlobalAveragePooling1D())
model.add(Dense(16, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

model.summary()



In [None]:
# Train the model
history = model.fit(x_train, y_train,
                    epochs=10,
                    batch_size=512,
                    validation_data=(x_test, y_test),
                    verbose=2)

Epoch 1/10
49/49 - 6s - 117ms/step - accuracy: 0.5864 - loss: 0.6853 - val_accuracy: 0.6067 - val_loss: 0.6706
Epoch 2/10
49/49 - 4s - 72ms/step - accuracy: 0.7147 - loss: 0.6287 - val_accuracy: 0.7668 - val_loss: 0.5810
Epoch 3/10
49/49 - 4s - 86ms/step - accuracy: 0.7927 - loss: 0.5212 - val_accuracy: 0.7825 - val_loss: 0.4836
Epoch 4/10
49/49 - 3s - 58ms/step - accuracy: 0.8332 - loss: 0.4258 - val_accuracy: 0.8441 - val_loss: 0.4031
Epoch 5/10
49/49 - 5s - 110ms/step - accuracy: 0.8617 - loss: 0.3608 - val_accuracy: 0.8563 - val_loss: 0.3617
Epoch 6/10
49/49 - 3s - 55ms/step - accuracy: 0.8776 - loss: 0.3204 - val_accuracy: 0.8652 - val_loss: 0.3338
Epoch 7/10
49/49 - 5s - 105ms/step - accuracy: 0.8851 - loss: 0.2946 - val_accuracy: 0.8723 - val_loss: 0.3185
Epoch 8/10
49/49 - 5s - 106ms/step - accuracy: 0.8962 - loss: 0.2715 - val_accuracy: 0.8768 - val_loss: 0.3050
Epoch 9/10
49/49 - 3s - 66ms/step - accuracy: 0.9019 - loss: 0.2545 - val_accuracy: 0.8787 - val_loss: 0.2988
Epoch 

In [None]:
# Evaluate the model
results = model.evaluate(x_test, y_test)
print(f"Test Accuracy: {results[1] * 100:.2f}%")


[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.8756 - loss: 0.2977
Test Accuracy: 87.32%


In [None]:
# Initialize the sentiment analysis model
classifier = pipeline("sentiment-analysis")

def get_places(query, api_key):
    url = "https://maps.googleapis.com/maps/api/place/textsearch/json"
    params = {
        'query': f"hotels in {query}",
        'fields': 'formatted_address,name,place_id,rating,user_ratings_total',
        'key': api_key
    }
    response = requests.get(url, params=params)
    results = response.json().get('results', [])
    return results[:10]  # Get top 10 hotels

def get_reviews(place_id, api_key):
    url = "https://maps.googleapis.com/maps/api/place/details/json"
    params = {
        'place_id': place_id,
        'fields': 'review',
        'key': api_key
    }
    response = requests.get(url, params=params)
    reviews = response.json().get('result', {}).get('reviews', [])
    return reviews

def analyze_reviews(reviews):
    sentiments = []
    for review in reviews:
        result = classifier(review['text'][:512])  # Truncate to 512 tokens if needed
        sentiments.append(result[0])
    return sentiments

def calculate_sentiment_stats(sentiments):
    positive_count = sum(1 for s in sentiments if s['label'] == 'POSITIVE')
    negative_count = sum(1 for s in sentiments if s['label'] == 'NEGATIVE')
    total = len(sentiments)

    if total == 0:
        return 0, 0, 0

    positive_percent = (positive_count / total) * 100
    negative_percent = (negative_count / total) * 100
    avg_confidence = sum(s['score'] for s in sentiments) / total

    return positive_percent, negative_percent, avg_confidence

def extract_common_complaints(reviews, sentiments):
    negative_reviews = [review['text'] for review, sentiment in zip(reviews, sentiments) if sentiment['label'] == 'NEGATIVE']

    # Simple keyword-based approach (could be enhanced with NLP)
    common_words = {
        'clean': ['dirty', 'clean', 'unclean', 'hygiene', 'filthy'],
        'service': ['service', 'staff', 'rude', 'unhelpful', 'friendly'],
        'noise': ['noise', 'loud', 'quiet', 'disturbance'],
        'food': ['food', 'breakfast', 'dinner', 'restaurant', 'meal'],
        'comfort': ['bed', 'comfortable', 'uncomfortable', 'pillow', 'mattress']
    }

    complaints = {}
    for category, keywords in common_words.items():
        count = sum(1 for review in negative_reviews if any(keyword in review.lower() for keyword in keywords))
        if count > 0:
            complaints[category] = count

    # Get top 3 complaints
    sorted_complaints = sorted(complaints.items(), key=lambda x: x[1], reverse=True)[:3]
    return [item[0] for item in sorted_complaints]


No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Device set to use cpu
