# Extract the reviews from Google Maps.

In [None]:
import os
import requests
import json

# Set the GOOGLE_PLACES_API_KEY environment variable
os.environ["GOOGLE_PLACES_API_KEY"] = "AIzaSyD_NUIC2KPfD-8euMjdKgpBsLB05MFzSgE"

def fetch_reviews(place_id, api_key):
    url = "https://maps.googleapis.com/maps/api/place/details/json"
    params = {
        'place_id': place_id,
        "X-Goog-FieldMask": "places.reviews",
        'key': api_key,
    }
    print(f"Making API request to URL: {url} with query parameters: {params}")
    try:
        response = requests.get(url, params=params)
        response.raise_for_status()  # Raise an exception if the status code indicates an error occurred
        data = response.json()
        return data.get('result', {}).get('reviews', [])
    except requests.exceptions.HTTPError as http_err:
        print(f"HTTP error occurred: {http_err}")
        print("Please check your API key and usage limits.")
    except requests.exceptions.RequestException as err:
        print(f"Error occurred: {err}")
    return []

place_id = "ChIJrTH0vwYnQg0RwFJPB0NzKeE"  # Example place ID
api_key = os.environ["GOOGLE_PLACES_API_KEY"]
reviews = fetch_reviews(place_id, api_key)

if reviews:
    print("Reviews fetched successfully:")
    for review in reviews:
        print(f"Author: {review.get('author_name')}, Rating: {review.get('rating')}, Review: {review.get('text')}")
else:
    print("Error: Unable to fetch reviews from the API")


## Using the better one and the second one to compare what they have in common.

In [2]:
import json
import os

# Load the data from the "TalentReviews.json" file
with open(os.path.join(os.getcwd(), "TalentReviews.json")) as f:
    data = json.load(f)

# Print the reviews for the place
print("Reviews for Talent Garden Madrid:")

for review in data.get("result", {}).get("reviews", []):
    print(review.get("text"))

Reviews for Talent Garden Madrid:
Cold and very noisy. Phone booths are open spaces between 4 walls, but no ceiling. Useless
Decided to visit on my week trip to Madrid. At first I was a little skeptical: thought it would be crowded, noisy, and rude staff. BUT I was wrong. This is such a pleasant place as I was welcomed the moment I walked through the door. Marta and Alexa (I think) were so gracious and helped me get situated for the day. Allowed me to sit and purchase a day pass since it wasn't working online. Marta checked on me through the day and informed me of the office space, private rooms for calls, cafe etc. The guy running the cafe was also very pleasant.

The internet was super fast - which is very important. The table/sear selection were comfortable, and the outlets are easily accessible.

I will definitely return on my visits and send people their way.
Thank you for a great experience!
Nice offices walking distance to centre of Madrid.

Clean, Spacious, Modern, plenty of ro

In [3]:
import nltk

# Download the VADER sentiment analysis model from NLTK
nltk.download('vader_lexicon')

# Define the sentiment analysis function
def analyze_sentiment(text):
    from nltk.sentiment import SentimentIntensityAnalyzer
    analyzer = SentimentIntensityAnalyzer()
    sentiment = analyzer.polarity_scores(text)
    if sentiment['compound'] > 0.05:
        return 'positive'
    elif sentiment['compound'] < -0.05:
        return 'negative'
    else:
        return 'neutral'

# Load the reviews from the "TalentReviews.json" file
with open(os.path.join(os.getcwd(), "TalentReviews.json")) as f:
    data = json.load(f)

# Classify the reviews as positive, negative, or neutral
for review in data.get("result", {}).get("reviews", []):
    sentiment = analyze_sentiment(review.get("text"))
    print(f"Review sentiment: {sentiment}")

Review sentiment: negative
Review sentiment: positive
Review sentiment: positive
Review sentiment: positive
Review sentiment: positive


[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /home/gabriel/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


## The sentiment for the first one is consider positive in almost all the reviews.

In [4]:
import collections
import nltk
nltk.download('wordnet')
import nltk.corpus

# Download the NLTK English tokenizer and stop words
nltk.download('punkt')
nltk.download('stopwords')

# Define the preprocessing function
def preprocess(text):
    from nltk.corpus import stopwords
    from nltk.stem import WordNetLemmatizer, PorterStemmer
    stop_words = set(stopwords.words('english'))
    stemmer = PorterStemmer()
    lemmatizer = WordNetLemmatizer()
    words = nltk.word_tokenize(text.lower())
    words = [lemmatizer.lemmatize(word) for word in words if word not in stop_words and word.isalpha()]
    return ' '.join(stemmer.stem(word) for word in words)

# Load the reviews from the "TalentReviews.json" file
with open(os.path.join(os.getcwd(), "TalentReviews.json")) as f:
    data = json.load(f)

# Extract the text of the reviews
reviews = [preprocess(review.get("text")) for review in data.get("result", {}).get("reviews", [])]

# Count the frequency of each word in the preprocessed reviews
word_counts = collections.Counter(word for review in reviews for word in review.split())

# Print the most common words in the reviews
print("Most common words in the reviews:")
for word, count in word_counts.most_common(10):
    print(f"{word}: {count}")

[nltk_data] Downloading package wordnet to /home/gabriel/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to /home/gabriel/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /home/gabriel/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Most common words in the reviews:
day: 4
space: 3
nice: 3
noisi: 2
visit: 2
madrid: 2
staff: 2
pleasant: 2
walk: 2
marta: 2


In [1]:
import os
import requests
import json

# Set the GOOGLE_PLACES_API_KEY environment variable
os.environ["GOOGLE_PLACES_API_KEY"] = "AIzaSyD_NUIC2KPfD-8euMjdKgpBsLB05MFzSgE"

def fetch_reviews(place_id, api_key):
    url = "https://maps.googleapis.com/maps/api/place/details/json"
    headers = {
        "X-Goog-FieldMask": "places.reviews"
    }
    params = {
        'place_id': place_id,
        'key': api_key,
    }
    print(f"Making API request to URL: {url} with query parameters: {params} and headers: {headers}")
    try:
        response = requests.get(url, params=params, headers=headers)
        response.raise_for_status()  # Raise an exception if the status code indicates an error occurred
        data = response.json()
        return data
    except requests.exceptions.HTTPError as http_err:
        print(f"HTTP error occurred: {http_err}")
        print("Please check your API key and usage limits.")
    except requests.exceptions.RequestException as err:
        print(f"Error occurred: {err}")
    return None

place_id = "ChIJCVUBCA4mQg0RbIccVWP9JA8"
api_key = os.environ["GOOGLE_PLACES_API_KEY"]
data = fetch_reviews(place_id, api_key)

if data:
    # Save the data as a JSON file
    with open(os.path.join(os.getcwd(), "FreelandReviews.json"), "w") as f:
        json.dump(data, f, indent=4)
    print("Data saved as FreelandReviews.json")
else:
    print("Error: Unable to fetch reviews from the API")

Making API request to URL: https://maps.googleapis.com/maps/api/place/details/json with query parameters: {'place_id': 'ChIJCVUBCA4mQg0RbIccVWP9JA8', 'key': 'AIzaSyD_NUIC2KPfD-8euMjdKgpBsLB05MFzSgE'} and headers: {'X-Goog-FieldMask': 'places.reviews'}
Data saved as FreelandReviews.json


In [6]:
import os
import json
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import nltk
import spacy

nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

nlp = spacy.load('en_core_web_sm')
stop_words = set(stopwords.words('english'))
lemma = WordNetLemmatizer()

def preprocess(text):
    doc = nlp(text)
    tokens = [token.lemma_.lower().strip() if token.lemma_ != "-PRON-" else token.lower_ for token in doc]
    cleaned_tokens = [token for token in tokens if token and token not in stop_words and token.isalpha()]
    return cleaned_tokens

# Load the reviews from the "TalentReviews.json" file
with open(os.path.join(os.getcwd(), "TalentReviews.json")) as f:
    data = json.load(f)

# Extract the text of the reviews
reviews = [review.get("text") for review in data.get("result", {}).get("reviews", [])]

# Skip preprocessing if the data is already preprocessed
if all(isinstance(review, str) for review in reviews):
    cleaned_reviews = reviews
else:
    cleaned_reviews = [preprocess(review) for review in reviews]

# Create a CountVectorizer
vectorizer = CountVectorizer()

# Transform the cleaned_reviews into a matrix of token counts
review_matrix = vectorizer.fit_transform(cleaned_reviews)

# Get the list of feature names after fitting the vectorizer
feature_names = vectorizer.get_feature_names_out()

# Create a LatentDirichletAllocation model
lda = LatentDirichletAllocation(n_components=5, random_state=42)

# Fit the model to the review_matrix
lda.fit(review_matrix)

# Display the topics
for i, topic in enumerate(lda.components_):
    print(f"Topic {i}:")
    print(" ".join([feature_names[i] for i in topic.argsort()[-10:][::-1]]))
    print("\n")

[nltk_data] Downloading package stopwords to
[nltk_data]     /home/gabriel/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /home/gabriel/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to /home/gabriel/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /home/gabriel/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


Topic 0:
are the by to definitely staff this as spaces very


Topic 1:
nice by to clean some centre modern near room expansion


Topic 2:
nice light close atocha day to by bars plenty expansion


Topic 3:
the and was to for me day on of very


Topic 4:
noisy but useless phone ceiling open booths cold no walls




In [7]:
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer

def preprocess(review):
    # Convert the review into lowercase
    review = review.lower()
    
    # Tokenize the review into individual words
    words = word_tokenize(review)
    
    # Remove punctuation and stopwords
    words = [word for word in words if word.isalpha()]
    words = [word for word in words if word not in stopwords.words('english')]
    
    # Lemmatize the words
    lemmatizer = WordNetLemmatizer()
    words = [lemmatizer.lemmatize(word) for word in words]
    
    return words

In [8]:
import os
import json
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from gensim import corpora, models

# Load the reviews from the JSON file
with open(os.path.join(os.getcwd(), "TalentReviews.json")) as f:
    data = json.load(f)

# Extract the text of the reviews
reviews = [review.get("text") for review in data.get("result", {}).get("reviews", [])]
# Preprocess the reviews
preprocessed_reviews = [preprocess(review) for review in reviews]

# Create the dictionary
dictionary = corpora.Dictionary(preprocessed_reviews)

# Create the corpus
corpus = [dictionary.doc2bow(review) for review in preprocessed_reviews]

# Create the LDA model
lda_model = models.LdaModel(corpus, num_topics=10, id2word=dictionary, passes=10)

# Print the top 10 words for each topic
for topic in lda_model.print_topics():
    print(topic)

(0, '0.010*"nice" + 0.010*"day" + 0.010*"atocha" + 0.010*"light" + 0.010*"close" + 0.010*"space" + 0.010*"booth" + 0.010*"useless" + 0.010*"wall" + 0.010*"ceiling"')
(1, '0.046*"noisy" + 0.046*"ceiling" + 0.046*"booth" + 0.046*"cold" + 0.046*"phone" + 0.046*"wall" + 0.046*"open" + 0.046*"useless" + 0.046*"space" + 0.046*"light"')
(2, '0.010*"day" + 0.010*"visit" + 0.010*"cafe" + 0.010*"room" + 0.010*"space" + 0.010*"pleasant" + 0.010*"marta" + 0.010*"noisy" + 0.010*"walked" + 0.010*"close"')
(3, '0.010*"day" + 0.010*"light" + 0.010*"atocha" + 0.010*"nice" + 0.010*"space" + 0.010*"noisy" + 0.010*"close" + 0.010*"booth" + 0.010*"useless" + 0.010*"open"')
(4, '0.010*"day" + 0.010*"cafe" + 0.010*"visit" + 0.010*"pleasant" + 0.010*"marta" + 0.010*"way" + 0.010*"purchase" + 0.010*"pas" + 0.010*"thank" + 0.010*"first"')
(5, '0.010*"nice" + 0.010*"day" + 0.010*"close" + 0.010*"atocha" + 0.010*"light" + 0.010*"space" + 0.010*"booth" + 0.010*"madrid" + 0.010*"open" + 0.010*"noisy"')
(6, '0.083*"

In [9]:
import pandas as pd

# Extract word weights from the LDA model
word_weights = {dictionary[word_id]: weight for word_id, weight in lda_model.get_topic_terms(i, topn=len(dictionary))}

# Convert word weights dictionary to a DataFrame
df_talent = pd.DataFrame(word_weights.items(), columns=['word', 'weight'])

# Display the DataFrame
print(df_talent)

         word    weight
0         day  0.010425
1        cafe  0.010282
2       visit  0.010251
3    pleasant  0.010225
4       marta  0.010141
..        ...       ...
95    walking  0.009881
96   spacious  0.009881
97     plenty  0.009881
98  coworking  0.009881
99       spot  0.009881

[100 rows x 2 columns]


In [10]:
import matplotlib.pyplot as plt

# Function to plot word weights
def plot_word_weights(lda_model, feature_names, n_top_words=10):
    # Combine probabilities of each word across all topics
    word_weights = {}
    for i, topic in enumerate(lda_model.components_):
        for j in topic.argsort()[:-n_top_words - 1:-1]:
            if feature_names[j] in word_weights:
                word_weights[feature_names[j]] += topic[j]
            else:
                word_weights[feature_names[j]] = topic[j]
    
    # Sort words by their weights
    sorted_word_weights = sorted(word_weights.items(), key=lambda x: x[1], reverse=True)
    words, weights = zip(*sorted_word_weights)
    
    # Plot word weights
    plt.figure(figsize=(10, 8))
    plt.barh(range(len(words)), weights, align='center', color='skyblue')
    plt.yticks(range(len(words)), words, fontsize=10)
    plt.gca().invert_yaxis()
    plt.xlabel('Word Weight')
    plt.title('Talent Garden Madrid Word Weights')
    plt.show()

# Get feature names
feature_names = list(dictionary.values())
# Plot word weights
plot_word_weights(lda_model, feature_names)


AttributeError: 'LdaModel' object has no attribute 'components_'

## The weight for each word that was put in the review of the Talent Garden Madrid.

In [11]:
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer

def preprocess(review):
    # Convert the review into lowercase
    review = review.lower()
    
    # Tokenize the review into individual words
    words = word_tokenize(review)
    
    # Remove punctuation and stopwords
    words = [word for word in words if word.isalpha()]
    words = [word for word in words if word not in stopwords.words('english')]
    
    # Lemmatize the words
    lemmatizer = WordNetLemmatizer()
    words = [lemmatizer.lemmatize(word) for word in words]
    
    return words

In [12]:
import os
import json
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from gensim import corpora, models

# Load the reviews from the JSON file
with open(os.path.join(os.getcwd(), "FreelandReviews.json")) as f:
    data = json.load(f)

# Extract the text of the reviews
reviews = [review.get("text") for review in data.get("result", {}).get("reviews", [])]
# Preprocess the reviews
preprocessed_reviews = [preprocess(review) for review in reviews]

# Create the dictionary
dictionary = corpora.Dictionary(preprocessed_reviews)

# Create the corpus
corpus = [dictionary.doc2bow(review) for review in preprocessed_reviews]

# Create the LDA model
lda_model = models.LdaModel(corpus, num_topics=10, id2word=dictionary, passes=10)

# Print the top 10 words for each topic
for topic in lda_model.print_topics():
    print(topic)

ValueError: cannot compute LDA over an empty collection (no terms)

In [None]:
import pandas as pd

# Extract word weights from the LDA model
word_weights = {dictionary[word_id]: weight for word_id, weight in lda_model.get_topic_terms(i, topn=len(dictionary))}

# Convert word weights dictionary to a DataFrame
df_freeland = pd.DataFrame(word_weights.items(), columns=['word', 'weight'])

# Display the DataFrame
print(df_freeland)


In [None]:
import matplotlib.pyplot as plt

# Function to plot word weights for Gensim LdaModel
def plot_word_weights_lda(lda_model, dictionary, n_top_words=10):
    # Combine probabilities of each word across all topics
    word_weights = {}
    for i in range(lda_model.num_topics):
        topic_words = lda_model.get_topic_terms(i, topn=n_top_words)
        for word_id, weight in topic_words:
            word = dictionary[word_id]
            if word in word_weights:
                word_weights[word] += weight
            else:
                word_weights[word] = weight
    
    # Sort words by their weights
    sorted_word_weights = sorted(word_weights.items(), key=lambda x: x[1], reverse=True)
    words, weights = zip(*sorted_word_weights)
    
    # Plot word weights
    plt.figure(figsize=(10, 8))
    plt.barh(range(len(words)), weights, align='center', color='skyblue')
    plt.yticks(range(len(words)), words, fontsize=8)
    plt.gca().invert_yaxis()
    plt.xlabel('Word Weight')
    plt.title('')
    plt.show()

# Plot word weights
plot_word_weights_lda(lda_model, dictionary)


## The weight for each word that was put in the review of the Freeland.

In [None]:
import pandas as pd

# Merge the two DataFrames on the 'word' column
merged_df = df_freeland.merge(df_talent, on='word', how='inner')

# Rename the columns
merged_df.columns = ['word', 'weight_freeland', 'weight_talent']

# Create a new column for the differences between the weights
merged_df['weight_difference'] = abs(merged_df['weight_freeland'] - merged_df['weight_talent'])

# Sort the DataFrame by the 'weight_difference' column in descending order
merged_df = merged_df.sort_values(by='weight_difference', ascending=True)

# Display the DataFrame
print(merged_df)


### The common words that was used in the reviews for each place.

In [None]:
# Save DataFrame to CSV file
merged_df.to_csv('merged_data.csv', index=False)


### Now we can see that the words: accessible, spacious, different, people and confortable have more impact for those two best working spaces in Madrid.