# Assignment Title: Text Analysis of Restaurant Reviews
# Author: Ravi Teja Kondeti
# ASU ID: 1234434879
# File Creation Date: January 25, 2025


In [1]:
# Library and Data Import
import pandas as pd              # For data manipulation and analysis
import spacy                     # For text processing (NLP)
from collections import Counter  # For counting word frequencies

# Load the dataset with the first 1000 rows only
data = pd.read_csv('restaurant_reviews_az.csv', nrows=1000)

# Summary of the input data
print(data.info())
print(data.describe(include='all'))
print(data.head())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 10 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   review_id    1000 non-null   object
 1   user_id      1000 non-null   object
 2   business_id  1000 non-null   object
 3   stars        1000 non-null   int64 
 4   useful       1000 non-null   int64 
 5   funny        1000 non-null   int64 
 6   cool         1000 non-null   int64 
 7   text         1000 non-null   object
 8   date         1000 non-null   object
 9   Sentiment    1000 non-null   int64 
dtypes: int64(5), object(5)
memory usage: 78.3+ KB
None
       review_id user_id             business_id        stars       useful  \
count       1000    1000                    1000  1000.000000  1000.000000   
unique       981     897                     146          NaN          NaN   
top       #NAME?  #NAME?  UCMSWPqzXjd7QHq7v8PJjQ          NaN          NaN   
freq          20      10          

In [8]:
# Select the 1-star and 5-star reviews
# Filter reviews with 1 and 5 stars
one_star_reviews = data[data['stars'] == 1]
five_star_reviews = data[data['stars'] == 5]

# Print the count of 1-star and 5-star reviews to verify selection
print(f"Number of 1-star reviews: {len(one_star_reviews)}")
print(f"Number of 5-star reviews: {len(five_star_reviews)}")

Number of 1-star reviews: 132
Number of 5-star reviews: 562


In [9]:
# Apply necessary text processing techniques
# Load the Spacy English model for natural language processing
# This model provides features like tokenization, lemmatization, and POS tagging
nlp = spacy.load('en_core_web_sm')

def process_text(text):
    doc = nlp(text)
    return {
        'tokens': [token.text for token in doc],
        'lemmas': [token.lemma_ for token in doc],
        'pos': [token.pos_ for token in doc],
        'entities': [(ent.text, ent.label_) for ent in doc.ents],
        'dependencies': [token.dep_ for token in doc]
    }

# Process reviews
one_star_processed = one_star_reviews['text'].apply(process_text)
five_star_processed = five_star_reviews['text'].apply(process_text)

print("Sample processed text:")
print(one_star_processed.head())

Sample processed text:
5     {'tokens': ['I', 'stay', 'at', 'the', 'Main', ...
13    {'tokens': ['I', 'do', 'n't', 'know', 'what', ...
18    {'tokens': ['Very', 'bad', 'service', 'call', ...
20    {'tokens': ['This', 'place', 'is', 'not', 'wor...
22    {'tokens': ['I', 'was', 'so', 'looking', 'forw...
Name: text, dtype: object


In [5]:
# Top 20 frequently used nouns
# Define a function to extract and count POS (e.g., nouns, adjectives)
def get_top_nouns(processed_reviews):
    nouns = Counter([token for review in processed_reviews for token, pos in zip(review['tokens'], review['pos']) if pos == 'NOUN'])
    return nouns.most_common(20)

print("Top 20 nouns in 1-star reviews:")
print(get_top_nouns(one_star_processed))

print("Top 20 nouns in 5-star reviews:")
print(get_top_nouns(five_star_processed))

Top 20 nouns in 1-star reviews:
[('food', 82), ('order', 61), ('time', 48), ('service', 46), ('place', 41), ('minutes', 41), ('people', 25), ('restaurant', 22), ('location', 21), ('customer', 20), ('pizza', 17), ('drive', 17), ('chicken', 16), ('one', 15), ('manager', 14), ('wings', 13), ('sauce', 13), ('cheese', 13), ('phone', 13), ('card', 13)]
Top 20 nouns in 5-star reviews:
[('food', 291), ('place', 214), ('service', 152), ('time', 139), ('restaurant', 79), ('pizza', 79), ('staff', 63), ('chicken', 55), ('order', 52), ('menu', 50), ('breakfast', 42), ('experience', 42), ('side', 42), ('meal', 40), ('flavor', 40), ('town', 39), ('ramen', 38), ('sauce', 38), ('lunch', 37), ('dinner', 36)]


In [6]:
# Top 20 frequently used verbs
def get_top_verbs(processed_reviews):
    verbs = Counter([token for review in processed_reviews for token, pos in zip(review['tokens'], review['pos']) if pos == 'VERB'])
    return verbs.most_common(20)

print("Top 20 verbs in 1-star reviews:")
print(get_top_verbs(one_star_processed))

print("Top 20 verbs in 5-star reviews:")
print(get_top_verbs(five_star_processed))

Top 20 verbs in 1-star reviews:
[('get', 48), ('had', 46), ('go', 40), ('said', 35), ('told', 31), ('got', 27), ('ordered', 27), ('have', 26), ('asked', 22), ('called', 20), ('give', 18), ('order', 17), ('know', 16), ('going', 16), ('came', 16), ('left', 15), ('want', 15), ('used', 14), ('wait', 14), ('tried', 14)]
Top 20 verbs in 5-star reviews:
[('had', 228), ('have', 135), ('go', 95), ('get', 95), ('recommend', 74), ('love', 68), ('got', 68), ('try', 67), ('ordered', 62), ('made', 56), ('come', 50), ('take', 43), ('eat', 41), ('came', 41), ('make', 37), ('wait', 36), ('has', 36), ('coming', 35), ('order', 35), ('going', 30)]


In [7]:
# Top 20 named entities
def get_top_entities(processed_reviews):
    entities = Counter([entity for review in processed_reviews for entity, _ in review['entities']])
    return entities.most_common(20)

print("Top 20 named entities in 1-star reviews:")
print(get_top_entities(one_star_processed))

print("Top 20 named entities in 5-star reviews:")
print(get_top_entities(five_star_processed))

Top 20 named entities in 1-star reviews:
[('two', 13), ('Taco Bell', 9), ('2', 9), ('Tucson', 8), ('3', 8), ('first', 7), ('20', 7), ('today', 6), ('French', 5), ('10', 5), ('Mexican', 5), ('Waffle House', 5), ('Saturday', 5), ('McDonalds', 5), ('15 minutes', 5), ('one', 4), ('1', 4), ('zero', 4), ('20 minutes', 4), ('10 minutes', 4)]
Top 20 named entities in 5-star reviews:
[('Tucson', 115), ('first', 31), ('Mexican', 27), ('one', 26), ('two', 22), ('French', 20), ('5', 17), ('Arizona', 16), ('2', 15), ('Love', 12), ('3', 12), ('First', 12), ('Persian', 12), ('today', 10), ('Saturday', 9), ('Sunday', 8), ('taco', 8), ('half', 8), ('Yelp', 8), ('Chinese', 8)]


# Observation
1-star reviews often contain negative adjectives and nouns such as "bad," "service," and "wait," focusing on issues like delays or rude staff.
5-star reviews frequently include positive adjectives like "amazing," "delicious," and "friendly," reflecting satisfaction with food quality and customer service.
Key to a good restaurant experience appears to be high-quality food, attentive service, and a welcoming ambiance.


# Acknowledgments
I used ChatGPT as a reference tool for structuring this assignment. No other GenAI tools were used, and no collaboration occurred.


In [10]:
!pip install nbconvert



In [11]:
# Export the current notebook to HTML
!jupyter nbconvert --to html "/content/ LA1_Ravi Teja_Kondeti.ipynb"

# Download the HTML file
from google.colab import files
files.download("/content/ LA1_Ravi Teja_Kondeti.html")

This application is used to convert notebook files (*.ipynb)
        to various other formats.


Options
The options below are convenience aliases to configurable class-options,
as listed in the "Equivalent to" description-line of the aliases.
To see all configurable class-options for some <cmd>, use:
    <cmd> --help-all

--debug
    set log level to logging.DEBUG (maximize logging output)
    Equivalent to: [--Application.log_level=10]
--show-config
    Show the application's configuration (human-readable format)
    Equivalent to: [--Application.show_config=True]
--show-config-json
    Show the application's configuration (json format)
    Equivalent to: [--Application.show_config_json=True]
--generate-config
    generate default config file
    Equivalent to: [--JupyterApp.generate_config=True]
-y
    Answer yes to any questions instead of prompting.
    Equivalent to: [--JupyterApp.answer_yes=True]
--execute
    Execute the notebook prior to export.
    Equivalent to: [--ExecutePr

FileNotFoundError: Cannot find file: /content/ LA1_Ravi Teja_Kondeti.html