# Dependencies

In [14]:
# Main imports for analysis and visualization 
import pandas as pd
import plotly.express as px

# Read tripadviser reviews file and get information of the dataset

In [15]:
df = pd.read_csv('tripadvisor_hotel_reviews.csv')
df.head()

Unnamed: 0,Review,Rating
0,nice hotel expensive parking got good deal sta...,4
1,ok nothing special charge diamond member hilto...,2
2,nice rooms not 4* experience hotel monaco seat...,3
3,"unique, great stay, wonderful time hotel monac...",5
4,"great stay great stay, went seahawk game aweso...",5


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20491 entries, 0 to 20490
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Review  20491 non-null  object
 1   Rating  20491 non-null  int64 
dtypes: int64(1), object(1)
memory usage: 320.3+ KB


In [6]:
df.describe()

Unnamed: 0,Rating
count,20491.0
mean,3.952223
std,1.23303
min,1.0
25%,3.0
50%,4.0
75%,5.0
max,5.0


In [7]:
df.shape

(20491, 2)

# Explore the dataset for any trends, patterns, and relationships

In [8]:
# Creating a histogram of the ratings
fig = px.histogram(df, x="Rating", title="Distribution of Ratings", nbins=5, labels={"Rating": "Hotel Ratings"})
fig.show()

# Word frequency and sentiment analysis

In [16]:
# Use spacy for word sentiment and word frequency analysis
import spacy
nlp = spacy.load('en_core_web_lg')
type(nlp)

spacy.lang.en.English

In [17]:
# Function to preprocess text
def preprocess_text(text):
    doc = nlp(text)
    tokens = [token.lemma_ for token in doc if not token.is_stop and not token.is_punct]
    return " ".join(tokens)


# New column to collect lemma of tokens
df['lemmas'] = df['Review'].apply(preprocess_text) 

In [18]:
df

Unnamed: 0,Review,Rating,lemmas
0,nice hotel expensive parking got good deal sta...,4,nice hotel expensive parking get good deal sta...
1,ok nothing special charge diamond member hilto...,2,ok special charge diamond member hilton decide...
2,nice rooms not 4* experience hotel monaco seat...,3,nice room 4 experience hotel monaco seattle go...
3,"unique, great stay, wonderful time hotel monac...",5,unique great stay wonderful time hotel monaco ...
4,"great stay great stay, went seahawk game aweso...",5,great stay great stay go seahawk game awesome ...
...,...,...,...
20486,"best kept secret 3rd time staying charm, not 5...",5,well keep secret 3rd time stay charm 5 star be...
20487,great location price view hotel great quick pl...,4,great location price view hotel great quick pl...
20488,"ok just looks nice modern outside, desk staff ...",2,ok look nice modern outside desk staff particu...
20489,hotel theft ruined vacation hotel opened sept ...,1,hotel theft ruin vacation hotel open sept 17 2...
