In [9]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [10]:
# Load the dataset from the CSV file
df = pd.read_csv('McDonald_s_Reviews.csv', encoding='latin-1')

# Display the first few rows of the dataset
df.head()


Unnamed: 0,reviewer_id,store_name,category,store_address,latitude,longitude,rating_count,review_time,review,rating
0,1,McDonald's,Fast food restaurant,"13749 US-183 Hwy, Austin, TX 78750, United States",30.460718,-97.792874,1240,3 months ago,Why does it look like someone spit on my food?...,1 star
1,2,McDonald's,Fast food restaurant,"13749 US-183 Hwy, Austin, TX 78750, United States",30.460718,-97.792874,1240,5 days ago,It'd McDonalds. It is what it is as far as the...,4 stars
2,3,McDonald's,Fast food restaurant,"13749 US-183 Hwy, Austin, TX 78750, United States",30.460718,-97.792874,1240,5 days ago,Made a mobile order got to the speaker and che...,1 star
3,4,McDonald's,Fast food restaurant,"13749 US-183 Hwy, Austin, TX 78750, United States",30.460718,-97.792874,1240,a month ago,My mc. Crispy chicken sandwich was ï¿½ï¿½ï¿½ï¿...,5 stars
4,5,McDonald's,Fast food restaurant,"13749 US-183 Hwy, Austin, TX 78750, United States",30.460718,-97.792874,1240,2 months ago,"I repeat my order 3 times in the drive thru, a...",1 star


In [11]:
df.columns

Index(['reviewer_id', 'store_name', 'category', 'store_address', 'latitude ',
       'longitude', 'rating_count', 'review_time', 'review', 'rating'],
      dtype='object')

In [13]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 33396 entries, 0 to 33395
Data columns (total 10 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   reviewer_id    33396 non-null  int64  
 1   store_name     33396 non-null  object 
 2   category       33396 non-null  object 
 3   store_address  33396 non-null  object 
 4   latitude       32736 non-null  float64
 5   longitude      32736 non-null  float64
 6   rating_count   33396 non-null  object 
 7   review_time    33396 non-null  object 
 8   review         33396 non-null  object 
 9   rating         33396 non-null  object 
dtypes: float64(2), int64(1), object(7)
memory usage: 2.5+ MB


In [35]:
df['rating'] = df['rating'].astype(int)

ValueError: invalid literal for int() with base 10: '1 star'

In [27]:
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
sia = SentimentIntensityAnalyzer()

sentiment_scores = []
for review in df['review']:
    sentiment = sia.polarity_scores(review)['compound']
    sentiment_scores.append(sentiment)

df['sentiment_score'] = sentiment_scores

df.head()

Unnamed: 0,reviewer_id,store_name,category,store_address,latitude,longitude,rating_count,review_time,review,rating,sentiment_score
0,1,McDonald's,Fast food restaurant,"13749 US-183 Hwy, Austin, TX 78750, United States",30.460718,-97.792874,1240,3 months ago,Why does it look like someone spit on my food?...,1 star,0.5215
1,2,McDonald's,Fast food restaurant,"13749 US-183 Hwy, Austin, TX 78750, United States",30.460718,-97.792874,1240,5 days ago,It'd McDonalds. It is what it is as far as the...,4 stars,0.8687
2,3,McDonald's,Fast food restaurant,"13749 US-183 Hwy, Austin, TX 78750, United States",30.460718,-97.792874,1240,5 days ago,Made a mobile order got to the speaker and che...,1 star,-0.3535
3,4,McDonald's,Fast food restaurant,"13749 US-183 Hwy, Austin, TX 78750, United States",30.460718,-97.792874,1240,a month ago,My mc. Crispy chicken sandwich was ï¿½ï¿½ï¿½ï¿...,5 stars,0.0
4,5,McDonald's,Fast food restaurant,"13749 US-183 Hwy, Austin, TX 78750, United States",30.460718,-97.792874,1240,2 months ago,"I repeat my order 3 times in the drive thru, a...",1 star,-0.802


In [28]:
# Define custom thresholds for sentiment categories
thresholds = {
    'Very Negative': -1.0,
    'Negative': -0.5,
    'Neutral': 0.0,
    'Positive': 0.5,
    'Very Positive': 1.0
}

In [29]:
# Assign sentiment labels based on the sentiment scores and thresholds
sentiment_labels = []
for score in df['sentiment_score']:
    for label, threshold in thresholds.items():
        if score <= threshold:
            sentiment_labels.append(label)
            break
    else:
        sentiment_labels.append('Unknown')
df['sentiment_label'] = sentiment_labels


In [31]:
df.head(5)

Unnamed: 0,reviewer_id,store_name,category,store_address,latitude,longitude,rating_count,review_time,review,rating,sentiment_score,sentiment_label
0,1,McDonald's,Fast food restaurant,"13749 US-183 Hwy, Austin, TX 78750, United States",30.460718,-97.792874,1240,3 months ago,Why does it look like someone spit on my food?...,1 star,0.5215,Very Positive
1,2,McDonald's,Fast food restaurant,"13749 US-183 Hwy, Austin, TX 78750, United States",30.460718,-97.792874,1240,5 days ago,It'd McDonalds. It is what it is as far as the...,4 stars,0.8687,Very Positive
2,3,McDonald's,Fast food restaurant,"13749 US-183 Hwy, Austin, TX 78750, United States",30.460718,-97.792874,1240,5 days ago,Made a mobile order got to the speaker and che...,1 star,-0.3535,Neutral
3,4,McDonald's,Fast food restaurant,"13749 US-183 Hwy, Austin, TX 78750, United States",30.460718,-97.792874,1240,a month ago,My mc. Crispy chicken sandwich was ï¿½ï¿½ï¿½ï¿...,5 stars,0.0,Neutral
4,5,McDonald's,Fast food restaurant,"13749 US-183 Hwy, Austin, TX 78750, United States",30.460718,-97.792874,1240,2 months ago,"I repeat my order 3 times in the drive thru, a...",1 star,-0.802,Negative


In [33]:
# Define mapping between sentiment scores and sentiment labels
score_label_mapping = {
    -1.0: 'Very Negative',
    -0.5: 'Negative',
    0.0: 'Neutral',
    0.5: 'Positive',
    1.0: 'Very Positive'
}

# Assign sentiment labels based on ratings and sentiment scores
sentiment_labels = []
for index, row in df.iterrows():
    sentiment_score = row['sentiment_score']
    rating = row['rating']
    if sentiment_score in score_label_mapping:
        sentiment_labels.append(score_label_mapping[sentiment_score])
    else:
        if rating <= 2:
            sentiment_labels.append('Negative')
        elif rating >= 4:
            sentiment_labels.append('Positive')
        else:
            sentiment_labels.append('Neutral')
df['sentiment_label'] = sentiment_labels

# Display the updated dataset with sentiment labels
print(df[['review', 'rating', 'sentiment_score', 'sentiment_label']])






TypeError: '<=' not supported between instances of 'str' and 'int'