### Loading our dataset

In [19]:
import pandas as pd

# Load the dataset from a CSV file
df = pd.read_csv('Walmart Canada Reviews _ Read Customer Service Reviews of www.walmart.ca.csv')

# Preview the dataset
df.head()


Unnamed: 0,Title_URL,Ratings(1-5),Review Heading,Review Body,Date
0,https://ca.trustpilot.com/users/5f3e4636d5dfc2...,1,DELIVER YOURSELVES NOT WITH DOORDASH,Walmart should perhaps learn how to deliver th...,2024-10-07
1,https://ca.trustpilot.com/users/655ba4c683806a...,1,Do not order online the app is garbage,I ordered originally 11 items of where 2 where...,2024-09-09
2,https://ca.trustpilot.com/users/67040a868a97fc...,1,Don't order heavy items from Walmart,Don't order heavy items from Walmart on-line!\...,2024-09-26
3,https://ca.trustpilot.com/users/60091fc06d10d9...,1,"money taken, order never shipped",I ordered from their website and made sure I w...,2024-10-03
4,https://ca.trustpilot.com/users/66b7bee93f448c...,1,Customer associates attention at Walmart,I went shopping today at Walmart located at 16...,2024-10-01


### Data Cleaning amd Preprocessing

In [20]:
# Check for null values in the dataset
print(df.isnull().sum())


Title_URL          0
Ratings(1-5)       0
Review Heading     0
Review Body       13
Date               0
dtype: int64


In [21]:
# Drop rows where both 'Review Heading' and 'Review Body' are null
df = df.dropna(subset=['Review Heading', 'Review Body'])

# Check again for nulls
print(df.isnull().sum())


Title_URL         0
Ratings(1-5)      0
Review Heading    0
Review Body       0
Date              0
dtype: int64


In [22]:
# Check for duplicates
duplicate_count = df.duplicated().sum()
print(f'Number of duplicate rows: {duplicate_count}')

# Remove duplicates
df = df.drop_duplicates()

# Verify removal of duplicates
print(f'Number of duplicate rows after removal: {df.duplicated().sum()}')


Number of duplicate rows: 443
Number of duplicate rows after removal: 0


In [23]:
df.head()

Unnamed: 0,Title_URL,Ratings(1-5),Review Heading,Review Body,Date
0,https://ca.trustpilot.com/users/5f3e4636d5dfc2...,1,DELIVER YOURSELVES NOT WITH DOORDASH,Walmart should perhaps learn how to deliver th...,2024-10-07
1,https://ca.trustpilot.com/users/655ba4c683806a...,1,Do not order online the app is garbage,I ordered originally 11 items of where 2 where...,2024-09-09
2,https://ca.trustpilot.com/users/67040a868a97fc...,1,Don't order heavy items from Walmart,Don't order heavy items from Walmart on-line!\...,2024-09-26
3,https://ca.trustpilot.com/users/60091fc06d10d9...,1,"money taken, order never shipped",I ordered from their website and made sure I w...,2024-10-03
4,https://ca.trustpilot.com/users/66b7bee93f448c...,1,Customer associates attention at Walmart,I went shopping today at Walmart located at 16...,2024-10-01


In [24]:
import re

# Function to clean text
def clean_text(text):
    # Convert text to lowercase
    text = text.lower()
    # Remove punctuation and special characters
    text = re.sub(r'[^\w\s]', '', text)
    # Remove extra spaces and newlines
    text = text.strip()
    return text

# Apply the cleaning function to both 'Review Heading' and 'Review Body'
df['Cleaned Review Heading'] = df['Review Heading'].apply(clean_text)
df['Cleaned Review Body'] = df['Review Body'].apply(clean_text)

# Preview the cleaned data
print(df[['Cleaned Review Heading', 'Cleaned Review Body']].head())


                     Cleaned Review Heading  \
0      deliver yourselves not with doordash   
1    do not order online the app is garbage   
2       dont order heavy items from walmart   
3           money taken order never shipped   
4  customer associates attention at walmart   

                                 Cleaned Review Body  
0  walmart should perhaps learn how to deliver th...  
1  i ordered originally 11 items of where 2 where...  
2  dont order heavy items from walmart online\n\n...  
3  i ordered from their website and made sure i w...  
4  i went shopping today at walmart located at 16...  


In [25]:
df.describe()

Unnamed: 0,Ratings(1-5)
count,1958.0
mean,1.252298
std,0.855821
min,1.0
25%,1.0
50%,1.0
75%,1.0
max,5.0


In [26]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1958 entries, 0 to 1970
Data columns (total 7 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   Title_URL               1958 non-null   object
 1   Ratings(1-5)            1958 non-null   int64 
 2   Review Heading          1958 non-null   object
 3   Review Body             1958 non-null   object
 4   Date                    1958 non-null   object
 5   Cleaned Review Heading  1958 non-null   object
 6   Cleaned Review Body     1958 non-null   object
dtypes: int64(1), object(6)
memory usage: 122.4+ KB


In [27]:
# Convert object to datetime
df['Date'] = pd.to_datetime(df['Date'])


In [29]:
df.head()

Unnamed: 0,Title_URL,Ratings(1-5),Review Heading,Review Body,Date,Cleaned Review Heading,Cleaned Review Body
0,https://ca.trustpilot.com/users/5f3e4636d5dfc2...,1,DELIVER YOURSELVES NOT WITH DOORDASH,Walmart should perhaps learn how to deliver th...,2024-10-07,deliver yourselves not with doordash,walmart should perhaps learn how to deliver th...
1,https://ca.trustpilot.com/users/655ba4c683806a...,1,Do not order online the app is garbage,I ordered originally 11 items of where 2 where...,2024-09-09,do not order online the app is garbage,i ordered originally 11 items of where 2 where...
2,https://ca.trustpilot.com/users/67040a868a97fc...,1,Don't order heavy items from Walmart,Don't order heavy items from Walmart on-line!\...,2024-09-26,dont order heavy items from walmart,dont order heavy items from walmart online\n\n...
3,https://ca.trustpilot.com/users/60091fc06d10d9...,1,"money taken, order never shipped",I ordered from their website and made sure I w...,2024-10-03,money taken order never shipped,i ordered from their website and made sure i w...
4,https://ca.trustpilot.com/users/66b7bee93f448c...,1,Customer associates attention at Walmart,I went shopping today at Walmart located at 16...,2024-10-01,customer associates attention at walmart,i went shopping today at walmart located at 16...


In [31]:
df = df.drop(['Review Heading', 'Review Body'], axis = 1)

In [32]:
df.head()

Unnamed: 0,Title_URL,Ratings(1-5),Date,Cleaned Review Heading,Cleaned Review Body
0,https://ca.trustpilot.com/users/5f3e4636d5dfc2...,1,2024-10-07,deliver yourselves not with doordash,walmart should perhaps learn how to deliver th...
1,https://ca.trustpilot.com/users/655ba4c683806a...,1,2024-09-09,do not order online the app is garbage,i ordered originally 11 items of where 2 where...
2,https://ca.trustpilot.com/users/67040a868a97fc...,1,2024-09-26,dont order heavy items from walmart,dont order heavy items from walmart online\n\n...
3,https://ca.trustpilot.com/users/60091fc06d10d9...,1,2024-10-03,money taken order never shipped,i ordered from their website and made sure i w...
4,https://ca.trustpilot.com/users/66b7bee93f448c...,1,2024-10-01,customer associates attention at walmart,i went shopping today at walmart located at 16...


### Sentiment Analysis

In [33]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# Initialize the VADER sentiment analyzer
analyzer = SentimentIntensityAnalyzer()

# Function to calculate all VADER sentiment scores
def get_vader_scores(text):
    scores = analyzer.polarity_scores(text)
    return scores['pos'], scores['neu'], scores['neg'], scores['compound']

# Apply VADER sentiment analysis to 'Cleaned Review Heading' and 'Cleaned Review Body'
df['Heading Pos'], df['Heading Neu'], df['Heading Neg'], df['Heading Compound'] = zip(*df['Cleaned Review Heading'].apply(get_vader_scores))
df['Body Pos'], df['Body Neu'], df['Body Neg'], df['Body Compound'] = zip(*df['Cleaned Review Body'].apply(get_vader_scores))


In [34]:
# Save the updated dataset with sentiment scores
df.to_csv('updated_reviews_with_sentiment.csv', index=False)


In [35]:
df.head()

Unnamed: 0,Title_URL,Ratings(1-5),Date,Cleaned Review Heading,Cleaned Review Body,Heading Pos,Heading Neu,Heading Neg,Heading Compound,Body Pos,Body Neu,Body Neg,Body Compound
0,https://ca.trustpilot.com/users/5f3e4636d5dfc2...,1,2024-10-07,deliver yourselves not with doordash,walmart should perhaps learn how to deliver th...,0.0,1.0,0.0,0.0,0.036,0.964,0.0,0.4215
1,https://ca.trustpilot.com/users/655ba4c683806a...,1,2024-09-09,do not order online the app is garbage,i ordered originally 11 items of where 2 where...,0.0,1.0,0.0,0.0,0.038,0.92,0.041,-0.101
2,https://ca.trustpilot.com/users/67040a868a97fc...,1,2024-09-26,dont order heavy items from walmart,dont order heavy items from walmart online\n\n...,0.0,1.0,0.0,0.0,0.018,0.935,0.047,-0.6513
3,https://ca.trustpilot.com/users/60091fc06d10d9...,1,2024-10-03,money taken order never shipped,i ordered from their website and made sure i w...,0.0,1.0,0.0,0.0,0.023,0.893,0.084,-0.9007
4,https://ca.trustpilot.com/users/66b7bee93f448c...,1,2024-10-01,customer associates attention at walmart,i went shopping today at walmart located at 16...,0.0,1.0,0.0,0.0,0.084,0.733,0.182,-0.8966
