# Importing necessities

In [10]:
import pandas as pd
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import re
from transformers import pipeline 
from tqdm import tqdm

print("Modiles imported !")

Modiles imported !


In [4]:
zomato_data = pd.read_csv('D:/playstore_scraper/notebooks/zomato_data.csv')
swiggy_data = pd.read_csv('D:/playstore_scraper/notebooks/swiggy_data.csv')
# zomato_data.head(10)
# swiggy_final_df.head(10)

print("Complete")

Complete


# Data Cleaning (Stopwords Removal) 

In [5]:
eng_stopwords = set(stopwords.words('english'))

hindi_stopwords = set([
    'के', 'का', 'एक', 'में', 'की', 'है', 'यह', 'और', 'से', 'हैं', 'को', 'पर',
    'इस', 'होता', 'कि', 'जो', 'कर', 'मे', 'गया', 'करने', 'किया', 'लिये', 'अपने',
    'ने', 'बनी', 'नहीं', 'तो', 'ही', 'या', 'एवं', 'दिया', 'हो', 'इसका', 'था',
    'द्वारा', 'हुआ', 'तक', 'साथ', 'करना', 'वाले', 'बाद', 'लिए', 'आप', 'कुछ',
    'सकते', 'किसी', 'ये', 'इसके', 'सबसे', 'इसमें', 'थे', 'दो', 'होने', 'वह',
    'वे', 'करते', 'बहुत', 'कहा', 'वर', 'कई', 'कम', 'करें', 'अभी', 'अगर', 'नहीं',
    'जब', 'होती', 'भी', 'मैं', 'हम', 'तुम', 'उसका', 'यहाँ', 'वहाँ', 'क्या',
    'दूसरे', 'इसे', 'उन्हें', 'जैसे', 'सकता', 'इसी', 'अपनी', 'उनके', 'हुई',
    'वर्ग', 'जिसका', 'जिसकी', 'जिसके', 'निकाल', 'अंदर', 'जितना'
])

combined_stopwords = eng_stopwords.union(hindi_stopwords)

def remove_stpwords(text):

    if pd.isna(text) or text == '':
        return ''
    
    text = text.lower()

    tokens = word_tokenize(text)

    filtered_tokens = [ 
        word for word in tokens
        if word not in combined_stopwords and word.isalnum()
    ]
    
    return ' '.join(filtered_tokens)

zomato_data['review_cleaned'] = zomato_data['review'].apply(remove_stpwords)
swiggy_data['review_cleaned'] = swiggy_data['review'].apply(remove_stpwords)

print('Before : ')
print(zomato_data['review'].head(10))
print('After : ')
print(zomato_data['review_cleaned'].head(10))

print('Before : ')
print(swiggy_data['review'].head(10))
print('After : ')
print(swiggy_data['review_cleaned'].head(10))



Before : 
0    they are thieves. I placed an order today with...
1                                nice app good service
2                                                   👍👍
3                                            excellent
4    Zomato is a good app and food gets delivered q...
5                                              great 👍
6    my order was not delivered by delivery partner...
7                                             Nice app
8                                            great app
9                                                    👍
Name: review, dtype: object
After : 
0    thieves placed order today account younger son...
1                                nice app good service
2                                                     
3                                            excellent
4          zomato good app food gets delivered quickly
5                                                great
6    order delivered delivery partner lost money or...
7                 

In [6]:
zomato_data.to_csv('final_zomato.csv', index=False)
swiggy_data.to_csv('final_swiggy.csv', index=False)

print('Done !!')

Done !!


In [9]:
zomato_data = zomato_data.sort_values(by = 'app_version', ascending = True)
zomato_data = zomato_data.reset_index(drop = True)

swiggy_data = swiggy_data.sort_values(by = 'app_version', ascending = True)
swiggy_data = swiggy_data.reset_index(drop = True )

print('Zomato Data: ')
print(zomato_data.head(10))

print('Swiggy Data: ')
print(swiggy_data.head(10))

Zomato Data: 
         date       user_name  \
0  2025-09-25     Nihal Mehta   
1  2025-09-21    Bhumit Kyada   
2  2025-09-14     Bharat Kale   
3  2025-09-27   A Google user   
4  2025-09-13      Gourab Das   
5  2025-09-18  Bhavana S Akki   
6  2025-09-28        Himanshu   
7  2025-09-01    mr shoaib007   
8  2025-09-20    Uday Chauhan   
9  2025-09-11              HB   

                                              review  score app_version  \
0                                       I love youll      5      10.1.1   
1  food was costly and quality was too bad Sw!ggy...      1      12.3.7   
2  I've cancelled my order within 10mnts.there is...      1      13.1.2   
3                                        No response      5      13.2.0   
4        platform fee 12.50 rs!! I rather use Swiggy      1      13.2.1   
5                I am very much happy with this app❤      5      13.2.4   
6  very fast delivery, great food and best thing ...      5      13.2.7   
7                     

# Semtiment Analysis

In [12]:
sentiment_pipeline = pipeline(
    "sentiment-analysis",
    model = "tabularisai/multilingual-sentiment-analysis",
    device = 0
)

def get_reviews(reviews, batch_size = 50):
    sentiments = []

    for i in tqdm(range(0, len(reviews), batch_size)):
        batch = reviews[i:i+batch_size].tolist()
        
        try:
            results = sentiment_pipeline(batch, truncation=True, max_length=512)
            sentiments.extend(results)
        except Exception as e:
            print(f"Error in batch {i}: {e}")
            sentiments.extend([{'label': 'NEUTRAL', 'score': 0.0}] * len(batch))
    
    return sentiments

# Apply sentiment analysis
print("Analyzing multilingual sentiments...")
sentiment_results = get_reviews(zomato_data['review_cleaned'])
# Add to dataframe
zomato_data['sentiment_label'] = [r['label'] for r in sentiment_results]
zomato_data['sentiment_score'] = [r['score'] for r in sentiment_results]

print("\nResults:")
print(zomato_data[['review', 'sentiment_label', 'sentiment_score']].head(10))

config.json:   0%|          | 0.00/851 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/541M [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

Device set to use cpu


Analyzing multilingual sentiments...


100%|██████████| 510/510 [05:26<00:00,  1.56it/s]


Results:
                                              review sentiment_label  \
0                                       I love youll        Positive   
1  food was costly and quality was too bad Sw!ggy...   Very Negative   
2  I've cancelled my order within 10mnts.there is...        Negative   
3                                        No response         Neutral   
4        platform fee 12.50 rs!! I rather use Swiggy        Negative   
5                I am very much happy with this app❤        Positive   
6  very fast delivery, great food and best thing ...        Positive   
7                     worst app and the service also   Very Negative   
8  I'm now irritating with this app time taken is...        Negative   
9                     good work for lazy peoples..😁😁        Positive   

   sentiment_score  
0         0.431561  
1         0.558284  
2         0.650167  
3         0.838287  
4         0.643328  
5         0.899562  
6         0.582522  
7         0.731763  
8       




In [13]:
print("Analyzing multilingual sentiments...")
sentiment_results = get_reviews(swiggy_data['review_cleaned'])
# Add to dataframe
swiggy_data['sentiment_label'] = [r['label'] for r in sentiment_results]
swiggy_data['sentiment_score'] = [r['score'] for r in sentiment_results]

print("\nResults:")
print(swiggy_data[['review', 'sentiment_label', 'sentiment_score']].head(10))

Analyzing multilingual sentiments...


100%|██████████| 358/358 [03:40<00:00,  1.62it/s]


Results:
                                              review sentiment_label  \
0                                           good job        Positive   
1  app is superb uber eat is charging 10 rs for d...         Neutral   
2                               My First Love Swiggy   Very Positive   
3  I never seen such a branded app/ service behav...        Negative   
4  service getting worse as days go by. worst cus...        Negative   
5  please don't download this app and don't Pay o...         Neutral   
6  New policy of no refund on missing items is a ...        Negative   
7              Payment fails when we add debit card.        Negative   
8  bahut hi ghatiya app hai WORUST Aap mai to Zin...   Very Negative   
9  DON'T USE THIS APP worst app ever, they take f...   Very Negative   

   sentiment_score  
0         0.922594  
1         0.553512  
2         0.322402  
3         0.537218  
4         0.635275  
5         0.530797  
6         0.462618  
7         0.475759  
8       




In [15]:
zomato_data.to_csv('sentiment_zomato.csv', index=False)
swiggy_data.to_csv('sentiment_swiggy.csv', index=False)

print('complete')

complete
