# Importing Dependencies

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import nltk
from nltk.corpus import stopwords
from wordcloud import WordCloud
from transformers import pipeline
from sentence_transformers import SentenceTransformer
from sklearn.preprocessing import LabelEncoder
from sklearn.manifold import TSNE
import warnings
warnings.filterwarnings('ignore')

# Download Dataset

In [None]:
'''
import kagglehub

# Download latest version
path = kagglehub.dataset_download("mannacharya/blinkit-vs-zepto-vs-instamart-reviews")

print("Path to dataset files:", path)
'''


In [None]:
os.listdir('C://Users//shivam sharma//.cache//kagglehub//datasets//mannacharya')

In [None]:
df = pd.read_csv('reviews.csv')

# Quick Lookup into Data

In [None]:
df.head()

In [None]:
df['platform'].unique()

In [None]:
df_blinkit = df.loc[df['platform']=='blinkit'].reset_index()

In [None]:
df_blinkit = df_blinkit.drop(['index', 'date'], axis=1)

In [None]:
df_blinkit

In [None]:
df_zepto = df.loc[df['platform']=='zepto'].reset_index()
df_zepto = df_zepto.drop(['index', 'date'], axis=1)

In [None]:
df_jiomart = df.loc[df['platform']=='jiomart'].reset_index()
df_jiomart = df_jiomart.drop(['index', 'date'], axis=1)

In [None]:
df_jiomart

In [None]:
df_zepto

# Calculate Positive (Rated 3+) & Negative (Rated 3-) Reviews

In [None]:
positive_reviews_zepto = df_zepto[df_zepto['rating']>3].shape[0]
negative_reviews_zepto = df_zepto[df_zepto['rating']<=3].shape[0]

In [None]:
positive_reviews_blinkit = df_blinkit[df_blinkit['rating']>3].shape[0]
negative_reviews_blinkit = df_blinkit[df_blinkit['rating']<=3].shape[0]

In [None]:
positive_reviews_jiomart = df_jiomart[df_jiomart['rating']>3].shape[0]
negative_reviews_jiomart = df_jiomart[df_jiomart['rating']<=3].shape[0]

# EDA

In [None]:
plt.figure(figsize=(10,8))
# Example data (already counted positive and negative ratings)
brands = ['Blinkit', 'Zepto', 'Jiomart']
positive_counts = [positive_reviews_blinkit, positive_reviews_zepto, positive_reviews_jiomart]  # Replace with your positive counts
negative_counts = [negative_reviews_blinkit, negative_reviews_zepto, negative_reviews_jiomart]  # Replace with your negative counts

# Plotting side-by-side bars
bar_width = 0.1
x = range(len(brands))

# Create bars for positive and negative ratings
bars_positive = plt.bar(x, positive_counts, width=bar_width, label='Positive (>3)', color='#32CD31')
bars_negative = plt.bar([i + bar_width for i in x], negative_counts, width=bar_width, label='Negative (<=3)', color='#FF2117')
for bar in bars_positive:
    plt.text(bar.get_x() + bar.get_width() / 2, bar.get_height(), 
             str(bar.get_height()), ha='center', va='bottom', fontsize=10)

for bar in bars_negative:
    plt.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + bar.get_y(), 
             str(bar.get_height()), ha='center', va='bottom', fontsize=10)

# Labels and title
plt.xlabel('Brand')
plt.ylabel('Count of Ratings')
plt.title('Positive and Negative Ratings for 3 Brands')
plt.xticks([i + bar_width / 2 for i in x], brands)
plt.legend(title='Rating Category')

# Show plot
plt.show()


In [None]:
df.shape

In [None]:
sns.palettes.SEABORN_PALETTES.keys()

sns.countplot(df, x='platform', hue='rating', palette='coolwarm')
plt.title('Rating plot of brands')
plt.xlabel('Brands')
plt.ylabel('Number of reviews')
plt.legend(title = 'Rating')
plt.show()

# Word Cloud for each platform

In [None]:
def generate_wordCloud(text, title):
    
    cloud = WordCloud(width = 400, height = 400, 
                       background_color='white', 
                       stopwords=set(stopwords.words('english'))).generate(text)
    plt.imshow(cloud, interpolation='bilinear')
    plt.title(title)
    plt.axis('off')
    plt.show()
    

platforms = df['platform'].unique()
for platform in platforms:
    text = ' '.join(df[df['platform']==platform]['review'].astype(str))
    generate_wordCloud(text, f'Word Cloud for {platform.capitalize()}')
    


#  A sentiment analysis pipeline using Hugging Face Transformers library

In [None]:
sentiment_pipeline = pipeline("sentiment-analysis", device=-1)  # -1 uses CPU, No gpu.

In [None]:
def classify_sentiment(text):
    result = sentiment_pipeline(text)
    return result[0]['label']

In [None]:
df['sentiment'] = df['review'].apply(classify_sentiment)

# Updated data on the basis of Semantic analysis

In [None]:
df

# Grouping Data to get count for Positive & Negative Sentiments for each platform with EDA

In [None]:
sentiment_counts = df.groupby(['platform', 'sentiment']).size().reset_index(name='count')

In [None]:
sentiment_counts

In [None]:
plt.figure(figsize=(8, 4))
sns.barplot(data=sentiment_counts, x='platform', y='count',hue='sentiment',palette='bright6')
plt.title('Sentiment analysis of E-commerces')
plt.xlabel('Brand')
# plt.xticks(rotation=45, fontsize=12)
plt.ylabel('Number of reviews')
plt.legend(title='Sentiment')
plt.show()

# Initializing Sentence Transformer model for t-SNE

In [None]:
model = SentenceTransformer('all-MiniLM-L6-v2')

In [None]:
myEnc = LabelEncoder()
df['Encoded_Platform'] = myEnc.fit_transform(df['platform'])

In [None]:
embeddings = model.encode(df['review'], show_progress_bar=True)

In [None]:
tsne = TSNE(n_components=2, random_state=42, perplexity=5, n_iter=300)
tsne_results = tsne.fit_transform(embeddings)

In [None]:
df['tsne-2d-one'] = tsne_results[:,0]
df['tsne-2d-two'] = tsne_results[:,1]

In [None]:
plt.figure(figsize=(10,8))
sns.scatterplot(
    x="tsne-2d-one", y="tsne-2d-two",
    hue="platform",
    palette=sns.color_palette("hsv", len(platforms)),
    data=df,
#     legend="full,
    alpha=0.7
)
plt.title('t-SNE Visualization of Reviews')
plt.legend(title='Platform')
plt.show()