**DRIVER CODE**

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sb
import nltk
plt.style.use('ggplot')

color = sb.color_palette()
%matplotlib inline
import plotly.offline as py
py.init_notebook_mode(connected=True)
import plotly.graph_objs as go
import plotly.tools as tls
import plotly.express as px

from collections import Counter

import string

import warnings
warnings.filterwarnings('ignore')

In [None]:
df=pd.read_csv('flipkart_30-06-2024-20-58.csv')
# Check if 'Unnamed: 0' exists in the DataFrame columns
if 'Unnamed: 0' in df.columns:
    # Increment the values in 'Unnamed: 0' by 1
    df['Unnamed: 0'] = df['Unnamed: 0'] + 1
    # Rename 'Unnamed: 0' to 'serial'
    df.rename(columns={'Unnamed: 0': 'serial'}, inplace=True)

# Print or further process the DataFrame
print(df)

In [None]:
df.describe(include=["object"]).T

In [None]:
df.info()

**DATA CLEANING**

In [None]:
df.isna().sum()

In [None]:
 #Define a function to fill null values with 1.0 or 2.0
def fill_nulls(value):
    return np.random.choice([1.0, 2.0]) if pd.isnull(value) else value

# Apply the function to fill null values in 'column_name'
df['stars'] = df['stars'].apply(fill_nulls)

In [None]:
file=pd.DataFrame()
file['Word Count'] = [len(review.split()) for review in df['review']]

file['Uppercase Char Count'] = [sum(char.isupper() for char in review) \
                              for review in df['review']]                           
                              
file['Special Char Count'] = [sum(char in string.punctuation for char in review) \
                            for review in df['review']]
file

In [None]:
df.tail()

In [None]:
%matplotlib inline
%config InlineBackend.figure_format="retina"

count=df['stars'].value_counts().sort_index()

ax=count.plot(kind="bar",figsize=(10,8),color="blue",width=0.50)
plt.title('STAR RATING DISTRIBUTION',fontsize=20)
plt.xlabel('STAR RATING',fontsize=20)
plt.xticks(fontsize=20,rotation=0)
plt.grid(True)
for p  in ax.patches:
    ax.annotate(str(p.get_height()), (p.get_x() *1.005, p.get_height() * 1.005))
plt.show()

In [None]:
#Creating Stopwords list
%matplotlib inline
%config InlineBackend.figure_format="retina"

import nltk
from nltk.corpus import stopwords
from wordcloud import WordCloud

stopwords = set(stopwords.words('english'))
stopwords.update(["br","href"])
text = " ".join(review for review in df.review)
wordcloud = WordCloud(stopwords=stopwords).generate(text)

plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.show()

In [None]:
df['sentiment'] = df['stars'].apply(lambda rating : +1 if rating >= 3 else -1)

In [None]:
df.head()

In [None]:
positive = df[df['sentiment'] == 1]
negative = df[df['sentiment'] == -1]

In [None]:
positive[:10]

In [None]:
negative[:10]

In [None]:
def getMostCommonWords(reviews, n_most_common, stopwords=None):
    # param reviews: column from pandas.DataFrame (e.g. df['Reviews']) 
        #(pandas.Series)
    # param n_most_common: the top n most common words in reviews (int)
    # param stopwords: list of stopwords (str) to remove from reviews (list)
    # Returns list of n_most_common words organized in tuples as 
        #('term', frequency) (list)
    
    # flatten review column into a list of words, and set each to lowercase
    flattened_reviews = [word for review in reviews for word in \
                         review.lower().split()]
    
    
    # remove punctuation from reviews
    flattened_reviews = [''.join(char for char in review if \
                                 char not in string.punctuation) for \
                         review in flattened_reviews]
    
    
    # remove stopwords, if applicable
    if stopwords:
        flattened_reviews = [word for word in flattened_reviews if \
                             word not in stopwords]
        
        
    # remove any empty strings that were created by this process
    flattened_reviews = [review for review in flattened_reviews if review]
      
    return Counter(flattened_reviews).most_common(n_most_common)

In [None]:
import nltk
from nltk.corpus import stopwords
from wordcloud import WordCloud

getMostCommonWords(positive['review'], 10, stopwords.words('english'))

In [None]:
import nltk
from nltk.corpus import stopwords
from wordcloud import WordCloud

getMostCommonWords(negative['review'], 10, stopwords.words('english'))

In [None]:
%matplotlib inline
%config InlineBackend.figure_format="retina"

import nltk
from nltk.corpus import stopwords
from wordcloud import WordCloud

stopwords = set(stopwords.words('english'))
stopwords.update(["br", "phone", "href","good","great","camera", "iphone", "nice","battery", "best", "awesome","perfomance","buy"]) 
## good and great removed because they were included in negative sentiment
pos = " ".join(review for review in positive.review)
wordcloud = WordCloud(stopwords=stopwords).generate(pos)
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.show()

In [None]:
%matplotlib inline
%config InlineBackend.figure_format="retina"

import nltk
from nltk.corpus import stopwords
from wordcloud import WordCloud

stopwords = set(stopwords.words('english'))
stopwords.update(["like", "phone", "product","good","great","camera", "iphone", "nice","battery", "best", "awesome","perfomance","buy","better","also"]) 
## good and great removed because they were included in negative sentiment
pos = " ".join(review for review in negative.review)
wordcloud = WordCloud(stopwords=stopwords).generate(pos)
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.show()

In [None]:
df['sentiment'] = df['sentiment'].replace({-1 : 'negative'})
df['sentiment'] = df['sentiment'].replace({1 : 'positive'})

In [None]:
%matplotlib inline
%config InlineBackend.figure_format="retina"

count=df['sentiment'].value_counts()

ax=count.plot(kind="bar",figsize=(10,8),color="blue",width=0.50)
plt.title('PRODUCT SENTIMENT DISTRIBUTION',fontsize=20)
plt.xlabel('PRODUCT SENTIMENT',fontsize=20)
plt.xticks(fontsize=20,rotation=0)
plt.grid(True)
for p  in ax.patches:
    ax.annotate(str(p.get_height()), (p.get_x() *1.005, p.get_height() * 1.005))
plt.show()

**VADER SENTIMENT SCORING**

In [None]:
from nltk.sentiment import SentimentIntensityAnalyzer
from tqdm.notebook import tqdm

sia = SentimentIntensityAnalyzer()

In [None]:
# Run the polarity score on the entire dataset
res = {}
for i, row in tqdm(df.iterrows(), total=len(df)):
    text = row['review']
    myid = row['serial']
    res[myid] = sia.polarity_scores(text)

In [None]:
vaders = pd.DataFrame(res).T
vaders = vaders.reset_index().rename(columns={'index': 'serial'})
vaders = vaders.merge(df, how='left')

In [None]:
vaders.head()

In [None]:
%matplotlib inline
%config InlineBackend.figure_format="retina"

color=sb.color_palette("bright", 5)
ax = sb.barplot(data=vaders, x='stars', y='compound',palette=color)
ax.set_title('COMPOUND SCORE BY FLIPKART PRODUCT REVIEW')
plt.show()

In [None]:
%matplotlib inline
%config InlineBackend.figure_format="retina"

color=sb.color_palette("Spectral", 5)
fig, axs = plt.subplots(1, 3, figsize=(12, 3))
sb.barplot(data=vaders, x='stars', y='pos', ax=axs[0],palette=color)
sb.barplot(data=vaders, x='stars', y='neu', ax=axs[1],palette=color)
sb.barplot(data=vaders, x='stars', y='neg', ax=axs[2],palette=color)
axs[0].set_title('Positive')
axs[1].set_title('Neutral')
axs[2].set_title('Negative')
plt.tight_layout()
plt.show()

**ROBERTA PRE-TRAINED MODEL**

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from scipy.special import softmax


In [None]:
MODEL = f"cardiffnlp/twitter-roberta-base-sentiment"
tokenizer = AutoTokenizer.from_pretrained(MODEL)
model = AutoModelForSequenceClassification.from_pretrained(MODEL)

In [None]:
def polarity_scores_roberta(example):
    encoded_text = tokenizer(example, return_tensors='pt')
    output = model(**encoded_text)
    scores = output[0][0].detach().numpy()
    scores = softmax(scores)
    scores_dict = {
        'roberta_neg' : scores[0],
        'roberta_neu' : scores[1],
        'roberta_pos' : scores[2]
    }
    return scores_dict

In [None]:
res = {}
for i, row in tqdm(df.iterrows(), total=len(df)):
    try:
        text = row['review']
        myid = row['serial']
        vader_result = sia.polarity_scores(text)
        vader_result_rename = {}
        for key, value in vader_result.items():
            vader_result_rename[f"vader_{key}"] = value
        roberta_result = polarity_scores_roberta(text)
        both = {**vader_result_rename, **roberta_result}
        res[myid] = both
    except RuntimeError:
        print(f'Broke for id {myid}')

In [None]:
results_df = pd.DataFrame(res).T
results_df = results_df.reset_index().rename(columns={'index': 'serial'})
results_df = results_df.merge(df, how='left')

In [None]:
sb.pairplot(data=results_df,
             vars=['vader_neg', 'vader_neu', 'vader_pos',
                  'roberta_neg', 'roberta_neu', 'roberta_pos'],
            hue='stars',
            palette='tab10')
plt.show()

**TRANSFORMERS  PIPELINE**

In [None]:
from transformers import pipeline

sent_pipeline = pipeline("sentiment-analysis")

In [None]:
for review in df['review'][:5]:
    print(review)
    print(sent_pipeline(review))
    print()

**FINAL REVIEW  SATTISFACTION METER**

In [None]:
pos_count=positive.size
neg_count=negative.size
pos_perc_values=(pos_count/df.size)*100
neg_perc_values=(neg_count/df.size)*100

**BASE VISUALISATION**

In [None]:
%%capture output11
%matplotlib inline
%config InlineBackend.figure_format="retina"

import requests
from bs4 import BeautifulSoup
from PIL import Image
from io import BytesIO
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import mplcursors
from ipywidgets import interactive,interactive_output,HTML

# Load the CSV data
csv_path = 'flipkart_base_07-07-2024-13-19.csv'
flipkart_data = pd.read_csv(csv_path)

# Function to fetch the image
def fetch_image(url, retries=3, delay=5):
    for i in range(retries):
        response = requests.get(url)
        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser')
            src = "https://rukminim2.flixcart.com/image/312/312/xif0q/mobile/k/l/l/-original-imagtc5fz9spysyk.jpeg?q=70"
            image_tag = soup.find('img', {'src': src})
            if image_tag:
                image_url = image_tag.get('src')
                image_response = requests.get(image_url)
                if image_response.status_code == 200:
                    return Image.open(BytesIO(image_response.content))
                else:
                    print(f"Failed to fetch image. Status code: {image_response.status_code}")
            else:
                print("Image with specified src not found")
                return None
        elif response.status_code == 429:
            print(f"Rate limited. Retrying in {delay} seconds...")
            time.sleep(delay)
            delay *= 2
        else:
            response.raise_for_status()
    raise Exception(f"Failed to fetch image after {retries} retries.")

# URL of the webpage to extract the image from
url = "https://www.flipkart.com/apple-iphone-15-blue-128-gb/product-reviews/itmbf14ef54f645d?pid=MOBGTAGPAQNVFZZY&lid=LSTMOBGTAGPAQNVFZZYO7HQ2L&marketplace=FLIPKART&page=1"
mobile_image = fetch_image(url)
mobile_image=mobile_image.resize((200,300))

# Product information
product_name = "Apple Iphone 15 Blue (128 GB)"

# Extract the relevant data
ratings = flipkart_data.iloc[0, :5]
total_reviews = flipkart_data['Total Number of Ratings and Reviews'][0]
average_rating = flipkart_data['Total Rating'][0]

# Convert ratings to numeric
ratings = ratings.astype(int)

# Split total ratings and reviews
total_ratings, total_reviews_only = total_reviews.split(' ')[0], total_reviews.split(' ')[2]

# Colors for the bars
colors = ['#f44336', '#ff9800', '#cddc39', '#8bc34a', '#4caf50']

# Create figure and axis
fig, ax = plt.subplots(figsize=(15, 8))

# Display the mobile image
ax_img = fig.add_axes([0.1, 0.1, 0.35, 0.8], anchor='W')
ax_img.axis('off')
ax_img.imshow(mobile_image)


# Create an inset plot for the ratings
ax_inset = fig.add_axes([0.5, 0.1, 0.45, 0.8])  # [left, bottom, width, height]
ax_inset.barh(np.arange(len(ratings)), ratings.values, color=colors, edgecolor='black')

# Add star labels and counts to the left and right of the bars
for i, (star, count) in enumerate(zip(ratings.index, ratings.values)):
    ax_inset.text(-500, i, f'{star} ★', va='center', ha='right', fontsize=12)
    ax_inset.text(count + 500, i, f'{count:,}', va='center', ha='left', fontsize=12)

# Remove axes spines and labels
ax_inset.spines['top'].set_visible(False)
ax_inset.spines['right'].set_visible(False)
ax_inset.spines['left'].set_visible(False)
ax_inset.spines['bottom'].set_visible(False)
ax_inset.yaxis.set_visible(False)
ax_inset.xaxis.set_visible(False)

# Add average rating and total ratings/reviews
fig.text(0.6, 0.9, f'{average_rating} ★', fontsize=24, fontweight='bold')
fig.text(0.6, 0.87, f'{total_ratings} Ratings & {total_reviews_only} Reviews', fontsize=14)

# Add product name
fig.text(0.5, 0.95, product_name, fontsize=18, fontweight='bold')

# Add clickable URL annotation
#url_annotation = fig.text(0.1, 1.02, f'URL: {url}', fontsize=12, color='blue', style='italic')

#def on_url_click(event):
#    import webbrowser
#    if event.xdata is not None and event.ydata is not None:
#        webbrowser.open_new(url)

#fig.canvas.mpl_connect('button_press_event', on_url_click)
# Create clickable URL widget
url_widget = HTML(
    value=f'<a href="{url}" target="_blank">Click here to open URL</a>',
    layout={'width': 'auto'}
)
fig.text(0.1, 1.02, url_widget, fontsize=12, color='blue', style='italic')

#display(url_widget)

ax.axis('off')
plt.ion()
plt.show()


In [None]:
%%capture output12
%matplotlib inline
%config InlineBackend.figure_format="retina"


import matplotlib.pyplot as plt
import numpy as np
from matplotlib.animation import FuncAnimation

plt.rcParams["figure.figsize"] = [7.50, 3.50]
plt.rcParams["figure.autolayout"] = True
plt.rcParams['animation.ffmpeg_path'] = 'ffmpeg'

# List of color codes
colors = [
    '#008000',  # Green
    '#90EE90',  # Light Green
    '#FFFF00',  # Yellow
    '#FFA500',  # Orange
    '#FF0000'   # Red
]

# Values and their intervals
values = [100, 80, 60, 40, 20, 0]  
pos_perc = pos_perc_values 
num_bars = len(colors)

# Calculate the angle for each bar
angles = np.linspace(0, np.pi, num_bars, endpoint=False)
intv=[0,0.62831853,1.25663706,1.88495559,2.51327412,3.14159265,3.76991118]
intv_mid=[0.314159265, 0.942477795, 1.57079633, 2.19911486, 2.82743339, 3.45575191]


# Initial pointer position
initial_pos = 0  # Start at 0%
target_pos = pos_perc  # End at target percentage
angle_for_target_pos = np.interp(target_pos, [0, 100], [np.pi, 0])  # Target angle

# Create figure and polar subplot
fig = plt.figure(figsize=(10, 10))
ax = fig.add_subplot(projection='polar')

# Create bars
bars = ax.bar(x=angles, width=np.pi/num_bars, bottom=2, height=0.5, color=colors, edgecolor='white', align='edge')

# Annotate values
for loc, val in zip(intv, values):
    plt.annotate(val, xy=(loc, 2.5), ha='right' if val <= 50 else 'left')

# Define emojis
emojis = ["😃", "😊", "😐", "😕", "😡"]

# Annotate emojis on bars
for loc, emoji in zip(intv_mid, emojis):
    ax.text(loc, 2.25, emoji, fontsize=40, ha='center', va='center', color='black')

# Create annotation for pointer
pointer_annotation = ax.annotate(f"{initial_pos:.2f}%", xytext=(0, 0), xy=(0, 2.0),
                                 arrowprops=dict(arrowstyle="wedge,tail_width=0.45", color="black", shrinkA=0),
                                 bbox=dict(boxstyle="circle", facecolor="black", linewidth=2.0),
                                 fontsize=25, color="white", ha="center")

# Animation function
def update(frame):
    current_pos = np.interp(frame, [0, 100], [0, pos_perc])
    current_angle = np.interp(current_pos, [0, 100], [np.pi, 0])
    pointer_annotation.xy = (current_angle, 2.0)
    pointer_annotation.set_text(f"{current_pos:.2f}%")
    return pointer_annotation,

ax.set_axis_off()
# Display the plot
plt.title("CUSTOMER SENTIMENT", loc="center", pad=20, fontsize=35, fontweight="bold")
#plt.show()
# Create the animation
anim = FuncAnimation(fig, update,frames=np.linspace(0, 100, num=201), interval=50, blit=True)
plt.close()


In [None]:
%%capture output13
from IPython.display import HTML

HTML(anim.to_jshtml())

In [None]:
# Import display function from IPython.display
from IPython.display import display,HTML

# Display the captured outputs
display(output11.show())
display(output13.show())
