<a href="https://colab.research.google.com/github/Mrugank16/Data-Analysis-and-ML-projects/blob/main/sentiment_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import nltk
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('punkt')
nltk.download('vader_lexicon')

import spacy
import pandas as pd

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


In [16]:
!pip install sumy

Collecting sumy
  Downloading sumy-0.11.0-py2.py3-none-any.whl (97 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/97.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━[0m [32m92.2/97.3 kB[0m [31m2.9 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m97.3/97.3 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting docopt<0.7,>=0.6.1 (from sumy)
  Downloading docopt-0.6.2.tar.gz (25 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting breadability>=0.1.20 (from sumy)
  Downloading breadability-0.1.20.tar.gz (32 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting pycountry>=18.2.23 (from sumy)
  Downloading pycountry-22.3.5.tar.gz (10.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.1/10.1 MB[0m [31m23.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdon

In [6]:
data={
    'review':[
        "This football jersey is amazing! The fabric is comfortable, and the design looks even better in person.",
         "The football jersey is decent, but I was expecting a slightly better fit.",
        "I'm beyond impressed with the craftsmanship of this football jersey. It's my new favorite game-day attire.",
        "Avoid this jersey. The colors faded quickly, and the fabric feels cheap.",
        "It's a standard football jersey. It does the job, but it's not exceptional.",
        "This jersey is a must-have for any football fan. The attention to detail is impressive, and it's very breathable.",
        "This jersey is a letdown. The quality is poor, and it doesn't look like the picture at all.",
        "I couldn't be happier with this purchase. The jersey is high-quality and represents my team perfectly.",
        "This football jersey exceeded my expectations. The colors are vibrant, and it's clear that it's made with high-quality materials.",
        "The football jersey I received is top-notch. It's breathable and lightweight, perfect for showing team spirit."
    ]
}

df = pd.DataFrame(data)
df.to_csv('dataset.csv',index=False)

#Loading and preprocessing the data

In [8]:
data = pd.read_csv('dataset.csv')
data

Unnamed: 0,review
0,This football jersey is amazing! The fabric is...
1,"The football jersey is decent, but I was expec..."
2,I'm beyond impressed with the craftsmanship of...
3,"Avoid this jersey. The colors faded quickly, a..."
4,It's a standard football jersey. It does the j...
5,This jersey is a must-have for any football fa...
6,"This jersey is a letdown. The quality is poor,..."
7,I couldn't be happier with this purchase. The ...
8,This football jersey exceeded my expectations....
9,The football jersey I received is top-notch. I...


In [9]:
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer

In [10]:
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

In [11]:
def preprocess_text(text):
  words = word_tokenize(text)
  words = [lemmatizer.lemmatize(word) for word in words if word.isalnum()]
  words = [word.lower() for word in words if word.lower() not in stop_words]
  return ' '.join(words)

In [13]:
data['preprocessed_text'] = data['review'].apply(preprocess_text)
data

Unnamed: 0,review,preprocessed_text
0,This football jersey is amazing! The fabric is...,football jersey amazing fabric comfortable des...
1,"The football jersey is decent, but I was expec...",football jersey decent wa expecting slightly b...
2,I'm beyond impressed with the craftsmanship of...,beyond impressed craftsmanship football jersey...
3,"Avoid this jersey. The colors faded quickly, a...",avoid jersey color faded quickly fabric feel c...
4,It's a standard football jersey. It does the j...,standard football jersey doe job exceptional
5,This jersey is a must-have for any football fa...,jersey football fan attention detail impressiv...
6,"This jersey is a letdown. The quality is poor,...",jersey letdown quality poor doe look like picture
7,I couldn't be happier with this purchase. The ...,could happier purchase jersey represents team ...
8,This football jersey exceeded my expectations....,football jersey exceeded expectation color vib...
9,The football jersey I received is top-notch. I...,football jersey received breathable lightweigh...


# Aspect Extraction

In [14]:
nlp = spacy.load('en_core_web_sm')

def extract_aspects(text):
  doc = nlp(text)
  aspects = [chunk.text for chunk in doc.noun_chunks]
  return aspects

data['aspects'] = data['preprocessed_text'].apply(extract_aspects)
data

Unnamed: 0,review,preprocessed_text,aspects
0,This football jersey is amazing! The fabric is...,football jersey amazing fabric comfortable des...,[even better person]
1,"The football jersey is decent, but I was expec...",football jersey decent wa expecting slightly b...,[football jersey decent]
2,I'm beyond impressed with the craftsmanship of...,beyond impressed craftsmanship football jersey...,[impressed craftsmanship football jersey new f...
3,"Avoid this jersey. The colors faded quickly, a...",avoid jersey color faded quickly fabric feel c...,[jersey color]
4,It's a standard football jersey. It does the j...,standard football jersey doe job exceptional,[standard football jersey doe]
5,This jersey is a must-have for any football fa...,jersey football fan attention detail impressiv...,[jersey football fan attention detail impressi...
6,"This jersey is a letdown. The quality is poor,...",jersey letdown quality poor doe look like picture,"[jersey letdown quality poor doe, picture]"
7,I couldn't be happier with this purchase. The ...,could happier purchase jersey represents team ...,"[happier purchase jersey, team]"
8,This football jersey exceeded my expectations....,football jersey exceeded expectation color vib...,"[football jersey, expectation color vibrant, m..."
9,The football jersey I received is top-notch. I...,football jersey received breathable lightweigh...,"[football jersey, breathable lightweight perfe..."


# Opinion Identification

In [15]:
from numpy import printoptions
from nltk.sentiment.vader import SentimentIntensityAnalyzer

sid = SentimentIntensityAnalyzer()

def identify_opinions(aspects):
  opinions={}

  for aspect in aspects:
    sentiment = sid.polarity_scores(aspect)['compound']

    if(sentiment>0.2):
      opinions[aspect] = 'positive'
    elif(sentiment < -0.2):
      opinions[aspect] = 'negative'
    else:
      opinions[aspect] = 'neutral'
  return opinions

data['opinions']= data['aspects'].apply(identify_opinions)
data['opinions']

0                   {'even better person': 'positive'}
1                {'football jersey decent': 'neutral'}
2    {'impressed craftsmanship football jersey new ...
3                          {'jersey color': 'neutral'}
4          {'standard football jersey doe': 'neutral'}
5    {'jersey football fan attention detail impress...
6    {'jersey letdown quality poor doe': 'negative'...
7    {'happier purchase jersey': 'positive', 'team'...
8    {'football jersey': 'neutral', 'expectation co...
9    {'football jersey': 'neutral', 'breathable lig...
Name: opinions, dtype: object

# Text Summarization

In [18]:
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lsa import LsaSummarizer

def generate_summary(text):
  parser = PlaintextParser.from_string(text , Tokenizer("english"))
  summarizer = LsaSummarizer()
  summary = summarizer(parser.document , sentences_count=1)
  return ' '.join([str(sentence) for sentence in summary])

data['summary'] = data['review'].apply(generate_summary)
data['summary']

0                     This football jersey is amazing!
1    The football jersey is decent, but I was expec...
2    I'm beyond impressed with the craftsmanship of...
3    The colors faded quickly, and the fabric feels...
4                     It's a standard football jersey.
5    The attention to detail is impressive, and it'...
6    The quality is poor, and it doesn't look like ...
7    The jersey is high-quality and represents my t...
8       This football jersey exceeded my expectations.
9    It's breathable and lightweight, perfect for s...
Name: summary, dtype: object