<a href="https://colab.research.google.com/github/KavyaM22/Sentimental-Analysis/blob/main/Sentimental%20Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
#It can detect the presence or absence of a text by matching it with a particular pattern,
#and also can split a pattern into one or more sub-patterns.
import re
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from textblob import TextBlob
import nltk
# Download necessary NLTK data
nltk.download('stopwords')
nltk.download('punkt') # punkt is a data package used for sentence tokenization.
                       # It includes pre-trained models and data necessary for tokenizing text into sentences and words.
nltk.download('wordnet')
nltk.download('omw-1.4')

# Load the dataset
file_path = '/content/Restaurant reviews (1).csv'  # Adjust path if necessary
data = pd.read_csv(file_path)

# Handle missing values
data['Review'] = data['Review'].astype(str).fillna('')

# Remove duplicate reviews
data = data.drop_duplicates(subset=['Review'])

# Function to clean text
def clean_text(text):
    text = re.sub(r'<.*?>', '', text)  # Remove HTML tags
    text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)  # Remove URLs
    text = re.sub(r'[^a-zA-Z\s]', '', text)  # Remove non-alphabetic characters
    return text

data['Cleaned_Review'] = data['Review'].apply(clean_text)

# Normalize the text (lowercasing)
data['Cleaned_Review'] = data['Cleaned_Review'].str.lower()

# Remove stop words
stop_words = set(stopwords.words('english'))

def remove_stopwords(text):
    return ' '.join([word for word in text.split() if word not in stop_words])

data['Cleaned_Review'] = data['Cleaned_Review'].apply(remove_stopwords)

# Tokenization
data['Tokenized_Review'] = data['Cleaned_Review'].apply(word_tokenize)

# Lemmatization
lemmatizer = WordNetLemmatizer()

def lemmatize_text(tokens):
    return [lemmatizer.lemmatize(word) for word in tokens]

data['Lemmatized_Review'] = data['Tokenized_Review'].apply(lemmatize_text)

# Convert tokens back to string
data['Processed_Review'] = data['Lemmatized_Review'].apply(lambda tokens: ' '.join(tokens))


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


In [None]:
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer # VADER for sentiment analysis
import pandas as pd # pandas for data manipulation

# Load VaderSentiment
nltk.download('vader_lexicon')
sia = SentimentIntensityAnalyzer()

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [None]:
def analyze_sentiment(text):
    sentiment_scores = sia.polarity_scores(text) # finding sentiment scores for the text
    compound_score = sentiment_scores['compound']
    # The compound score is a single value that represents the overall sentiment of the text, ranging from -1 to +1.
    if compound_score >= 0.05:
        return 'Positive'
    elif compound_score <= -0.05:
        return 'Negative'
    else:
        return 'Neutral'

In [None]:
data['sentiment'] = data['Processed_Review'].apply(analyze_sentiment)

In [None]:
print(data['sentiment'].value_counts())

sentiment
Positive    7318
Negative    1366
Neutral      681
Name: count, dtype: int64


In [None]:
# Get the count of each sentiment type
sentiment_counts = data['sentiment'].value_counts()

# Calculate the overall sentiment
if sentiment_counts.index[0] == 'Positive':
    overall_sentiment = 'Positive'
elif sentiment_counts.index[0] == 'Negative':
    overall_sentiment = 'Negative'
else:
    overall_sentiment = 'Neutral'

print(f'Overall Sentiment: {overall_sentiment}')

Overall Sentiment: Positive


In [None]:
# Count the number of positive reviews for each restaurant
positive_reviews = data[data['sentiment'] == 'Positive'].groupby('Restaurant').size().reset_index(name='Positive_Review_Count')

# Check if there are any positive reviews
if positive_reviews.empty:
    print("No positive reviews found.")
else:
    # Identify the restaurant with the most positive reviews
    most_positive_restaurant = positive_reviews.sort_values(by='Positive_Review_Count', ascending=False).iloc[0]
    print(most_positive_restaurant)

Restaurant               AB's - Absolute Barbecues
Positive_Review_Count                          100
Name: 3, dtype: object
