In [3]:
import pandas as pd
import re
from nltk.tokenize import TreebankWordTokenizer
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer, PorterStemmer
import nltk
from textblob import TextBlob

# Download necessary NLTK resources
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')

# Initialize NLP tools
tokenizer = TreebankWordTokenizer()
stop_words = stopwords.words("english")
lemmatizer = WordNetLemmatizer()
stemmer = PorterStemmer()

# Load the dataset
file_path = '/content/Cleaned_Combined_Reviews (1).csv'
dataset = pd.read_csv(file_path)

# Inspect the dataset
print("Dataset columns:", dataset.columns)
print("Sample Data:")
print(dataset.head())

# Assuming the review text is in a column named 'Review_Text'
review_column = 'Review_Text'  # Replace with the correct column name if different

# Check if the review column exists
if review_column not in dataset.columns:
    raise ValueError(f"Column '{review_column}' not found in the dataset.")

# Handle missing or non-string values in the review column
dataset[review_column] = dataset[review_column].fillna("").astype(str)

# Function to preprocess text
def preprocess_text(text):
    # Convert text to lowercase
    text = text.lower()
    # Remove special characters
    text = re.sub('[^a-z0-9]', ' ', text)
    # Tokenize the text
    tokens = tokenizer.tokenize(text)
    # Remove stopwords
    filtered_tokens = [word for word in tokens if word not in stop_words]
    # Lemmatize tokens
    lemmatized_tokens = [lemmatizer.lemmatize(word) for word in filtered_tokens]
    # Stem tokens
    stemmed_tokens = [stemmer.stem(word) for word in lemmatized_tokens]
    # Join tokens back into a single string
    return " ".join(stemmed_tokens)

# Apply preprocessing
dataset['Processed_Review'] = dataset[review_column].apply(preprocess_text)

# Function for sentiment analysis
def analyze_sentiment(review):
    analysis = TextBlob(review)
    polarity = analysis.sentiment.polarity
    if polarity > 0:
        return "Positive"
    elif polarity < 0:
        return "Negative"
    else:
        return "Neutral"

# Perform sentiment analysis
dataset['Sentiment'] = dataset['Processed_Review'].apply(analyze_sentiment)

# Calculate sentiment percentages
sentiment_counts = dataset['Sentiment'].value_counts(normalize=True) * 100
sentiment_summary = sentiment_counts.reset_index()
sentiment_summary.columns = ['Sentiment', 'Percentage']

# Print sentiment summary to the console
print("Sentiment Analysis Report:")
print(sentiment_summary)

# Save sentiment percentages to a CSV file for further use
sentiment_summary.to_csv('/content/Sentiment_Summary_Report.csv', index=False)
print("Sentiment summary saved to: /content/Sentiment_Summary_Report.csv")

# Save results for future use
output_file_path = '/content/Sentiment_Analysis_Results.csv'
dataset.to_csv(output_file_path, index=False)
print(f"Sentiment analysis results saved to: {output_file_path}")

# Display sample results
print("Sample sentiment analysis results:")
print(dataset[[review_column, 'Processed_Review', 'Sentiment']].head())


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
  dataset = pd.read_csv(file_path)


Dataset columns: Index(['sl number', 'Title', 'Review_Text', 'Stars', 'Unnamed: 4',
       'Unnamed: 5', 'Unnamed: 6', 'Unnamed: 7', 'Unnamed: 8', 'Unnamed: 9',
       'Unnamed: 10', 'Unnamed: 11', 'Unnamed: 12', 'Unnamed: 13',
       'Unnamed: 14', 'Unnamed: 15', 'Unnamed: 16', 'Unnamed: 17',
       'Unnamed: 18', 'Unnamed: 19', 'Unnamed: 20', 'Unnamed: 21',
       'Unnamed: 22'],
      dtype='object')
Sample Data:
   sl number                   Title  \
0        0.0           david kinzett   
1        1.0         Bradley Slining   
2        2.0            Mr Mark Shaw   
3        3.0  Benjamin F Johnson Jr.   
4        4.0             Donna Light   

                                         Review_Text  Stars  Unnamed: 4  \
0  I regularly use Amazon to order and deliver my...    5.0         NaN   
1  This is like the third package in 2 months "de...    2.0         NaN   
2  This review did not directly affect me but my ...    1.0         NaN   
3  As a Prime customer I am deeply disa