In [2]:
import pandas as pd
import spacy

# Download spaCy model if not already installed
try:
    nlp = spacy.load("en_core_web_sm")
except OSError:
    # If model not found, download it
    print("Downloading spaCy model 'en_core_web_sm'")
    spacy.cli.download("en_core_web_sm")
    nlp = spacy.load("en_core_web_sm")  # Corrected the syntax error here

# Relative file path to the CSV file
file_path_spacy = "amazon_product_reviews.csv"

# Load the dataset with specified data types
dataframe_spacy = pd.read_csv(file_path_spacy, dtype={'name': str, 'reviews.didPurchase': object})

# Select the 'reviews.text' column
reviews_data_spacy = dataframe_spacy['reviews.text']

# Remove missing values from the selected column
clean_data_spacy = dataframe_spacy.dropna(subset=['reviews.text'])

def analyze_sentiment_spacy(review):
    # Process the review text using spaCy
    doc = nlp(review)

    # Calculate sentiment polarity
    polarity = doc.sentiment.polarity

    # Determine sentiment (positive, negative, neutral)
    sentiment = "Positive" if polarity > 0 else "Negative" if polarity < 0 else "Neutral"

    return sentiment


NameError: name 'reviews_dat' is not defined

In [None]:
import pandas as pd
from textblob import TextBlob

# Relative file path to the CSV file
file_path_textblob = "amazon_product_reviews.csv"

# Read the CSV file with proper dtype specification
clean_data_textblob = pd.read_csv(file_path_textblob,
                                  dtype={'name': 'object', 'reviews.didPurchase': 'object'})  # Corrected column names here

def analyze_sentiment_textblob(review):
    try:
        # Convert to string if not already
        review_str = str(review)
        
        # Check if the review is a valid string
        if review_str.strip():  # Non-empty string
            blob = TextBlob(review_str)
            sentiment = blob.sentiment.polarity
            if sentiment > 0:
                return "Positive"
            elif sentiment < 0:
                return "Negative"
            else:
                return "Neutral"
        else:
            return "Invalid Review"
    except:
        return "Invalid Review"

# Example usage
for index, row in clean_data_textblob.iterrows():
    review = row['reviews.text']
    sentiment = analyze_sentiment_textblob(review)
    print(f"Review: {review}\nSentiment: {sentiment}\n")

# Example usage
reviews = ["I love this product!", "Not satisfied with the quality.", "It's okay.", 123.45]

for review in reviews:
    sentiment = analyze_sentiment_textblob(review)
    print(f"Review: {review}\nSentiment: {sentiment}\n")


In [None]:
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
import os

# Function to create PDF report
def create_report(output_dir="."):
    # Create PDF
    pdf_file = os.path.join(output_dir, "sentimentanalysis_report.pdf")
    pdf = canvas.Canvas(pdf_file, pagesize=letter)

    # Add content to the PDF
    pdf.setFont("Helvetica", 12)
    pdf.drawString(72, 800, "Sentiment Analysis Report")

    # Description of the dataset
    dataset_description = (
        "The dataset used for sentiment analysis consists of Amazon product reviews. "
        "It includes various columns such as id, name, asins, brand, categories, keys, "
        "manufacturer, reviews.date, reviews.dateAdded, reviews.dateSeen, reviews.didPurchase, "
        "reviews.doRecommend, reviews.id, reviews.numHelpful, reviews.rating, reviews.sourceURLs, "
        "reviews.text, reviews.title, reviews.userCity, reviews.userProvince, and reviews.username."
    )
    pdf.drawString(72, 780, "Description of the Dataset:")
    pdf.drawString(72, 760, dataset_description)

    # Details of preprocessing steps
    preprocessing_details = (
        "The preprocessing steps involved selecting the 'reviews.text' column from the dataset "
        "and removing missing values. Stopwords were removed from the text data to prepare it for analysis."
    )
    pdf.drawString(72, 720, "Details of Preprocessing Steps:")
    pdf.drawString(72, 700, preprocessing_details)

    # Evaluation of results
    results_evaluation = (
        "The sentiment analysis model was evaluated using sample product reviews. "
        "The results indicated the sentiment as Positive, Negative, or Neutral based on the calculated polarity."
    )
    pdf.drawString(72, 660, "Evaluation of Results:")
    pdf.drawString(72, 640, results_evaluation)

    # Insights into the model's strengths and limitations
    model_insights = (
        "Strengths:\n- The model effectively predicts sentiment using spaCy's sentiment polarity.\n\n"
        "Limitations:\n- The model might not capture nuanced sentiments well.\n"
        "- It relies on the spaCy model, which might not be trained on specific domain vocabulary."
    )
    pdf.drawString(72, 600, "Insights into the Model's Strengths and Limitations:")
    pdf.drawString(72, 580, model_insights)

    # Save the PDF
    pdf.save()
    print(f"PDF report saved as {pdf_file}")

# Create the PDF report in the specified directory
create_report(output_dir="task21")
