In [None]:
from textblob import TextBlob
from pymongo import MongoClient
import matplotlib.pyplot as plt

# MongoDB setup
client = MongoClient(
    "mongodb+srv://root:admin@cluster0.lfqdtch.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0",
    tls=True,
    tlsAllowInvalidCertificates=True
)
db = client['trustpilot_reviews']
# --- UPDATED: Changed collection name to target partsofcanada.com reviews ---
collection = db['partsofcanada_reviews']

# Function to classify sentiment
def classify_sentiment(text):
    # Handle empty strings or placeholder text like "N/A" gracefully
    if not text or text.strip() == "N/A":
        return "Neutral"
    try:
        analysis = TextBlob(text)
        # TextBlob's polarity ranges from -1 (negative) to 1 (positive)
        if analysis.sentiment.polarity > 0:
            return "Positive"
        elif analysis.sentiment.polarity < 0:
            return "Negative"
        else:
            return "Neutral" # Polarity is 0
    except Exception as e:
        print(f"Error classifying sentiment for text: {text[:50]}... Error: {e}")
        return "Neutral" # Default to Neutral on error


# Analyze sentiment and store in MongoDB
print(f"Performing sentiment analysis on reviews from '{collection.name}' collection...")
# Only fetch reviews that don't have sentiment already calculated, or refetch all
# Adding a filter can help resume if it was interrupted: collection.find({'sentiment': {'$exists': False}}).batch_size(100)
reviews_cursor = collection.find({}).batch_size(100)
sentiment_counts = {"Positive": 0, "Negative": 0, "Neutral": 0}
total_reviews_processed = 0

# Convert cursor to a list to get total count easily, might consume memory for large collections
# Alternatively, use collection.count_documents({})
total_reviews_in_collection = collection.count_documents({})
print(f"Found {total_reviews_in_collection} reviews to process.")


for review in reviews_cursor:
    total_reviews_processed += 1
    text = review.get('review_text', '')

    # Skip if sentiment is already calculated (optional, based on needs)
    # if 'sentiment' in review:
    #     sentiment = review['sentiment']
    # else:
    #     sentiment = classify_sentiment(text)
    #     # Update the document in the database with the calculated sentiment
    #     collection.update_one({'_id': review['_id']}, {'$set': {'sentiment': sentiment}})

    # --- Simplified: Always calculate and update (will overwrite if exists) ---
    sentiment = classify_sentiment(text)
    # Update the document in the database with the calculated sentiment
    collection.update_one({'_id': review['_id']}, {'$set': {'sentiment': sentiment}})

    sentiment_counts[sentiment] += 1

    # Optional: Print progress
    if total_reviews_processed % 177 == 0:
         print(f"Processed {total_reviews_processed}/{total_reviews_in_collection} reviews.")


print("\nSentiment analysis completed and stored in MongoDB.")
print("Sentiment distribution:")
for sentiment, count in sentiment_counts.items():
    print(f"{sentiment}: {count}")

# Optional: Visualize sentiment distribution
sentiments = list(sentiment_counts.keys())
counts = list(sentiment_counts.values())

plt.figure(figsize=(8, 6))
plt.bar(sentiments, counts, color=['green', 'red', 'gray'])
# --- UPDATED: Changed plot title ---
plt.title(f'Sentiment Distribution of {collection.name.replace("_reviews", "").title()} Reviews') # Dynamic title
plt.xlabel('Sentiment')
plt.ylabel('Number of Reviews')
plt.show()

# Close the MongoDB connection (optional, good practice)
client.close()
print("MongoDB connection closed.")