In [3]:
!pip install langchain_google_genai

Collecting langchain_google_genai
  Downloading langchain_google_genai-2.0.6-py3-none-any.whl.metadata (3.6 kB)
Collecting filetype<2.0.0,>=1.2.0 (from langchain_google_genai)
  Downloading filetype-1.2.0-py2.py3-none-any.whl.metadata (6.5 kB)
Downloading langchain_google_genai-2.0.6-py3-none-any.whl (41 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.3/41.3 kB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading filetype-1.2.0-py2.py3-none-any.whl (19 kB)
Installing collected packages: filetype, langchain_google_genai
Successfully installed filetype-1.2.0 langchain_google_genai-2.0.6


In [12]:
import os
import json
from typing import List, Dict, Any
import numpy as np
from collections import Counter
import re

# Langchain and AI Libraries
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from transformers import pipeline

class OverallCommentAnalyzer:
    def __init__(self, google_api_key: str):
        """Initialize analyzer with Gemini and sentiment models"""
        self.gemini_model = ChatGoogleGenerativeAI(
            model="gemini-pro",
            google_api_key=google_api_key
        )

        self.sentiment_model = pipeline(
            "sentiment-analysis",
            model="distilbert-base-uncased-finetuned-sst-2-english"
        )

        self.topic_model = pipeline(
            "zero-shot-classification",
            model="facebook/bart-large-mnli"
        )

    def overall_sentiment_analysis(self, comments: List[str]) -> Dict[str, Any]:
        """Compute overall sentiment across comments"""
        sentiments = [self.sentiment_model(comment)[0] for comment in comments]

        return {
            "positive_ratio": sum(1 for s in sentiments if s['label'] == 'POSITIVE') / len(sentiments),
            "negative_ratio": sum(1 for s in sentiments if s['label'] == 'NEGATIVE') / len(sentiments),
            "avg_confidence": np.mean([s['score'] for s in sentiments])
        }

    def overall_topic_analysis(self, comments: List[str]) -> Dict[str, float]:
        """Classify overall topics across comments"""
        categories = ["technology", "politics", "personal", "professional", "entertainment"]

        # Perform zero-shot classification
        result = self.topic_model(
            comments[0] if comments else "",
            categories
        )

        return {
            label: float(score)
            for label, score in zip(result['labels'], result['scores'])
        }

    def additional_comment_analysis(self, comments: List[str]) -> Dict[str, Any]:
        """Perform additional detailed comment analysis"""
        # Analyze individual comment sentiments
        comment_sentiments = []
        for comment in comments:
            sentiment = self.sentiment_model(comment)[0]
            comment_sentiments.append({
                "text": comment,
                "label": sentiment['label'],
                "score": sentiment['score']
            })

        # Find most positive and negative comments
        most_positive = max(
            [c for c in comment_sentiments if c['label'] == 'POSITIVE'],
            key=lambda x: x['score'],
            default={"text": "No positive comments", "score": 0}
        )

        most_negative = max(
            [c for c in comment_sentiments if c['label'] == 'NEGATIVE'],
            key=lambda x: x['score'],
            default={"text": "No negative comments", "score": 0}
        )

        # Analyze most positive and negative words
        def extract_words(comments):
            words = []
            for comment in comments:
                # Clean and tokenize words
                clean_words = re.findall(r'\w+', comment.lower())
                words.extend([word for word in clean_words if len(word) > 2])
            return words

        all_words = extract_words(comments)
        word_counts = Counter(all_words)

        # Find most positive and negative words
        def word_sentiment(word):
            try:
                return self.sentiment_model([word])[0]
            except:
                return {"label": "NEUTRAL", "score": 0}

        word_sentiments = {
            word: word_sentiment(word) for word in set(all_words)
        }

        positive_words = sorted(
            [w for w, s in word_sentiments.items() if s['label'] == 'POSITIVE'],
            key=lambda w: (word_sentiments[w]['score'], word_counts[w]),
            reverse=True
        )

        negative_words = sorted(
            [w for w, s in word_sentiments.items() if s['label'] == 'NEGATIVE'],
            key=lambda w: (word_sentiments[w]['score'], word_counts[w]),
            reverse=True
        )

        # Gemini improvement suggestions
        prompt = PromptTemplate(
            input_variables=["comments"],
            template="""Provide specific, actionable improvement suggestions based on these comments:
            {comments}
            """
        )

        chain = LLMChain(llm=self.gemini_model, prompt=prompt)
        improvements = chain.run(comments="\n".join(comments))

        return {
            "most_positive_comment": most_positive,
            "most_negative_comment": most_negative,
            "top_3_positive_words": positive_words[:3],
            "top_3_negative_words": negative_words[:3],
            "suggested_improvements": improvements
        }

    def gemini_comprehensive_analysis(self, comments: List[str]) -> Dict[str, Any]:
        """Perform comprehensive analysis using Gemini"""
        prompt = PromptTemplate(
            input_variables=["comments"],
            template="""Analyze the following comments comprehensively:
            1. Identify overall themes
            2. Summarize key insights
            3. Provide strategic recommendations

            Comments: {comments}
            """
        )

        chain = LLMChain(llm=self.gemini_model, prompt=prompt)
        return {"overall_insights": chain.run(comments="\n".join(comments))}

    def analyze_comments(self, comments: List[str]) -> str:
        """Perform comprehensive overall analysis"""
        analysis = {
            "total_comments": len(comments),
            "sentiment_analysis": self.overall_sentiment_analysis(comments),
            "topic_distribution": self.overall_topic_analysis(comments),
            "gemini_insights": self.gemini_comprehensive_analysis(comments),
            **self.additional_comment_analysis(comments)
        }

        return json.dumps(analysis, indent=2)

def main():
    google_api_key = "AIzaSyDFCC3WxFXkar2cuZWBLNkFweuzIVB1hRE"
    analyzer = OverallCommentAnalyzer(google_api_key)

    sample_comments = [
        "This product is amazing! Really love the features.",
        "I'm disappointed with the customer service.",
        "Another boring political debate happening again.",
        "Technology is changing so fast these days!",
        "Great experience with the new update."
    ]

    overall_analysis = analyzer.analyze_comments(sample_comments)
    print(overall_analysis)

    with open("overall_comment_analysis.json", "w") as f:
        f.write(overall_analysis)

if __name__ == "__main__":
    main()

{
  "total_comments": 5,
  "sentiment_analysis": {
    "positive_ratio": 0.6,
    "negative_ratio": 0.4,
    "avg_confidence": 0.9997121810913085
  },
  "topic_distribution": {
    "professional": 0.6739587783813477,
    "entertainment": 0.24131295084953308,
    "personal": 0.047384753823280334,
    "technology": 0.03617731109261513,
    "politics": 0.0011661830358207226
  },
  "gemini_insights": {
    "overall_insights": "**1. Overall Themes**\n* **Product Feedback:** Positive feedback on product features, negative feedback on customer service.\n* **Current Events:** Commentary on political debates and technological advancements.\n\n**2. Key Insights**\n\n* **Product Features:** Users are highly satisfied with the product's functionality.\n* **Customer Service:** There is dissatisfaction with the level of customer support provided.\n* **Political Discontent:** Users express frustration with the repetitiveness and polarization of political discourse.\n* **Technological Progress:** User