In [7]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import tkinter as tk
from tkinter import ttk, scrolledtext
from tkinter import messagebox
import time
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer


class ReviewScraperUI:

    def __init__(self, master):
        self.master = master
        master.title("Review Scraper")

        # Configuration
        self.quora_links = [
            "https://www.quora.com/What-is-your-review-of-Rungta-College-of-Engineering-and-Technology-Bhilai",
            "https://www.quora.com/Whats-your-review-about-the-Rungta-College-of-Engineering-and-Technology-Bhilai",
            "https://www.quora.com/What-do-you-like-and-dislike-about-Rungta-College-of-Engineering-Technology",
            "https://www.quora.com/How-good-is-Rungta-College-of-Engineering-and-Technology-in-Bhilai-India",
            "https://www.quora.com/What-are-the-benefits-of-studying-at-RSR-Rungta-College-of-Information-Technology-in-Bhilai"
        ]
        self.shiksha_link = "https://www.shiksha.com/college/rungta-college-of-engineering-and-technology-bhilai-52239/news-articles"


        # UI Elements
        self.label_source = ttk.Label(master, text="Select Source:")
        self.label_source.grid(row=0, column=0, padx=5, pady=5, sticky=tk.W)

        self.source_variable = tk.StringVar(value="Quora") # Default selection
        self.radio_quora = ttk.Radiobutton(master, text="Quora", variable=self.source_variable, value="Quora")
        self.radio_quora.grid(row=1, column=0, padx=5, pady=2, sticky=tk.W)
        self.radio_shiksha = ttk.Radiobutton(master, text="Shiksha", variable=self.source_variable, value="Shiksha")
        self.radio_shiksha.grid(row=2, column=0, padx=5, pady=2, sticky=tk.W)


        self.scrape_button = ttk.Button(master, text="Scrape Reviews", command=self.scrape_reviews)
        self.scrape_button.grid(row=3, column=0, columnspan=2, pady=10)

        self.status_label = ttk.Label(master, text="Ready")
        self.status_label.grid(row=4, column=0, columnspan=2, pady=5)

        self.results_text = scrolledtext.ScrolledText(master, wrap=tk.WORD, width=80, height=20)
        self.results_text.grid(row=5, column=0, columnspan=2, padx=5, pady=5)

        self.clear_button = ttk.Button(master, text="Clear Results", command=self.clear_results)
        self.clear_button.grid(row=6, column=0, columnspan=2, pady=5)

        self.loading_window = None

    def setup_driver(self):
        options = Options()
        options.add_argument("--headless")  # Run in headless mode (no GUI)
        options.add_argument("--disable-gpu")
        options.add_argument("--no-sandbox")
        options.add_argument("--disable-dev-shm-usage")
        options.add_argument("start-maximized")
        service = Service(ChromeDriverManager().install())
        return webdriver.Chrome(service=service, options=options)


    def scrape_quora_reviews(self):
        driver = self.setup_driver()
        all_reviews = []
        seen_reviews = set()
        try:
            for url in self.quora_links:
                self.update_status(f"Scraping Quora: {url}")
                driver.get(url)
                time.sleep(5)

                soup = BeautifulSoup(driver.page_source, "html.parser")
                answers = soup.find_all("div", class_="q-text")

                for ans in answers:
                    review_text = ans.get_text(strip=True)
                    if len(review_text) > 50 and review_text not in seen_reviews:
                        sentiment = self.analyze_sentiment_vader(review_text)
                        all_reviews.append({"Review": review_text, "Sentiment": sentiment})
                        seen_reviews.add(review_text)
                # Limit the number of reviews for each source
                if len(all_reviews) >= 50:
                    break

        except Exception as e:
            self.update_status(f"Error scraping Quora: {e}")
        finally:
            driver.quit()
        return all_reviews[:50]


    def scrape_shiksha_reviews(self):
        driver = self.setup_driver()
        all_reviews = []
        seen_reviews = set()

        try:
            driver.get(self.shiksha_link)
            time.sleep(5)  # Allow page to load

            soup = BeautifulSoup(driver.page_source, "html.parser")
            article_urls = soup.find_all("a", class_="news-article")
            all_article_urls = []

            for article in article_urls:
                article_url = article.get("href")
                if article_url:
                    all_article_urls.append("https://www.shiksha.com" + article_url)


            for url in all_article_urls:
                self.update_status(f"Scraping Shiksha: {url}")
                driver.get(url)
                time.sleep(5)

                soup = BeautifulSoup(driver.page_source, "html.parser")
                article_text = soup.find("div", class_="news-article-content-body")

                if article_text:
                    review_text = article_text.get_text(strip=True)
                    if len(review_text) > 50 and review_text not in seen_reviews:
                        all_reviews.append(review_text)
                        seen_reviews.add(review_text)

        except Exception as e:
            self.update_status(f"Error scraping Shiksha: {e}")
        finally:
            driver.quit()
        return all_reviews

    def analyze_sentiment_vader(self, text):
        analyzer = SentimentIntensityAnalyzer()
        score = analyzer.polarity_scores(text)["compound"]
        return "Positive" if score > 0 else "Negative" if score < 0 else "Neutral"

    def scrape_reviews(self):
        source = self.source_variable.get()
        self.clear_results()  # Clear previous results
        self.update_status("Scraping... Please wait.")
        self.show_loading_window()
        self.master.update() # force ui update

        try:
            if source == "Quora":
                reviews = self.scrape_quora_reviews()
                self.display_quora_results(reviews)

            elif source == "Shiksha":
                reviews = self.scrape_shiksha_reviews()
                self.display_shiksha_results(reviews)
            else:
                messagebox.showerror("Error", "Please select a source.")
        except Exception as e:
            messagebox.showerror("Error", f"An unexpected error occurred: {e}")
        finally:
            self.hide_loading_window()
            self.update_status("Ready")

    def display_quora_results(self, reviews):
        if not reviews:
            self.results_text.insert(tk.END, "No reviews found.\n")
            return

        self.results_text.insert(tk.END, f"Total Quora Reviews Analyzed: {len(reviews)}\n\n")

        sentiment_counts = {"Positive": 0, "Negative": 0, "Neutral": 0}
        for review in reviews:
            sentiment_counts[review["Sentiment"]] += 1

        total_reviews = len(reviews)
        sentiment_percentages = {}
        for sentiment, count in sentiment_counts.items():
            percentage = (count / total_reviews) * 100 if total_reviews > 0 else 0
            sentiment_percentages[sentiment] = percentage

        self.results_text.insert(tk.END, "Sentiment Analysis Summary:\n")
        for sentiment, percentage in sentiment_percentages.items():
            self.results_text.insert(tk.END, f"{sentiment}: {percentage:.2f}%\n")
        self.results_text.insert(tk.END, "\n")

        self.results_text.insert(tk.END, "Reviews:\n")
        for review in reviews:
            self.results_text.insert(tk.END, f"Review: {review['Review'][:200]}...\n")
            self.results_text.insert(tk.END, f"Sentiment: {review['Sentiment']}\n\n")


    def display_shiksha_results(self, reviews):
        if not reviews:
            self.results_text.insert(tk.END, "No reviews found.\n")
            return

        self.results_text.insert(tk.END, f"Total Shiksha Reviews Analyzed: {len(reviews)}\n\n")

        self.results_text.insert(tk.END, "Reviews:\n")
        for i, review in enumerate(reviews, 1):
            self.results_text.insert(tk.END, f"{i}. Review: {review[:200]}...\n\n")

    def update_status(self, message):
        self.status_label.config(text=message)
        self.master.update()  # Force UI update

    def clear_results(self):
        self.results_text.delete("1.0", tk.END)

    def show_loading_window(self):
        self.loading_window = tk.Toplevel(self.master)
        self.loading_window.title("Loading...")
        label = ttk.Label(self.loading_window, text="Scraping... Please wait.")
        label.pack(padx=20, pady=20)
        # Prevent interaction with the main window while loading
        self.loading_window.grab_set()
        # Make loading window modal (blocks interaction with the main window)
        self.loading_window.transient(self.master)
        self.master.update()

    def hide_loading_window(self):
        if self.loading_window:
            self.loading_window.destroy()
            self.loading_window = None


root = tk.Tk()
gui = ReviewScraperUI(root)
root.mainloop()

# Main execution
if __name__ == "__main__":
    reviews = scrape_shiksha_reviews()

    # Count sentiment occurrences (not applicable for Shiksha)
    print(f"Total Reviews Analyzed: {len(reviews)}\n")

    # Print individual reviews
    print("Reviews:")
    for i, review in enumerate(reviews, 1):
        print(f"{i}. Review: {review[:200]}...")  # Show only first 200 characters
        print(f"{'Sentiment: Not Determined' if len(reviews) == 1 else ''}\n")

    # Analyze sentiment individually if possible
    if len(reviews) == 1:  
        sentiment = analyze_sentiment_vader(reviews[0])
        print(f"Sentiment of the Review 1: {sentiment}")
    else:
        print("Not all articles had content to analyze sentiment.")

Total Reviews Analyzed: 0

Reviews:
Not all articles had content to analyze sentiment.
