In [None]:
"""
Text Analysis and Indexing Service
This module provides functionality for text analysis, word frequency tracking,
and advanced text processing operations.
"""

!pip install requests beautifulsoup4
!pip install firebase
import re
from nltk.stem import PorterStemmer

class TextAnalysisService:
    """
    A service class for analyzing and processing text content with advanced features
    like stop word removal and word stemming.
    """
    
    def __init__(self):
        """Initialize an empty word frequency index."""
        self.frequency_index = {}

    def process_text(self, input_text):
        """
        Process input text and create a frequency index.
        
        Args:
            input_text (str): Text content to analyze
        """
        word_tokens = re.findall(r'\w+', input_text)
        for token in word_tokens:
            normalized_token = token.lower()
            self.frequency_index[normalized_token] = self.frequency_index.get(normalized_token, 0) + 1

    def filter_common_words(self):
        """Remove common stop words from the frequency index."""
        common_words = {'a', 'an', 'the', 'and', 'or', 'in', 'on', 'at'}
        for common_word in common_words:
            self.frequency_index.pop(common_word, None)

    def normalize_word_forms(self):
        """
        Apply word stemming to normalize words to their root form.
        Uses Porter Stemming algorithm.
        """
        word_stemmer = PorterStemmer()
        normalized_index = {}
        for word, frequency in self.frequency_index.items():
            root_form = word_stemmer.stem(word)
            normalized_index[root_form] = normalized_index.get(root_form, 0) + frequency
        self.frequency_index = normalized_index

    def execute_search(self, search_query):
        """
        Search for terms in the frequency index.
        
        Args:
            search_query (str): Search query string
            
        Returns:
            dict: Dictionary of matching terms and their frequencies
        """
        word_stemmer = PorterStemmer()
        query_tokens = re.findall(r'\w+', search_query.lower())
        search_results = {}
        for token in query_tokens:
            stemmed_token = word_stemmer.stem(token)
            if stemmed_token in self.frequency_index:
                search_results[stemmed_token] = self.frequency_index[stemmed_token]
        return search_results

    def get_frequency_index(self):
        """
        Retrieve the current frequency index.
        
        Returns:
            dict: Current word frequency index
        """
        return self.frequency_index


In [None]:
"""
Web Content Retrieval Module
Provides functionality for fetching and parsing web page content.
"""

from bs4 import BeautifulSoup
from urllib.request import urlopen

def retrieve_webpage_content(target_url):
    """
    Fetch and parse content from a specified URL.
    
    Args:
        target_url (str): The URL to fetch content from
        
    Returns:
        BeautifulSoup: Parsed HTML content or None if fetch fails
    """
    try:
        web_response = urlopen(target_url)
        return BeautifulSoup(web_response, 'html.parser')
    except:
        return None

In [None]:
"""
Firebase Data Management Service
Provides an interface for interacting with Firebase Realtime Database.
"""

from firebase import firebase

class DatabaseService:
    """
    A service class for managing data operations with Firebase Realtime Database.
    """
    
    def __init__(self, database_url: str):
        """
        Initialize Firebase connection.
        
        Args:
            database_url (str): Firebase database URL
        """
        self.database_connection = firebase.FirebaseApplication(database_url, None)

    def store_data(self, storage_path: str, data_payload: dict):
        """
        Store data in Firebase under specified path.
        
        Args:
            storage_path (str): Database path for storage
            data_payload (dict): Data to store
            
        Returns:
            None
        """
        self.database_connection.put('/', storage_path, data_payload)
        print(f"✅ Data successfully stored at /{storage_path}")

    def retrieve_data(self, retrieval_path: str):
        """
        Retrieve data from specified Firebase path.
        
        Args:
            retrieval_path (str): Path to retrieve data from
            
        Returns:
            dict: Retrieved data
        """
        retrieved_data = self.database_connection.get(f"/{retrieval_path}", None)
        print(f"📥 Data successfully retrieved from /{retrieval_path}")
        return retrieved_data


In [None]:
"""
Web Content Analysis System
Main execution script that combines web scraping, text analysis, and data storage.
"""

# Initialize system components
target_website = 'https://www.arianagrande.com/'
webpage_content = retrieve_webpage_content(target_website)
database_service = DatabaseService("https://cloud7-38a0b-default-rtdb.firebaseio.com/")

# Process webpage content and perform analysis
if webpage_content:
    # Initialize text analysis service
    analysis_service = TextAnalysisService()
    
    # Process webpage content
    analysis_service.process_text(webpage_content.get_text())  # Text indexing
    analysis_service.filter_common_words()                     # Remove stop words
    analysis_service.normalize_word_forms()                    # Apply stemming
    
    # Execute search for specific terms
    search_query = 'shop buy ariana music grande video eternal sunshine brighter days'
    analysis_results = analysis_service.execute_search(search_query)

    # Display and store results
    print("Analysis Results:")
    print(analysis_results)
    database_service.store_data("words", analysis_results)
    print("\nStored Results:")
    print(database_service.retrieve_data("words"))
    
    # Perform specific word search
    specific_query = "eternal"
    specific_results = analysis_service.execute_search(specific_query)
    print("\n🔎 Search Results for:", specific_query)
    print(specific_results)
else:
    print("Error: Unable to retrieve webpage content")
