<a href="https://colab.research.google.com/github/Kepners/ChopOnions/blob/main/Trndzo2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [22]:
# Block 1: Install Required Packages

!pip install --upgrade openai==0.28.1 python-dotenv praw rake_nltk newsapi-python --upgrade lxml_html_clean bleach --upgrade newspaper3k feedparser aiohttp nest_asyncio structlog beautifulsoup4 cachetools fuzzywuzzy python-Levenshtein nltk pytrends ratelimit prettytable bs4 configparser structlog
print("pip install completed")
import nest_asyncio
# Apply nest_asyncio to allow nested event loops
nest_asyncio.apply()
import lxml
import newspaper

print(f"lxml version: {lxml.__version__}")
print(f"newspaper version: {newspaper.__version__}")

import nltk
nltk.download('vader_lexicon')
print("vader_lexicon downloaded")
nltk.download('stopwords')
print("stopwords downloaded")
nltk.download('punkt')
print("punkt downloaded")
nltk.download('averaged_perceptron_tagger')
print("averaged_perceptron_tagger downloaded")
nltk.download('maxent_ne_chunker')
print("maxent_ne_chunker downloaded")
nltk.download('words')
print("words downloaded")
nltk.download('wordnet')
print("wordnet downloaded")
nltk.download('punkt_tab')
print('punkt_tab downloaded')


print("Installation and NLTK data download completed successfully.")

pip install completed
lxml version: 5.3.0
newspaper version: 0.2.8
vader_lexicon downloaded
stopwords downloaded
punkt downloaded
averaged_perceptron_tagger downloaded
maxent_ne_chunker downloaded
words downloaded
wordnet downloaded
punkt_tab downloaded
Installation and NLTK data download completed successfully.


[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package maxent_ne_chunker is already up-to-date!
[nltk_data] Downloading package words to /root/nltk_data...
[nltk_data]   Package words is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   

In [23]:
# Block 2: Mount Google Drive and Load Configuration
from google.colab import drive
import configparser
import os
import openai  # import the openai module

# Mount Google Drive
drive.mount('/content/drive')

# Define the path to your config.ini in Google Drive
config_path = '/content/drive/MyDrive/Secrets/config.ini'  # Update this path as needed

# Check if config.ini exists at the specified path
if not os.path.exists(config_path):
    raise FileNotFoundError(f"config.ini not found at {config_path}")

# Load configuration using configparser
config = configparser.ConfigParser()
config.read(config_path)

# Retrieve OpenAI API key
try:
    openai_api_key = config.get('openai', 'api_key', fallback=None)
except KeyError:
    raise ValueError("OpenAI API key not found or invalid format in config.ini under [openai] section.")

if not openai_api_key:
    raise ValueError("OpenAI API key not found or invalid format in config.ini under [openai] section.")

# Set the OpenAI API key
openai.api_key = openai_api_key

# Retrieve NewsAPI key
try:
    newsapi_key = config.get('newsapi', 'api_key')
except KeyError:
    raise ValueError("NewsAPI key not found in config.ini under [newsapi] section.")

# Retrieve caching configurations
rss_cache_ttl = config.getint('CACHING', 'RSS_CACHE_TTL', fallback=86400)
trend_cache_ttl = config.getint('CACHING', 'TREND_CACHE_TTL', fallback=3600)
openai_cache_ttl = config.getint('CACHING', 'OPENAI_CACHE_TTL', fallback=86400)

# Retrieve rate limiting configurations
rss_calls_per_day = config.getint('RATE_LIMITING', 'RSS_CALLS_PER_DAY', fallback=1000)

print("Configuration loaded successfully.")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Configuration loaded successfully.


In [24]:
# Block 3: Define Data Structures

from dataclasses import dataclass, field
from typing import List, Optional

@dataclass
class GoogleTrend:
    topic: str
    interest: str
    sentiment: str

@dataclass
class Trend:
    title: str
    description: str
    source: str
    approx_traffic: str
    sentiment: str
    google_trend: Optional[GoogleTrend] = None

@dataclass
class ScriptOptions:
    style: str = "Normal Script"
    tone: str = "Informative"
    length: str = "60 seconds"

print("Data structures defined successfully.")


Data structures defined successfully.


In [25]:
# Block 4: Configuration and Initialization

import configparser
from pytrends.request import TrendReq # This import will now work
from newsapi import NewsApiClient

# Initialize OpenAI
api_key = openai_api_key

# Initialize NewsAPI
newsapi = NewsApiClient(api_key=newsapi_key)

# Initialize PyTrends with custom requests arguments
pytrends = TrendReq(hl='en-US', tz=360, requests_args={'headers': {'User-Agent': 'Mozilla/5.0'}})

print("Configuration and initialization completed successfully.")

Configuration and initialization completed successfully.


In [26]:
# Block 5: Create utils/data_processing.py and utils/__init__.py

# Define the directory where helper functions will reside
utils_dir = 'utils'

# Create the 'utils' directory if it doesn't exist
os.makedirs(utils_dir, exist_ok=True)

# Define the path for the __init__.py file to make 'utils' a package
init_path = os.path.join(utils_dir, '__init__.py')

# Create an empty __init__.py file if it doesn't exist
if not os.path.exists(init_path):
    with open(init_path, 'w') as file:
        pass  # Creating an empty __init__.py
    print(f"Created empty '__init__.py' at '{init_path}' to make 'utils' a package.")
else:
    print(f"'__init__.py' already exists at '{init_path}'.")

# Define the path for the data_processing.py file
data_processing_path = os.path.join(utils_dir, 'data_processing.py')

# Define the content for data_processing.py
data_processing_code = """
# data_processing.py

import re
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk import ne_chunk, pos_tag
from nltk.tree import Tree

def clean_text(text):
    '''
    Cleans the input text by removing URLs, special characters, and stopwords.

    Parameters:
        text (str): The text to clean.

    Returns:
        str: The cleaned text.
    '''
    # Remove URLs
    text = re.sub(r'http\\S+', '', text)
    # Remove special characters and numbers
    text = re.sub(r'[^A-Za-z\\s]', '', text)
    # Convert to lowercase
    text = text.lower()
    # Tokenize the text
    words = word_tokenize(text)
    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    filtered_words = [word for word in words if word not in stop_words]
    # Join the words back into a single string
    cleaned_text = ' '.join(filtered_words)
    return cleaned_text

def extract_entities(text):
    '''
    Extracts named entities from the input text.

    Parameters:
        text (str): The text to extract entities from.

    Returns:
        list: A list of named entities.
    '''
    def get_entities(tree):
        entities = []
        for subtree in tree:
            if isinstance(subtree, Tree):
                entity = " ".join([token for token, pos in subtree.leaves()])
                entities.append(entity)
        return entities

    tokens = word_tokenize(text)
    tagged = pos_tag(tokens)
    chunked = ne_chunk(tagged)
    entities = get_entities(chunked)
    return entities
"""

# Write the data_processing.py file
with open(data_processing_path, 'w') as file:
    file.write(data_processing_code)

print(f"Created '{data_processing_path}' successfully.")

# Optional: Verify the creation by listing the 'utils' directory
print("\nVerifying the creation of 'data_processing.py' and '__init__.py':")
!ls -l utils/


'__init__.py' already exists at 'utils/__init__.py'.
Created 'utils/data_processing.py' successfully.

Verifying the creation of 'data_processing.py' and '__init__.py':
total 4
-rw-r--r-- 1 root root 1534 Nov 13 19:24 data_processing.py
-rw-r--r-- 1 root root    0 Nov 13 19:02 __init__.py


In [27]:
import os
print(f"Current Working Directory: {os.getcwd()}")


Current Working Directory: /content


In [28]:
import sys
import os
import asyncio
import aiohttp
import structlog
from prettytable import PrettyTable, HRuleStyle  # Import HRuleStyle for hrules
from cachetools import TTLCache, cached
from fuzzywuzzy import process
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from rake_nltk import Rake
import feedparser
import time
import re
from ratelimit import limits, sleep_and_retry
import openai
import openai.error  # Add this line
import textwrap  # For wrapping text in the table
from datetime import datetime, timedelta, timezone
from newsapi import NewsApiClient
from bs4 import BeautifulSoup
import difflib
import nltk
import threading
import configparser  # Import configparser
import random
import requests
from newspaper import Article
from pytrends.request import TrendReq
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)
import pandas as pd
pd.set_option('future.no_silent_downcasting', True)
from random import uniform
import time

# ----------------------------
# Structured Logging with structlog
# ----------------------------

structlog.configure(
    processors=[
        structlog.processors.TimeStamper(fmt="iso"),
        structlog.processors.JSONRenderer()
    ],
    context_class=dict,
    logger_factory=structlog.stdlib.LoggerFactory(),
    wrapper_class=structlog.stdlib.BoundLogger,
    cache_logger_on_first_use=True,
)

logger = structlog.get_logger()

# ----------------------------
# Read API Keys from config.ini
# ----------------------------

# Initialize ConfigParser
config = configparser.ConfigParser()
config.read('/content/drive/MyDrive/Secrets/config.ini')  # Ensure this path is correct

# Handle missing API keys gracefully
try:
    openai_api_key = config.get('openai', 'api_key')
    newsapi_api_key = config.get('newsapi', 'api_key')
except (configparser.NoSectionError, configparser.NoOptionError) as e:
    logger.error(f"Error reading API keys from config.ini: {e}", exc_info=True)
    sys.exit("Failed to read API keys from config.ini. Please ensure the file exists and contains the necessary keys.")

# Set your OpenAI API key from config.ini
openai.api_key = openai_api_key

# Initialize NewsApiClient with your API key from config.ini
newsapi = NewsApiClient(api_key=newsapi_api_key)

# ----------------------------
# Caching
# ----------------------------

# Define caching constants
TREND_CACHE_TTL = 3600       # 1 hour in seconds
OPENAI_CACHE_TTL = 86400     # 1 day in seconds

# Initialize caches with defined TTLs
trends_cache = TTLCache(maxsize=100, ttl=TREND_CACHE_TTL)
openai_cache = TTLCache(maxsize=1000, ttl=OPENAI_CACHE_TTL)

# ----------------------------
# Rate Limiting for RSS Feeds
# ----------------------------

ONE_DAY = 86400               # Seconds in one day
RSS_CALLS_PER_DAY = 100       # Maximum number of RSS feed calls per day

@sleep_and_retry
@limits(calls=RSS_CALLS_PER_DAY, period=ONE_DAY)
def fetch_rss_feed_sync(rss_url):
    # Placeholder for synchronous RSS feed fetching if needed
    pass

# ----------------------------
# Initialize Sentiment Analyzer and Keyword Extractor
# ----------------------------

# Download necessary NLTK data
nltk.download('vader_lexicon')
nltk.download('stopwords')
nltk.download('punkt')

# Initialize Sentiment Analyzer
sid = SentimentIntensityAnalyzer()
rake = Rake()

# ----------------------------
# OpenAI API Key and Semaphore
# ----------------------------

# Define a semaphore to limit concurrent requests
google_trends_semaphore = asyncio.Semaphore(2)  # Reduce concurrency to 2


# ----------------------------
# Helper Functions
# ----------------------------

# Available countries with multiple codes
available_countries = {
    'United States': ['US', 'USA'],
    'Canada': ['CA'],
    'United Kingdom': ['GB', 'UK'],
    'Australia': ['AU'],
    'India': ['IN'],
    'Germany': ['DE'],
    'France': ['FR'],
    'Japan': ['JP'],
    'Brazil': ['BR'],
    'South Korea': ['KR'],
    'Argentina': ['AR'],
    'Mexico': ['MX'],
    'Singapore': ['SG'],
    'Spain': ['ES'],
    'Italy': ['IT'],
    'Netherlands': ['NL'],
    'Poland': ['PL'],
    'Sweden': ['SE'],
    'Switzerland': ['CH'],
    # Add more countries and their codes as needed
}

def get_matching_country(input_country, available_countries):
    """
    Matches user input to available countries.

    Parameters:
        input_country (str): The country input by the user.
        available_countries (dict): Dictionary mapping country names to lists of codes.

    Returns:
        str or None: The matched country name or None if no match.
    """
    normalized_input = input_country.lower()
    for country, codes in available_countries.items():
        # Check if input matches the country name
        if normalized_input == country.lower():
            return country
        # Check if input matches any of the country codes
        if normalized_input in [code.lower() for code in codes]:
            return country
    # If no direct match, attempt fuzzy matching
    country_names = list(available_countries.keys())
    closest_matches = difflib.get_close_matches(input_country, country_names, n=1, cutoff=0.8)
    if closest_matches:
        return closest_matches[0]
    return None

def sanitize_topic(topic):
    """
    Sanitizes the topic string by removing unwanted characters.

    Parameters:
        topic (str): The topic to sanitize.

    Returns:
        str: The sanitized topic.
    """
    return re.sub(r'[^\w\s]', '', topic)

def extract_source(url):
    """
    Extracts the main domain name from the URL to identify the source.

    Parameters:
        url (str): The URL of the news article.

    Returns:
        str: The name of the source.
    """
    try:
        from urllib.parse import urlparse
        domain = urlparse(url).netloc
        domain = domain.lower()
        domain_mapping = {
            'cbsnews.com': 'CBS News',
            'cnn.com': 'CNN',
            'foxnews.com': 'Fox News',
            'abcnews.go.com': 'ABC News',
            'bbc.co.uk': 'BBC',
            'google.com': 'Google News',
            'news.google.com': 'Google News',
            'reuters.com': 'Reuters',
            'theguardian.com': 'The Guardian',
            'nytimes.com': 'The New York Times',
            'usatoday.com': 'USA Today',
            'fortworthstar.com': 'Fort Worth Star-Telegram',
            'wcnc.com': 'WCNC',
            'apnews.com': 'AP News',
            'floridatoday.com': 'Florida Today',
            'msnbc.com': 'MSNBC News',
            # Add more mappings as needed
        }
        # Handle subdomains (e.g., edition.cnn.com)
        domain_parts = domain.split('.')
        if len(domain_parts) > 2:
            domain = '.'.join(domain_parts[-2:])
        return domain_mapping.get(domain, domain.capitalize())
    except Exception as e:
        logger.error(f"Error extracting source from URL '{url}': {e}", exc_info=True)
        return "Unknown Source"

def broaden_query(query):
    """
    Broadens the query by adding synonyms or related terms.

    Parameters:
        query (str): The original query.

    Returns:
        str: The broadened query.
    """
    # Placeholder for query broadening logic
    return query

@cached(trends_cache)
def fetch_google_trends_cached(topic, timeframe='now 7-d', max_retries=5):
    """
    Fetches Google Trends data for the given topic with retry logic.
    """
    pytrends = TrendReq(hl='en-US', tz=360)
    attempt = 0
    wait_time = 2  # Initial wait time in seconds

    while attempt < max_retries:
        try:
            pytrends.build_payload([topic], timeframe=timeframe)
            interest_over_time = pytrends.interest_over_time()
            if interest_over_time is not None and not interest_over_time.empty:
                avg_interest = interest_over_time[topic].mean()
                sentiment = 'Neutral'
                return {'topic': topic, 'interest': avg_interest, 'sentiment': sentiment}
            else:
                logger.error(f"No data returned for topic '{topic}' from Google Trends.")
                return None
        except Exception as e:
            # Gracefully handle rate limits (429) and other errors
            if "429" in str(e):
                logger.warning(f"Rate limit hit for topic '{topic}'. Retrying after a delay...")
            else:
                logger.error(f"Error fetching Google Trends data for '{topic}': {e}", exc_info=True)

            time.sleep(wait_time)
            wait_time = min(wait_time * 2, 60)  # Exponential backoff
            attempt += 1

    logger.error(f"Max retries exceeded for Google Trends data fetch for '{topic}'.")
    return None

def fetch_google_trends(topic, timeframe='now 7-d', retries=3, backoff_factor=2):
    """
    Fetches Google Trends data with retries and backoff.
    """
    for attempt in range(retries):
        try:
            # Initialize pytrends
            from pytrends.request import TrendReq
            pytrends = TrendReq(hl='en-US', tz=360)
            # Prepare the payload for pytrends
            pytrends.build_payload([topic], timeframe=timeframe)
            interest_over_time = pytrends.interest_over_time()
            if not interest_over_time.empty:
                # Get the average interest
                avg_interest = interest_over_time[topic].mean()
                sentiment = analyze_sentiment(topic)
                return {'topic': topic, 'interest': avg_interest, 'sentiment': sentiment}
            else:
                return None
        except Exception as e:
            logger.error(f"Error fetching Google Trends data for '{topic}': {e}", exc_info=True)
            time.sleep(backoff_factor * (2 ** attempt))
    return None

async def generate_summary_async(content, max_retries=5):
    """
    Generates a summary of the content using OpenAI's GPT with retry logic.

    Parameters:
        content (str): The content to summarize.
        max_retries (int): Maximum number of retries upon failure.

    Returns:
        str: The generated summary.
    """
    attempt = 0
    wait_time = 1  # Initial wait time in seconds

    while attempt < max_retries:
        try:
            async with openai_semaphore:
                response = await openai.ChatCompletion.acreate(
                    model="gpt-4",
                    messages=[
                        {"role": "system", "content": "You are a helpful assistant that summarizes text."},
                        {"role": "user", "content": f"Summarize the following text:\n\n{content}\n\nSummary:"}
                    ],
                    max_tokens=150,
                    temperature=0.5,
                )
            summary = response.choices[0].message['content'].strip()
            return summary
        except openai.error.RateLimitError as e:
    # Handle rate limit error

            logger.error({
                "event": "RateLimitError in generate_summary_async",
                "error": str(e),
                "timestamp": datetime.utcnow().isoformat()
            })
            # Extract wait time from the error message if available
            match = re.search(r"Please try again in ([\d\.]+)s", str(e))
            if match:
                wait_time = float(match.group(1))
            else:
                wait_time = min(wait_time * 2, 60)  # Exponential backoff with a max wait time
            logger.info(f"Rate limit exceeded. Waiting for {wait_time} seconds before retrying...")
            await asyncio.sleep(wait_time)
            attempt += 1
        except Exception as e:
            logger.error({
                "event": "Error generating summary_async",
                "error": str(e),
                "timestamp": datetime.utcnow().isoformat()
            })
            return content  # Return original content if other errors occur
    logger.error("Max retries exceeded for generate_summary_async. Returning original content.")
    return content

def analyze_sentiment(text):
    """
    Analyzes the sentiment of the given text using NLTK's VADER.

    Parameters:
        text (str): The text to analyze.

    Returns:
        str: 'Positive', 'Neutral', or 'Negative'
    """
    try:
        if not text:
            return 'Neutral'
        text = str(text)  # Ensure text is a string
        scores = sid.polarity_scores(text)
        compound_score = scores['compound']
        if compound_score >= 0.05:
            return 'Positive'
        elif compound_score <= -0.05:
            return 'Negative'
        else:
            return 'Neutral'
    except Exception as e:
        logger.error(f"Error in analyze_sentiment: {e}", exc_info=True)
        return 'Neutral'

def map_topic_to_trends_query(topic_title):
    """
    Maps a topic title to a Google Trends query.

    Parameters:
        topic_title (str): The title of the topic.

    Returns:
        str: The mapped query.
    """
    # Placeholder for mapping logic
    return topic_title

def extract_keywords(topic_title):
    """
    Extracts keywords from the topic title using RAKE.

    Parameters:
        topic_title (str): The title of the topic.

    Returns:
        List[str]: A list of extracted keywords.
    """
    rake.extract_keywords_from_text(topic_title)
    return rake.get_ranked_phrases()

def fetch_newsapi_articles(topic, page_size=5):
    """
    Fetches articles from NewsAPI for a given topic.

    Parameters:
        topic (str): The topic to search articles for.
        page_size (int): Number of articles to fetch.

    Returns:
        List[dict]: A list of articles.
    """
    try:
        # Use the initialized NewsApiClient
        all_articles = newsapi.get_everything(q=topic,
                                              language='en',
                                              sort_by='relevancy',
                                              page_size=page_size)
        if all_articles.get('status') != 'ok':
            raise Exception(f"NewsAPI Error: {all_articles.get('message', 'Unknown error')}")
        articles = all_articles['articles']
        return articles
    except Exception as e:
        logger.error(f"Error fetching articles from NewsAPI: {e}", exc_info=True)
        return []

# ----------------------------
# Trend Data Classes
# ----------------------------

from dataclasses import dataclass, field
from typing import List, Optional

@dataclass
class GoogleTrend:
    topic: str
    interest: float
    sentiment: str

@dataclass
class Trend:
    title: str
    description: str
    source: str
    approx_traffic: str
    sentiment: str
    google_trend: Optional[GoogleTrend] = None

@dataclass
class ScriptOptions:
    style: str = "Normal"
    tone: str = "Informative"
    length: str = "60 seconds"

# ----------------------------
# Fetch Trending Topics Function
# ----------------------------


async def fetch_full_article_content(url):
    try:
        article = Article(url)
        article.download()
        article.parse()
        return article.text
    except Exception as e:
        logger.error(f"Error fetching full article content from {url}: {e}", exc_info=True)
        return None

async def fetch_trending_topics_rss_async(geo='US', limit=10):
    """
    Asynchronously fetches trending topics from multiple RSS feeds.
    """
    rss_feeds = [
        f"https://trends.google.com/trends/trendingsearches/daily/rss?geo={geo}",
        f"https://news.google.com/rss?hl=en-{geo}&gl={geo}&ceid={geo}:en",
        # Additional RSS feeds
        "http://rss.cnn.com/rss/edition.rss",
        "http://news.yahoo.com/rss/",
        "https://www.theguardian.com/uk/rss",
        "https://news.un.org/feed/subscribe/en/news/all/rss.xml"
        # Add more feeds as needed
    ]

    trending_topics = []  # Initialize the list to store trending topics

    tasks = []
    for rss_url in rss_feeds:
        tasks.append(fetch_rss_feed_async(rss_url, limit))

    results = await asyncio.gather(*tasks)

    for rss_url, result in zip(rss_feeds, results):
        if not result:
            continue
        for entry in result:
            # Process each entry
            title = entry.get('title', 'No Title')
            link = entry.get('link', '')
            summary = entry.get('summary') or entry.get('description') or entry.get('title') or 'No description available.'
            source = extract_source(link)
            approx_traffic = 'N/A'

            trending_topics.append({
                'title': title,
                'description': summary,
                'source': source,
                'approx_traffic': approx_traffic,
                'sentiment': 'Neutral',  # Placeholder; actual sentiment will be determined later
                'published': datetime.now(timezone.utc)
            })
    return trending_topics  # Correctly indented outside the loop

async def fetch_rss_feed_async(rss_url, limit):
    """
    Asynchronously fetches and parses an RSS feed.

    Parameters:
        rss_url (str): The URL of the RSS feed.
        limit (int): Number of entries to fetch.

    Returns:
        List[dict]: A list of RSS feed entries.
    """
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'
    }
    try:
        async with aiohttp.ClientSession(headers=headers) as session:
            async with session.get(rss_url, timeout=10) as response:
                if response.status != 200:
                    logger.error(f"Failed to fetch RSS feed from {rss_url}. Status code: {response.status}")
                    return []
                content = await response.text()
                feed = feedparser.parse(content)
                entries = feed.entries[:limit]
                return entries
    except Exception as e:
        logger.error(f"Error fetching RSS feed from {rss_url}: {e}", exc_info=True)
        return []

# ----------------------------
# Aggregate Trends Data Function
# ----------------------------

async def aggregate_trends_data(rss_trends, selected_timeframe_hours):
    """
    Aggregates data for each trend by fetching NewsAPI articles and Google Trends data.
    """
    aggregated_trends = []
    now = datetime.now(timezone.utc)

    for trend in rss_trends:
        # Filter based on the selected time frame
        time_diff = now - trend['published']
        if time_diff.total_seconds() > selected_timeframe_hours * 3600:
            continue  # Skip trends outside the time frame

        topic_title = trend['title']
        # Clean the description by stripping HTML
        soup = BeautifulSoup(trend['description'], 'html.parser')
        clean_description = soup.get_text(separator=' ', strip=True)
        # Optionally, limit the description length
        clean_description = (clean_description[:200] + '...') if len(clean_description) > 200 else clean_description

        # Fetch NewsAPI articles (if needed)
        # newsapi_articles = fetch_newsapi_articles(topic_title, page_size=5)
        # Process articles as needed (e.g., summarize, analyze sentiment)

        # Fetch Google Trends data
        google_trend_data = fetch_google_trends_cached(topic_title, timeframe='now 7-d')
        if google_trend_data:
            google_trend = GoogleTrend(**google_trend_data)
            approx_traffic = str(google_trend.interest)
        else:
            google_trend = None
            approx_traffic = 'Unavailable'

        # Generate summary asynchronously if description is available
        if clean_description and clean_description != 'No description available.':
            summary = await generate_summary_async(clean_description)
        else:
            summary = clean_description  # Use existing description

        sentiment = analyze_sentiment(summary)

        aggregated_trends.append(Trend(
            title=topic_title,
            description=summary,
            source=trend['source'],
            approx_traffic=approx_traffic,
            sentiment=sentiment,
            google_trend=google_trend
        ))

        if len(aggregated_trends) >= 10:
            break  # Limit to 10 trends

    return aggregated_trends

# ----------------------------
# Define fetch_and_aggregate_trending_data Function
# ----------------------------

async def fetch_and_aggregate_trending_data(country_code, timeframe_hours):
    """
    Fetches trending topics and aggregates data.

    Parameters:
        country_code (str): The country code for fetching trends.
        timeframe_hours (int): The timeframe in hours to filter trends.

    Returns:
        List[Trend]: A list of aggregated Trend objects.
    """
    rss_trends = await fetch_trending_topics_rss_async(geo=country_code, limit=20)
    aggregated_trends = await aggregate_trends_data(rss_trends, timeframe_hours)
    return aggregated_trends

# ----------------------------
# Generate and Display Script Function
# ----------------------------

async def generate_and_display_script_async(selected_topic_data: Trend):
    """
    Prompts the user for script customization options, generates the script,
    and displays it.

    Parameters:
        selected_topic_data (Trend): The data associated with the selected topic.
    """
    print("\nYou can customize the script generation. If you wish to skip any option, just press Enter.")

    # Select script style
    style_input = input("Select script style (Flashy / Expressive / Normal): ").strip()
    style = style_input if style_input else "Normal"

    # Select script tone
    tone_input = input("Select script tone (e.g., Informative, Persuasive, Emotional): ").strip()
    tone = tone_input if tone_input else "Informative"

    # Enter script length
    length_input = input("Enter script length in seconds (e.g., 60, 120): ").strip()
    length = f"{length_input} seconds" if length_input.isdigit() else "60 seconds"

    # Create ScriptOptions instance
    options = ScriptOptions(
        style=style,
        tone=tone,
        length=length
    )

    print("\nSelected Options - Style: {0}, Tone: {1}, Length: {2}".format(options.style, options.tone, options.length))

    print("\nGenerating script for the selected topic...")
    try:
        script = await generate_script_for_topic_async(selected_topic_data.title, selected_topic_data, options=options)
        print("Script generated successfully.")
    except Exception as e:
        logger.error({
            "error": str(e),
            "event": "generate_script_error",
            "timestamp": datetime.utcnow().isoformat()
        })
        script = "Failed to generate script."

    # Display the script
    print(f"\n### Generated Script for '{selected_topic_data.title}':\n")
    print(script)
    print("\n**Source:**", selected_topic_data.source)
    print("\n====\n")

    print("Script generation completed.")

async def generate_script_for_topic_async(title, trend_data, options: ScriptOptions):
    """
    Generates a script based on the topic and options using OpenAI's GPT.

    Parameters:
        title (str): The title of the topic.
        trend_data (Trend): The trend data.
        options (ScriptOptions): User-selected script options.

    Returns:
        str: The generated script.
    """
    # Construct the prompt for OpenAI
    prompt = (
        f"Generate a {options.length} script for the topic '{title}' with a {options.tone} tone "
        f"and a {options.style} style. The script should include the following details:\n"
        f"- Description: {trend_data.description}\n"
        f"- Sentiment: {trend_data.sentiment}\n"
        f"- Approximate Traffic: {trend_data.approx_traffic}\n\n"
        f"Please ensure the script is engaging and suitable for a {options.length} presentation."
    )

    attempt = 0
    max_retries = 5
    wait_time = 1  # Initial wait time in seconds

    while attempt < max_retries:
        try:
            async with openai_semaphore:
                response = await openai.ChatCompletion.acreate(
                    model="gpt-4",
                    messages=[
                        {
                            "role": "system",
                            "content": "You are a creative scriptwriter that generates scripts based on provided data."
                        },
                        {
                            "role": "user",
                            "content": prompt
                        }
                    ],
                    max_tokens=500,
                    temperature=0.7,
                )
            script = response.choices[0].message['content'].strip()
            return script
        except openai.error.RateLimitError as e:
    # Handle rate limit error

            logger.error({
                "event": "RateLimitError in generate_script_for_topic_async",
                "error": str(e),
                "timestamp": datetime.utcnow().isoformat()
            })
            # Extract wait time from the error message if available
            match = re.search(r"Please try again in ([\d\.]+)s", str(e))
            if match:
                wait_time = float(match.group(1))
            else:
                wait_time = min(wait_time * 2, 60)  # Exponential backoff with a max wait time
            logger.info(f"Rate limit exceeded. Waiting for {wait_time} seconds before retrying...")
            await asyncio.sleep(wait_time)
            attempt += 1
        except Exception as e:
            logger.error({
                "event": "Error generating script_async",
                "error": str(e),
                "timestamp": datetime.utcnow().isoformat()
            })
            return "Failed to generate script due to an unexpected error."

    logger.error("Max retries exceeded for generate_script_for_topic_async. Returning failure message.")
    return "Failed to generate script after multiple attempts due to rate limits."

# ----------------------------
# Main Function
# ----------------------------

def main():
    """
    Main function to execute the script workflow.
    """
    async def run():
        # Function to display a changing message every 5 seconds with an additional note
        def flashing_message(stop_event):
            messages = [
                "🔍 Gathering the latest trends... (This could take up to 2 minutes. Please wait.)",
                "⏳ Processing data, please wait... (This could take up to 2 minutes. Please wait.)",
                "✨ Almost there, thank you for your patience! (This could take up to 2 minutes. Please wait.)"
            ]
            idx = 0
            while not stop_event.is_set():
                message = messages[idx % len(messages)]
                print(f"\r{message}   ", end='', flush=True)
                for _ in range(5):
                    if stop_event.is_set():
                        break
                    time.sleep(1)
                idx += 1
                print('\r' + ' ' * len(message) + '   ', end='', flush=True)

        while True:
            print("Enter the country for trending topics data (e.g., United States or US):")
            country_input = input("Country: ").strip()
            matching_country = get_matching_country(country_input, available_countries)
            if not matching_country:
                print("No matching countries found. Please try again.")
                # Optionally, display available countries
                print("Available countries are:")
                for country in available_countries.keys():
                    print(f"- {country}")
            else:
                break

        selected_country = matching_country
        # Retrieve all possible codes for the selected country
        selected_country_codes = available_countries[selected_country]
        selected_country_code = selected_country_codes[0]  # Use the first code by default
        print(f"\nYou selected: {selected_country}")

        print("\nSelect the time frame for trending topics:")
        print("A. Last 4 hours")
        print("B. Last 24 hours")
        print("C. Last 7 days")
        time_range_selection = input("Enter the letter of the time frame you're interested in: ").strip().upper()
        time_range_mapping = {'A': 4, 'B': 24, 'C': 168}
        selected_timeframe_hours = time_range_mapping.get(time_range_selection)
        if not selected_timeframe_hours:
            print("Invalid selection. Exiting.")
            return

        # Start flashing message while fetching all data
        stop_event = threading.Event()
        thread = threading.Thread(target=flashing_message, args=(stop_event,))
        thread.start()

        # Fetch all data asynchronously with robust error handling
        try:
            # Run the async fetch_and_aggregate_trending_data function
            aggregated_trends = await fetch_and_aggregate_trending_data(selected_country_code, selected_timeframe_hours)
        except Exception as e:
            logger.error({
                "error": str(e),
                "event": "fetch_data_error",
                "timestamp": datetime.utcnow().isoformat()
            })
            aggregated_trends = []

        # Stop flashing message
        stop_event.set()
        thread.join()
        print()  # Move to the next line after flashing message

        if not aggregated_trends:
            logger.error({
                "event": "no_trending_topics_found",
                "timestamp": datetime.utcnow().isoformat()
            })
            print("No trending topics found.")
            return

        # Display trending topics with pagination
        # Pagination variables
        batch_size = 10
        total_trends = len(aggregated_trends)
        current_index = 0

        while current_index < total_trends:
            # Determine the end index for the current batch
            end_index = min(current_index + batch_size, total_trends)
            batch_trends = aggregated_trends[current_index:end_index]

            # Display the consolidated list with sentiments
            print(f"\nCurrently Trending in {selected_country} in the last {selected_timeframe_hours} hours (Showing {current_index + 1} to {end_index} of {total_trends}):\n")
            table = PrettyTable()
            table.field_names = ["No.", "Topic", "Description", "Source", "Approx Traffic", "Sentiment"]
            table.hrules = HRuleStyle.ALL  # Use HRuleStyle.ALL
            table.max_width = 40  # Suitable for phone screens
            for idx, trend in enumerate(batch_trends, start=current_index + 1):
                title = textwrap.fill(trend.title, width=40)
                description = textwrap.fill(trend.description, width=40)
                source = trend.source
                approx_traffic = trend.approx_traffic
                sentiment = trend.sentiment
                table.add_row([idx, title, description, source, approx_traffic, sentiment])
            print(table)

            # Prepare to handle user input for pagination or selection
            while True:
                user_input = input("Type 'more' or '+' to view more results, enter the number of the topic to select it, or any other key to exit: ").strip().lower()
                if user_input in ['more', '+']:
                    current_index = end_index
                    break  # Continue to the next batch
                elif user_input.isdigit():
                    selected_idx = int(user_input)
                    if 1 <= selected_idx <= total_trends:
                        selected_topic_data = aggregated_trends[selected_idx - 1]
                        selected_topic = selected_topic_data.title
                        # Apply text wrapping to the selected topic message
                        selected_topic_display = textwrap.fill(f"You selected: {selected_topic}", width=40)
                        print(f"\n{selected_topic_display}")
                        # Generate scripts for the selected topic
                        await generate_and_display_script_async(selected_topic_data)
                        return  # Exit after script generation
                    else:
                        print("Invalid selection. Please enter a valid topic number.")
                else:
                    print("Exiting the script.")
                    return

        print("====\nNo more trending topics available.\n")

        # After all batches are displayed, prompt the user to select a topic
        while True:
            try:
                selected_idx = input("Enter the number of the topic you're interested in (or type 0 to exit): ").strip()
                if selected_idx == '0':
                    print("Exiting the script.")
                    return
                if selected_idx.isdigit():
                    selected_idx = int(selected_idx)
                    if 1 <= selected_idx <= total_trends:
                        selected_topic_data = aggregated_trends[selected_idx - 1]
                        selected_topic = selected_topic_data.title
                        # Apply text wrapping to the selected topic message
                        selected_topic_display = textwrap.fill(f"You selected: {selected_topic}", width=40)
                        print(f"\n{selected_topic_display}")
                        # Generate scripts for the selected topic
                        await generate_and_display_script_async(selected_topic_data)
                        return  # Exit after script generation
                    else:
                        print("Invalid selection. Please enter a valid topic number.")
                else:
                    print("Invalid input. Please enter a number.")
            except ValueError:
                print("Invalid input. Please enter a valid number.")

    asyncio.run(run())

if __name__ == "__main__":
    main()


[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Enter the country for trending topics data (e.g., United States or US):
Country: us

You selected: United States

Select the time frame for trending topics:
A. Last 4 hours
B. Last 24 hours
C. Last 7 days
Enter the letter of the time frame you're interested in: a
                                                                                               

Currently Trending in United States in the last 4 hours (Showing 1 to 10 of 10):

+-----+-----------------------+------------------------------------------+-------------+--------------------+-----------+
| No. |         Topic         |               Description                |    Source   |   Approx Traffic   | Sentiment |
+-----+-----------------------+------------------------------------------+-------------+--------------------+-----------+
|  1  |       John Thune      |  You didn't provide any text related to  | Google News | 1.6923076923076923 |  Positive |
|     |                       |   "Thune." Please provide the text yo