<a href="https://colab.research.google.com/github/Kepners/ChopOnions/blob/main/ChoppingOnions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Block 1: Install Required Packages

!pip install --upgrade openai python-dotenv praw rake_nltk feedparser aiohttp nest_asyncio structlog cachetools fuzzywuzzy python-Levenshtein nltk pytrends ratelimit prettytable
print("pip install completed")
import nest_asyncio
# Apply nest_asyncio to allow nested event loops
nest_asyncio.apply()

import nltk
nltk.download('vader_lexicon')
print("vader_lexicon downloaded")
nltk.download('stopwords')
print("stopwords downloaded")
nltk.download('punkt')
print("punkt downloaded")
nltk.download('averaged_perceptron_tagger')
print("averaged_perceptron_tagger downloaded")
nltk.download('maxent_ne_chunker')
print("maxent_ne_chunker downloaded")
nltk.download('words')
print("words downloaded")
nltk.download('wordnet')
print("wordnet downloaded")

print("Installation and NLTK data download completed successfully.")


Collecting openai
  Downloading openai-1.54.3-py3-none-any.whl.metadata (24 kB)
Collecting python-dotenv
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Collecting praw
  Downloading praw-7.8.1-py3-none-any.whl.metadata (9.4 kB)
Collecting rake_nltk
  Downloading rake_nltk-1.0.6-py3-none-any.whl.metadata (6.4 kB)
Collecting feedparser
  Downloading feedparser-6.0.11-py3-none-any.whl.metadata (2.4 kB)
Collecting structlog
  Downloading structlog-24.4.0-py3-none-any.whl.metadata (7.3 kB)
Collecting fuzzywuzzy
  Downloading fuzzywuzzy-0.18.0-py2.py3-none-any.whl.metadata (4.9 kB)
Collecting python-Levenshtein
  Downloading python_Levenshtein-0.26.1-py3-none-any.whl.metadata (3.7 kB)
Collecting nltk
  Downloading nltk-3.9.1-py3-none-any.whl.metadata (2.9 kB)
Collecting pytrends
  Downloading pytrends-4.9.2-py3-none-any.whl.metadata (13 kB)
Collecting ratelimit
  Downloading ratelimit-2.2.1.tar.gz (5.3 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting p

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data] Downloading package stopwords to /root/nltk_data...


vader_lexicon downloaded
stopwords downloaded


[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


punkt downloaded


[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


averaged_perceptron_tagger downloaded


[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping chunkers/maxent_ne_chunker.zip.


maxent_ne_chunker downloaded
words downloaded


[nltk_data] Downloading package words to /root/nltk_data...
[nltk_data]   Unzipping corpora/words.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


wordnet downloaded
Installation and NLTK data download completed successfully.


In [2]:
# Block 2: Mount Google Drive and Load Environment Variables
from google.colab import drive
import os
from dotenv import load_dotenv

# Mount Google Drive
drive.mount('/content/drive')

# Define the path to your .env file in Google Drive
dotenv_path = '/content/drive/MyDrive/Secrets/.env'

# Load the environment variables from the .env file
load_dotenv(dotenv_path)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


True

In [3]:
# Block 3: Define Data Structures

from dataclasses import dataclass, field
from typing import List, Optional

@dataclass
class GoogleTrend:
    topic: str
    interest: str
    sentiment: str

@dataclass
class RedditPost:
    type: str
    title: str
    summary: str
    source: str
    approx_traffic: str
    sentiment: str

@dataclass
class Trend:
    title: str
    description: str
    source: str
    approx_traffic: str
    sentiment: str
    reddit_posts: List[RedditPost] = field(default_factory=list)
    google_trend: Optional[GoogleTrend] = None

@dataclass
class ScriptOptions:
    style: str = "Normal Script"
    tone: str = "Informative"
    length: str = "60 seconds"

print("Data structures defined successfully.")


Data structures defined successfully.


In [4]:
# Block 4: Configuration and Initialization

import configparser
import praw
from pytrends.request import TrendReq

# Path to config.ini in Google Drive
config_path = '/content/drive/MyDrive/Secrets/config.ini'  # Adjust the path as needed

# Check if config.ini exists
if not os.path.exists(config_path):
    print(f"Configuration file not found at {config_path}. Please create it with the following format:")
    print("""
[openai]
api_key=YOUR_OPENAI_API_KEY

[reddit]
client_id=YOUR_REDDIT_CLIENT_ID
client_secret=YOUR_REDDIT_CLIENT_SECRET
user_agent=YOUR_REDDIT_USER_AGENT
""")
    raise FileNotFoundError(f"No config.ini found at {config_path}")

# Load configuration from config.ini
config = configparser.ConfigParser()
config.read(config_path)

# Assign API keys and credentials
try:
    OPENAI_API_KEY = config.get('openai', 'api_key')
    REDDIT_CLIENT_ID = config.get('reddit', 'client_id')
    REDDIT_CLIENT_SECRET = config.get('reddit', 'client_secret')
    REDDIT_USER_AGENT = config.get('reddit', 'user_agent')
except configparser.NoSectionError as e:
    print(f"Configuration error: {e}")
    raise
except configparser.NoOptionError as e:
    print(f"Configuration error: {e}")
    raise

# Initialize OpenAI
api_key = OPENAI_API_KEY

# Initialize Reddit
reddit = praw.Reddit(
    client_id=REDDIT_CLIENT_ID,
    client_secret=REDDIT_CLIENT_SECRET,
    user_agent=REDDIT_USER_AGENT,
)

# Initialize PyTrends
pytrends = TrendReq(hl='en-US', tz=360)

print("Configuration and initialization completed successfully.")


Configuration and initialization completed successfully.


In [5]:
# Block 5: Create utils/data_processing.py and utils/__init__.py

# Define the directory where helper functions will reside
utils_dir = 'utils'

# Create the 'utils' directory if it doesn't exist
os.makedirs(utils_dir, exist_ok=True)

# Define the path for the __init__.py file to make 'utils' a package
init_path = os.path.join(utils_dir, '__init__.py')

# Create an empty __init__.py file if it doesn't exist
if not os.path.exists(init_path):
    with open(init_path, 'w') as file:
        pass  # Creating an empty __init__.py
    print(f"Created empty '__init__.py' at '{init_path}' to make 'utils' a package.")
else:
    print(f"'__init__.py' already exists at '{init_path}'.")

# Define the path for the data_processing.py file
data_processing_path = os.path.join(utils_dir, 'data_processing.py')

# Define the content for data_processing.py
data_processing_code = """
# data_processing.py

import re
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk import ne_chunk, pos_tag
from nltk.tree import Tree

def clean_text(text):
    '''
    Cleans the input text by removing URLs, special characters, and stopwords.

    Parameters:
        text (str): The text to clean.

    Returns:
        str: The cleaned text.
    '''
    # Remove URLs
    text = re.sub(r'http\\S+', '', text)
    # Remove special characters and numbers
    text = re.sub(r'[^A-Za-z\\s]', '', text)
    # Convert to lowercase
    text = text.lower()
    # Tokenize the text
    words = word_tokenize(text)
    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    filtered_words = [word for word in words if word not in stop_words]
    # Join the words back into a single string
    cleaned_text = ' '.join(filtered_words)
    return cleaned_text

def extract_entities(text):
    '''
    Extracts named entities from the input text.

    Parameters:
        text (str): The text to extract entities from.

    Returns:
        list: A list of named entities.
    '''
    def get_entities(tree):
        entities = []
        for subtree in tree:
            if isinstance(subtree, Tree):
                entity = " ".join([token for token, pos in subtree.leaves()])
                entities.append(entity)
        return entities

    tokens = word_tokenize(text)
    tagged = pos_tag(tokens)
    chunked = ne_chunk(tagged)
    entities = get_entities(chunked)
    return entities
"""

# Write the data_processing.py file
with open(data_processing_path, 'w') as file:
    file.write(data_processing_code)

print(f"Created '{data_processing_path}' successfully.")

# Optional: Verify the creation by listing the 'utils' directory
print("\nVerifying the creation of 'data_processing.py' and '__init__.py':")
!ls -l utils/


Created empty '__init__.py' at 'utils/__init__.py' to make 'utils' a package.
Created 'utils/data_processing.py' successfully.

Verifying the creation of 'data_processing.py' and '__init__.py':
total 4
-rw-r--r-- 1 root root 1558 Nov  7 14:09 data_processing.py
-rw-r--r-- 1 root root    0 Nov  7 14:09 __init__.py


In [11]:
# Block 6: Define Helper Functions

import sys
import os

# Get the absolute path to the notebook's directory
notebook_dir = os.path.abspath('')

# Add the 'utils' directory to the system path
utils_dir = os.path.join(notebook_dir, 'utils')
sys.path.append(utils_dir)

# Now import from utils
from utils.data_processing import clean_text, extract_entities

import openai
from openai import OpenAIError  # Import OpenAIError directly
from prettytable import PrettyTable, HRuleStyle, VRuleStyle  # Import HRuleStyle, VRuleStyle
# ... (rest of your code in Block 6)

# ... (later, when using PrettyTable)
table.hrules = HRuleStyle.ALL  # Use HRuleStyle.ALL


import sys
import os
import asyncio
import aiohttp
import structlog
import praw
from prettytable import PrettyTable, HRuleStyle, VRuleStyle

from utils.data_processing import clean_text, extract_entities
from cachetools import TTLCache, cached
from fuzzywuzzy import process
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from rake_nltk import Rake
from textblob import TextBlob
import feedparser
from prettytable import PrettyTable, ALL
import time
import re
from ratelimit import limits, sleep_and_retry
from utils.data_processing import clean_text, extract_entities
from openai.error import OpenAIError  # Import OpenAIError correctly

def my_function_using_prettytable():
table = PrettyTable() # Create the table instance here
table.field_names = ["Column 1", "Column 2"]
table.hrules = HRuleStyle.ALL

  # Set hrules after creating the table
table = PrettyTable()  # Create a PrettyTable instance
table.field_names = ["No.", "Topic", "Description", "Source", "Approx Traffic", "Sentiment"]
table.hrules = ALL  # Use ALL for horizontal rules
table.max_width = 40  # Suitable for phone screens

  print(table)
# ----------------------------
# Structured Logging with structlog
# ----------------------------

structlog.configure(
    processors=[
        structlog.processors.TimeStamper(fmt="iso"),
        structlog.processors.JSONRenderer()
    ],
    context_class=dict,
    logger_factory=structlog.stdlib.LoggerFactory(),
    wrapper_class=structlog.stdlib.BoundLogger,
    cache_logger_on_first_use=True,
)

logger = structlog.get_logger()

# ----------------------------
# Caching
# ----------------------------

# Define caching constants
TREND_CACHE_TTL = 3600       # 1 hour in seconds
OPENAI_CACHE_TTL = 86400     # 1 day in seconds

# Initialize caches with defined TTLs
trends_cache = TTLCache(maxsize=100, ttl=TREND_CACHE_TTL)
openai_cache = TTLCache(maxsize=1000, ttl=OPENAI_CACHE_TTL)

# ----------------------------
# Rate Limiting for RSS Feeds
# ----------------------------

ONE_DAY = 86400               # Seconds in one day
RSS_CALLS_PER_DAY = 100       # Maximum number of RSS feed calls per day

@sleep_and_retry
@limits(calls=RSS_CALLS_PER_DAY, period=ONE_DAY)
def fetch_rss_feed_sync(rss_url):
    """
    Fetches and parses the RSS feed with rate limiting.

    Parameters:
        rss_url (str): The URL of the RSS feed.

    Returns:
        feedparser.FeedParserDict or None: Parsed RSS feed or None if fetching fails.
    """
    try:
        feed = feedparser.parse(rss_url)
        return feed
    except Exception as e:
        logger.error("rss_fetch_error", rss_url=rss_url, error=str(e))
        return None

# ----------------------------
# Initialize Sentiment Analyzer and Keyword Extractor
# ----------------------------

sid = SentimentIntensityAnalyzer()
rake = Rake()

# ----------------------------
# Helper Functions
# ----------------------------

def get_matching_country(input_country, available_countries):
    """
    Uses fuzzy matching to find the best matching country from the available_countries.
    Supports both country names and country codes.

    Parameters:
        input_country (str): User input for the country.
        available_countries (dict): Dictionary of available countries and their codes.

    Returns:
        str or None: The matched country name or None if no match is found.
    """
    # Combine country names and codes
    country_list = list(available_countries.keys()) + list(available_countries.values())
    # Use fuzzy matching to find the best match
    match, score = process.extractOne(input_country, country_list)
    if score >= 80:  # Threshold can be adjusted
        # Determine if the match is a country name or code
        if match.upper() in available_countries.values():
            # Find the country name corresponding to the code
            for name, code in available_countries.items():
                if code.upper() == match.upper():
                    return name
        else:
            return match
    else:
        return None

def sanitize_topic(topic):
    """
    Simplifies and sanitizes the topic string to make it suitable for Google Trends queries.

    Parameters:
        topic (str): The topic to sanitize.

    Returns:
        str: Sanitized topic.
    """
    # Remove URLs, special characters, and excessive whitespace
    topic = re.sub(r'http\S+', '', topic)  # Remove URLs
    topic = re.sub(r'[^A-Za-z0-9\s]', '', topic)  # Remove special characters
    topic = re.sub(r'\s+', ' ', topic)  # Replace multiple spaces with single space
    topic = topic.strip()
    # Optionally, shorten the topic if it's too long
    if len(topic) > 100:
        topic = topic[:100]
    return topic

def extract_source(url):
    """
    Extracts the main domain name from the URL to identify the source.

    Parameters:
        url (str): The URL of the news article.

    Returns:
        str: The name of the source.
    """
    try:
        match = re.findall(r'https?://(?:www\.)?([^/]+)/', url)
        domain = match[0] if match else "Unknown Source"
        domain_mapping = {
            'cbsnews.com': 'CBS News',
            'cnn.com': 'CNN',
            'foxnews.com': 'Fox News',
            'abcnews.go.com': 'ABC News',
            'bbc.co.uk': 'BBC',
            'google.com': 'Google News',
            'news.google.com': 'Google News',
            'reuters.com': 'Reuters',
            'theguardian.com': 'The Guardian',
            'nytimes.com': 'The New York Times',
            'usatoday.com': 'USA Today',
            'fortworthstar.com': 'Fort Worth Star-Telegram',
            'wcnc.com': 'WCNC',
            'apnews.com': 'AP News',
            'floridatoday.com': 'Florida Today',
            'msnbc.com': 'MSNBC News',
            # Add more mappings as needed
        }
        return domain_mapping.get(domain.lower(), domain.capitalize())
    except Exception as e:
        logger.error("extract_source_error", url=url, error=str(e))
        return "Unknown Source"

def broaden_query(query):
    """
    Broadens the query by removing specific terms to increase the likelihood of data retrieval.

    Parameters:
        query (str): The original query.

    Returns:
        str: A broadened query.
    """
    try:
        remove_terms = ['and', 'or', 'the', 'of', 'in', 'to', 'for']
        words = query.split()
        broadened_words = [word for word in words if word.lower() not in remove_terms]
        broadened_query = ' '.join(broadened_words)
        return broadened_query
    except Exception as e:
        logger.error("broaden_query_error", original_query=query, error=str(e))
        return query

@cached(trends_cache)
def fetch_google_trends_cached(topic, timeframe='now 7-d'):
    """
    Fetches Google Trends data for a given topic with caching.

    Parameters:
        topic (str): The topic to fetch trends for.
        timeframe (str): The time frame for the trends data.

    Returns:
        dict or None: Trends data or None if fetching fails.
    """
    return fetch_google_trends(topic, timeframe)

def fetch_google_trends(topic, timeframe='now 7-d', retries=3, backoff_factor=2):
    """
    Fetches Google Trends data with enhanced error handling and structured logging.

    Parameters:
        topic (str): The topic to fetch trends for.
        timeframe (str): The time frame for the trends data.
        retries (int): Number of retry attempts.
        backoff_factor (int): Factor for exponential backoff.

    Returns:
        dict or None: Trends data or None if fetching fails.
    """
    refined_query = map_topic_to_trends_query(topic)

    # Log the refined query
    logger.info("refined_google_trends_query", refined_query=refined_query, original_topic=topic)
    print(f"Refined Google Trends Query: '{refined_query}' for Topic: '{topic}'")

    for attempt in range(1, retries + 1):
        try:
            pytrends.build_payload([refined_query], timeframe=timeframe, geo='US')
            interest_over_time = pytrends.interest_over_time()
            if not interest_over_time.empty:
                latest_value = interest_over_time[refined_query].iloc[-1]
                approx_traffic = str(latest_value)
                sentiment = analyze_sentiment(approx_traffic)
                return {
                    'topic': refined_query,
                    'interest': approx_traffic,
                    'sentiment': sentiment
                }
            else:
                logger.warning("no_google_trends_data", query=refined_query, attempt=attempt)
                if attempt < retries:
                    refined_query = broaden_query(refined_query)
                    logger.info("broaden_query_retry", refined_query=refined_query, attempt=attempt)
                    print(f"Broadening query to '{refined_query}' and retrying.")
        except OpenAIError as e:
            logger.error("google_trends_openai_error", query=refined_query, attempt=attempt, error=str(e))
            return None
        except Exception as e:
            logger.error("google_trends_error", query=refined_query, attempt=attempt, error=str(e))
            if attempt < retries:
                sleep_time = backoff_factor ** attempt
                logger.info("retrying_google_trends", sleep_time=sleep_time)
                print(f"Retrying in {sleep_time} seconds...")
                time.sleep(sleep_time)
            else:
                logger.error("google_trends_failed", query=refined_query, error=str(e))
                return None
    return None

@cached(openai_cache)
def generate_summary_cached(content):
    """
    Generates a summary for the given content using cached OpenAI responses.

    Parameters:
        content (str): The content to summarize.

    Returns:
        str: A two-sentence summary.
    """
    return generate_summary(content)

def generate_summary(content):
    """
    Generates a concise two-sentence summary using OpenAI's GPT-3.5-turbo.

    Parameters:
        content (str): The content to summarize.

    Returns:
        str: A two-sentence summary.
    """
    try:
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {
                    "role": "system",
                    "content": "You are a concise summarizer. Provide a clear and brief two-sentence summary of the following content."
                },
                {
                    "role": "user",
                    "content": content
                }
            ],
            max_tokens=150,
            temperature=0.5,
        )
        summary = response.choices[0].message.content.strip()
        return summary
    except OpenAIError as e:
        logger.error("generate_summary_error", error=str(e))
        return "No summary available."

def analyze_sentiment(text):
    """
    Analyzes sentiment using VADER and returns 'Positive', 'Negative', or 'Neutral'.

    Parameters:
        text (str): The text to analyze.

    Returns:
        str: Sentiment category.
    """
    try:
        if text == 'N/A':
            # Default to Neutral if no applicable text is available
            return 'Neutral'
        scores = sid.polarity_scores(text)
        compound = scores['compound']
        if compound >= 0.05:
            return 'Positive'
        elif compound <= -0.05:
            return 'Negative'
        else:
            return 'Neutral'
    except Exception as e:
        logger.error("sentiment_analysis_error", text=text, error=str(e))
        return 'Neutral'

def map_topic_to_trends_query(topic_title):
    """
    Maps topic titles to Google Trends queries using keyword extraction.

    Parameters:
        topic_title (str): The title of the topic.

    Returns:
        str: A refined Google Trends query.
    """
    try:
        keywords = extract_keywords(topic_title)
        return keywords
    except Exception as e:
        logger.error("map_topic_to_trends_query_error", topic=topic_title, error=str(e))
        return topic_title

def extract_keywords(topic_title):
    """
    Extracts keywords from the topic title using RAKE.

    Parameters:
        topic_title (str): The title of the topic.

    Returns:
        str: A string of top keywords.
    """
    try:
        rake.extract_keywords_from_text(topic_title)
        keywords = rake.get_ranked_phrases()
        return ' '.join(keywords[:3])  # Top 3 keywords as a single string
    except Exception as e:
        logger.error("keyword_extraction_error", topic=topic_title, error=str(e))
        return topic_title

def is_post_relevant(post_title, topic):
    """
    Determines if a Reddit post is relevant to the topic using OpenAI's GPT-3.5-turbo.

    Parameters:
        post_title (str): The title of the Reddit post.
        topic (str): The topic to compare against.

    Returns:
        bool: True if relevant, False otherwise.
    """
    try:
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {
                    "role": "system",
                    "content": (
                        "You are a helpful assistant that determines if a Reddit post is relevant to a given topic. "
                        "Respond with 'Yes' or 'No'."
                    )
                },
                {
                    "role": "user",
                    "content": f"Is the following Reddit post relevant to the topic '{topic}'?\n\nPost Title: {post_title}\n\nRespond with 'Yes' or 'No'."
                }
            ],
            max_tokens=1,
            temperature=0,
        )
        answer = response.choices[0].message.content.strip().lower()
        return answer == 'yes'
    except OpenAIError as e:
        logger.error("is_post_relevant_error", post_title=post_title, topic=topic, error=str(e))
        return False

def fetch_reddit_posts(topic, limit=10):
    """
    Fetches Reddit posts related to the topic.

    Parameters:
        topic (str): The topic to search for.
        limit (int): Number of posts to fetch.

    Returns:
        List[dict]: A list of Reddit post data.
    """
    reddit_posts = []
    try:
        subreddit = reddit.subreddit('all')
        search_results = subreddit.search(topic, limit=limit)
        for post in search_results:
            if post.over_18:
                continue
            if is_post_relevant(post.title, topic):
                # Analyze sentiment based on the post's title
                sentiment = analyze_sentiment(post.title)
                reddit_posts.append({
                    'type': 'Reddit Post',
                    'title': post.title,
                    'summary': f"Score: {post.score}",
                    'source': post.subreddit.display_name,
                    'approx_traffic': 'N/A',
                    'sentiment': sentiment
                })
    except Exception as e:
        logger.error("fetch_reddit_posts_error", topic=topic, error=str(e))
    return reddit_posts

print("Helper functions defined successfully.")


IndentationError: expected an indented block after function definition on line 47 (<ipython-input-11-75f3cb2fefad>, line 48)

In [12]:
import os
print(f"Current Working Directory: {os.getcwd()}")


Current Working Directory: /content


In [13]:
# Block 9: Define Script Generation Function

@cached(openai_cache)
def generate_script_for_topic_cached(topic, trend_data, options: ScriptOptions):
    """
    Generates a script for the given topic using cached OpenAI responses.

    Parameters:
        topic (str): The topic to generate the script for.
        trend_data (Trend): The aggregated trend data for the topic.
        options (ScriptOptions): User-defined script customization options.

    Returns:
        str: The generated script.
    """
    return generate_script_for_topic(topic, trend_data, options)

def generate_script_for_topic(topic, trend_data, options: ScriptOptions):
    """
    Generates a script based on the topic, trend data, and customization options.

    Parameters:
        topic (str): The topic to generate the script for.
        trend_data (Trend): The aggregated trend data for the topic.
        options (ScriptOptions): User-defined script customization options.

    Returns:
        str: The generated script.
    """
    try:
        # Construct the prompt based on user options
        prompt = (
            f"Create a {options.length} video script about '{topic}'.\n"
            f"Style: {options.style}\n"
            f"Tone: {options.tone}\n"
            f"Use the following data:\n"
            f"Description: {trend_data.description}\n"
            f"Source: {trend_data.source}\n"
            f"Approx Traffic: {trend_data.approx_traffic}\n"
            f"Sentiment: {trend_data.sentiment}\n"
            f"Related Reddit Posts:\n"
        )
        for post in trend_data.reddit_posts:
            prompt += f"- {post.title} (Sentiment: {post.sentiment})\n"

        prompt += "\nGenerate a concise and engaging script suitable for a video presentation based on the above information."

        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "You are a creative scriptwriter."},
                {"role": "user", "content": prompt}
            ],
            max_tokens=500,
            temperature=0.7,
        )
        script = response.choices[0].message.content.strip()
        return script
    except OpenAIError as e:
        logger.error("generate_script_error", error=str(e))
        return "No script available."


NameError: name 'openai_cache' is not defined

In [None]:
# Block 10: Fetch and Aggregate Trending Data

import asyncio
from prettytable import PrettyTable
import openai  # Ensure openai is imported
from pytrends.request import TrendReq  # Assuming pytrends is used
import json

# Initialize PyTrends
pytrends = TrendReq(hl='en-US', tz=360)

async def fetch_and_aggregate_trending_data():
    """
    Fetches and aggregates trending data from RSS feeds, Reddit, and Google Trends.

    Returns:
        List[Trend]: A list of aggregated trending data.
    """
    rss_trends = await fetch_trending_topics_rss_async(geo='US', limit=10)
    aggregated_trends = aggregate_trends_data(rss_trends)
    return aggregated_trends

async def fetch_trending_topics_rss_async(geo='US', limit=10):
    """
    Asynchronously fetches trending topics from multiple RSS feeds with rate limiting and caching.

    Parameters:
        geo (str): Geographic location code.
        limit (int): Number of entries per RSS feed.

    Returns:
        List[dict]: A list of trending topics.
    """
    rss_feeds = [
        "https://news.google.com/rss?geo=US",
        "https://news.google.com/rss/topics/CAAqJggKIiBDQkFTRWdvSUwyMHZNRGRqTVdZU0FtVnVLQUFQAQ?hl=en-US&gl=US&ceid=US%3Aen",
        "https://news.google.com/rss/search?q=technology&hl=en-US&gl=US&ceid=US%3Aen",
        "https://news.google.com/rss/search?q=health&hl=en-US&gl=US&ceid=US%3Aen",
        "https://news.google.com/rss/search?q=business&hl=en-US&gl=US&ceid=US%3Aen",
        # Add more RSS feed URLs as needed
    ]
    trending_topics = []

    tasks = []
    for rss_url in rss_feeds:
        tasks.append(fetch_rss_feed_async(rss_url, limit))

    results = await asyncio.gather(*tasks, return_exceptions=True)

    for result in results:
        if isinstance(result, Exception):
            logger.error("fetch_trending_topics_rss_async_exception", error=str(result))
            continue
        for entry in result:
            title = entry.get('title', 'No Title')
            link = entry.get('link', '')
            summary = entry.get('summary', "No description available.")
            source = extract_source(link)
            approx_traffic = entry.get('ht_approx_traffic', 'N/A')
            if approx_traffic != 'N/A':
                approx_traffic = approx_traffic.strip()
                sentiment = analyze_sentiment(approx_traffic)
            else:
                google_trend_data = fetch_google_trends_cached(title, timeframe='now 7-d')
                if google_trend_data:
                    approx_traffic = google_trend_data.get('interest', 'N/A')
                    sentiment = google_trend_data.get('sentiment', 'Neutral')
                else:
                    approx_traffic = 'N/A'
                    sentiment = 'Neutral'
            summary = generate_summary_cached(summary)
            trending_topics.append({
                'title': title,
                'description': summary,
                'source': source,
                'approx_traffic': approx_traffic,
                'sentiment': sentiment
            })
    return trending_topics

async def fetch_rss_feed_async(rss_url, limit):
    """
    Asynchronously fetches and parses an RSS feed.

    Parameters:
        rss_url (str): The URL of the RSS feed.
        limit (int): Number of entries to fetch.

    Returns:
        List[dict]: A list of RSS feed entries.
    """
    try:
        async with aiohttp.ClientSession() as session:
            async with session.get(rss_url, timeout=10) as response:
                if response.status != 200:
                    logger.error("rss_fetch_async_error", rss_url=rss_url, status=response.status)
                    return []
                content = await response.text()
                feed = feedparser.parse(content)
                entries = feed.entries[:limit]
                return entries
    except asyncio.TimeoutError:
        logger.error("rss_fetch_async_timeout", rss_url=rss_url)
        return []
    except Exception as e:
        logger.error("rss_fetch_async_exception", rss_url=rss_url, error=str(e))
        return []

def aggregate_trends_data(rss_trends):
    """
    Aggregates data for each trend by fetching Reddit posts and Google Trends data.

    Parameters:
        rss_trends (List[dict]): List of trends from RSS feeds.

    Returns:
        List[Trend]: List of aggregated trends.
    """
    aggregated_trends = []
    for trend in rss_trends:
        topic_title = trend['title']
        reddit_posts_data = fetch_reddit_posts(topic_title, limit=5)
        reddit_posts = [RedditPost(**post) for post in reddit_posts_data]
        # Fetch Google Trends data
        google_trend_data = fetch_google_trends_cached(topic_title, timeframe='now 7-d')
        if google_trend_data:
            google_trend = GoogleTrend(**google_trend_data)
            sentiment = google_trend.sentiment
        else:
            google_trend = None
            sentiment = 'Neutral'
        aggregated_trends.append(Trend(
            title=topic_title,
            description=trend['description'],
            source=trend['source'],
            approx_traffic=trend['approx_traffic'],
            sentiment=sentiment,
            reddit_posts=reddit_posts,
            google_trend=google_trend
        ))
    return aggregated_trends

# Define dataclasses if not already defined
from dataclasses import dataclass
from typing import List, Optional

@dataclass
class RedditPost:
    type: str
    title: str
    summary: str
    source: str
    approx_traffic: str
    sentiment: str

@dataclass
class GoogleTrend:
    topic: str
    interest: str
    sentiment: str

@dataclass
class Trend:
    title: str
    description: str
    source: str
    approx_traffic: str
    sentiment: str
    reddit_posts: List[RedditPost]
    google_trend: Optional[GoogleTrend]


In [None]:
# Block 10: Define the Main Function (Block 10)

import sys
import threading
import random
import textwrap
import time
from IPython.display import display, Markdown
from prettytable import PrettyTable, ALL  # Ensure ALL is imported

import nest_asyncio
import asyncio

# Apply the nest_asyncio patch to allow nested event loops
nest_asyncio.apply()

def main():
    """
    Main function to execute the script workflow.
    """
    # Function to display a changing message every 15 seconds with an additional note
    def flashing_message(stop_event):
        messages = [
            "🔍 Gathering the latest trends... (This could take up to 2 minutes. Please wait.)",
            "⏳ Processing data, please wait... (This could take up to 2 minutes. Please wait.)",
            "✨ Almost there, thank you for your patience! (This could take up to 2 minutes. Please wait.)"
        ]
        idx = 0
        while not stop_event.is_set():
            message = messages[idx % len(messages)]
            print(f"\r{message}   ", end='', flush=True)
            for _ in range(15):
                if stop_event.is_set():
                    break
                time.sleep(1)
            idx += 1
            print('\r' + ' ' * len(message) + '   ', end='', flush=True)

    # ----------------------------
    # Step 1: User selects the country for trending topics
    # ----------------------------
    available_countries = {
        'United States': 'US',
        'Canada': 'CA',
        'United Kingdom': 'GB',
        'Australia': 'AU',
        'India': 'IN',
        'Germany': 'DE',
        'France': 'FR',
        'Japan': 'JP',
        'Brazil': 'BR',
        'South Korea': 'KR',
        # Add more countries as needed
    }

    while True:
        print("Enter the country for trending topics data (e.g., United States or US):")
        country_input = input("Country: ").strip()
        matching_country = get_matching_country(country_input, available_countries)
        if not matching_country:
            print("No matching countries found. Please try again.")
            # Optionally, display available countries
            print("Available countries are:")
            for country in available_countries.keys():
                print(f"- {country}")
        else:
            break

    selected_country = matching_country
    selected_country_code = available_countries[selected_country]
    print(f"You selected: {selected_country}")

    # ----------------------------
    # Step 2: User selects the time frame for trending topics
    # ----------------------------
    print("\nSelect the time frame for trending topics:")
    print("\033[1mA.\033[0m Last 4 hours")
    print("\033[1mB.\033[0m Last 24 hours")
    time_range_selection = input("Enter the letter of the time frame you're interested in: ").strip().upper()
    time_range_mapping = {'A': 'Last 4 hours', 'B': 'Last 24 hours'}
    time_range = time_range_mapping.get(time_range_selection)
    if not time_range:
        print("Invalid selection. Exiting.")
        return

    # ----------------------------
    # Step 3: Start flashing message while fetching all data
    # ----------------------------
    stop_event = threading.Event()
    thread = threading.Thread(target=flashing_message, args=(stop_event,))
    thread.start()

    # ----------------------------
    # Step 4: Fetch all data asynchronously with robust error handling
    # ----------------------------
    async def fetch_data():
        try:
            aggregated_trends = await fetch_and_aggregate_trending_data()
            return aggregated_trends
        except Exception as e:
            logger.error("fetch_data_error", error=str(e))
            return []

    loop = asyncio.get_event_loop()
    aggregated_trends = loop.create_task(fetch_data())

    try:
        # Wait for the data to be fetched
        loop.run_until_complete(aggregated_trends)
        aggregated_trends = aggregated_trends.result()
    except Exception as e:
        logger.error("asyncio_run_until_complete_error", error=str(e))
        aggregated_trends = []

    # Stop flashing message
    stop_event.set()
    thread.join()
    print()  # Move to the next line after flashing message

    if not aggregated_trends:
        logger.error("no_trending_topics_found")
        print("No trending topics found.")
        return

    # ----------------------------
    # Step 5: Display trending topics with pagination
    # ----------------------------
    # Pagination variables
    batch_size = 10
    total_trends = len(aggregated_trends)
    current_index = 0

    while current_index < total_trends:
        # Determine the end index for the current batch
        end_index = min(current_index + batch_size, total_trends)
        batch_trends = aggregated_trends[current_index:end_index]

        # Display the consolidated list with sentiments
        print(f"\nCurrently Trending in {selected_country} in the last {time_range} (Showing {current_index + 1} to {end_index} of {total_trends}):\n")
        table = PrettyTable()
        table.field_names = ["No.", "Topic", "Description", "Source", "Approx Traffic", "Sentiment"]
        table.hrules = ALL  # Use ALL for horizontal rules
        table.max_width = 40  # Suitable for phone screens
        for idx, trend in enumerate(batch_trends, start=current_index + 1):
            title = textwrap.fill(trend.title, width=40)
            description = textwrap.fill(trend.description, width=40)
            source = trend.source
            approx_traffic = trend.approx_traffic
            sentiment = trend.sentiment
            table.add_row([idx, title, description, source, approx_traffic, sentiment])
        print(table)

        current_index = end_index

        if current_index >= total_trends:
            print("====\nNo more trending topics available.\n")
            break

        # Prompt the user to view more results
        while True:
            user_input = input("Type 'more' or '+' to view more results, or any other key to exit: ").strip().lower()
            if user_input in ['more', '+']:
                break  # Continue to the next batch
            else:
                print("Exiting the script.")
                return

    # ----------------------------
    # Step 6: User selects a topic to generate scripts
    # ----------------------------
    try:
        selected_idx = input("Enter the number of the topic you're interested in (or type 0 to exit): ").strip()
        if selected_idx.lower() == '0':
            print("Exiting the script.")
            return
        selected_idx = int(selected_idx)
        if 1 <= selected_idx <= len(aggregated_trends):
            selected_topic_data = aggregated_trends[selected_idx - 1]
            selected_topic = selected_topic_data.title
            # Apply text wrapping to the selected topic message
            selected_topic_display = textwrap.fill(f"You selected: {selected_topic}", width=40)
            print(f"\n{selected_topic_display}")
        else:
            print("Invalid selection. Exiting.")
            return
    except ValueError:
        print("Invalid input. Please enter a number. Exiting.")
        return

    # ----------------------------
    # Step 7: Generate scripts for the selected topic
    # ----------------------------
    print("\nChoose script customization options:")
    style = input("Select script style (Flashy Script / Expressive Script / Normal Script): ").strip().title()
    tone = input("Select script tone (e.g., Informative, Persuasive, Emotional): ").strip().capitalize()
    length = input("Enter script length (e.g., 60 seconds, 120 seconds): ").strip()

    options = ScriptOptions(
        style=style if style in ["Flashy Script", "Expressive Script", "Normal Script"] else "Normal Script",
        tone=tone if tone else "Informative",
        length=length if length else "60 seconds"
    )

    print("\nGenerating script for the selected topic...")
    try:
        script = generate_script_for_topic_cached(selected_topic, selected_topic_data, options=options)
    except Exception as e:
        logger.error("generate_script_error", error=str(e))
        script = "Failed to generate script."

    # Display the script with enhanced formatting as a code block to preserve structure
    script_formatted = f"```plaintext\n{script}\n```"
    display(Markdown(f"### Generated Script for '{selected_topic}':\n\n{script_formatted}\n\n**Source:** [{selected_topic_data.source}]"))
    print("====\n")

    print("Script generation completed.")
