# Ecosystem Report

This report aims to scrape blogs and summarize news, generate proposals for how we can present research ideas

In [1]:
import requests
from bs4 import BeautifulSoup
import feedparser
import time
from datetime import datetime
import logging

# Set up logging for better debugging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

class BlockchainNewsScraper:
    def __init__(self):
        self.sources = {
            'ethereum_blog': 'https://blog.ethereum.org/feed.xml',
            'arbitrum_medium': 'https://medium.com/feed/@arbitrum',
            # 'polygon_blog': 'https://blog.polygon.technology/feed',
            # 'solana_news': 'https://solana.com/news/rss.xml',
            # 'flow_blog': 'https://www.onflow.org/post/rss.xml'
        }
        
        # Add headers to mimic a real browser
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
            'Accept': 'application/rss+xml, application/xml, text/xml, application/atom+xml',
            'Accept-Language': 'en-US,en;q=0.9',
        }
    
    def test_url_accessibility(self, url, source_name):
        """Test if URL is accessible and what type of content it returns"""
        logger.info(f"Testing URL accessibility for {source_name}: {url}")
        
        try:
            response = requests.get(url, headers=self.headers, timeout=10)
            
            logger.info(f"{source_name} - Status Code: {response.status_code}")
            logger.info(f"{source_name} - Content-Type: {response.headers.get('content-type', 'Unknown')}")
            logger.info(f"{source_name} - Content Length: {len(response.content)} bytes")
            
            # Check if it's actually RSS/XML content
            content_type = response.headers.get('content-type', '').lower()
            is_xml = 'xml' in content_type or 'rss' in content_type or 'atom' in content_type
            
            if not is_xml:
                # Check if content starts with XML declaration or RSS tags
                content_preview = response.text[:200]
                logger.info(f"{source_name} - Content preview: {content_preview}")
                
                if not any(tag in content_preview.lower() for tag in ['<rss', '<feed', '<?xml']):
                    logger.warning(f"{source_name} - This appears to be HTML, not RSS/XML!")
            
            return response.status_code == 200, response
            
        except requests.exceptions.RequestException as e:
            logger.error(f"{source_name} - Request failed: {e}")
            return False, None
    
    def analyze_feed_structure(self, feed, source_name):
        """Analyze the feed structure to understand what's available"""
        logger.info(f"Analyzing feed structure for {source_name}")
        
        # # Check feed metadata
        # if hasattr(feed, 'feed'):
        #     logger.info(f"{source_name} - Feed title: {getattr(feed.feed, 'title', 'No title')}")
        #     logger.info(f"{source_name} - Feed description: {getattr(feed.feed, 'description', 'No description')}")
        #     logger.info(f"{source_name} - Feed version: {getattr(feed, 'version', 'Unknown version')}")
        
        # # Check entries
        # logger.info(f"{source_name} - Number of entries: {len(feed.entries)}")
        
        if feed.entries:
            # Analyze first entry structure
            first_entry = feed.entries[0]
            # logger.info(f"{source_name} - First entry keys: {list(first_entry.keys())}")
            # logger.info(f"{source_name} - First entry title: {getattr(first_entry, 'title', 'No title')}")
        
        # Check for parsing errors
        if hasattr(feed, 'bozo') and feed.bozo:
            logger.warning(f"{source_name} - Feed has parsing issues: {feed.bozo_exception}")
    
    def scrape_rss_feeds(self):
        articles = []
        
        for source, url in self.sources.items():
            logger.info(f"\n{'='*50}")
            logger.info(f"Processing source: {source}")
            logger.info(f"URL: {url}")
            
            try:
                # First, test URL accessibility
                is_accessible, response = self.test_url_accessibility(url, source)
                
                if not is_accessible:
                    logger.error(f"Skipping {source} - URL not accessible")
                    continue
                
                # Parse the feed
                logger.info(f"Parsing feed for {source}...")
                feed = feedparser.parse(url)
                
                # Analyze feed structure
                self.analyze_feed_structure(feed, source)
                
                # Check if feed has entries
                if len(feed.entries) > 0:
                    logger.info(f"Processing {len(feed.entries)} entries from {source}")
                    
                    for i, entry in enumerate(feed.entries[:5]):  # Latest 5 articles
                        logger.debug(f"Processing entry {i+1} from {source}: {getattr(entry, 'title', 'No title')}")
                        
                        # Check if entry has required fields
                        title = getattr(entry, 'title', 'No title')
                        summary = getattr(entry, 'summary', getattr(entry, 'description', ''))
                        
                        if self.is_upgrade_related(title + " " + summary):
                            article = {
                                'source': source,
                                'title': title,
                                'link': getattr(entry, 'link', ''),
                                'date': getattr(entry, 'published', getattr(entry, 'updated', 'No date')),
                                'summary': summary
                            }
                            articles.append(article)
                            logger.info(f"Added upgrade-related article from {source}: {title}")
                
                else:
                    logger.warning(f"No entries found in feed for {source}")
                    # Print more detailed feed information for debugging
                    if hasattr(feed, 'bozo') and feed.bozo:
                        logger.error(f"Feed parsing error for {source}: {feed.bozo_exception}")
                    
                    # Show raw content preview if feed is empty
                    if response:
                        logger.info(f"Raw content preview for {source}:")
                        logger.info(response.text[:500] + "..." if len(response.text) > 500 else response.text)
                
            except Exception as e:
                logger.error(f"Error scraping {source}: {e}")
                import traceback
                logger.error(traceback.format_exc())
        
        logger.info(f"\n{'='*50}")
        logger.info(f"Total articles found: {len(articles)}")
        return articles
    
    def is_upgrade_related(self, text):
        keywords = ['upgrade', 'update', 'fork', 'hardfork', 'testnet', 
                   'mainnet', 'release', 'version', 'protocol', 'network']
        return any(keyword.lower() in text.lower() for keyword in keywords)
    
    def get_flow_blog_rss(self):
        """Special handler for Flow blog since it doesn't have RSS"""
        logger.info("Attempting to find RSS feed for Flow blog...")
        
        try:
            response = requests.get('https://flow.com/blog', headers=self.headers)
            soup = BeautifulSoup(response.content, 'html.parser')
            
            # Look for RSS link in HTML head
            rss_link = soup.find('link', {'type': 'application/rss+xml'})
            if rss_link:
                rss_url = rss_link.get('href')
                logger.info(f"Found RSS feed for Flow: {rss_url}")
                return rss_url
            else:
                logger.warning("No RSS feed found for Flow blog")
                return None
                
        except Exception as e:
            logger.error(f"Error finding Flow RSS feed: {e}")
            return None

# Usage with debugging
if __name__ == "__main__":
    scraper = BlockchainNewsScraper()
    
    # Optional: Try to find Flow's actual RSS feed
    flow_rss = scraper.get_flow_blog_rss()
    if flow_rss:
        scraper.sources['flow_blog'] = flow_rss
    
    articles = scraper.scrape_rss_feeds()
    
    # Print results summary
    print(f"\n{'='*60}")
    print("FINAL RESULTS SUMMARY")
    print(f"{'='*60}")
    
    if articles:
        for article in articles:
            print(f"\nSource: {article['source']}")
            print(f"Title: {article['title']}")
            print(f"Date: {article['date']}")
            print(f"Link: {article['link']}")
            print("-" * 40)
    else:
        print("No upgrade-related articles found.")

2025-08-24 22:34:07,270 - INFO - Attempting to find RSS feed for Flow blog...
2025-08-24 22:34:07,641 - INFO - 
2025-08-24 22:34:07,641 - INFO - Processing source: ethereum_blog
2025-08-24 22:34:07,642 - INFO - URL: https://blog.ethereum.org/feed.xml
2025-08-24 22:34:07,642 - INFO - Testing URL accessibility for ethereum_blog: https://blog.ethereum.org/feed.xml
2025-08-24 22:34:07,963 - INFO - ethereum_blog - Status Code: 200
2025-08-24 22:34:07,965 - INFO - ethereum_blog - Content-Type: application/xml
2025-08-24 22:34:07,966 - INFO - ethereum_blog - Content Length: 478507 bytes
2025-08-24 22:34:07,966 - INFO - Parsing feed for ethereum_blog...
2025-08-24 22:34:08,430 - INFO - Analyzing feed structure for ethereum_blog
2025-08-24 22:34:08,430 - INFO - Processing 581 entries from ethereum_blog
2025-08-24 22:34:08,431 - INFO - Added upgrade-related article from ethereum_blog: Protocol Update 002 - Scale Blobs
2025-08-24 22:34:08,431 - INFO - Added upgrade-related article from ethereum_b


FINAL RESULTS SUMMARY

Source: ethereum_blog
Title: Protocol Update 002 - Scale Blobs
Date: Fri, 22 Aug 2025 00:00:00 GMT
Link: https://blog.ethereum.org/en/2025/08/22/protocol-update-002
----------------------------------------

Source: ethereum_blog
Title: Join Us: EF Protocol Reddit AMA - August 29th, 2025
Date: Fri, 15 Aug 2025 00:00:00 GMT
Link: https://blog.ethereum.org/en/2025/08/15/protocol-ama
----------------------------------------

Source: ethereum_blog
Title: Protocol Update 001 – Scale L1
Date: Tue, 05 Aug 2025 00:00:00 GMT
Link: https://blog.ethereum.org/en/2025/08/05/protocol-update-001
----------------------------------------

Source: arbitrum_medium
Title: Most profitable SushiSwap liquidity pool ArbiFLUX-ETH — 162.44% APY
Date: Wed, 15 Dec 2021 01:20:00 GMT
Link: https://arbitrum.medium.com/most-profitable-sushiswap-liquidity-pool-arbiflux-eth-162-44-apy-8b717e5e7b2d?source=rss-8cf0900f966a------2
----------------------------------------

Source: arbitrum_medium
Tit

# Feed into AI

Now that we have articles, we can feed this into AI to generate ideas. First we will import our claude API key

In [3]:
from dotenv import load_dotenv
import os

load_dotenv()
api_key = os.getenv('ANTHROPIC_API_KEY')
model = os.getenv('ANTHROPIC_MODEL')

# Query Claude for Proposal Ideas

In [7]:
import anthropic

client = anthropic.Anthropic()
test_article = articles[0]["summary"]

message = client.messages.create(
    model=model,
    max_tokens=1000,
    messages=[
        {
            "role": "user",
            "content": f"As a research agency experienced in user research can you take the content of this article and generate a proposal idea to perform user research?: {test_article}"
        }
    ]
)
print(message.content)

2025-08-24 22:41:18,342 - INFO - HTTP Request: POST https://api.anthropic.com/v1/messages "HTTP/1.1 200 OK"


[TextBlock(citations=None, text="# User Research Proposal: Understanding Developer and User Perspectives on Ethereum's Blob Scaling Implementation\n\n## Executive Summary\nThis research proposal aims to investigate user perceptions, adoption barriers, and practical implementation challenges related to Ethereum's blob scaling approach for Layer 2 (L2) solutions. The study will focus on understanding how developers, L2 operators, and end-users experience data availability improvements and scaling benefits.\n\n## Research Objectives\n\n### Primary Objectives\n- Understand developer adoption patterns and implementation challenges with blob scaling\n- Assess user perception of performance improvements in L2 transactions\n- Identify barriers to blob scaling adoption across different user segments\n\n### Secondary Objectives\n- Map the user journey for implementing blob-based data availability solutions\n- Evaluate the effectiveness of current documentation and developer resources\n- Understa

In [4]:
articles[0]

{'source': 'ethereum_blog',
 'title': 'Protocol Update 002 - Scale Blobs',
 'link': 'https://blog.ethereum.org/en/2025/08/22/protocol-update-002',
 'date': 'Fri, 22 Aug 2025 00:00:00 GMT',
 'summary': 'Following up from Protocol Update 001, we’d like to introduce our approach to blob scaling. The L1 serves as a robust foundation for L2 systems to scale Ethereum, and a necessary component of secure L2 solutions is data availability provided by the L1. Data availability ensures that updates L2s make...'}