In [1]:
import requests
import json
from typing import List, Dict
import time

In [2]:
class Web3AlertsScraper:
    def __init__(self, cookies_path: str):
        self.base_url = "https://web3alerts.app"
        self.session = requests.Session()
        self.load_cookies(cookies_path)

    def load_cookies(self, cookies_path: str):
        """Load exported cookies into the session"""
        with open(cookies_path, 'r') as f:
            cookies = json.load(f)
            for cookie in cookies:
                self.session.cookies.set(cookie['name'], cookie['value'])

    def get_new_projects(self, ts: str = None) -> List[Dict]:
        """Fetch new projects from the API"""
        url = f"{self.base_url}/api/new_projects"
        params = {'ts': ts} if ts else {}

        response = self.session.get(url, params=params)
        response.raise_for_status()
        return response.json()

    def get_watch_list(self, ts: str = None) -> List[Dict]:
        """Fetch watch list from the API"""
        url = f"{self.base_url}/api/watch_list"
        params = {'ts': ts} if ts else {}

        response = self.session.get(url, params=params)
        response.raise_for_status()
        return response.json()

    def get_all_projects_paginated(self, max_pages: int = 10):
        """Fetch all projects with pagination"""
        all_projects = []
        ts = None

        for page in range(max_pages):
            try:
                projects = self.get_new_projects(ts=ts)
                if not projects:
                    break

                all_projects.extend(projects)

                # Extract timestamp for next page (you'll need to inspect the response)
                # This might be in the last item or in response metadata
                if projects:
                    # Adjust based on actual response structure
                    ts = projects[-1].get('timestamp') or projects[-1].get('ts')

                time.sleep(1)  # Be respectful with rate limiting

            except Exception as e:
                print(f"Error on page {page}: {e}")
                break

        return all_projects

In [3]:
scraper = Web3AlertsScraper('/Users/jiesong/Dropbox/a0crypto/a02w3alerts2x/credentials/cookies.json')

In [4]:
projects = scraper.get_new_projects()

In [5]:
len(projects)

2819

In [6]:
import pandas as pd

# Convert list of projects to a pandas dataframe
projects_df = pd.DataFrame(projects)

In [9]:
projects_df = projects_df.sort_values('days_since_account_creation')

In [None]:
projects_df[projects_df['project_name'] == 'Reflect']

Unnamed: 0,twitter_user_id,project_name,handle,profile_picture_url,discord_url,telegram_url,website_url,days_since_discovery,days_since_account_creation,no_twitter_followers,description,entities,no_discord_users,quality_score,quality_score_deltas,tags,top_followers,is_redacted
1273,1586791639623901185,Reflect,reflectmoney,https://pbs.twimg.com/profile_images/191577277...,,,http://reflect.money,856,1039,10670,The Capital Efficient Stablecoin Protocol,"{'url': {'urls': [{'end': 23, 'url': 'https://...",,95.4,"{'1': 19.5, '3': 19.1, '7': 19.1, '30': 95.4, ...","[type: Stablecoin, chain: Other]","[{'account_name': 'Solana', 'profile_picture_u...",False


In [10]:
projects_df.head(30)

Unnamed: 0,twitter_user_id,project_name,handle,profile_picture_url,discord_url,telegram_url,website_url,days_since_discovery,days_since_account_creation,no_twitter_followers,description,entities,no_discord_users,quality_score,quality_score_deltas,tags,top_followers,is_redacted
2078,1962778017681539072,Vimix,vimixdotfun,https://pbs.twimg.com/profile_images/196286474...,,,http://vimix.fun,1,1,192,Turning vibes into value with infinite remixing.,"{'url': {'urls': [{'end': 23, 'url': 'https://...",,37.8,"{'1': 14.6, '3': 37.8, '7': 37.8, '30': 37.8, ...","[type: Unknown, chain: Solana]","[{'account_name': 'Casper ü¶áüîä', 'profile_pictur...",False
2057,1962883959554969600,Loom Finance,loom_finance,https://pbs.twimg.com/profile_images/196290589...,,,https://lambdaclass.com/loom-levenue.pdf,0,1,376,Enabling real-world businesses to tokenize and...,"{'url': {'urls': [{'end': 23, 'url': 'https://...",,42.4,"{'1': 42.4, '3': 42.4, '7': 42.4, '30': 42.4, ...","[type: Unknown, chain: Other]","[{'account_name': 'Casper ü¶áüîä', 'profile_pictur...",False
2644,1962234273534013440,Redacted,redacted_icm,https://pbs.twimg.com/profile_images/196223452...,,,,0,3,193,google for internet capital markets,{},,1.5,"{'1': 0.9, '3': 1.5, '7': 1.5, '30': 1.5, '90'...","[type: Unknown, chain: Other]","[{'account_name': 'Yash', 'profile_picture_url...",False
2724,1961756236279676930,Bonk Index,BNKK_Fun,https://pbs.twimg.com/profile_images/196312946...,,,https://bnkk.fun/,2,4,1201,Stake $BNKK. Earn yield across the Bonk ecosys...,"{'url': {'urls': [{'end': 23, 'url': 'https://...",,0.8,"{'1': 0.0, '3': 0.8, '7': 0.8, '30': 0.8, '90'...","[type: Unknown, chain: Other]","[{'account_name': 'Kaduna', 'profile_picture_u...",False
2213,1961515195949617153,Netrun Foundation,NetrunFDN,https://pbs.twimg.com/profile_images/196171834...,,,,0,5,59,,{},,24.3,"{'1': 24.3, '3': 24.3, '7': 24.3, '30': 24.3, ...","[type: Unknown, chain: Other]","[{'account_name': '‰ΩôÂπ¥', 'profile_picture_url':...",False
2403,1961474095322992642,sparktoshi,sparktoshibot,https://pbs.twimg.com/profile_images/196147415...,,,,3,5,500,,{},,8.7,"{'1': 0.0, '3': 7.3, '7': 8.7, '30': 8.7, '90'...","[type: Bot, chain: Other]","[{'account_name': 'Theo', 'profile_picture_url...",False
2090,1960960393473388544,The Prediction Arc,predictionarc,https://pbs.twimg.com/profile_images/196096747...,,https://t.me/thepredictionarc,,6,6,407,"Everyone loves prediction markets, some just d...","{'url': {'urls': [{'end': 23, 'url': 'https://...",,28.9,"{'1': 0.4, '3': -1.2, '7': 28.9, '30': 28.9, '...","[type: Unknown, chain: Other, attr: Telegram]","[{'account_name': 'ÂéÇ‰ªîÈòøÁå© (ü¶ç,ü¶ç)', 'profile_pictu...",False
2414,1961157239705853952,QR SOL,qrsoldotfun,https://pbs.twimg.com/profile_images/196116402...,,,https://qrsol.fun/,4,6,78,QR code never changes but destination changes....,"{'url': {'urls': [{'end': 23, 'url': 'https://...",,12.3,"{'1': 0.1, '3': -0.3, '7': 12.3, '30': 12.3, '...","[type: Unknown, chain: Other]",[],False
2188,1961155887914999808,Netrun,usenetrun,https://pbs.twimg.com/profile_images/196175249...,,,https://www.netrun.xyz/,1,6,561,Netrun is an anonymous Layer-1 metaprotocol wi...,"{'url': {'urls': [{'end': 23, 'url': 'https://...",,15.6,"{'1': 6.4, '3': 15.6, '7': 15.6, '30': 15.6, '...","[type: Unknown, chain: Other]","[{'account_name': 'CJ the ""Doughnut""', 'profil...",False
2469,1961100036898537472,(redacted) time,hellomodelotime,https://pbs.twimg.com/profile_images/196110111...,,,,0,6,49,fully liquidated on everything,{},,11.3,"{'1': 1.8, '3': 1.7, '7': 11.3, '30': 11.3, '9...","[type: Unknown, chain: Other]","[{'account_name': 'knower', 'profile_picture_u...",False
