In [3]:
!pip install ace-tools yfinance

Defaulting to user installation because normal site-packages is not writeable
Collecting yfinance
  Downloading yfinance-0.2.52-py2.py3-none-any.whl.metadata (5.8 kB)
Collecting multitasking>=0.0.7 (from yfinance)
  Downloading multitasking-0.0.11-py3-none-any.whl.metadata (5.5 kB)
Collecting peewee>=3.16.2 (from yfinance)
  Downloading peewee-3.17.8.tar.gz (948 kB)
     ---------------------------------------- 0.0/948.2 kB ? eta -:--:--
     -------------------- ---------------- 524.3/948.2 kB 16.4 MB/s eta 0:00:01
     -------------------------------------- 948.2/948.2 kB 8.8 MB/s eta 0:00:00
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Collecting html5lib>=1.1 (from yfinance)
  Downloading html5lib-1.1-



In [4]:
import yfinance as yf
import pandas as pd
import time
import os
from datetime import datetime, timedelta

TECH_STOCKS = [
    "AAPL", "MSFT", "NVDA", "AVGO", "CRM", "ORCL", "CSCO", "ACN", "NOW", "IBM",
    "AMD", "ADBE", "INTU", "QCOM", "TXN", "AMAT", "PLTR", "ANET", "PANW", "MU",
    "ADI", "LRCX", "KLAC", "APH", "INTC", "CDNS", "CRWD", "MSI", "SNPS", "ADSK",
    "FTNT", "ROP", "WDAY", "NXPI", "FICO", "TEL", "IT", "CTSH", "GLW", "DELL",
    "HPQ", "MCHP", "ANSS", "MPWR", "HPE", "KEYS", "GDDY", "CDW", "TYL", "NTAP"
]

MACRO_TICKERS = {
    '^VIX': 'Volatility Index',
    '^TNX': '10-Year Treasury Yield',
    '^FVX': '5-Year Treasury Yield',
    'CL=F': 'Crude Oil Futures'
}

DATA_FOLDER = "yfinance_tech_stock_data"
MACRO_FOLDER = "macroeconomic_data"
os.makedirs(DATA_FOLDER, exist_ok=True)
os.makedirs(MACRO_FOLDER, exist_ok=True)

def fetch_stock_data(symbol, interval="1h", period="2y"):
    """Fetch and save stock data with enhanced error handling"""
    try:
        print(f"\n📈 Fetching {symbol} ({interval} interval)...")
        df = yf.Ticker(symbol).history(period=period, interval=interval)
        
        if df.empty:
            print(f"🚨 No data for {symbol}")
            return None

        df = df.reset_index().rename(columns={"Datetime": "timestamp"})
        df['symbol'] = symbol  
        

        path = os.path.join(DATA_FOLDER, f"{symbol}_stock.csv")
        df.to_csv(path, index=False)
        print(f"✅ Saved {len(df)} rows to {path}")
        
        return df

    except Exception as e:
        print(f"🚨 Error fetching {symbol}: {str(e)}")
        return None

def fetch_macro_data():
    """Fetch macroeconomic indicators with retry logic"""
    macro_data = {}
    
    for ticker, name in MACRO_TICKERS.items():
        retries = 3
        while retries > 0:
            try:
                print(f"\n🌐 Fetching {name} ({ticker})...")
                df = yf.download(ticker, start=datetime.now()-timedelta(days=730), interval='1h')
                
                if not df.empty:
                    df = df.reset_index().rename(columns={'Date': 'timestamp'})
                    df['ticker'] = ticker
                    macro_data[ticker] = df
                    

                    path = os.path.join(MACRO_FOLDER, f"{ticker.replace('^','')}.csv")
                    df.to_csv(path, index=False)
                    print(f"✅ Saved {ticker} data")
                    

                    print(f"\n📋 Columns for {ticker} ({name}):")
                    print(df.columns.tolist())
                    print(f"\nFirst row for {ticker}:")
                    print(df.head(1))
                    break
                else:
                    print(f"🚨 Empty data for {ticker}")
                    retries -= 1
                    
            except Exception as e:
                print(f"🚨 Error ({retries} retries left): {str(e)}")
                retries -= 1
                time.sleep(5)
    
    return macro_data

def main_data_pipeline():
    """Orchestrate data fetching with rate limiting"""
    stock_dfs = []
    preview_printed = False  
    

    for symbol in TECH_STOCKS:
        df = fetch_stock_data(symbol)
        if df is not None:
            stock_dfs.append(df)
            if not preview_printed:
                print(f"\n📋 Columns for {symbol}:")
                print(df.columns.tolist())
                print(f"\nFirst row for {symbol}:")
                print(df.head(1))
                preview_printed = True
        time.sleep(1.5)
    

    macro_data = fetch_macro_data()
    
    return pd.concat(stock_dfs), macro_data


if __name__ == "__main__":
    stock_data, macro_data = main_data_pipeline()
    print("\n🎉 Data pipeline complete!")
    print(f"Stock data shape: {stock_data.shape}")
    print(f"Macro data keys: {list(macro_data.keys())}")


📈 Fetching AAPL (1h interval)...
✅ Saved 3494 rows to yfinance_tech_stock_data\AAPL_stock.csv

📋 Columns for AAPL:
['timestamp', 'Open', 'High', 'Low', 'Close', 'Volume', 'Dividends', 'Stock Splits', 'symbol']

First row for AAPL:
                  timestamp        Open    High         Low       Close  \
0 2023-01-31 09:30:00-05:00  142.699997  143.75  142.279999  143.169998   

     Volume  Dividends  Stock Splits symbol  
0  13897777        0.0           0.0   AAPL  

📈 Fetching MSFT (1h interval)...
✅ Saved 3494 rows to yfinance_tech_stock_data\MSFT_stock.csv

📈 Fetching NVDA (1h interval)...
✅ Saved 3494 rows to yfinance_tech_stock_data\NVDA_stock.csv

📈 Fetching AVGO (1h interval)...
✅ Saved 3494 rows to yfinance_tech_stock_data\AVGO_stock.csv

📈 Fetching CRM (1h interval)...
✅ Saved 3494 rows to yfinance_tech_stock_data\CRM_stock.csv

📈 Fetching ORCL (1h interval)...
✅ Saved 3494 rows to yfinance_tech_stock_data\ORCL_stock.csv

📈 Fetching CSCO (1h interval)...
✅ Saved 3494 rows 

[*********************100%***********************]  1 of 1 completed


✅ Saved ^VIX data

📋 Columns for ^VIX (Volatility Index):
[('Datetime', ''), ('Close', '^VIX'), ('High', '^VIX'), ('Low', '^VIX'), ('Open', '^VIX'), ('Volume', '^VIX'), ('ticker', '')]

First row for ^VIX:
Price                   Datetime      Close   High        Low   Open Volume  \
Ticker                                 ^VIX   ^VIX       ^VIX   ^VIX   ^VIX   
0      2023-02-02 08:00:00+00:00  17.620001  17.75  17.610001  17.74      0   

Price  ticker  
Ticker         
0        ^VIX  

🌐 Fetching 10-Year Treasury Yield (^TNX)...


[*********************100%***********************]  1 of 1 completed


✅ Saved ^TNX data

📋 Columns for ^TNX (10-Year Treasury Yield):
[('Datetime', ''), ('Close', '^TNX'), ('High', '^TNX'), ('Low', '^TNX'), ('Open', '^TNX'), ('Volume', '^TNX'), ('ticker', '')]

First row for ^TNX:
Price                   Datetime  Close   High    Low   Open Volume ticker
Ticker                             ^TNX   ^TNX   ^TNX   ^TNX   ^TNX       
0      2023-02-02 13:20:00+00:00  3.344  3.378  3.344  3.373      0   ^TNX

🌐 Fetching 5-Year Treasury Yield (^FVX)...


[*********************100%***********************]  1 of 1 completed


✅ Saved ^FVX data

📋 Columns for ^FVX (5-Year Treasury Yield):
[('Datetime', ''), ('Close', '^FVX'), ('High', '^FVX'), ('Low', '^FVX'), ('Open', '^FVX'), ('Volume', '^FVX'), ('ticker', '')]

First row for ^FVX:
Price                   Datetime  Close   High    Low   Open Volume ticker
Ticker                             ^FVX   ^FVX   ^FVX   ^FVX   ^FVX       
0      2023-02-02 13:20:00+00:00  3.423  3.462  3.423  3.454      0   ^FVX

🌐 Fetching Crude Oil Futures (CL=F)...


[*********************100%***********************]  1 of 1 completed


✅ Saved CL=F data

📋 Columns for CL=F (Crude Oil Futures):
[('Datetime', ''), ('Close', 'CL=F'), ('High', 'CL=F'), ('Low', 'CL=F'), ('Open', 'CL=F'), ('Volume', 'CL=F'), ('ticker', '')]

First row for CL=F:
Price                   Datetime      Close       High        Low       Open  \
Ticker                                 CL=F       CL=F       CL=F       CL=F   
0      2023-02-02 04:00:00+00:00  77.220001  77.239998  77.050003  77.099998   

Price  Volume ticker  
Ticker   CL=F         
0           0   CL=F  

🎉 Data pipeline complete!
Stock data shape: (174699, 9)
Macro data keys: ['^VIX', '^TNX', '^FVX', 'CL=F']


In [5]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import sys
import logging
import re
from datetime import datetime, timedelta
from typing import List, Dict
from concurrent.futures import ThreadPoolExecutor, as_completed

TECH_STOCKS = [
    "AAPL", "MSFT", "NVDA", "AVGO", "CRM", "ORCL", "CSCO", "ACN", "NOW", "IBM",
    "AMD", "ADBE", "INTU", "QCOM", "TXN", "AMAT", "PLTR", "ANET", "PANW", "MU",
    "ADI", "LRCX", "KLAC", "APH", "INTC", "CDNS", "CRWD", "MSI", "SNPS", "ADSK",
    "FTNT", "ROP", "WDAY", "NXPI", "FICO", "TEL", "IT", "CTSH", "GLW", "DELL",
    "HPQ", "MCHP", "ANSS", "MPWR", "HPE", "KEYS", "GDDY", "CDW", "TYL", "NTAP"
]

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

class ProgressTracker:
    def __init__(self, total_months: int):
        self.start_time = time.time()
        self.total_months = total_months
        self.completed_months = 0
        self.total_articles = 0
        self.current_month = None
        
    def update_progress(self):
        elapsed = time.time() - self.start_time
        avg_time = elapsed / (self.completed_months + 1e-6)
        remaining = avg_time * (self.total_months - self.completed_months)
        
        progress = (
            f"\n📅 Current: {self.current_month} | "
            f"Completed: {self.completed_months}/{self.total_months} months | "
            f"Articles: {self.total_articles} | "
            f"Elapsed: {timedelta(seconds=int(elapsed))} | "
            f"ETA: {timedelta(seconds=int(remaining))}"
        )
        sys.stdout.write("\r\033[K" + progress)
        sys.stdout.flush()

class VoxScraper:
    def __init__(self, start_date: datetime, end_date: datetime):
        self.base_url = "https://www.vox.com"
        self.rate_limiter = RateLimiter(calls=3, period=1)
        self.article_buffer = []
        self.all_articles = []  # Stores all articles across months
        self.start_date = start_date
        self.end_date = end_date
        self.total_months = ((end_date.year - start_date.year) * 12 
                            + end_date.month - start_date.month + 1)
        self.progress = ProgressTracker(self.total_months)
        self.financial_terms = re.compile(
            r'\b(stock|market|tech|economy|investment|trading|'
            r'earnings|valuation|merger|acquisition|IPO)\b', 
            re.IGNORECASE
        )

    def fetch_article_content(self, url: str) -> Dict:
        with self.rate_limiter:
            try:
                response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"}, timeout=10)
                response.raise_for_status()
                soup = BeautifulSoup(response.text, 'html.parser')

                time_tag = soup.find('time')
                article_date = pd.to_datetime(time_tag['datetime']) if time_tag else None

                article_body = soup.find('div', class_='c-entry-content')
                text_content = ' '.join([p.get_text() for p in article_body.find_all('p')]) if article_body else ''

                self.progress.total_articles += 1
                self.progress.update_progress()

                return {
                    'url': url,
                    'timestamp': article_date,
                    'content': text_content,
                    'is_financial': bool(self.financial_terms.search(text_content)),
                    'tickers': self._extract_tickers(text_content)
                }

            except Exception as e:
                logging.error(f"Failed to fetch {url}: {str(e)}")
                return None

    def _extract_tickers(self, text: str) -> List[str]:
        return [ticker for ticker in TECH_STOCKS if re.search(r'\b' + ticker + r'\b', text)]

    def scrape_time_range(self):
        current_date = self.start_date
        while current_date <= self.end_date:
            year = current_date.year
            month = current_date.month
            self.progress.current_month = f"{year}-{month:02d}"
            self.progress.update_progress()
            
            self._scrape_month(year, month)
            current_date += timedelta(days=32)
            self.progress.completed_months += 1
            self.progress.update_progress()

    def _scrape_month(self, year: int, month: int):
        page = 1
        while True:
            url = f"{self.base_url}/archives/{year}/{month}/{page}"
            try:
                article_links = self._get_article_links(url)
                if not article_links:
                    break
                
                with ThreadPoolExecutor(max_workers=4) as executor:
                    futures = [executor.submit(self.fetch_article_content, link) for link in article_links]
                    for future in as_completed(futures):
                        if (result := future.result()) is not None:
                            self.article_buffer.append(result)
                
                page += 1
                time.sleep(0.5)
                
            except Exception as e:
                logging.error(f"Stopping scrape for {year}-{month:02d}: {str(e)}")
                break
                
        self._save_buffer(year, month)

    def _get_article_links(self, url: str) -> List[str]:
        try:
            response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"}, timeout=10)
            response.raise_for_status()
            soup = BeautifulSoup(response.text, 'html.parser')
            return [
                self.base_url + a['href'] 
                for a in soup.select('div._1p9ghgz3 a[href^="/"]')
                if not a['href'].startswith("/archives")
            ]
        except Exception as e:
            logging.error(f"Failed to get links from {url}: {str(e)}")
            return []

    def _save_buffer(self, year: int, month: int):
        if not self.article_buffer:
            return
            
        df = pd.DataFrame(self.article_buffer)
        self.all_articles.extend(self.article_buffer) 
        
        df['date_hour'] = df['timestamp'].dt.floor('h')
        df = df.dropna(subset=['date_hour'])
        
        path = f"vox_articles_{year}_{month:02d}.parquet"
        df.to_parquet(path, index=False)
        logging.info(f"\n✅ Saved {len(df)} articles to {path}")
        self.article_buffer.clear()

class RateLimiter:
    def __init__(self, calls: int, period: int):
        self.calls = calls
        self.period = period
        self.timestamps = []
        
    def __enter__(self):
        now = time.time()
        self.timestamps = [t for t in self.timestamps if t > now - self.period]
        
        if len(self.timestamps) >= self.calls:
            sleep_time = self.period - (now - self.timestamps[0])
            time.sleep(sleep_time)
            
        self.timestamps.append(time.time())
        
    def __exit__(self, *args):
        pass

if __name__ == "__main__":
    start_date = datetime(2023, 1, 1)
    end_date = datetime.now()
    
    scraper = VoxScraper(start_date, end_date)
    print("🚀 Starting Vox.com scraper with progress tracking:")
    print(f"⏳ Time range: {start_date.strftime('%Y-%m')} to {end_date.strftime('%Y-%m')}")
    print("📊 Progress will be updated in real-time below:\n")
    
    try:
        scraper.scrape_time_range()

        final_df = pd.DataFrame(scraper.all_articles)
        
        print("\n\n🎉 Final Report:")
        print(f"Total articles collected: {len(final_df)}")
        
        if not final_df.empty:
            stock_articles = final_df[final_df['tickers'].apply(len) > 0]
            print(f"\n📈 Tech Stock Articles Found ({len(stock_articles)}):")
            for idx, row in stock_articles.iterrows():
                print(f"\n📰 Article {idx + 1}:")
                print(f"   URL: {row['url']}")
                print(f"   Date: {row['timestamp'].strftime('%Y-%m-%d') if pd.notnull(row['timestamp']) else 'Unknown'}")
                print(f"   Tickers: {', '.join(row['tickers'])}")
                print(f"   Financial Terms Found: {'Yes' if row['is_financial'] else 'No'}")
            print(f"\n💾 Full data saved to vox_articles_YYYY_MM.parquet files")
        else:
            print("\nNo articles found in the specified date range")
            
        print(f"\nProcessed months: {scraper.progress.completed_months}/{scraper.total_months}")
        
    except KeyboardInterrupt:
        print("\n\n⚠️ Scraping interrupted by user! Partial results saved.")

🚀 Starting Vox.com scraper with progress tracking:
⏳ Time range: 2023-01 to 2025-02
📊 Progress will be updated in real-time below:

[K
[KCurrent: 2023-01 | Completed: 0/26 months | Articles: 0 | Elapsed: 0:00:00 | ETA: 7:17:50
[KCurrent: 2023-01 | Completed: 0/26 months | Articles: 1 | Elapsed: 0:00:04 | ETA: 1356 days, 9:07:52
[KCurrent: 2023-01 | Completed: 0/26 months | Articles: 2 | Elapsed: 0:00:04 | ETA: 1400 days, 0:21:18
[KCurrent: 2023-01 | Completed: 0/26 months | Articles: 3 | Elapsed: 0:00:04 | ETA: 1409 days, 8:15:06
[KCurrent: 2023-01 | Completed: 0/26 months | Articles: 4 | Elapsed: 0:00:05 | ETA: 1602 days, 16:24:30
[KCurrent: 2023-01 | Completed: 0/26 months | Articles: 5 | Elapsed: 0:00:06 | ETA: 1883 days, 21:42:58
[KCurrent: 2023-01 | Completed: 0/26 months | Articles: 6 | Elapsed: 0:00:06 | ETA: 1903 days, 3:55:13
[KCurrent: 2023-01 | Completed: 0/26 months | Articles: 7 | Elapsed: 0:00:06 | ETA: 1924 days, 19:55:14
[KCurrent: 2023-01 | Completed: 0/26 mo

2025-02-01 10:11:21,969 - INFO - 
✅ Saved 177 articles to vox_articles_2023_01.parquet


[K
[KCurrent: 2023-01 | Completed: 1/26 months | Articles: 177 | Elapsed: 0:02:16 | ETA: 0:56:53
[KCurrent: 2023-02 | Completed: 1/26 months | Articles: 177 | Elapsed: 0:02:16 | ETA: 0:56:53
[KCurrent: 2023-02 | Completed: 1/26 months | Articles: 178 | Elapsed: 0:02:20 | ETA: 0:58:20
[KCurrent: 2023-02 | Completed: 1/26 months | Articles: 179 | Elapsed: 0:02:20 | ETA: 0:58:26
[KCurrent: 2023-02 | Completed: 1/26 months | Articles: 180 | Elapsed: 0:02:20 | ETA: 0:58:38
[KCurrent: 2023-02 | Completed: 1/26 months | Articles: 181 | Elapsed: 0:02:21 | ETA: 0:58:47
[KCurrent: 2023-02 | Completed: 1/26 months | Articles: 182 | Elapsed: 0:02:21 | ETA: 0:59:04
[KCurrent: 2023-02 | Completed: 1/26 months | Articles: 183 | Elapsed: 0:02:22 | ETA: 0:59:15
[KCurrent: 2023-02 | Completed: 1/26 months | Articles: 184 | Elapsed: 0:02:22 | ETA: 0:59:18
[KCurrent: 2023-02 | Completed: 1/26 months | Articles: 185 | Elapsed: 0:02:22 | ETA: 0:59:25
[KCurrent: 2023-02 | Completed: 1/26 months |

2025-02-01 10:14:01,118 - INFO - 
✅ Saved 188 articles to vox_articles_2023_02.parquet


[K
[KCurrent: 2023-02 | Completed: 2/26 months | Articles: 365 | Elapsed: 0:04:55 | ETA: 0:59:08
[KCurrent: 2023-03 | Completed: 2/26 months | Articles: 365 | Elapsed: 0:04:55 | ETA: 0:59:08
[KCurrent: 2023-03 | Completed: 2/26 months | Articles: 366 | Elapsed: 0:04:59 | ETA: 0:59:56
[KCurrent: 2023-03 | Completed: 2/26 months | Articles: 367 | Elapsed: 0:05:00 | ETA: 1:00:01
[KCurrent: 2023-03 | Completed: 2/26 months | Articles: 368 | Elapsed: 0:05:00 | ETA: 1:00:04
[KCurrent: 2023-03 | Completed: 2/26 months | Articles: 369 | Elapsed: 0:05:01 | ETA: 1:00:14
[KCurrent: 2023-03 | Completed: 2/26 months | Articles: 370 | Elapsed: 0:05:01 | ETA: 1:00:17
[KCurrent: 2023-03 | Completed: 2/26 months | Articles: 371 | Elapsed: 0:05:01 | ETA: 1:00:20
[KCurrent: 2023-03 | Completed: 2/26 months | Articles: 372 | Elapsed: 0:05:01 | ETA: 1:00:23
[KCurrent: 2023-03 | Completed: 2/26 months | Articles: 373 | Elapsed: 0:05:02 | ETA: 1:00:30
[KCurrent: 2023-03 | Completed: 2/26 months |

2025-02-01 10:15:48,415 - ERROR - Failed to fetch https://www.vox.com/climate/23589785/ev-dc-fast-charging-station-battery-electric-tesla-rivian: HTTPSConnectionPool(host='www.vox.com', port=443): Read timed out. (read timeout=10)


[K
[KCurrent: 2023-03 | Completed: 2/26 months | Articles: 478 | Elapsed: 0:06:43 | ETA: 1:20:38
[KCurrent: 2023-03 | Completed: 2/26 months | Articles: 479 | Elapsed: 0:06:44 | ETA: 1:20:52
📅 Current: 2023-03 | Completed: 2/26 months | Articles: 480 | Elapsed: 0:06:45 | ETA: 1:21:06

2025-02-01 10:15:51,183 - ERROR - Failed to fetch https://www.vox.com/future-perfect/2023/3/18/23644776/covid-origin-raccoon-dog-beijing-wuhan-coronavirus-zoonotic-lab-leak-sars: HTTPSConnectionPool(host='www.vox.com', port=443): Read timed out. (read timeout=10)


[K
[KCurrent: 2023-03 | Completed: 2/26 months | Articles: 481 | Elapsed: 0:06:45 | ETA: 1:21:10
📅 Current: 2023-03 | Completed: 2/26 months | Articles: 482 | Elapsed: 0:06:47 | ETA: 1:21:30

2025-02-01 10:15:55,176 - ERROR - Failed to fetch https://www.vox.com/policy/2023/3/18/23644339/hate-crimes-report-rise-in-hate-crimes: HTTPSConnectionPool(host='www.vox.com', port=443): Read timed out. (read timeout=10)


[K
[KCurrent: 2023-03 | Completed: 2/26 months | Articles: 483 | Elapsed: 0:06:54 | ETA: 1:22:53
[KCurrent: 2023-03 | Completed: 2/26 months | Articles: 484 | Elapsed: 0:06:54 | ETA: 1:22:55
[KCurrent: 2023-03 | Completed: 2/26 months | Articles: 485 | Elapsed: 0:06:54 | ETA: 1:22:58
[KCurrent: 2023-03 | Completed: 2/26 months | Articles: 486 | Elapsed: 0:06:56 | ETA: 1:23:13
[KCurrent: 2023-03 | Completed: 2/26 months | Articles: 487 | Elapsed: 0:06:56 | ETA: 1:23:14
[KCurrent: 2023-03 | Completed: 2/26 months | Articles: 488 | Elapsed: 0:06:56 | ETA: 1:23:22
[KCurrent: 2023-03 | Completed: 2/26 months | Articles: 489 | Elapsed: 0:06:58 | ETA: 1:23:45
[KCurrent: 2023-03 | Completed: 2/26 months | Articles: 490 | Elapsed: 0:06:59 | ETA: 1:23:57
[KCurrent: 2023-03 | Completed: 2/26 months | Articles: 491 | Elapsed: 0:07:01 | ETA: 1:24:16
[KCurrent: 2023-03 | Completed: 2/26 months | Articles: 492 | Elapsed: 0:07:01 | ETA: 1:24:23
[KCurrent: 2023-03 | Completed: 2/26 months |

2025-02-01 10:16:29,945 - ERROR - Failed to fetch https://www.vox.com/climate/2023/3/14/23637780/willow-project-biden-oil-drilling-climate-change: HTTPSConnectionPool(host='www.vox.com', port=443): Read timed out. (read timeout=10)


[K
[KCurrent: 2023-03 | Completed: 2/26 months | Articles: 512 | Elapsed: 0:07:27 | ETA: 1:29:33
[KCurrent: 2023-03 | Completed: 2/26 months | Articles: 513 | Elapsed: 0:07:28 | ETA: 1:29:40
[KCurrent: 2023-03 | Completed: 2/26 months | Articles: 514 | Elapsed: 0:07:28 | ETA: 1:29:43
[KCurrent: 2023-03 | Completed: 2/26 months | Articles: 515 | Elapsed: 0:07:28 | ETA: 1:29:46
[KCurrent: 2023-03 | Completed: 2/26 months | Articles: 516 | Elapsed: 0:07:29 | ETA: 1:29:50
[KCurrent: 2023-03 | Completed: 2/26 months | Articles: 517 | Elapsed: 0:07:29 | ETA: 1:29:59
[KCurrent: 2023-03 | Completed: 2/26 months | Articles: 518 | Elapsed: 0:07:30 | ETA: 1:30:06
[KCurrent: 2023-03 | Completed: 2/26 months | Articles: 519 | Elapsed: 0:07:30 | ETA: 1:30:09
[KCurrent: 2023-03 | Completed: 2/26 months | Articles: 520 | Elapsed: 0:07:31 | ETA: 1:30:12
[KCurrent: 2023-03 | Completed: 2/26 months | Articles: 521 | Elapsed: 0:07:31 | ETA: 1:30:22
[KCurrent: 2023-03 | Completed: 2/26 months |

2025-02-01 10:17:20,055 - INFO - 
✅ Saved 211 articles to vox_articles_2023_03.parquet


[K
[KCurrent: 2023-03 | Completed: 3/26 months | Articles: 577 | Elapsed: 0:08:14 | ETA: 1:03:12
[KCurrent: 2023-04 | Completed: 3/26 months | Articles: 577 | Elapsed: 0:08:14 | ETA: 1:03:12
[KCurrent: 2023-04 | Completed: 3/26 months | Articles: 578 | Elapsed: 0:08:18 | ETA: 1:03:42
[KCurrent: 2023-04 | Completed: 3/26 months | Articles: 579 | Elapsed: 0:08:19 | ETA: 1:03:49
[KCurrent: 2023-04 | Completed: 3/26 months | Articles: 580 | Elapsed: 0:08:20 | ETA: 1:03:53
[KCurrent: 2023-04 | Completed: 3/26 months | Articles: 581 | Elapsed: 0:08:20 | ETA: 1:04:00
[KCurrent: 2023-04 | Completed: 3/26 months | Articles: 582 | Elapsed: 0:08:21 | ETA: 1:04:04
[KCurrent: 2023-04 | Completed: 3/26 months | Articles: 583 | Elapsed: 0:08:22 | ETA: 1:04:12
[KCurrent: 2023-04 | Completed: 3/26 months | Articles: 584 | Elapsed: 0:08:22 | ETA: 1:04:15
[KCurrent: 2023-04 | Completed: 3/26 months | Articles: 585 | Elapsed: 0:08:23 | ETA: 1:04:21
[KCurrent: 2023-04 | Completed: 3/26 months |

2025-02-01 10:17:33,458 - ERROR - Failed to fetch https://www.vox.com/tv/23702839/succession-season-4-episode-6-kendall-waystar-investors-meeting: HTTPSConnectionPool(host='www.vox.com', port=443): Read timed out. (read timeout=10)


[K
[KCurrent: 2023-04 | Completed: 3/26 months | Articles: 592 | Elapsed: 0:08:32 | ETA: 1:05:30
[KCurrent: 2023-04 | Completed: 3/26 months | Articles: 593 | Elapsed: 0:08:32 | ETA: 1:05:30
[KCurrent: 2023-04 | Completed: 3/26 months | Articles: 594 | Elapsed: 0:08:33 | ETA: 1:05:37
[KCurrent: 2023-04 | Completed: 3/26 months | Articles: 595 | Elapsed: 0:08:33 | ETA: 1:05:39
[KCurrent: 2023-04 | Completed: 3/26 months | Articles: 596 | Elapsed: 0:08:34 | ETA: 1:05:41
[KCurrent: 2023-04 | Completed: 3/26 months | Articles: 597 | Elapsed: 0:08:34 | ETA: 1:05:48
[KCurrent: 2023-04 | Completed: 3/26 months | Articles: 598 | Elapsed: 0:08:35 | ETA: 1:05:48
[KCurrent: 2023-04 | Completed: 3/26 months | Articles: 599 | Elapsed: 0:08:35 | ETA: 1:05:49
[KCurrent: 2023-04 | Completed: 3/26 months | Articles: 600 | Elapsed: 0:08:35 | ETA: 1:05:52
[KCurrent: 2023-04 | Completed: 3/26 months | Articles: 601 | Elapsed: 0:08:36 | ETA: 1:05:58
[KCurrent: 2023-04 | Completed: 3/26 months |

2025-02-01 10:19:58,164 - ERROR - Failed to fetch https://www.vox.com/policy/2023/4/5/23668755/industrial-policy-biden-chips: HTTPSConnectionPool(host='www.vox.com', port=443): Read timed out. (read timeout=10)
2025-02-01 10:19:59,063 - ERROR - Failed to fetch https://www.vox.com/future-perfect/23669586/goat-girl-4-h-shasta-county-seizure: HTTPSConnectionPool(host='www.vox.com', port=443): Read timed out. (read timeout=10)


[K
[KCurrent: 2023-04 | Completed: 3/26 months | Articles: 785 | Elapsed: 0:10:58 | ETA: 1:24:04
[KCurrent: 2023-04 | Completed: 3/26 months | Articles: 786 | Elapsed: 0:10:58 | ETA: 1:24:11
[KCurrent: 2023-04 | Completed: 3/26 months | Articles: 787 | Elapsed: 0:10:58 | ETA: 1:24:12
[KCurrent: 2023-04 | Completed: 3/26 months | Articles: 788 | Elapsed: 0:10:59 | ETA: 1:24:16
[KCurrent: 2023-04 | Completed: 3/26 months | Articles: 789 | Elapsed: 0:10:59 | ETA: 1:24:16
[KCurrent: 2023-04 | Completed: 3/26 months | Articles: 790 | Elapsed: 0:11:00 | ETA: 1:24:20
[KCurrent: 2023-04 | Completed: 3/26 months | Articles: 791 | Elapsed: 0:11:00 | ETA: 1:24:22
[KCurrent: 2023-04 | Completed: 3/26 months | Articles: 792 | Elapsed: 0:11:01 | ETA: 1:24:32
[KCurrent: 2023-04 | Completed: 3/26 months | Articles: 793 | Elapsed: 0:11:01 | ETA: 1:24:32
[KCurrent: 2023-04 | Completed: 3/26 months | Articles: 794 | Elapsed: 0:11:02 | ETA: 1:24:36
[KCurrent: 2023-04 | Completed: 3/26 months |

2025-02-01 10:20:19,175 - INFO - 
✅ Saved 224 articles to vox_articles_2023_04.parquet


[K
[KCurrent: 2023-04 | Completed: 4/26 months | Articles: 803 | Elapsed: 0:11:13 | ETA: 1:01:45
[KCurrent: 2023-05 | Completed: 4/26 months | Articles: 803 | Elapsed: 0:11:13 | ETA: 1:01:45
[KCurrent: 2023-05 | Completed: 4/26 months | Articles: 804 | Elapsed: 0:11:17 | ETA: 1:02:06
[KCurrent: 2023-05 | Completed: 4/26 months | Articles: 805 | Elapsed: 0:11:17 | ETA: 1:02:08
[KCurrent: 2023-05 | Completed: 4/26 months | Articles: 806 | Elapsed: 0:11:18 | ETA: 1:02:12
[KCurrent: 2023-05 | Completed: 4/26 months | Articles: 807 | Elapsed: 0:11:19 | ETA: 1:02:15
[KCurrent: 2023-05 | Completed: 4/26 months | Articles: 808 | Elapsed: 0:11:19 | ETA: 1:02:16
[KCurrent: 2023-05 | Completed: 4/26 months | Articles: 809 | Elapsed: 0:11:19 | ETA: 1:02:19
[KCurrent: 2023-05 | Completed: 4/26 months | Articles: 810 | Elapsed: 0:11:21 | ETA: 1:02:29
[KCurrent: 2023-05 | Completed: 4/26 months | Articles: 811 | Elapsed: 0:11:23 | ETA: 1:02:36
[KCurrent: 2023-05 | Completed: 4/26 months |

2025-02-01 10:23:34,801 - INFO - 
✅ Saved 247 articles to vox_articles_2023_05.parquet


[K
[KCurrent: 2023-05 | Completed: 5/26 months | Articles: 1051 | Elapsed: 0:14:29 | ETA: 1:00:51
[KCurrent: 2023-06 | Completed: 5/26 months | Articles: 1051 | Elapsed: 0:14:29 | ETA: 1:00:51
[KCurrent: 2023-06 | Completed: 5/26 months | Articles: 1052 | Elapsed: 0:14:32 | ETA: 1:01:05
[KCurrent: 2023-06 | Completed: 5/26 months | Articles: 1053 | Elapsed: 0:14:32 | ETA: 1:01:05
[KCurrent: 2023-06 | Completed: 5/26 months | Articles: 1054 | Elapsed: 0:14:32 | ETA: 1:01:06
[KCurrent: 2023-06 | Completed: 5/26 months | Articles: 1055 | Elapsed: 0:14:33 | ETA: 1:01:09
[KCurrent: 2023-06 | Completed: 5/26 months | Articles: 1056 | Elapsed: 0:14:33 | ETA: 1:01:10
[KCurrent: 2023-06 | Completed: 5/26 months | Articles: 1057 | Elapsed: 0:14:34 | ETA: 1:01:11
[KCurrent: 2023-06 | Completed: 5/26 months | Articles: 1058 | Elapsed: 0:14:34 | ETA: 1:01:13
[KCurrent: 2023-06 | Completed: 5/26 months | Articles: 1059 | Elapsed: 0:14:35 | ETA: 1:01:16
[KCurrent: 2023-06 | Completed: 5/2

2025-02-01 10:26:13,773 - INFO - 
✅ Saved 216 articles to vox_articles_2023_06.parquet


[K
[KCurrent: 2023-06 | Completed: 6/26 months | Articles: 1267 | Elapsed: 0:17:08 | ETA: 0:57:07
[KCurrent: 2023-07 | Completed: 6/26 months | Articles: 1267 | Elapsed: 0:17:08 | ETA: 0:57:07
[KCurrent: 2023-07 | Completed: 6/26 months | Articles: 1268 | Elapsed: 0:17:11 | ETA: 0:57:19
[KCurrent: 2023-07 | Completed: 6/26 months | Articles: 1269 | Elapsed: 0:17:11 | ETA: 0:57:19
[KCurrent: 2023-07 | Completed: 6/26 months | Articles: 1270 | Elapsed: 0:17:11 | ETA: 0:57:19
[KCurrent: 2023-07 | Completed: 6/26 months | Articles: 1271 | Elapsed: 0:17:13 | ETA: 0:57:24
[KCurrent: 2023-07 | Completed: 6/26 months | Articles: 1272 | Elapsed: 0:17:13 | ETA: 0:57:24
[KCurrent: 2023-07 | Completed: 6/26 months | Articles: 1273 | Elapsed: 0:17:13 | ETA: 0:57:25
[KCurrent: 2023-07 | Completed: 6/26 months | Articles: 1274 | Elapsed: 0:17:14 | ETA: 0:57:26
[KCurrent: 2023-07 | Completed: 6/26 months | Articles: 1275 | Elapsed: 0:17:15 | ETA: 0:57:30
[KCurrent: 2023-07 | Completed: 6/2

2025-02-01 10:26:53,737 - ERROR - Failed to fetch https://www.vox.com/climate/23771835/biden-ira-climate-fossil-fuel-energy-investment: HTTPSConnectionPool(host='www.vox.com', port=443): Read timed out. (read timeout=10)
2025-02-01 10:26:58,969 - ERROR - Failed to fetch https://www.vox.com/2023/7/25/23807309/us-doj-sues-texas-floating-border-barrier-greg-abbott-migrant-crossing: HTTPSConnectionPool(host='www.vox.com', port=443): Read timed out. (read timeout=10)


[K
[KCurrent: 2023-07 | Completed: 6/26 months | Articles: 1311 | Elapsed: 0:17:58 | ETA: 0:59:53
[KCurrent: 2023-07 | Completed: 6/26 months | Articles: 1312 | Elapsed: 0:17:58 | ETA: 0:59:54
[KCurrent: 2023-07 | Completed: 6/26 months | Articles: 1313 | Elapsed: 0:17:59 | ETA: 0:59:58
[KCurrent: 2023-07 | Completed: 6/26 months | Articles: 1314 | Elapsed: 0:17:59 | ETA: 0:59:58
[KCurrent: 2023-07 | Completed: 6/26 months | Articles: 1315 | Elapsed: 0:18:00 | ETA: 1:00:00
[KCurrent: 2023-07 | Completed: 6/26 months | Articles: 1316 | Elapsed: 0:18:00 | ETA: 1:00:00
[KCurrent: 2023-07 | Completed: 6/26 months | Articles: 1317 | Elapsed: 0:18:01 | ETA: 1:00:03
[KCurrent: 2023-07 | Completed: 6/26 months | Articles: 1318 | Elapsed: 0:18:01 | ETA: 1:00:06
[KCurrent: 2023-07 | Completed: 6/26 months | Articles: 1319 | Elapsed: 0:18:01 | ETA: 1:00:06
[KCurrent: 2023-07 | Completed: 6/26 months | Articles: 1320 | Elapsed: 0:18:01 | ETA: 1:00:06
[KCurrent: 2023-07 | Completed: 6/2

2025-02-01 10:28:53,088 - INFO - 
✅ Saved 184 articles to vox_articles_2023_07.parquet


[K
[KCurrent: 2023-07 | Completed: 7/26 months | Articles: 1453 | Elapsed: 0:19:47 | ETA: 0:53:43
[KCurrent: 2023-08 | Completed: 7/26 months | Articles: 1453 | Elapsed: 0:19:47 | ETA: 0:53:43
[KCurrent: 2023-08 | Completed: 7/26 months | Articles: 1454 | Elapsed: 0:19:51 | ETA: 0:53:54
[KCurrent: 2023-08 | Completed: 7/26 months | Articles: 1455 | Elapsed: 0:19:52 | ETA: 0:53:56
[KCurrent: 2023-08 | Completed: 7/26 months | Articles: 1456 | Elapsed: 0:19:52 | ETA: 0:53:57
[KCurrent: 2023-08 | Completed: 7/26 months | Articles: 1457 | Elapsed: 0:19:53 | ETA: 0:53:59
[KCurrent: 2023-08 | Completed: 7/26 months | Articles: 1458 | Elapsed: 0:19:53 | ETA: 0:54:00
[KCurrent: 2023-08 | Completed: 7/26 months | Articles: 1459 | Elapsed: 0:19:54 | ETA: 0:54:01
[KCurrent: 2023-08 | Completed: 7/26 months | Articles: 1460 | Elapsed: 0:19:54 | ETA: 0:54:03
[KCurrent: 2023-08 | Completed: 7/26 months | Articles: 1461 | Elapsed: 0:19:55 | ETA: 0:54:03
[KCurrent: 2023-08 | Completed: 7/2

2025-02-01 10:31:22,343 - INFO - 
✅ Saved 205 articles to vox_articles_2023_08.parquet


[K
[KCurrent: 2023-08 | Completed: 8/26 months | Articles: 1658 | Elapsed: 0:22:16 | ETA: 0:50:08
[KCurrent: 2023-09 | Completed: 8/26 months | Articles: 1658 | Elapsed: 0:22:16 | ETA: 0:50:08
[KCurrent: 2023-09 | Completed: 8/26 months | Articles: 1659 | Elapsed: 0:22:20 | ETA: 0:50:17
[KCurrent: 2023-09 | Completed: 8/26 months | Articles: 1660 | Elapsed: 0:22:21 | ETA: 0:50:17
[KCurrent: 2023-09 | Completed: 8/26 months | Articles: 1661 | Elapsed: 0:22:21 | ETA: 0:50:17
[KCurrent: 2023-09 | Completed: 8/26 months | Articles: 1662 | Elapsed: 0:22:21 | ETA: 0:50:19
[KCurrent: 2023-09 | Completed: 8/26 months | Articles: 1663 | Elapsed: 0:22:22 | ETA: 0:50:21
[KCurrent: 2023-09 | Completed: 8/26 months | Articles: 1664 | Elapsed: 0:22:23 | ETA: 0:50:22
[KCurrent: 2023-09 | Completed: 8/26 months | Articles: 1665 | Elapsed: 0:22:23 | ETA: 0:50:22
[KCurrent: 2023-09 | Completed: 8/26 months | Articles: 1666 | Elapsed: 0:22:23 | ETA: 0:50:22
[KCurrent: 2023-09 | Completed: 8/2

2025-02-01 10:33:45,314 - INFO - 
✅ Saved 200 articles to vox_articles_2023_09.parquet


[K
[KCurrent: 2023-09 | Completed: 9/26 months | Articles: 1858 | Elapsed: 0:24:39 | ETA: 0:46:35
[KCurrent: 2023-10 | Completed: 9/26 months | Articles: 1858 | Elapsed: 0:24:39 | ETA: 0:46:35
[KCurrent: 2023-10 | Completed: 9/26 months | Articles: 1859 | Elapsed: 0:24:42 | ETA: 0:46:39
[KCurrent: 2023-10 | Completed: 9/26 months | Articles: 1860 | Elapsed: 0:24:42 | ETA: 0:46:40
[KCurrent: 2023-10 | Completed: 9/26 months | Articles: 1861 | Elapsed: 0:24:42 | ETA: 0:46:40
[KCurrent: 2023-10 | Completed: 9/26 months | Articles: 1862 | Elapsed: 0:24:43 | ETA: 0:46:42
[KCurrent: 2023-10 | Completed: 9/26 months | Articles: 1863 | Elapsed: 0:24:44 | ETA: 0:46:43
[KCurrent: 2023-10 | Completed: 9/26 months | Articles: 1864 | Elapsed: 0:24:44 | ETA: 0:46:43
[KCurrent: 2023-10 | Completed: 9/26 months | Articles: 1865 | Elapsed: 0:24:44 | ETA: 0:46:44
[KCurrent: 2023-10 | Completed: 9/26 months | Articles: 1866 | Elapsed: 0:24:45 | ETA: 0:46:45
[KCurrent: 2023-10 | Completed: 9/2

2025-02-01 10:34:42,990 - ERROR - Failed to fetch https://www.vox.com/politics/2023/10/19/23923926/house-speaker-jordan-mchenry-coalition-government: HTTPSConnectionPool(host='www.vox.com', port=443): Read timed out. (read timeout=10)


[K
[KCurrent: 2023-10 | Completed: 9/26 months | Articles: 1918 | Elapsed: 0:25:41 | ETA: 0:48:31
[KCurrent: 2023-10 | Completed: 9/26 months | Articles: 1919 | Elapsed: 0:25:41 | ETA: 0:48:31
[KCurrent: 2023-10 | Completed: 9/26 months | Articles: 1920 | Elapsed: 0:25:41 | ETA: 0:48:32
[KCurrent: 2023-10 | Completed: 9/26 months | Articles: 1921 | Elapsed: 0:25:42 | ETA: 0:48:34
[KCurrent: 2023-10 | Completed: 9/26 months | Articles: 1922 | Elapsed: 0:25:42 | ETA: 0:48:34
[KCurrent: 2023-10 | Completed: 9/26 months | Articles: 1923 | Elapsed: 0:25:43 | ETA: 0:48:36
[KCurrent: 2023-10 | Completed: 9/26 months | Articles: 1924 | Elapsed: 0:25:44 | ETA: 0:48:37
[KCurrent: 2023-10 | Completed: 9/26 months | Articles: 1925 | Elapsed: 0:25:45 | ETA: 0:48:38
[KCurrent: 2023-10 | Completed: 9/26 months | Articles: 1926 | Elapsed: 0:25:45 | ETA: 0:48:38
[KCurrent: 2023-10 | Completed: 9/26 months | Articles: 1927 | Elapsed: 0:25:45 | ETA: 0:48:38
[KCurrent: 2023-10 | Completed: 9/2

2025-02-01 10:35:31,278 - ERROR - Failed to fetch https://www.vox.com/science/23911440/seti-explained-extra-terrestrial-intelligence-science-true-story-congressional-tesitmony-nazca-mummy: HTTPSConnectionPool(host='www.vox.com', port=443): Read timed out. (read timeout=10)


[K
[KCurrent: 2023-10 | Completed: 9/26 months | Articles: 1977 | Elapsed: 0:26:31 | ETA: 0:50:05
[KCurrent: 2023-10 | Completed: 9/26 months | Articles: 1978 | Elapsed: 0:26:31 | ETA: 0:50:06
[KCurrent: 2023-10 | Completed: 9/26 months | Articles: 1979 | Elapsed: 0:26:32 | ETA: 0:50:08
[KCurrent: 2023-10 | Completed: 9/26 months | Articles: 1980 | Elapsed: 0:26:33 | ETA: 0:50:10
[KCurrent: 2023-10 | Completed: 9/26 months | Articles: 1981 | Elapsed: 0:26:34 | ETA: 0:50:11
[KCurrent: 2023-10 | Completed: 9/26 months | Articles: 1982 | Elapsed: 0:26:35 | ETA: 0:50:13
[KCurrent: 2023-10 | Completed: 9/26 months | Articles: 1983 | Elapsed: 0:26:35 | ETA: 0:50:14
[KCurrent: 2023-10 | Completed: 9/26 months | Articles: 1984 | Elapsed: 0:26:36 | ETA: 0:50:15
[KCurrent: 2023-10 | Completed: 9/26 months | Articles: 1985 | Elapsed: 0:26:37 | ETA: 0:50:17
[KCurrent: 2023-10 | Completed: 9/26 months | Articles: 1986 | Elapsed: 0:26:37 | ETA: 0:50:17
[KCurrent: 2023-10 | Completed: 9/2

2025-02-01 10:35:45,992 - ERROR - Failed to fetch https://www.vox.com/future-perfect/23912776/israel-gaza-hamas-war-palestine-charity-peace-altruism-moral-circle: HTTPSConnectionPool(host='www.vox.com', port=443): Read timed out. (read timeout=10)


[K
📅 Current: 2023-10 | Completed: 9/26 months | Articles: 1989 | Elapsed: 0:26:41 | ETA: 0:50:25

2025-02-01 10:35:47,676 - ERROR - Failed to fetch https://www.vox.com/technology/2023/10/11/23905263/tom-wheeler-techlash-fcc-q-and-a-gilded-age: HTTPSConnectionPool(host='www.vox.com', port=443): Read timed out. (read timeout=10)


[K
[KCurrent: 2023-10 | Completed: 9/26 months | Articles: 1990 | Elapsed: 0:26:46 | ETA: 0:50:33
[KCurrent: 2023-10 | Completed: 9/26 months | Articles: 1991 | Elapsed: 0:26:46 | ETA: 0:50:34
[KCurrent: 2023-10 | Completed: 9/26 months | Articles: 1992 | Elapsed: 0:26:48 | ETA: 0:50:37
[KCurrent: 2023-10 | Completed: 9/26 months | Articles: 1993 | Elapsed: 0:26:48 | ETA: 0:50:38
[KCurrent: 2023-10 | Completed: 9/26 months | Articles: 1994 | Elapsed: 0:26:48 | ETA: 0:50:38
[KCurrent: 2023-10 | Completed: 9/26 months | Articles: 1995 | Elapsed: 0:26:49 | ETA: 0:50:40
[KCurrent: 2023-10 | Completed: 9/26 months | Articles: 1996 | Elapsed: 0:26:49 | ETA: 0:50:40
[KCurrent: 2023-10 | Completed: 9/26 months | Articles: 1997 | Elapsed: 0:26:50 | ETA: 0:50:42
[KCurrent: 2023-10 | Completed: 9/26 months | Articles: 1998 | Elapsed: 0:26:50 | ETA: 0:50:42
[KCurrent: 2023-10 | Completed: 9/26 months | Articles: 1999 | Elapsed: 0:26:51 | ETA: 0:50:44
[KCurrent: 2023-10 | Completed: 9/2

2025-02-01 10:36:02,901 - ERROR - Failed to fetch https://www.vox.com/future-perfect/23904723/mindfulness-meditation-economy-mcmindfulness-capitalism-stress: HTTPSConnectionPool(host='www.vox.com', port=443): Read timed out. (read timeout=10)
2025-02-01 10:36:05,913 - ERROR - Failed to fetch https://www.vox.com/2023/7/4/23778786/malaria-us-florida-texas-maryland-climate-change-travel-resurgence-comeback: HTTPSConnectionPool(host='www.vox.com', port=443): Read timed out. (read timeout=10)


[K
[KCurrent: 2023-10 | Completed: 9/26 months | Articles: 2003 | Elapsed: 0:27:05 | ETA: 0:51:10
[KCurrent: 2023-10 | Completed: 9/26 months | Articles: 2004 | Elapsed: 0:27:05 | ETA: 0:51:11
[KCurrent: 2023-10 | Completed: 9/26 months | Articles: 2005 | Elapsed: 0:27:06 | ETA: 0:51:12
[KCurrent: 2023-10 | Completed: 9/26 months | Articles: 2006 | Elapsed: 0:27:06 | ETA: 0:51:12
[KCurrent: 2023-10 | Completed: 9/26 months | Articles: 2007 | Elapsed: 0:27:07 | ETA: 0:51:14
[KCurrent: 2023-10 | Completed: 9/26 months | Articles: 2008 | Elapsed: 0:27:07 | ETA: 0:51:14
[KCurrent: 2023-10 | Completed: 9/26 months | Articles: 2009 | Elapsed: 0:27:07 | ETA: 0:51:14
[KCurrent: 2023-10 | Completed: 9/26 months | Articles: 2010 | Elapsed: 0:27:07 | ETA: 0:51:15
[KCurrent: 2023-10 | Completed: 9/26 months | Articles: 2011 | Elapsed: 0:27:09 | ETA: 0:51:17
[KCurrent: 2023-10 | Completed: 9/26 months | Articles: 2012 | Elapsed: 0:27:09 | ETA: 0:51:17
[KCurrent: 2023-10 | Completed: 9/2

2025-02-01 10:36:37,429 - INFO - 
✅ Saved 176 articles to vox_articles_2023_10.parquet


[K
[KCurrent: 2023-10 | Completed: 10/26 months | Articles: 2034 | Elapsed: 0:27:31 | ETA: 0:44:03
[KCurrent: 2023-11 | Completed: 10/26 months | Articles: 2034 | Elapsed: 0:27:31 | ETA: 0:44:03
[KCurrent: 2023-11 | Completed: 10/26 months | Articles: 2035 | Elapsed: 0:27:37 | ETA: 0:44:11
[KCurrent: 2023-11 | Completed: 10/26 months | Articles: 2036 | Elapsed: 0:27:37 | ETA: 0:44:12
[KCurrent: 2023-11 | Completed: 10/26 months | Articles: 2037 | Elapsed: 0:27:38 | ETA: 0:44:13
[KCurrent: 2023-11 | Completed: 10/26 months | Articles: 2038 | Elapsed: 0:27:38 | ETA: 0:44:14
[KCurrent: 2023-11 | Completed: 10/26 months | Articles: 2039 | Elapsed: 0:27:39 | ETA: 0:44:15
[KCurrent: 2023-11 | Completed: 10/26 months | Articles: 2040 | Elapsed: 0:27:39 | ETA: 0:44:15
[KCurrent: 2023-11 | Completed: 10/26 months | Articles: 2041 | Elapsed: 0:27:40 | ETA: 0:44:16
[KCurrent: 2023-11 | Completed: 10/26 months | Articles: 2042 | Elapsed: 0:27:40 | ETA: 0:44:16
[KCurrent: 2023-11 | Comp

2025-02-01 10:38:33,416 - INFO - 
✅ Saved 150 articles to vox_articles_2023_11.parquet


[K
[KCurrent: 2023-11 | Completed: 11/26 months | Articles: 2185 | Elapsed: 0:29:27 | ETA: 0:40:10
📅 Current: 2023-12 | Completed: 11/26 months | Articles: 2185 | Elapsed: 0:29:27 | ETA: 0:40:10

2025-02-01 10:38:44,217 - ERROR - Failed to get links from https://www.vox.com/archives/2023/12/1: HTTPSConnectionPool(host='www.vox.com', port=443): Read timed out. (read timeout=10)


[K
[KCurrent: 2023-12 | Completed: 12/26 months | Articles: 2185 | Elapsed: 0:29:38 | ETA: 0:34:35
[KCurrent: 2024-01 | Completed: 12/26 months | Articles: 2185 | Elapsed: 0:29:38 | ETA: 0:34:35
[KCurrent: 2024-01 | Completed: 12/26 months | Articles: 2186 | Elapsed: 0:29:42 | ETA: 0:34:39
[KCurrent: 2024-01 | Completed: 12/26 months | Articles: 2187 | Elapsed: 0:29:42 | ETA: 0:34:39
[KCurrent: 2024-01 | Completed: 12/26 months | Articles: 2188 | Elapsed: 0:29:42 | ETA: 0:34:39
[KCurrent: 2024-01 | Completed: 12/26 months | Articles: 2189 | Elapsed: 0:29:43 | ETA: 0:34:40
[KCurrent: 2024-01 | Completed: 12/26 months | Articles: 2190 | Elapsed: 0:29:43 | ETA: 0:34:40
[KCurrent: 2024-01 | Completed: 12/26 months | Articles: 2191 | Elapsed: 0:29:44 | ETA: 0:34:41
[KCurrent: 2024-01 | Completed: 12/26 months | Articles: 2192 | Elapsed: 0:29:44 | ETA: 0:34:42
[KCurrent: 2024-01 | Completed: 12/26 months | Articles: 2193 | Elapsed: 0:29:45 | ETA: 0:34:42
[KCurrent: 2024-01 | Comp

2025-02-01 10:40:28,659 - INFO - 
✅ Saved 150 articles to vox_articles_2024_01.parquet


[K
[KCurrent: 2024-01 | Completed: 13/26 months | Articles: 2335 | Elapsed: 0:31:23 | ETA: 0:31:23
[KCurrent: 2024-02 | Completed: 13/26 months | Articles: 2335 | Elapsed: 0:31:23 | ETA: 0:31:23
[KCurrent: 2024-02 | Completed: 13/26 months | Articles: 2336 | Elapsed: 0:31:26 | ETA: 0:31:26
[KCurrent: 2024-02 | Completed: 13/26 months | Articles: 2337 | Elapsed: 0:31:27 | ETA: 0:31:27
[KCurrent: 2024-02 | Completed: 13/26 months | Articles: 2338 | Elapsed: 0:31:27 | ETA: 0:31:27
[KCurrent: 2024-02 | Completed: 13/26 months | Articles: 2339 | Elapsed: 0:31:28 | ETA: 0:31:28
[KCurrent: 2024-02 | Completed: 13/26 months | Articles: 2340 | Elapsed: 0:31:28 | ETA: 0:31:28
[KCurrent: 2024-02 | Completed: 13/26 months | Articles: 2341 | Elapsed: 0:31:28 | ETA: 0:31:28
[KCurrent: 2024-02 | Completed: 13/26 months | Articles: 2342 | Elapsed: 0:31:29 | ETA: 0:31:29
[KCurrent: 2024-02 | Completed: 13/26 months | Articles: 2343 | Elapsed: 0:31:29 | ETA: 0:31:29
[KCurrent: 2024-02 | Comp

2025-02-01 10:41:06,471 - ERROR - Failed to fetch https://www.vox.com/24081504/moon-landing-nasa-odysseus-intuitive-machines-artemis-mars: HTTPSConnectionPool(host='www.vox.com', port=443): Read timed out. (read timeout=10)


[K
[KCurrent: 2024-02 | Completed: 13/26 months | Articles: 2380 | Elapsed: 0:32:06 | ETA: 0:32:06
[KCurrent: 2024-02 | Completed: 13/26 months | Articles: 2381 | Elapsed: 0:32:06 | ETA: 0:32:06
[KCurrent: 2024-02 | Completed: 13/26 months | Articles: 2382 | Elapsed: 0:32:07 | ETA: 0:32:07
[KCurrent: 2024-02 | Completed: 13/26 months | Articles: 2383 | Elapsed: 0:32:08 | ETA: 0:32:08
[KCurrent: 2024-02 | Completed: 13/26 months | Articles: 2384 | Elapsed: 0:32:08 | ETA: 0:32:08
[KCurrent: 2024-02 | Completed: 13/26 months | Articles: 2385 | Elapsed: 0:32:08 | ETA: 0:32:08
[KCurrent: 2024-02 | Completed: 13/26 months | Articles: 2386 | Elapsed: 0:32:09 | ETA: 0:32:09
[KCurrent: 2024-02 | Completed: 13/26 months | Articles: 2387 | Elapsed: 0:32:10 | ETA: 0:32:10
[KCurrent: 2024-02 | Completed: 13/26 months | Articles: 2388 | Elapsed: 0:32:10 | ETA: 0:32:10
[KCurrent: 2024-02 | Completed: 13/26 months | Articles: 2389 | Elapsed: 0:32:10 | ETA: 0:32:10
[KCurrent: 2024-02 | Comp

2025-02-01 10:42:45,398 - INFO - 
✅ Saved 164 articles to vox_articles_2024_02.parquet


[K
[KCurrent: 2024-02 | Completed: 14/26 months | Articles: 2500 | Elapsed: 0:33:39 | ETA: 0:28:51
[KCurrent: 2024-03 | Completed: 14/26 months | Articles: 2500 | Elapsed: 0:33:39 | ETA: 0:28:51
[KCurrent: 2024-03 | Completed: 14/26 months | Articles: 2501 | Elapsed: 0:33:43 | ETA: 0:28:54
[KCurrent: 2024-03 | Completed: 14/26 months | Articles: 2502 | Elapsed: 0:33:43 | ETA: 0:28:54
[KCurrent: 2024-03 | Completed: 14/26 months | Articles: 2503 | Elapsed: 0:33:43 | ETA: 0:28:54
[KCurrent: 2024-03 | Completed: 14/26 months | Articles: 2504 | Elapsed: 0:33:44 | ETA: 0:28:54
[KCurrent: 2024-03 | Completed: 14/26 months | Articles: 2505 | Elapsed: 0:33:44 | ETA: 0:28:55
[KCurrent: 2024-03 | Completed: 14/26 months | Articles: 2506 | Elapsed: 0:33:44 | ETA: 0:28:55
[KCurrent: 2024-03 | Completed: 14/26 months | Articles: 2507 | Elapsed: 0:33:45 | ETA: 0:28:56
[KCurrent: 2024-03 | Completed: 14/26 months | Articles: 2508 | Elapsed: 0:33:45 | ETA: 0:28:56
[KCurrent: 2024-03 | Comp

2025-02-01 10:44:53,661 - INFO - 
✅ Saved 189 articles to vox_articles_2024_03.parquet


[K
[KCurrent: 2024-03 | Completed: 15/26 months | Articles: 2689 | Elapsed: 0:35:48 | ETA: 0:26:15
[KCurrent: 2024-04 | Completed: 15/26 months | Articles: 2689 | Elapsed: 0:35:48 | ETA: 0:26:15
[KCurrent: 2024-04 | Completed: 15/26 months | Articles: 2690 | Elapsed: 0:35:51 | ETA: 0:26:17
[KCurrent: 2024-04 | Completed: 15/26 months | Articles: 2691 | Elapsed: 0:35:52 | ETA: 0:26:18
[KCurrent: 2024-04 | Completed: 15/26 months | Articles: 2692 | Elapsed: 0:35:52 | ETA: 0:26:18
[KCurrent: 2024-04 | Completed: 15/26 months | Articles: 2693 | Elapsed: 0:35:52 | ETA: 0:26:18
[KCurrent: 2024-04 | Completed: 15/26 months | Articles: 2694 | Elapsed: 0:35:52 | ETA: 0:26:18
[KCurrent: 2024-04 | Completed: 15/26 months | Articles: 2695 | Elapsed: 0:35:53 | ETA: 0:26:19
[KCurrent: 2024-04 | Completed: 15/26 months | Articles: 2696 | Elapsed: 0:35:53 | ETA: 0:26:19
[KCurrent: 2024-04 | Completed: 15/26 months | Articles: 2697 | Elapsed: 0:35:53 | ETA: 0:26:19
[KCurrent: 2024-04 | Comp

2025-02-01 10:46:50,997 - INFO - 
✅ Saved 168 articles to vox_articles_2024_04.parquet


[K
[KCurrent: 2024-04 | Completed: 16/26 months | Articles: 2857 | Elapsed: 0:37:45 | ETA: 0:23:35
[KCurrent: 2024-05 | Completed: 16/26 months | Articles: 2857 | Elapsed: 0:37:45 | ETA: 0:23:35
[KCurrent: 2024-05 | Completed: 16/26 months | Articles: 2858 | Elapsed: 0:37:50 | ETA: 0:23:38
[KCurrent: 2024-05 | Completed: 16/26 months | Articles: 2859 | Elapsed: 0:37:50 | ETA: 0:23:38
[KCurrent: 2024-05 | Completed: 16/26 months | Articles: 2860 | Elapsed: 0:37:50 | ETA: 0:23:39
[KCurrent: 2024-05 | Completed: 16/26 months | Articles: 2861 | Elapsed: 0:37:51 | ETA: 0:23:39
[KCurrent: 2024-05 | Completed: 16/26 months | Articles: 2862 | Elapsed: 0:37:51 | ETA: 0:23:39
[KCurrent: 2024-05 | Completed: 16/26 months | Articles: 2863 | Elapsed: 0:37:51 | ETA: 0:23:39
[KCurrent: 2024-05 | Completed: 16/26 months | Articles: 2864 | Elapsed: 0:37:52 | ETA: 0:23:40
[KCurrent: 2024-05 | Completed: 16/26 months | Articles: 2865 | Elapsed: 0:37:52 | ETA: 0:23:40
[KCurrent: 2024-05 | Comp

2025-02-01 10:48:56,221 - ERROR - Failed to fetch https://www.vox.com/politics/24147613/flight-delay-refund-white-house-rule: HTTPSConnectionPool(host='www.vox.com', port=443): Read timed out. (read timeout=10)


[K
[KCurrent: 2024-05 | Completed: 16/26 months | Articles: 3022 | Elapsed: 0:39:54 | ETA: 0:24:56
[KCurrent: 2024-05 | Completed: 16/26 months | Articles: 3023 | Elapsed: 0:39:54 | ETA: 0:24:56
[KCurrent: 2024-05 | Completed: 16/26 months | Articles: 3024 | Elapsed: 0:39:55 | ETA: 0:24:57
[KCurrent: 2024-05 | Completed: 16/26 months | Articles: 3025 | Elapsed: 0:39:56 | ETA: 0:24:57
[KCurrent: 2024-05 | Completed: 16/26 months | Articles: 3026 | Elapsed: 0:39:56 | ETA: 0:24:57
[KCurrent: 2024-05 | Completed: 16/26 months | Articles: 3027 | Elapsed: 0:39:57 | ETA: 0:24:58
[KCurrent: 2024-05 | Completed: 16/26 months | Articles: 3028 | Elapsed: 0:39:57 | ETA: 0:24:58
[KCurrent: 2024-05 | Completed: 16/26 months | Articles: 3029 | Elapsed: 0:39:57 | ETA: 0:24:58
[KCurrent: 2024-05 | Completed: 16/26 months | Articles: 3030 | Elapsed: 0:39:58 | ETA: 0:24:58
[KCurrent: 2024-05 | Completed: 16/26 months | Articles: 3031 | Elapsed: 0:39:59 | ETA: 0:24:59
[KCurrent: 2024-05 | Comp

2025-02-01 10:49:07,823 - INFO - 
✅ Saved 177 articles to vox_articles_2024_05.parquet


[K
[KCurrent: 2024-05 | Completed: 17/26 months | Articles: 3034 | Elapsed: 0:40:02 | ETA: 0:21:11
[KCurrent: 2024-06 | Completed: 17/26 months | Articles: 3034 | Elapsed: 0:40:02 | ETA: 0:21:11
[KCurrent: 2024-06 | Completed: 17/26 months | Articles: 3035 | Elapsed: 0:40:08 | ETA: 0:21:14
[KCurrent: 2024-06 | Completed: 17/26 months | Articles: 3036 | Elapsed: 0:40:08 | ETA: 0:21:14
[KCurrent: 2024-06 | Completed: 17/26 months | Articles: 3037 | Elapsed: 0:40:08 | ETA: 0:21:15
[KCurrent: 2024-06 | Completed: 17/26 months | Articles: 3038 | Elapsed: 0:40:09 | ETA: 0:21:15
[KCurrent: 2024-06 | Completed: 17/26 months | Articles: 3039 | Elapsed: 0:40:09 | ETA: 0:21:15
[KCurrent: 2024-06 | Completed: 17/26 months | Articles: 3040 | Elapsed: 0:40:09 | ETA: 0:21:15
[KCurrent: 2024-06 | Completed: 17/26 months | Articles: 3041 | Elapsed: 0:40:10 | ETA: 0:21:16
[KCurrent: 2024-06 | Completed: 17/26 months | Articles: 3042 | Elapsed: 0:40:11 | ETA: 0:21:16
[KCurrent: 2024-06 | Comp

2025-02-01 10:49:25,507 - ERROR - Failed to fetch https://www.vox.com/politics/357955/biden-age-democrats-nominee-kamala-harris: HTTPSConnectionPool(host='www.vox.com', port=443): Read timed out. (read timeout=10)


[K
[KCurrent: 2024-06 | Completed: 17/26 months | Articles: 3049 | Elapsed: 0:40:24 | ETA: 0:21:23
[KCurrent: 2024-06 | Completed: 17/26 months | Articles: 3050 | Elapsed: 0:40:24 | ETA: 0:21:23
[KCurrent: 2024-06 | Completed: 17/26 months | Articles: 3051 | Elapsed: 0:40:24 | ETA: 0:21:23
[KCurrent: 2024-06 | Completed: 17/26 months | Articles: 3052 | Elapsed: 0:40:25 | ETA: 0:21:24
[KCurrent: 2024-06 | Completed: 17/26 months | Articles: 3053 | Elapsed: 0:40:25 | ETA: 0:21:24
[KCurrent: 2024-06 | Completed: 17/26 months | Articles: 3054 | Elapsed: 0:40:25 | ETA: 0:21:24
[KCurrent: 2024-06 | Completed: 17/26 months | Articles: 3055 | Elapsed: 0:40:26 | ETA: 0:21:24
[KCurrent: 2024-06 | Completed: 17/26 months | Articles: 3056 | Elapsed: 0:40:26 | ETA: 0:21:24
[KCurrent: 2024-06 | Completed: 17/26 months | Articles: 3057 | Elapsed: 0:40:27 | ETA: 0:21:24
[KCurrent: 2024-06 | Completed: 17/26 months | Articles: 3058 | Elapsed: 0:40:27 | ETA: 0:21:24
[KCurrent: 2024-06 | Comp

2025-02-01 10:50:14,741 - ERROR - Failed to fetch https://www.vox.com/future-perfect/355735/why-americans-hate-inflation-and-its-cure: HTTPSConnectionPool(host='www.vox.com', port=443): Read timed out. (read timeout=10)


[K
[KCurrent: 2024-06 | Completed: 17/26 months | Articles: 3108 | Elapsed: 0:41:14 | ETA: 0:21:50
[KCurrent: 2024-06 | Completed: 17/26 months | Articles: 3109 | Elapsed: 0:41:14 | ETA: 0:21:50
[KCurrent: 2024-06 | Completed: 17/26 months | Articles: 3110 | Elapsed: 0:41:15 | ETA: 0:21:50
[KCurrent: 2024-06 | Completed: 17/26 months | Articles: 3111 | Elapsed: 0:41:15 | ETA: 0:21:50
[KCurrent: 2024-06 | Completed: 17/26 months | Articles: 3112 | Elapsed: 0:41:15 | ETA: 0:21:50
[KCurrent: 2024-06 | Completed: 17/26 months | Articles: 3113 | Elapsed: 0:41:15 | ETA: 0:21:50
[KCurrent: 2024-06 | Completed: 17/26 months | Articles: 3114 | Elapsed: 0:41:16 | ETA: 0:21:51
[KCurrent: 2024-06 | Completed: 17/26 months | Articles: 3115 | Elapsed: 0:41:17 | ETA: 0:21:51
[KCurrent: 2024-06 | Completed: 17/26 months | Articles: 3116 | Elapsed: 0:41:17 | ETA: 0:21:51
[KCurrent: 2024-06 | Completed: 17/26 months | Articles: 3117 | Elapsed: 0:41:17 | ETA: 0:21:51
[KCurrent: 2024-06 | Comp

2025-02-01 10:50:42,572 - ERROR - Failed to fetch https://www.vox.com/culture/23356689/house-of-the-dragon-episode-5-alicent-dress-meaning-greens-and-blacks: HTTPSConnectionPool(host='www.vox.com', port=443): Read timed out. (read timeout=10)


[K
[KCurrent: 2024-06 | Completed: 17/26 months | Articles: 3137 | Elapsed: 0:41:41 | ETA: 0:22:04
[KCurrent: 2024-06 | Completed: 17/26 months | Articles: 3138 | Elapsed: 0:41:41 | ETA: 0:22:04
[KCurrent: 2024-06 | Completed: 17/26 months | Articles: 3139 | Elapsed: 0:41:42 | ETA: 0:22:04
[KCurrent: 2024-06 | Completed: 17/26 months | Articles: 3140 | Elapsed: 0:41:42 | ETA: 0:22:04
[KCurrent: 2024-06 | Completed: 17/26 months | Articles: 3141 | Elapsed: 0:41:43 | ETA: 0:22:05
[KCurrent: 2024-06 | Completed: 17/26 months | Articles: 3142 | Elapsed: 0:41:43 | ETA: 0:22:05
[KCurrent: 2024-06 | Completed: 17/26 months | Articles: 3143 | Elapsed: 0:41:43 | ETA: 0:22:05
[KCurrent: 2024-06 | Completed: 17/26 months | Articles: 3144 | Elapsed: 0:41:44 | ETA: 0:22:05
[KCurrent: 2024-06 | Completed: 17/26 months | Articles: 3145 | Elapsed: 0:41:44 | ETA: 0:22:05
[KCurrent: 2024-06 | Completed: 17/26 months | Articles: 3146 | Elapsed: 0:41:44 | ETA: 0:22:06
[KCurrent: 2024-06 | Comp

2025-02-01 10:51:49,877 - INFO - 
✅ Saved 198 articles to vox_articles_2024_06.parquet


[K
[KCurrent: 2024-06 | Completed: 18/26 months | Articles: 3232 | Elapsed: 0:42:44 | ETA: 0:18:59
[KCurrent: 2024-07 | Completed: 18/26 months | Articles: 3232 | Elapsed: 0:42:44 | ETA: 0:18:59
[KCurrent: 2024-07 | Completed: 18/26 months | Articles: 3233 | Elapsed: 0:42:48 | ETA: 0:19:01
[KCurrent: 2024-07 | Completed: 18/26 months | Articles: 3234 | Elapsed: 0:42:48 | ETA: 0:19:01
[KCurrent: 2024-07 | Completed: 18/26 months | Articles: 3235 | Elapsed: 0:42:48 | ETA: 0:19:01
[KCurrent: 2024-07 | Completed: 18/26 months | Articles: 3236 | Elapsed: 0:42:49 | ETA: 0:19:02
[KCurrent: 2024-07 | Completed: 18/26 months | Articles: 3237 | Elapsed: 0:42:50 | ETA: 0:19:02
[KCurrent: 2024-07 | Completed: 18/26 months | Articles: 3238 | Elapsed: 0:42:50 | ETA: 0:19:02
[KCurrent: 2024-07 | Completed: 18/26 months | Articles: 3239 | Elapsed: 0:42:50 | ETA: 0:19:02
[KCurrent: 2024-07 | Completed: 18/26 months | Articles: 3240 | Elapsed: 0:42:51 | ETA: 0:19:02
[KCurrent: 2024-07 | Comp

2025-02-01 10:54:22,899 - INFO - 
✅ Saved 211 articles to vox_articles_2024_07.parquet


[K
[KCurrent: 2024-07 | Completed: 19/26 months | Articles: 3443 | Elapsed: 0:45:17 | ETA: 0:16:41
[KCurrent: 2024-08 | Completed: 19/26 months | Articles: 3443 | Elapsed: 0:45:17 | ETA: 0:16:41
[KCurrent: 2024-08 | Completed: 19/26 months | Articles: 3444 | Elapsed: 0:45:21 | ETA: 0:16:42
[KCurrent: 2024-08 | Completed: 19/26 months | Articles: 3445 | Elapsed: 0:45:21 | ETA: 0:16:42
[KCurrent: 2024-08 | Completed: 19/26 months | Articles: 3446 | Elapsed: 0:45:21 | ETA: 0:16:42
[KCurrent: 2024-08 | Completed: 19/26 months | Articles: 3447 | Elapsed: 0:45:22 | ETA: 0:16:43
[KCurrent: 2024-08 | Completed: 19/26 months | Articles: 3448 | Elapsed: 0:45:22 | ETA: 0:16:43
[KCurrent: 2024-08 | Completed: 19/26 months | Articles: 3449 | Elapsed: 0:45:23 | ETA: 0:16:43
[KCurrent: 2024-08 | Completed: 19/26 months | Articles: 3450 | Elapsed: 0:45:23 | ETA: 0:16:43
[KCurrent: 2024-08 | Completed: 19/26 months | Articles: 3451 | Elapsed: 0:45:24 | ETA: 0:16:43
[KCurrent: 2024-08 | Comp

2025-02-01 10:56:41,750 - INFO - 
✅ Saved 196 articles to vox_articles_2024_08.parquet


[K
[KCurrent: 2024-08 | Completed: 20/26 months | Articles: 3640 | Elapsed: 0:47:36 | ETA: 0:14:16
[KCurrent: 2024-10 | Completed: 20/26 months | Articles: 3640 | Elapsed: 0:47:36 | ETA: 0:14:16
[KCurrent: 2024-10 | Completed: 20/26 months | Articles: 3641 | Elapsed: 0:47:39 | ETA: 0:14:17
[KCurrent: 2024-10 | Completed: 20/26 months | Articles: 3642 | Elapsed: 0:47:40 | ETA: 0:14:18
[KCurrent: 2024-10 | Completed: 20/26 months | Articles: 3643 | Elapsed: 0:47:40 | ETA: 0:14:18
[KCurrent: 2024-10 | Completed: 20/26 months | Articles: 3644 | Elapsed: 0:47:41 | ETA: 0:14:18
[KCurrent: 2024-10 | Completed: 20/26 months | Articles: 3645 | Elapsed: 0:47:41 | ETA: 0:14:18
[KCurrent: 2024-10 | Completed: 20/26 months | Articles: 3646 | Elapsed: 0:47:42 | ETA: 0:14:18
[KCurrent: 2024-10 | Completed: 20/26 months | Articles: 3647 | Elapsed: 0:47:42 | ETA: 0:14:18
[KCurrent: 2024-10 | Completed: 20/26 months | Articles: 3648 | Elapsed: 0:47:42 | ETA: 0:14:18
[KCurrent: 2024-10 | Comp

2025-02-01 10:59:19,915 - INFO - 
✅ Saved 210 articles to vox_articles_2024_10.parquet


[K
[KCurrent: 2024-10 | Completed: 21/26 months | Articles: 3851 | Elapsed: 0:50:14 | ETA: 0:11:57
[KCurrent: 2024-11 | Completed: 21/26 months | Articles: 3851 | Elapsed: 0:50:14 | ETA: 0:11:57
[KCurrent: 2024-11 | Completed: 21/26 months | Articles: 3852 | Elapsed: 0:50:17 | ETA: 0:11:58
[KCurrent: 2024-11 | Completed: 21/26 months | Articles: 3853 | Elapsed: 0:50:17 | ETA: 0:11:58
[KCurrent: 2024-11 | Completed: 21/26 months | Articles: 3854 | Elapsed: 0:50:17 | ETA: 0:11:58
[KCurrent: 2024-11 | Completed: 21/26 months | Articles: 3855 | Elapsed: 0:50:18 | ETA: 0:11:58
[KCurrent: 2024-11 | Completed: 21/26 months | Articles: 3856 | Elapsed: 0:50:19 | ETA: 0:11:58
[KCurrent: 2024-11 | Completed: 21/26 months | Articles: 3857 | Elapsed: 0:50:19 | ETA: 0:11:58
[KCurrent: 2024-11 | Completed: 21/26 months | Articles: 3858 | Elapsed: 0:50:19 | ETA: 0:11:58
[KCurrent: 2024-11 | Completed: 21/26 months | Articles: 3859 | Elapsed: 0:50:19 | ETA: 0:11:59
[KCurrent: 2024-11 | Comp

2025-02-01 11:01:38,669 - ERROR - Failed to fetch https://www.vox.com/2024-elections/382799/ballot-curing-mailed-absentee-ballots-fix-signature: HTTPSConnectionPool(host='www.vox.com', port=443): Read timed out. (read timeout=10)
2025-02-01 11:01:43,059 - ERROR - Failed to fetch https://www.vox.com/politics/378912/musk-trump-voting-contest-million-dollars-swing-state-lottery-pennsylvania: HTTPSConnectionPool(host='www.vox.com', port=443): Read timed out. (read timeout=10)


[K
[KCurrent: 2024-11 | Completed: 21/26 months | Articles: 4045 | Elapsed: 0:52:41 | ETA: 0:12:32
[KCurrent: 2024-11 | Completed: 21/26 months | Articles: 4046 | Elapsed: 0:52:41 | ETA: 0:12:32
[KCurrent: 2024-11 | Completed: 21/26 months | Articles: 4047 | Elapsed: 0:52:42 | ETA: 0:12:32
[KCurrent: 2024-11 | Completed: 21/26 months | Articles: 4048 | Elapsed: 0:52:42 | ETA: 0:12:33
[KCurrent: 2024-11 | Completed: 21/26 months | Articles: 4049 | Elapsed: 0:52:43 | ETA: 0:12:33
[KCurrent: 2024-11 | Completed: 21/26 months | Articles: 4050 | Elapsed: 0:52:43 | ETA: 0:12:33
[KCurrent: 2024-11 | Completed: 21/26 months | Articles: 4051 | Elapsed: 0:52:44 | ETA: 0:12:33
[KCurrent: 2024-11 | Completed: 21/26 months | Articles: 4052 | Elapsed: 0:52:44 | ETA: 0:12:33
[KCurrent: 2024-11 | Completed: 21/26 months | Articles: 4053 | Elapsed: 0:52:44 | ETA: 0:12:33
[KCurrent: 2024-11 | Completed: 21/26 months | Articles: 4054 | Elapsed: 0:52:44 | ETA: 0:12:33
[KCurrent: 2024-11 | Comp

2025-02-01 11:02:09,712 - ERROR - Failed to fetch https://www.vox.com/2024-elections/381399/electorate-demographics-2024-election-women-republican-black-latino-voter: HTTPSConnectionPool(host='www.vox.com', port=443): Read timed out. (read timeout=10)
2025-02-01 11:02:12,210 - INFO - 
✅ Saved 217 articles to vox_articles_2024_11.parquet


[K
[KCurrent: 2024-11 | Completed: 22/26 months | Articles: 4070 | Elapsed: 0:53:06 | ETA: 0:09:39
[KCurrent: 2024-12 | Completed: 22/26 months | Articles: 4070 | Elapsed: 0:53:06 | ETA: 0:09:39
[KCurrent: 2024-12 | Completed: 22/26 months | Articles: 4071 | Elapsed: 0:53:09 | ETA: 0:09:39
[KCurrent: 2024-12 | Completed: 22/26 months | Articles: 4072 | Elapsed: 0:53:10 | ETA: 0:09:40
[KCurrent: 2024-12 | Completed: 22/26 months | Articles: 4073 | Elapsed: 0:53:10 | ETA: 0:09:40
[KCurrent: 2024-12 | Completed: 22/26 months | Articles: 4074 | Elapsed: 0:53:10 | ETA: 0:09:40
[KCurrent: 2024-12 | Completed: 22/26 months | Articles: 4075 | Elapsed: 0:53:11 | ETA: 0:09:40
[KCurrent: 2024-12 | Completed: 22/26 months | Articles: 4076 | Elapsed: 0:53:11 | ETA: 0:09:40
[KCurrent: 2024-12 | Completed: 22/26 months | Articles: 4077 | Elapsed: 0:53:12 | ETA: 0:09:40
[KCurrent: 2024-12 | Completed: 22/26 months | Articles: 4078 | Elapsed: 0:53:12 | ETA: 0:09:40
[KCurrent: 2024-12 | Comp

2025-02-01 11:03:57,256 - INFO - 
✅ Saved 155 articles to vox_articles_2024_12.parquet


[K
[KCurrent: 2024-12 | Completed: 23/26 months | Articles: 4226 | Elapsed: 0:54:51 | ETA: 0:07:09
[KCurrent: 2025-01 | Completed: 23/26 months | Articles: 4226 | Elapsed: 0:54:51 | ETA: 0:07:09
[KCurrent: 2025-01 | Completed: 23/26 months | Articles: 4227 | Elapsed: 0:54:53 | ETA: 0:07:09
[KCurrent: 2025-01 | Completed: 23/26 months | Articles: 4228 | Elapsed: 0:54:54 | ETA: 0:07:09
[KCurrent: 2025-01 | Completed: 23/26 months | Articles: 4229 | Elapsed: 0:54:54 | ETA: 0:07:09
[KCurrent: 2025-01 | Completed: 23/26 months | Articles: 4230 | Elapsed: 0:54:54 | ETA: 0:07:09
[KCurrent: 2025-01 | Completed: 23/26 months | Articles: 4231 | Elapsed: 0:54:55 | ETA: 0:07:09
[KCurrent: 2025-01 | Completed: 23/26 months | Articles: 4232 | Elapsed: 0:54:55 | ETA: 0:07:09
[KCurrent: 2025-01 | Completed: 23/26 months | Articles: 4233 | Elapsed: 0:54:56 | ETA: 0:07:09
[KCurrent: 2025-01 | Completed: 23/26 months | Articles: 4234 | Elapsed: 0:54:56 | ETA: 0:07:09
[KCurrent: 2025-01 | Comp

2025-02-01 11:06:12,520 - INFO - 
✅ Saved 194 articles to vox_articles_2025_01.parquet


[K
📅 Current: 2025-01 | Completed: 24/26 months | Articles: 4420 | Elapsed: 0:57:07 | ETA: 0:04:45

🎉 Final Report:
Total articles collected: 4420

📈 Tech Stock Articles Found (0):

💾 Full data saved to vox_articles_YYYY_MM.parquet files

Processed months: 24/26
