In [1]:
import pandas as pd
from datetime import datetime, timedelta
from dotenv import load_dotenv
import os, logging

from eventor_api import EventorAPI

logging.basicConfig(level=logging.INFO)

load_dotenv()
api = EventorAPI(os.getenv('API_KEY'))

In [4]:
def fetch_entries_by_year(api: EventorAPI, start_year: int, end_year: int, output_dir: str = 'data'):
    """
    Fetch entries data progressively and save to yearly CSV files.
    Overwrites existing files to ensure data is up to date.
    
    Args:
        api: EventorAPI instance
        start_year: Starting year (inclusive)
        end_year: Ending year (inclusive)
        output_dir: Directory to save CSV files
    """
    import os
    from pathlib import Path
    from calendar import monthrange
    
    # Create output directory if it doesn't exist
    Path(output_dir).mkdir(parents=True, exist_ok=True)
    
    # Process each year
    for year in range(start_year, end_year + 1):
        output_file = os.path.join(output_dir, f'entries_{year}.csv')
        temp_file = os.path.join(output_dir, f'entries_{year}.temp.csv')
        
        logging.info(f"Processing year {year}")
        
        # Initialize empty DataFrame for the year
        yearly_entries = pd.DataFrame()
        
        # Process month by month
        for month in range(1, 13):
            # Get the last day of the month
            _, last_day = monthrange(year, month)
            start_date = datetime(year, month, 1)
            end_date = datetime(year, month, last_day)
            
            logging.info(f"Fetching entries for {start_date.strftime('%B %Y')}")
            
            try:
                # Fetch entries for the current month
                entries_data = api.get_entries(
                    from_event_date=start_date.strftime('%Y-%m-%d'),
                    to_event_date=end_date.strftime('%Y-%m-%d')
                )
                
                # Convert to DataFrame and append
                if entries_data is not None:
                    entries_df = api.entries_to_dataframe(entries_data)
                    yearly_entries = pd.concat([yearly_entries, entries_df], ignore_index=True)
                    
            except Exception as e:
                logging.error(f"Error fetching data for {start_date.strftime('%B %Y')}: {e}")
            
        # Save to temporary file first, then rename to ensure atomic write
        if not yearly_entries.empty:
            yearly_entries = yearly_entries.drop_duplicates(subset=['EntryId'])
            try:
                yearly_entries.to_csv(temp_file, index=False)
                os.replace(temp_file, output_file)  # Atomic operation
                logging.info(f"Saved {len(yearly_entries)} entries for {year} to {output_file}")
            except Exception as e:
                logging.error(f"Error saving file for {year}: {e}")
                if os.path.exists(temp_file):
                    os.remove(temp_file)
        else:
            logging.warning(f"No entries found for {year}")
            # Create empty file to indicate year was processed
            pd.DataFrame().to_csv(output_file, index=False)



In [6]:
# Example usage
start_year = 2014
end_year = 2024
fetch_entries_by_year(api, start_year, end_year)

INFO:root:Processing year 2014
INFO:root:Fetching entries for January 2014
INFO:root:Fetching entries for February 2014
INFO:root:Fetching entries for March 2014
INFO:root:Fetching entries for April 2014
INFO:root:Fetching entries for May 2014
INFO:root:Fetching entries for June 2014
INFO:root:Fetching entries for July 2014
INFO:root:Fetching entries for August 2014
INFO:root:Fetching entries for September 2014
INFO:root:Fetching entries for October 2014
INFO:root:Fetching entries for November 2014
INFO:root:Fetching entries for December 2014
INFO:root:Saved 44030 entries for 2014 to data/entries_2014.csv
INFO:root:Processing year 2015
INFO:root:Fetching entries for January 2015
INFO:root:Fetching entries for February 2015
INFO:root:Fetching entries for March 2015
INFO:root:Fetching entries for April 2015
INFO:root:Fetching entries for May 2015
INFO:root:Fetching entries for June 2015
INFO:root:Fetching entries for July 2015
INFO:root:Fetching entries for August 2015
INFO:root:Fetching