In [None]:
import requests
import time
import json
import pandas as pd
from io import StringIO

# --- CONFIGURATION ---

# The final, corrected URL template starting from 2020-01-01
BASE_API_URL_TEMPLATE = (
    "The Website You Want to Scape it"
    "sort=date:desc&fields[]=rates&fields[]=date&filters[currency][$eq]=USD&"
    "filters[date][$gte]=2020-01-01T00:00:00.000Z&" 
    "filters[date][$lte]=2025-10-21T20:59:59.999Z&"
    "pagination[page]={page_num}&pagination[pageSize]=100" 
)

# CRITICAL: PASTE YOUR FULL, WORKING HEADERS HERE (including the Authorization token)
HEADERS = {
    "Authorization": "Bearer YOUR_ACTUAL_HIDDEN_API_KEY_HERE", 
    "User-Agent": "Your USER AGENT",
    "Referer": "The Website You Want to Scape it (Check the Robots.txt & POS )", 
    "Accept": "application/json",
    "Accept-Encoding": "Go Through ",
    "Accept-Language": "en-US,en;q=0.9",
    "Origin": "The Website You Want to Scape it",
    "Content-Type": "application/json",
    "Sec-Fetch-Dest": "empty",
    "Sec-Fetch-Mode": "cors",
    "Sec-Fetch-Site": "same-site",
}


def fetch_gold_data(page_num):
    """Fetches data for a specific page number."""
    url = BASE_API_URL_TEMPLATE.format(page_num=page_num)
    
    try:
        print(f"‚û°Ô∏è Fetching page {page_num}...")
        response = requests.get(url, headers=HEADERS)
        response.raise_for_status() 
        return response.json()
    except requests.exceptions.HTTPError as err:
        print(f"‚ùå HTTP Error on page {page_num}: {err}")
        return None
    except Exception as e:
        print(f"‚ùå An error occurred on page {page_num}: {e}")
        return None
    finally:
        # ETHICAL DELAY
        print("‚è∏Ô∏è Pausing for 7 seconds...")
        time.sleep(7)


if __name__ == "__main__":
    
    all_records = []
    page = 1
    
    while True:
        data = fetch_gold_data(page)
        
        if data and data.get('data'):
            records_on_page = data['data']
            
            # Stop condition: if the server returns an empty list
            if not records_on_page:
                print(f"üèÅ Page {page} returned no data. End of historical records.")
                break
                
            print(f"‚úÖ Success! Retrieved {len(records_on_page)} records on page {page}.")
            all_records.extend(records_on_page)
            page += 1
            
        else:
            print(f"üèÅ Request failed or page {page} returned no data. End of process.")
            break
            
    print(f"\n‚ú® Scraping Complete! Total records retrieved: {len(all_records)}")
    
    # --- FINAL DATA PROCESSING AND SAVE ---
    if all_records:
        df_final = pd.DataFrame(all_records)
        
        # 1. Date cleaning and validation
        df_final['date'] = pd.to_datetime(df_final['date'], errors='coerce')
        df_final_cleaned = df_final.dropna(subset=['date']).copy()
        
        # 2. Extract 24k gold price (USD)
        # We need to handle the 'rates' column which is a dictionary
        df_final_cleaned['price'] = df_final_cleaned['rates'].apply(lambda x: x.get('24k') if isinstance(x, dict) else None)
        df_final_cleaned = df_final_cleaned.dropna(subset=['price'])
        
        # 3. Save final cleaned data
        df_output = df_final_cleaned[['date', 'price']].rename(columns={'price': 'price_24k_usd'})
        
        output_filename = 'gold_sa_historical_data_2020_onwards.csv'
        df_output.to_csv(output_filename, index=False)
        print(f"Data saved to {output_filename}. Total cleaned records: {len(df_output)}")
        
    else:
        print("No records were successfully retrieved to save.")