# Setup and Core Functions

This notebook contains the initial setup, data loading, and core functions required for all experiments in the Prime Reduction Estimates for S(T) project.

## Contents
1. Imports and environment setup
2. Core function definitions
3. Data loading and caching
4. Prime cache generation

In [None]:
# @title 1.1 Imports and Setup

# Core Python libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import kstest, chisquare, linregress
from scipy.ndimage import gaussian_filter1d
import time
import pickle
import os
import warnings
from mpmath import mp
import sys

# Add src directory to Python path
sys.path.append('../src')

# Import custom modules
from src.core.numerical_utils import kahan_sum, kahan_sum_complex
from src.core.prime_cache import PrimeCache, segmented_sieve
from src.core.s_t_functions import S_direct, S_RS, S_euler, smooth_RVM, analyze_error
from src.utils.paths import PathConfig, check_prerequisites

# Suppress warnings
warnings.filterwarnings('ignore')

# High precision for reference calculations
mp.dps = 50
mp_pi = mp.pi

# Plotting defaults
plt.rcParams['figure.dpi'] = 150
plt.rcParams['savefig.dpi'] = 300
plt.rcParams['font.size'] = 11
plt.rcParams['axes.labelsize'] = 12
plt.rcParams['axes.titlesize'] = 13
plt.rcParams['legend.fontsize'] = 10
plt.rcParams['figure.titlesize'] = 14
sns.set_palette("husl")

print("="*80)
print("PRIME REDUCTION ESTIMATES FOR S(T)")
print("Setup and Core Functions Loading...")
print("="*80)
print()
print(f"Python version: {sys.version}")
print(f"NumPy version: {np.__version__}")
print(f"mpmath precision: {mp.dps} digits")
print()

In [None]:
# @title 1.2 Path Configuration

# Initialize path configuration
paths = PathConfig()

# Ensure all directories exist
paths.ensure_dirs()

print("\nConfiguration:")
print("-" * 40)
for name, path in paths.paths.items():
    print(f"  {name:10s}: {path}")

print("\nData files:")
print("-" * 40)
print(f"  Zeros file: {paths.zeros_file}")
print(f"  Prime cache: {paths.prime_cache_file}")

In [None]:
# @title 1.3 Additional Helper Functions

def load_zeros(file_path: str, max_zeros: int = 10_000_000) -> np.ndarray:
    """
    Load and preprocess Riemann zeros from file.
    
    Parameters:
    -----------
    file_path : str
        Path to zeros file
    max_zeros : int
        Maximum number of zeros to load
        
    Returns:
    --------
    np.ndarray : Sorted positive zeros
    """
    print(f"Loading zeros from {file_path}")
    
    # Load zeros
    zeros = np.loadtxt(file_path, max_rows=max_zeros)
    
    # Preprocess
    original_count = len(zeros)
    zeros = np.sort(zeros)
    zeros = zeros[zeros > 0]  # Keep only positive
    
    print(f"  Loaded: {original_count:,} zeros")
    print(f"  After filtering: {len(zeros):,} zeros")
    print(f"  Range: [{zeros[0]:.2f}, {zeros[-1]:.2f}]")
    
    return zeros


def save_cache(data, file_path: str):
    """
    Save data to cache file.
    
    Parameters:
    -----------
    data : any
        Data to save
    file_path : str
        Output file path
    """
    os.makedirs(os.path.dirname(file_path), exist_ok=True)
    with open(file_path, 'wb') as f:
        pickle.dump(data, f)
    print(f"Saved to cache: {file_path}")


def load_cache(file_path: str):
    """
    Load data from cache file.
    
    Parameters:
    -----------
    file_path : str
        Cache file path
        
    Returns:
    --------
    any : Loaded data
    """
    with open(file_path, 'rb') as f:
        return pickle.load(f)


print("Helper functions defined successfully.")

In [None]:
# @title 2.1 Data Loading - Riemann Zeros

ZEROS_CACHE_PATH = paths.cache_dir / "zeros.npy"

# Check if already cached
if ZEROS_CACHE_PATH.exists():
    print(f"Found cached zeros: {ZEROS_CACHE_PATH}")
    print("Loading from cache...")
    start = time.time()
    zeros = np.load(ZEROS_CACHE_PATH)
    elapsed = time.time() - start
    print(f"✓ Loaded {len(zeros):,} zeros from cache in {elapsed:.2f}s")
    print(f"  Range: [{zeros[0]:.2f}, {zeros[-1]:.2f}]")
else:
    # Load from source file
    source_file = paths.zeros_file
    
    if not source_file.exists():
        print(f"⚠ ERROR: Zeros file not found at {source_file}")
        print("\nPlease download the zeros file and place it in the correct location.")
        print("Source: Andrew Odlyzko's zero tables")
        raise FileNotFoundError(f"Zeros file not found: {source_file}")
    
    # Load and preprocess
    zeros = load_zeros(source_file)
    
    # Save to cache
    print("\nSaving to cache...")
    np.save(ZEROS_CACHE_PATH, zeros)
    print(f"✓ Cached to: {ZEROS_CACHE_PATH}")

print(f"\nZeros ready for experiments: {len(zeros):,} zeros")

In [None]:
# @title 2.2 Data Loading - Prime Cache

# Initialize prime cache
MAX_PRIME = 1_000_000_000  # 1 billion
prime_cache = PrimeCache(max_prime=MAX_PRIME, cache_file=str(paths.prime_cache_file))

# Check if cache exists
if paths.prime_cache_file.exists():
    print(f"Found prime cache: {paths.prime_cache_file}")
    # Primes will be loaded on demand
    primes_up_to_1M = prime_cache.get_primes_up_to(1_000_000)
    print(f"✓ Prime cache ready (sample: {primes_up_to_1M[:5]}...)")
else:
    print("Prime cache not found. Will generate on demand.")
    print("This may take several minutes for the first generation.")

print(f"\nPrime cache configured for primes up to {MAX_PRIME:,}")

In [None]:
# @title 3.1 Test Core Functions

print("Testing core functions...")
print("-" * 40)

# Test parameters
T_test = 10000.0
P_max_test = 1_000_000

print(f"Test parameters: T = {T_test}, P_max = {P_max_test:,}")
print()

# Test S_direct (if zeros available)
if 'zeros' in locals():
    print("1. Testing S_direct...")
    start = time.time()
    s_direct = S_direct(T_test, zeros)
    elapsed = time.time() - start
    print(f"   S_direct({T_test}) = {s_direct:.6f} (took {elapsed:.4f}s)")

# Test S_RS
if 'zeros' in locals():
    print("\n2. Testing S_RS (Riemann-Siegel)...")
    start = time.time()
    s_rs = S_RS(T_test, zeros)
    elapsed = time.time() - start
    print(f"   S_RS({T_test}) = {s_rs:.6f} (took {elapsed:.4f}s)")

# Test S_euler
print("\n3. Testing S_euler (Euler product)...")
start = time.time()
s_euler = S_euler(T_test, P_max_test, prime_cache)
elapsed = time.time() - start
print(f"   S_euler({T_test}, {P_max_test:,}) = {s_euler:.6f} (took {elapsed:.4f}s)")

# Test prime cache
print("\n4. Testing prime cache...")
start = time.time()
primes_subset = prime_cache.get_primes_up_to(100_000)
elapsed = time.time() - start
print(f"   Retrieved {len(primes_subset):,} primes up to 100,000 (took {elapsed:.4f}s)")
print(f"   π(100_000) = {len(primes_subset)}")

# Test numerical utilities
print("\n5. Testing numerical utilities...")
test_array = np.array([1.0, -1.0, 1e-15, -1e-15, 1e-16])
normal_sum = np.sum(test_array)
kahan_sum_result = kahan_sum(test_array)
print(f"   Normal sum: {normal_sum:.16f}")
print(f"   Kahan sum: {kahan_sum_result:.16f}")
print(f"   Difference: {abs(normal_sum - kahan_sum_result):.2e}")

print("\n✓ All tests completed successfully!")

## Summary

This notebook has:
1. ✅ Set up the environment and imported all necessary modules
2. ✅ Configured paths and ensured directories exist
3. ✅ Loaded Riemann zeros (from cache or source file)
4. ✅ Initialized the prime cache system
5. ✅ Tested all core functions

The setup is now complete and ready for the main experiments.

### Next Steps
Run `02_main_experiments.ipynb` to perform the optimal truncation search and method comparison experiments.