In [2]:
# Parallelized with dictionary structure and separate data files

from sage.databases.cremona import CremonaDatabase
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from joblib import Parallel, delayed
import gc
import multiprocessing
import os

# Create output directory
output_dir = "Curve Database (Conductor < 100 000)/"
os.makedirs(output_dir, exist_ok=True)
print(f"Output directory: {output_dir}")

# Get number of available cores
n_cores = multiprocessing.cpu_count()
print(f"Using {n_cores} CPU cores")

# Database of elliptic curves over the rationals
db = CremonaDatabase()

# Define conductor range
cmax = 100000
cmin = 1

# Extract elliptic curves with conductor in (cmin, cmax) and rank 0, 1, 2, 3
print(f"Extracting curves in conductor range [{cmin}, {cmax}]...")
rank_zero_curves = []
rank_one_curves = []
rank_two_curves = []
rank_three_curves = []

for E in tqdm(db.iter(range(cmin, cmax)), desc="Loading curves"):
    r = E.rank()
    if r == 0:
        rank_zero_curves.append(E)
    elif r == 1:
        rank_one_curves.append(E)
    elif r == 2:
        rank_two_curves.append(E)
    elif r == 3:
        rank_three_curves.append(E)

print(f"Found {len(rank_zero_curves)} rank 0 curves")
print(f"Found {len(rank_one_curves)} rank 1 curves")
print(f"Found {len(rank_two_curves)} rank 2 curves")
print(f"Found {len(rank_three_curves)} rank 3 curves")

# Extract representatives for different isogeny classes using LABELS
print("Extracting isogeny class representatives...")

def get_isogeny_label(E):
    """Extract isogeny class from Cremona label (e.g., '11a1' -> '11a')"""
    label = E.cremona_label()
    # Remove the final digit(s) to get isogeny class
    i = len(label) - 1
    while i >= 0 and label[i].isdigit():
        i -= 1
    return label[:i+1]

def extract_isogeny_reps(curve_list, desc):
    """Extract one representative per isogeny class"""
    isogeny_reps = []
    seen_labels = set()
    
    for E in tqdm(curve_list, desc=desc):
        iso_label = get_isogeny_label(E)
        if iso_label not in seen_labels:
            isogeny_reps.append(E)
            seen_labels.add(iso_label)
    
    return isogeny_reps

rk0_isogeny_reps = extract_isogeny_reps(rank_zero_curves, "Rank 0 isogeny classes")
rk1_isogeny_reps = extract_isogeny_reps(rank_one_curves, "Rank 1 isogeny classes")
rk2_isogeny_reps = extract_isogeny_reps(rank_two_curves, "Rank 2 isogeny classes")
rk3_isogeny_reps = extract_isogeny_reps(rank_three_curves, "Rank 3 isogeny classes")

print(f"Found {len(rk0_isogeny_reps)} rank 0 isogeny classes")
print(f"Found {len(rk1_isogeny_reps)} rank 1 isogeny classes")
print(f"Found {len(rk2_isogeny_reps)} rank 2 isogeny classes")
print(f"Found {len(rk3_isogeny_reps)} rank 3 isogeny classes")

# Combine all curves
all_curves = rk0_isogeny_reps + rk1_isogeny_reps + rk2_isogeny_reps + rk3_isogeny_reps
print(f"Total curves to process: {len(all_curves)}")

# Generate list of primes
N = 1000  # Number of primes to compute
print(f"Computing first {N} primes...")
primes_list = list(primes_first_n(N))
print(f"Prime range: {primes_list[0]} to {primes_list[-1]}")

# Function to compute curve data
def compute_curve_data(E, primes):
    """Compute curve data including ap values, return dictionary or None if failed"""
    try:
        label = E.cremona_label()
        conductor = E.conductor()
        isogeny_class = get_isogeny_label(E)
        rank = E.rank()
        
        ap_list = []
        for p in primes:
            ap = E.ap(p)
            ap_list.append(int(ap))
        
        return {
            'label': label,
            'conductor': int(conductor),
            'isogeny_class': isogeny_class,
            'rank': int(rank),
            'ap_list': ap_list
        }
    except Exception as e:
        print(f"  Error processing {E.cremona_label()}: {e}")
        return None

# Process all curves in parallel
print("Computing ap values for all curves in parallel...")
results = Parallel(n_jobs=n_cores, backend='multiprocessing')(
    delayed(compute_curve_data)(E, primes_list)
    for E in tqdm(all_curves, desc="Processing curves")
)

# Build the main database dictionary
print("Building database dictionary...")
curve_database = {}
skipped_count = 0

for result in results:
    if result is not None:
        label = result['label']
        curve_database[label] = result
    else:
        skipped_count += 1

print(f"Successfully processed {len(curve_database)} curves")
print(f"Skipped {skipped_count} curves due to errors")

# Save the main database
print("\n" + "="*60)
print("SAVING FILES")
print("="*60)

database_file = os.path.join(output_dir, f"curve_database_c{cmin}_to_{cmax}.sobj")
print(f"Saving complete database to {database_file}...")
save(curve_database, database_file)

# Also save as a more portable format (pickle)
import pickle
pickle_file = os.path.join(output_dir, f"curve_database_c{cmin}_to_{cmax}.pkl")
with open(pickle_file, 'wb') as f:
    pickle.dump(curve_database, f)
print(f"Also saved as pickle to {pickle_file}")

# Separate curves by rank for analysis
print("\nOrganizing by rank for analysis...")
rk0_data = [v for v in curve_database.values() if v['rank'] == 0]
rk1_data = [v for v in curve_database.values() if v['rank'] == 1]
rk2_data = [v for v in curve_database.values() if v['rank'] == 2]
rk3_data = [v for v in curve_database.values() if v['rank'] == 3]

print(f"Rank 0: {len(rk0_data)} curves")
print(f"Rank 1: {len(rk1_data)} curves")
print(f"Rank 2: {len(rk2_data)} curves")
print(f"Rank 3: {len(rk3_data)} curves")

# ========== SAVE SEPARATE DATA FILES ==========
print("\nSaving separate data files...")

# Save primes list
primes_file = os.path.join(output_dir, f"primes_list_N{N}.sobj")
save(primes_list, primes_file)
print(f"  Saved primes list: {primes_file}")

# Function to save rank-specific data
def save_rank_data(rank_data, rank_num):
    """Save separate files for a specific rank"""
    prefix = f"rank{rank_num}_c{cmin}_to_{cmax}"
    
    # Extract data
    labels = [d['label'] for d in rank_data]
    conductors = [d['conductor'] for d in rank_data]
    isogeny_classes = [d['isogeny_class'] for d in rank_data]
    ap_matrix = np.array([d['ap_list'] for d in rank_data], dtype=np.int32)
    
    # Save as .sobj files
    save(labels, os.path.join(output_dir, f"{prefix}_labels.sobj"))
    save(conductors, os.path.join(output_dir, f"{prefix}_conductors.sobj"))
    save(isogeny_classes, os.path.join(output_dir, f"{prefix}_isogeny_classes.sobj"))
    save(ap_matrix, os.path.join(output_dir, f"{prefix}_ap_matrix.sobj"))
    
    # Also save ap_matrix as numpy file for easier loading
    np.save(os.path.join(output_dir, f"{prefix}_ap_matrix.npy"), ap_matrix)
    
    print(f"  Saved rank {rank_num} data: {len(rank_data)} curves")

# Save data for each rank
if len(rk0_data) > 0:
    save_rank_data(rk0_data, 0)
if len(rk1_data) > 0:
    save_rank_data(rk1_data, 1)
if len(rk2_data) > 0:
    save_rank_data(rk2_data, 2)
if len(rk3_data) > 0:
    save_rank_data(rk3_data, 3)

# Save all data combined (separate from dictionary)
print("\nSaving combined separate data files...")
all_labels = [v['label'] for v in curve_database.values()]
all_conductors = [v['conductor'] for v in curve_database.values()]
all_isogeny_classes = [v['isogeny_class'] for v in curve_database.values()]
all_ranks = [v['rank'] for v in curve_database.values()]
all_ap_matrix = np.array([v['ap_list'] for v in curve_database.values()], dtype=np.int32)

save(all_labels, os.path.join(output_dir, f"all_labels_c{cmin}_to_{cmax}.sobj"))
save(all_conductors, os.path.join(output_dir, f"all_conductors_c{cmin}_to_{cmax}.sobj"))
save(all_isogeny_classes, os.path.join(output_dir, f"all_isogeny_classes_c{cmin}_to_{cmax}.sobj"))
save(all_ranks, os.path.join(output_dir, f"all_ranks_c{cmin}_to_{cmax}.sobj"))
save(all_ap_matrix, os.path.join(output_dir, f"all_ap_matrix_c{cmin}_to_{cmax}.sobj"))
np.save(os.path.join(output_dir, f"all_ap_matrix_c{cmin}_to_{cmax}.npy"), all_ap_matrix)

print(f"  Saved combined data for all {len(curve_database)} curves")

# ========== COMPUTE AND SAVE AVERAGES ==========
print("\nComputing average ap values...")

def compute_average_aps(curve_data_list):
    """Compute average ap values across curves"""
    if len(curve_data_list) == 0:
        return np.zeros(N)
    ap_matrix = np.array([c['ap_list'] for c in curve_data_list], dtype=np.float64)
    return np.mean(ap_matrix, axis=0)

rk0_avg_aps = compute_average_aps(rk0_data)
rk1_avg_aps = compute_average_aps(rk1_data)
rk2_avg_aps = compute_average_aps(rk2_data)
rk3_avg_aps = compute_average_aps(rk3_data)

# Compute overall average across ALL curves
overall_avg_aps = np.mean(all_ap_matrix, axis=0)

# Save average ap's
averages_dict = {
    'primes': primes_list,
    'rank_0_average': list(rk0_avg_aps),
    'rank_1_average': list(rk1_avg_aps),
    'rank_2_average': list(rk2_avg_aps),
    'rank_3_average': list(rk3_avg_aps),
    'overall_average': list(overall_avg_aps),
    'rank_0_count': len(rk0_data),
    'rank_1_count': len(rk1_data),
    'rank_2_count': len(rk2_data),
    'rank_3_count': len(rk3_data),
    'total_count': len(curve_database)
}

averages_file = os.path.join(output_dir, f"average_aps_c{cmin}_to_{cmax}.sobj")
print(f"Saving average ap's to {averages_file}...")
save(averages_dict, averages_file)

# Also save as pickle
averages_pickle = os.path.join(output_dir, f"average_aps_c{cmin}_to_{cmax}.pkl")
with open(averages_pickle, 'wb') as f:
    pickle.dump(averages_dict, f)

# Save individual average arrays
save(rk0_avg_aps, os.path.join(output_dir, f"rank0_avg_aps_c{cmin}_to_{cmax}.sobj"))
save(rk1_avg_aps, os.path.join(output_dir, f"rank1_avg_aps_c{cmin}_to_{cmax}.sobj"))
save(rk2_avg_aps, os.path.join(output_dir, f"rank2_avg_aps_c{cmin}_to_{cmax}.sobj"))
save(rk3_avg_aps, os.path.join(output_dir, f"rank3_avg_aps_c{cmin}_to_{cmax}.sobj"))
save(overall_avg_aps, os.path.join(output_dir, f"overall_avg_aps_c{cmin}_to_{cmax}.sobj"))

# ========== CREATE PLOTS ==========
print("\nCreating plots...")

# Plot 1: Rank 0 vs Rank 1
plt.figure(figsize=(14, 7))
plt.scatter(range(N), rk0_avg_aps, label=f"Rank 0 (n={len(rk0_data)})", alpha=0.6, s=15)
plt.scatter(range(N), rk1_avg_aps, label=f"Rank 1 (n={len(rk1_data)})", alpha=0.6, s=15)
plt.axhline(y=0, color='gray', linestyle='--', alpha=0.3)
plt.legend(fontsize=12)
plt.xlabel(r"Prime index $i$", fontsize=12)
plt.ylabel(r"Average $a_{p_i}$", fontsize=12)
plt.title(fr"Average $a_p$ vs prime index over conductors $c \in [{cmin}, {cmax}]$", fontsize=14)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plot1_file = os.path.join(output_dir, f"ap_averages_rank0_vs_rank1_c{cmin}_to_{cmax}.png")
plt.savefig(plot1_file, dpi=150)
print(f"  Saved plot: {plot1_file}")
plt.close()

# Plot 2: All ranks
plt.figure(figsize=(14, 7))
plt.scatter(range(N), rk0_avg_aps, label=f"Rank 0 (n={len(rk0_data)})", alpha=0.5, s=12)
plt.scatter(range(N), rk1_avg_aps, label=f"Rank 1 (n={len(rk1_data)})", alpha=0.5, s=12)
if len(rk2_data) > 0:
    plt.scatter(range(N), rk2_avg_aps, label=f"Rank 2 (n={len(rk2_data)})", alpha=0.5, s=12)
if len(rk3_data) > 0:
    plt.scatter(range(N), rk3_avg_aps, label=f"Rank 3 (n={len(rk3_data)})", alpha=0.5, s=12)
plt.axhline(y=0, color='gray', linestyle='--', alpha=0.3)
plt.legend(fontsize=12)
plt.xlabel(r"Prime index $i$", fontsize=12)
plt.ylabel(r"Average $a_{p_i}$", fontsize=12)
plt.title(fr"Average $a_p$ by rank over conductors $c \in [{cmin}, {cmax}]$", fontsize=14)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plot2_file = os.path.join(output_dir, f"ap_averages_all_ranks_c{cmin}_to_{cmax}.png")
plt.savefig(plot2_file, dpi=150)
print(f"  Saved plot: {plot2_file}")
plt.close()

# Plot 3: Overall average
plt.figure(figsize=(14, 7))
plt.scatter(range(N), overall_avg_aps, alpha=0.6, s=15, color='purple')
plt.axhline(y=0, color='gray', linestyle='--', alpha=0.3)
plt.xlabel(r"Prime index $i$", fontsize=12)
plt.ylabel(r"Average $a_{p_i}$", fontsize=12)
plt.title(fr"Overall average $a_p$ over all {len(curve_database)} curves with $c \in [{cmin}, {cmax}]$", fontsize=14)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plot3_file = os.path.join(output_dir, f"ap_average_overall_c{cmin}_to_{cmax}.png")
plt.savefig(plot3_file, dpi=150)
print(f"  Saved plot: {plot3_file}")
plt.close()

print("\nAll plots saved!")

# ========== PRINT SUMMARY ==========
print("\n" + "="*60)
print("SUMMARY")
print("="*60)
print(f"Conductor range: [{cmin}, {cmax}]")
print(f"Number of primes: {N} (up to {primes_list[-1]})")
print(f"\nCurves processed by rank:")
print(f"  Rank 0: {len(rk0_data)} curves")
print(f"  Rank 1: {len(rk1_data)} curves")
print(f"  Rank 2: {len(rk2_data)} curves")
print(f"  Rank 3: {len(rk3_data)} curves")
print(f"  Total:  {len(curve_database)} curves")
print(f"\nAll files saved in: {output_dir}")
print(f"\nMain database files:")
print(f"  - curve_database_c{cmin}_to_{cmax}.sobj")
print(f"  - curve_database_c{cmin}_to_{cmax}.pkl")
print(f"\nSeparate data files by rank:")
print(f"  - rank{{0,1,2,3}}_c{cmin}_to_{cmax}_{{labels,conductors,isogeny_classes,ap_matrix}}.sobj")
print(f"  - rank{{0,1,2,3}}_c{cmin}_to_{cmax}_ap_matrix.npy")
print(f"\nCombined separate data files:")
print(f"  - all_{{labels,conductors,isogeny_classes,ranks,ap_matrix}}_c{cmin}_to_{cmax}.sobj")
print(f"  - all_ap_matrix_c{cmin}_to_{cmax}.npy")
print(f"\nAverage files:")
print(f"  - average_aps_c{cmin}_to_{cmax}.sobj/.pkl")
print(f"  - rank{{0,1,2,3,overall}}_avg_aps_c{cmin}_to_{cmax}.sobj")
print(f"\nPlots:")
print(f"  - 3 PNG files")
print("="*60)

Output directory: Curve Database (Conductor < 100 000)/
Using 28 CPU cores
Extracting curves in conductor range [1, 100000]...


Loading curves: 657396it [22:35, 485.01it/s] 


Found 267565 rank 0 curves
Found 332314 rank 1 curves
Found 56975 rank 2 curves
Found 542 rank 3 curves
Extracting isogeny class representatives...


Rank 0 isogeny classes: 100%|███████████████████████████████████████████████| 267565/267565 [00:00<00:00, 272801.38it/s]
Rank 1 isogeny classes: 100%|███████████████████████████████████████████████| 332314/332314 [00:01<00:00, 316788.85it/s]
Rank 2 isogeny classes: 100%|█████████████████████████████████████████████████| 56975/56975 [00:00<00:00, 345753.75it/s]
Rank 3 isogeny classes: 100%|█████████████████████████████████████████████████████| 542/542 [00:00<00:00, 291974.41it/s]


Found 168760 rank 0 isogeny classes
Found 222439 rank 1 isogeny classes
Found 45496 rank 2 isogeny classes
Found 531 rank 3 isogeny classes
Total curves to process: 437226
Computing first 1000 primes...
Prime range: 2 to 7919
Computing ap values for all curves in parallel...


Processing curves: 100%|███████████████████████████████████████████████████████| 437226/437226 [15:23<00:00, 473.66it/s]


Building database dictionary...
Successfully processed 437226 curves
Skipped 0 curves due to errors

SAVING FILES
Saving complete database to Curve Database (Conductor < 100 000)/curve_database_c1_to_100000.sobj...
Also saved as pickle to Curve Database (Conductor < 100 000)/curve_database_c1_to_100000.pkl

Organizing by rank for analysis...
Rank 0: 168760 curves
Rank 1: 222439 curves
Rank 2: 45496 curves
Rank 3: 531 curves

Saving separate data files...
  Saved primes list: Curve Database (Conductor < 100 000)/primes_list_N1000.sobj
  Saved rank 0 data: 168760 curves
  Saved rank 1 data: 222439 curves
  Saved rank 2 data: 45496 curves
  Saved rank 3 data: 531 curves

Saving combined separate data files...
  Saved combined data for all 437226 curves

Computing average ap values...
Saving average ap's to Curve Database (Conductor < 100 000)/average_aps_c1_to_100000.sobj...

Creating plots...
  Saved plot: Curve Database (Conductor < 100 000)/ap_averages_rank0_vs_rank1_c1_to_100000.png
 

In [1]:
# Parallelized with dictionary structure indexed by isogeny class

from sage.databases.cremona import CremonaDatabase
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from joblib import Parallel, delayed
import gc
import multiprocessing
import os

# Create output directory
output_dir = "Curve Database (Conductor < 100 000)/"
os.makedirs(output_dir, exist_ok=True)
print(f"Output directory: {output_dir}")

# Get number of available cores
n_cores = multiprocessing.cpu_count()
print(f"Using {n_cores} CPU cores")

# Database of elliptic curves over the rationals
db = CremonaDatabase()

# Define conductor range
cmax = 100000
cmin = 1

# Extract elliptic curves with conductor in (cmin, cmax) and rank 0, 1, 2, 3
print(f"Extracting curves in conductor range [{cmin}, {cmax}]...")
rank_zero_curves = []
rank_one_curves = []
rank_two_curves = []
rank_three_curves = []

for E in tqdm(db.iter(range(cmin, cmax)), desc="Loading curves"):
    r = E.rank()
    if r == 0:
        rank_zero_curves.append(E)
    elif r == 1:
        rank_one_curves.append(E)
    elif r == 2:
        rank_two_curves.append(E)
    elif r == 3:
        rank_three_curves.append(E)

print(f"Found {len(rank_zero_curves)} rank 0 curves")
print(f"Found {len(rank_one_curves)} rank 1 curves")
print(f"Found {len(rank_two_curves)} rank 2 curves")
print(f"Found {len(rank_three_curves)} rank 3 curves")

# Extract representatives for different isogeny classes using LABELS
print("Extracting isogeny class representatives...")

def get_isogeny_class(E):
    """Extract isogeny class from Cremona label (e.g., '11a1' -> '11a')"""
    label = E.cremona_label()
    # Remove the final digit(s) to get isogeny class
    i = len(label) - 1
    while i >= 0 and label[i].isdigit():
        i -= 1
    return label[:i+1]

def extract_isogeny_reps(curve_list, desc):
    """Extract one representative per isogeny class"""
    isogeny_reps = []
    seen_classes = set()
    
    for E in tqdm(curve_list, desc=desc):
        iso_class = get_isogeny_class(E)
        if iso_class not in seen_classes:
            isogeny_reps.append(E)
            seen_classes.add(iso_class)
    
    return isogeny_reps

rk0_isogeny_reps = extract_isogeny_reps(rank_zero_curves, "Rank 0 isogeny classes")
rk1_isogeny_reps = extract_isogeny_reps(rank_one_curves, "Rank 1 isogeny classes")
rk2_isogeny_reps = extract_isogeny_reps(rank_two_curves, "Rank 2 isogeny classes")
rk3_isogeny_reps = extract_isogeny_reps(rank_three_curves, "Rank 3 isogeny classes")

print(f"Found {len(rk0_isogeny_reps)} rank 0 isogeny classes")
print(f"Found {len(rk1_isogeny_reps)} rank 1 isogeny classes")
print(f"Found {len(rk2_isogeny_reps)} rank 2 isogeny classes")
print(f"Found {len(rk3_isogeny_reps)} rank 3 isogeny classes")

# Combine all curves
all_curves = rk0_isogeny_reps + rk1_isogeny_reps + rk2_isogeny_reps + rk3_isogeny_reps
print(f"Total isogeny classes to process: {len(all_curves)}")

# Generate list of primes
N = 1000  # Number of primes to compute
print(f"Computing first {N} primes...")
primes_list = list(primes_first_n(N))
print(f"Prime range: {primes_list[0]} to {primes_list[-1]}")

# Function to compute curve data
def compute_curve_data(E, primes):
    """Compute curve data including ap values, return dictionary or None if failed"""
    try:
        isogeny_class = get_isogeny_class(E)
        conductor = E.conductor()
        rank = E.rank()
        
        ap_list = E.aplist(primes[-1])
        
        return {
            'isogeny_class': isogeny_class,
            'conductor': int(conductor),
            'rank': int(rank),
            'ap_list': ap_list
        }
    except Exception as e:
        print(f"  Error processing {get_isogeny_class(E)}: {e}")
        return None

# Process all curves in parallel
print("Computing ap values for all isogeny classes in parallel...")
results = Parallel(n_jobs=n_cores, backend='multiprocessing')(
    delayed(compute_curve_data)(E, primes_list)
    for E in tqdm(all_curves, desc="Processing isogeny classes")
)

# Build the main database dictionary indexed by isogeny class
print("Building database dictionary...")
curve_database = {}
skipped_count = 0

for result in results:
    if result is not None:
        isogeny_class = result['isogeny_class']
        curve_database[isogeny_class] = result
    else:
        skipped_count += 1

print(f"Successfully processed {len(curve_database)} isogeny classes")
print(f"Skipped {skipped_count} isogeny classes due to errors")

# Save the main database
print("\n" + "="*60)
print("SAVING FILES")
print("="*60)

database_file = os.path.join(output_dir, f"curve_database_c{cmin}_to_{cmax}.sobj")
print(f"Saving complete database to {database_file}...")
save(curve_database, database_file)

# Also save as a more portable format (pickle)
import pickle
pickle_file = os.path.join(output_dir, f"curve_database_c{cmin}_to_{cmax}.pkl")
with open(pickle_file, 'wb') as f:
    pickle.dump(curve_database, f)
print(f"Also saved as pickle to {pickle_file}")

# Separate curves by rank for analysis
print("\nOrganizing by rank for analysis...")
rk0_data = [v for v in curve_database.values() if v['rank'] == 0]
rk1_data = [v for v in curve_database.values() if v['rank'] == 1]
rk2_data = [v for v in curve_database.values() if v['rank'] == 2]
rk3_data = [v for v in curve_database.values() if v['rank'] == 3]

print(f"Rank 0: {len(rk0_data)} isogeny classes")
print(f"Rank 1: {len(rk1_data)} isogeny classes")
print(f"Rank 2: {len(rk2_data)} isogeny classes")
print(f"Rank 3: {len(rk3_data)} isogeny classes")

# ========== SAVE SEPARATE DATA FILES ==========
print("\nSaving separate data files...")

# Save primes list
primes_file = os.path.join(output_dir, f"primes_list_N{N}.sobj")
save(primes_list, primes_file)
print(f"  Saved primes list: {primes_file}")

# Function to save rank-specific data
def save_rank_data(rank_data, rank_num):
    """Save separate files for a specific rank"""
    prefix = f"rank{rank_num}_c{cmin}_to_{cmax}"
    
    # Extract data
    isogeny_classes = [d['isogeny_class'] for d in rank_data]
    conductors = [d['conductor'] for d in rank_data]
    ap_matrix = np.array([d['ap_list'] for d in rank_data], dtype=np.int32)
    
    # Save as .sobj files
    save(isogeny_classes, os.path.join(output_dir, f"{prefix}_isogeny_classes.sobj"))
    save(conductors, os.path.join(output_dir, f"{prefix}_conductors.sobj"))
    save(ap_matrix, os.path.join(output_dir, f"{prefix}_ap_matrix.sobj"))
    
    # Also save ap_matrix as numpy file for easier loading
    np.save(os.path.join(output_dir, f"{prefix}_ap_matrix.npy"), ap_matrix)
    
    print(f"  Saved rank {rank_num} data: {len(rank_data)} isogeny classes")

# Save data for each rank
if len(rk0_data) > 0:
    save_rank_data(rk0_data, 0)
if len(rk1_data) > 0:
    save_rank_data(rk1_data, 1)
if len(rk2_data) > 0:
    save_rank_data(rk2_data, 2)
if len(rk3_data) > 0:
    save_rank_data(rk3_data, 3)

# Save all data combined (separate from dictionary)
print("\nSaving combined separate data files...")
all_isogeny_classes = [v['isogeny_class'] for v in curve_database.values()]
all_conductors = [v['conductor'] for v in curve_database.values()]
all_ranks = [v['rank'] for v in curve_database.values()]
all_ap_matrix = np.array([v['ap_list'] for v in curve_database.values()], dtype=np.int32)

save(all_isogeny_classes, os.path.join(output_dir, f"all_isogeny_classes_c{cmin}_to_{cmax}.sobj"))
save(all_conductors, os.path.join(output_dir, f"all_conductors_c{cmin}_to_{cmax}.sobj"))
save(all_ranks, os.path.join(output_dir, f"all_ranks_c{cmin}_to_{cmax}.sobj"))
save(all_ap_matrix, os.path.join(output_dir, f"all_ap_matrix_c{cmin}_to_{cmax}.sobj"))
np.save(os.path.join(output_dir, f"all_ap_matrix_c{cmin}_to_{cmax}.npy"), all_ap_matrix)

print(f"  Saved combined data for all {len(curve_database)} isogeny classes")

# ========== COMPUTE AND SAVE AVERAGES ==========
print("\nComputing average ap values...")

def compute_average_aps(curve_data_list):
    """Compute average ap values across isogeny classes"""
    if len(curve_data_list) == 0:
        return np.zeros(N)
    ap_matrix = np.array([c['ap_list'] for c in curve_data_list], dtype=np.float64)
    return np.mean(ap_matrix, axis=0)

rk0_avg_aps = compute_average_aps(rk0_data)
rk1_avg_aps = compute_average_aps(rk1_data)
rk2_avg_aps = compute_average_aps(rk2_data)
rk3_avg_aps = compute_average_aps(rk3_data)

# Compute overall average across ALL isogeny classes
overall_avg_aps = np.mean(all_ap_matrix, axis=0)

# Save average ap's
averages_dict = {
    'primes': primes_list,
    'rank_0_average': list(rk0_avg_aps),
    'rank_1_average': list(rk1_avg_aps),
    'rank_2_average': list(rk2_avg_aps),
    'rank_3_average': list(rk3_avg_aps),
    'overall_average': list(overall_avg_aps),
    'rank_0_count': len(rk0_data),
    'rank_1_count': len(rk1_data),
    'rank_2_count': len(rk2_data),
    'rank_3_count': len(rk3_data),
    'total_count': len(curve_database)
}

averages_file = os.path.join(output_dir, f"average_aps_c{cmin}_to_{cmax}.sobj")
print(f"Saving average ap's to {averages_file}...")
save(averages_dict, averages_file)

# Also save as pickle
averages_pickle = os.path.join(output_dir, f"average_aps_c{cmin}_to_{cmax}.pkl")
with open(averages_pickle, 'wb') as f:
    pickle.dump(averages_dict, f)

# Save individual average arrays
save(rk0_avg_aps, os.path.join(output_dir, f"rank0_avg_aps_c{cmin}_to_{cmax}.sobj"))
save(rk1_avg_aps, os.path.join(output_dir, f"rank1_avg_aps_c{cmin}_to_{cmax}.sobj"))
save(rk2_avg_aps, os.path.join(output_dir, f"rank2_avg_aps_c{cmin}_to_{cmax}.sobj"))
save(rk3_avg_aps, os.path.join(output_dir, f"rank3_avg_aps_c{cmin}_to_{cmax}.sobj"))
save(overall_avg_aps, os.path.join(output_dir, f"overall_avg_aps_c{cmin}_to_{cmax}.sobj"))

# ========== CREATE PLOTS ==========
print("\nCreating plots...")

# Plot 1: Rank 0 vs Rank 1
plt.figure(figsize=(14, 7))
plt.scatter(range(N), rk0_avg_aps, label=f"Rank 0 (n={len(rk0_data)})", alpha=0.6, s=15)
plt.scatter(range(N), rk1_avg_aps, label=f"Rank 1 (n={len(rk1_data)})", alpha=0.6, s=15)
plt.axhline(y=0, color='gray', linestyle='--', alpha=0.3)
plt.legend(fontsize=12)
plt.xlabel(r"Prime index $i$", fontsize=12)
plt.ylabel(r"Average $a_{p_i}$", fontsize=12)
plt.title(fr"Average $a_p$ vs prime index over conductors $c \in [{cmin}, {cmax}]$", fontsize=14)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plot1_file = os.path.join(output_dir, f"ap_averages_rank0_vs_rank1_c{cmin}_to_{cmax}.png")
plt.savefig(plot1_file, dpi=150)
print(f"  Saved plot: {plot1_file}")
plt.close()

# Plot 2: All ranks
plt.figure(figsize=(14, 7))
plt.scatter(range(N), rk0_avg_aps, label=f"Rank 0 (n={len(rk0_data)})", alpha=0.5, s=12)
plt.scatter(range(N), rk1_avg_aps, label=f"Rank 1 (n={len(rk1_data)})", alpha=0.5, s=12)
if len(rk2_data) > 0:
    plt.scatter(range(N), rk2_avg_aps, label=f"Rank 2 (n={len(rk2_data)})", alpha=0.5, s=12)
if len(rk3_data) > 0:
    plt.scatter(range(N), rk3_avg_aps, label=f"Rank 3 (n={len(rk3_data)})", alpha=0.5, s=12)
plt.axhline(y=0, color='gray', linestyle='--', alpha=0.3)
plt.legend(fontsize=12)
plt.xlabel(r"Prime index $i$", fontsize=12)
plt.ylabel(r"Average $a_{p_i}$", fontsize=12)
plt.title(fr"Average $a_p$ by rank over conductors $c \in [{cmin}, {cmax}]$", fontsize=14)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plot2_file = os.path.join(output_dir, f"ap_averages_all_ranks_c{cmin}_to_{cmax}.png")
plt.savefig(plot2_file, dpi=150)
print(f"  Saved plot: {plot2_file}")
plt.close()

# Plot 3: Overall average
plt.figure(figsize=(14, 7))
plt.scatter(range(N), overall_avg_aps, alpha=0.6, s=15, color='purple')
plt.axhline(y=0, color='gray', linestyle='--', alpha=0.3)
plt.xlabel(r"Prime index $i$", fontsize=12)
plt.ylabel(r"Average $a_{p_i}$", fontsize=12)
plt.title(fr"Overall average $a_p$ over all {len(curve_database)} isogeny classes with $c \in [{cmin}, {cmax}]$", fontsize=14)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plot3_file = os.path.join(output_dir, f"ap_average_overall_c{cmin}_to_{cmax}.png")
plt.savefig(plot3_file, dpi=150)
print(f"  Saved plot: {plot3_file}")
plt.close()

print("\nAll plots saved!")

# ========== PRINT SUMMARY ==========
print("\n" + "="*60)
print("SUMMARY")
print("="*60)
print(f"Conductor range: [{cmin}, {cmax}]")
print(f"Number of primes: {N} (up to {primes_list[-1]})")
print(f"\nIsogeny classes processed by rank:")
print(f"  Rank 0: {len(rk0_data)} isogeny classes")
print(f"  Rank 1: {len(rk1_data)} isogeny classes")
print(f"  Rank 2: {len(rk2_data)} isogeny classes")
print(f"  Rank 3: {len(rk3_data)} isogeny classes")
print(f"  Total:  {len(curve_database)} isogeny classes")
print(f"\nAll files saved in: {output_dir}")
print(f"\nMain database files:")
print(f"  - curve_database_c{cmin}_to_{cmax}.sobj (indexed by isogeny class)")
print(f"  - curve_database_c{cmin}_to_{cmax}.pkl")
print(f"\nDatabase usage example:")
print(f"  curve_db = load('curve_database_c{cmin}_to_{cmax}.sobj')")
print(f"  # For curve label '11a1', extract isogeny class '11a':")
print(f"  isogeny_class = '11a1'[:3]  # or use get_isogeny_class()")
print(f"  ap_list = curve_db[isogeny_class]['ap_list']")
print(f"\nSeparate data files by rank:")
print(f"  - rank{{0,1,2,3}}_c{cmin}_to_{cmax}_{{isogeny_classes,conductors,ap_matrix}}.sobj")
print(f"  - rank{{0,1,2,3}}_c{cmin}_to_{cmax}_ap_matrix.npy")
print(f"\nCombined separate data files:")
print(f"  - all_{{isogeny_classes,conductors,ranks,ap_matrix}}_c{cmin}_to_{cmax}.sobj")
print(f"  - all_ap_matrix_c{cmin}_to_{cmax}.npy")
print(f"\nAverage files:")
print(f"  - average_aps_c{cmin}_to_{cmax}.sobj/.pkl")
print(f"  - rank{{0,1,2,3,overall}}_avg_aps_c{cmin}_to_{cmax}.sobj")
print(f"\nPlots:")
print(f"  - 3 PNG files")
print("="*60)

Output directory: Curve Database (Conductor < 100 000)/
Using 28 CPU cores
Extracting curves in conductor range [1, 100000]...


Loading curves: 657396it [28:13, 388.20it/s] 


Found 267565 rank 0 curves
Found 332314 rank 1 curves
Found 56975 rank 2 curves
Found 542 rank 3 curves
Extracting isogeny class representatives...


Rank 0 isogeny classes: 100%|███████████████████████████████████████████████| 267565/267565 [00:00<00:00, 282223.83it/s]
Rank 1 isogeny classes: 100%|███████████████████████████████████████████████| 332314/332314 [00:00<00:00, 332758.92it/s]
Rank 2 isogeny classes: 100%|█████████████████████████████████████████████████| 56975/56975 [00:00<00:00, 373104.54it/s]
Rank 3 isogeny classes: 100%|█████████████████████████████████████████████████████| 542/542 [00:00<00:00, 319959.57it/s]


Found 168760 rank 0 isogeny classes
Found 222439 rank 1 isogeny classes
Found 45496 rank 2 isogeny classes
Found 531 rank 3 isogeny classes
Total isogeny classes to process: 437226
Computing first 1000 primes...
Prime range: 2 to 7919
Computing ap values for all isogeny classes in parallel...


Processing isogeny classes: 100%|██████████████████████████████████████████████| 437226/437226 [15:02<00:00, 484.29it/s]


Building database dictionary...
Successfully processed 437226 isogeny classes
Skipped 0 isogeny classes due to errors

SAVING FILES
Saving complete database to Curve Database (Conductor < 100 000)/curve_database_c1_to_100000.sobj...
Also saved as pickle to Curve Database (Conductor < 100 000)/curve_database_c1_to_100000.pkl

Organizing by rank for analysis...
Rank 0: 168760 isogeny classes
Rank 1: 222439 isogeny classes
Rank 2: 45496 isogeny classes
Rank 3: 531 isogeny classes

Saving separate data files...
  Saved primes list: Curve Database (Conductor < 100 000)/primes_list_N1000.sobj
  Saved rank 0 data: 168760 isogeny classes
  Saved rank 1 data: 222439 isogeny classes
  Saved rank 2 data: 45496 isogeny classes
  Saved rank 3 data: 531 isogeny classes

Saving combined separate data files...
  Saved combined data for all 437226 isogeny classes

Computing average ap values...
Saving average ap's to Curve Database (Conductor < 100 000)/average_aps_c1_to_100000.sobj...

Creating plots.

In [None]:
# Parallelized with dictionary structure indexed by isogeny class

from sage.databases.cremona import CremonaDatabase
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from joblib import Parallel, delayed
import gc
import multiprocessing
import os

# Create output directory
output_dir = "Curve Database (Conductor < 100 000)/"
os.makedirs(output_dir, exist_ok=True)
print(f"Output directory: {output_dir}")

# Get number of available cores
n_cores = multiprocessing.cpu_count()
print(f"Using {n_cores} CPU cores")

# Database of elliptic curves over the rationals
db = CremonaDatabase()

# Define conductor range
cmax = 100000
cmin = 1

# Parallelized curve loading by conductor
def load_curves_for_conductor(c):
    """Load all curves for a given conductor and classify by rank"""
    db_local = CremonaDatabase()  # Create local instance for this process
    rk0 = []
    rk1 = []
    rk2 = []
    rk3 = []
    
    try:
        for E in db_local.iter([c]):
            r = E.rank()
            if r == 0:
                rk0.append(E)
            elif r == 1:
                rk1.append(E)
            elif r == 2:
                rk2.append(E)
            elif r == 3:
                rk3.append(E)
    except:
        pass  # Skip conductors with no curves
    
    return (rk0, rk1, rk2, rk3)

# Extract elliptic curves with conductor in (cmin, cmax) and rank 0, 1, 2, 3
print(f"Extracting curves in conductor range [{cmin}, {cmax}] in parallel...")
conductor_list = list(range(cmin, cmax))

# Process all conductors in parallel
results = Parallel(n_jobs=n_cores, backend='multiprocessing')(
    delayed(load_curves_for_conductor)(c)
    for c in tqdm(conductor_list, desc="Loading curves by conductor")
)

# Merge results
print("Merging results...")
rank_zero_curves = []
rank_one_curves = []
rank_two_curves = []
rank_three_curves = []

for rk0, rk1, rk2, rk3 in results:
    rank_zero_curves.extend(rk0)
    rank_one_curves.extend(rk1)
    rank_two_curves.extend(rk2)
    rank_three_curves.extend(rk3)

print(f"Found {len(rank_zero_curves)} rank 0 curves")
print(f"Found {len(rank_one_curves)} rank 1 curves")
print(f"Found {len(rank_two_curves)} rank 2 curves")
print(f"Found {len(rank_three_curves)} rank 3 curves")

# Extract representatives for different isogeny classes using LABELS
print("Extracting isogeny class representatives...")

def get_isogeny_class(E):
    """Extract isogeny class from Cremona label (e.g., '11a1' -> '11a')"""
    label = E.cremona_label()
    # Remove the final digit(s) to get isogeny class
    i = len(label) - 1
    while i >= 0 and label[i].isdigit():
        i -= 1
    return label[:i+1]

def extract_isogeny_reps(curve_list, desc):
    """Extract one representative per isogeny class"""
    isogeny_reps = []
    seen_classes = set()
    
    for E in tqdm(curve_list, desc=desc):
        iso_class = get_isogeny_class(E)
        if iso_class not in seen_classes:
            isogeny_reps.append(E)
            seen_classes.add(iso_class)
    
    return isogeny_reps

rk0_isogeny_reps = extract_isogeny_reps(rank_zero_curves, "Rank 0 isogeny classes")
rk1_isogeny_reps = extract_isogeny_reps(rank_one_curves, "Rank 1 isogeny classes")
rk2_isogeny_reps = extract_isogeny_reps(rank_two_curves, "Rank 2 isogeny classes")
rk3_isogeny_reps = extract_isogeny_reps(rank_three_curves, "Rank 3 isogeny classes")

print(f"Found {len(rk0_isogeny_reps)} rank 0 isogeny classes")
print(f"Found {len(rk1_isogeny_reps)} rank 1 isogeny classes")
print(f"Found {len(rk2_isogeny_reps)} rank 2 isogeny classes")
print(f"Found {len(rk3_isogeny_reps)} rank 3 isogeny classes")

# Combine all curves
all_curves = rk0_isogeny_reps + rk1_isogeny_reps + rk2_isogeny_reps + rk3_isogeny_reps
print(f"Total isogeny classes to process: {len(all_curves)}")

# Generate list of primes
N = 1000  # Number of primes to compute
print(f"Computing first {N} primes...")
primes_list = list(primes_first_n(N))
print(f"Prime range: {primes_list[0]} to {primes_list[-1]}")

# Function to compute curve data
def compute_curve_data(E, primes):
    """Compute curve data including ap values, return dictionary or None if failed"""
    try:
        isogeny_class = get_isogeny_class(E)
        conductor = E.conductor()
        rank = E.rank()
        
        ap_list = []
        for p in primes:
            ap = E.ap(p)
            ap_list.append(int(ap))
        
        return {
            'isogeny_class': isogeny_class,
            'conductor': int(conductor),
            'rank': int(rank),
            'ap_list': ap_list
        }
    except Exception as e:
        print(f"  Error processing {get_isogeny_class(E)}: {e}")
        return None

# Process all curves in parallel
print("Computing ap values for all isogeny classes in parallel...")
results = Parallel(n_jobs=n_cores, backend='multiprocessing')(
    delayed(compute_curve_data)(E, primes_list)
    for E in tqdm(all_curves, desc="Processing isogeny classes")
)

# Build the main database dictionary indexed by isogeny class
print("Building database dictionary...")
curve_database = {}
skipped_count = 0

for result in results:
    if result is not None:
        isogeny_class = result['isogeny_class']
        curve_database[isogeny_class] = result
    else:
        skipped_count += 1

print(f"Successfully processed {len(curve_database)} isogeny classes")
print(f"Skipped {skipped_count} isogeny classes due to errors")

# Save the main database
print("\n" + "="*60)
print("SAVING FILES")
print("="*60)

database_file = os.path.join(output_dir, f"curve_database_c{cmin}_to_{cmax}.sobj")
print(f"Saving complete database to {database_file}...")
save(curve_database, database_file)

# Also save as a more portable format (pickle)
import pickle
pickle_file = os.path.join(output_dir, f"curve_database_c{cmin}_to_{cmax}.pkl")
with open(pickle_file, 'wb') as f:
    pickle.dump(curve_database, f)
print(f"Also saved as pickle to {pickle_file}")

# Separate curves by rank for analysis
print("\nOrganizing by rank for analysis...")
rk0_data = [v for v in curve_database.values() if v['rank'] == 0]
rk1_data = [v for v in curve_database.values() if v['rank'] == 1]
rk2_data = [v for v in curve_database.values() if v['rank'] == 2]
rk3_data = [v for v in curve_database.values() if v['rank'] == 3]

print(f"Rank 0: {len(rk0_data)} isogeny classes")
print(f"Rank 1: {len(rk1_data)} isogeny classes")
print(f"Rank 2: {len(rk2_data)} isogeny classes")
print(f"Rank 3: {len(rk3_data)} isogeny classes")

# ========== SAVE SEPARATE DATA FILES ==========
print("\nSaving separate data files...")

# Save primes list
primes_file = os.path.join(output_dir, f"primes_list_N{N}.sobj")
save(primes_list, primes_file)
print(f"  Saved primes list: {primes_file}")

# Function to save rank-specific data
def save_rank_data(rank_data, rank_num):
    """Save separate files for a specific rank"""
    prefix = f"rank{rank_num}_c{cmin}_to_{cmax}"
    
    # Extract data
    isogeny_classes = [d['isogeny_class'] for d in rank_data]
    conductors = [d['conductor'] for d in rank_data]
    ap_matrix = np.array([d['ap_list'] for d in rank_data], dtype=np.int32)
    
    # Save as .sobj files
    save(isogeny_classes, os.path.join(output_dir, f"{prefix}_isogeny_classes.sobj"))
    save(conductors, os.path.join(output_dir, f"{prefix}_conductors.sobj"))
    save(ap_matrix, os.path.join(output_dir, f"{prefix}_ap_matrix.sobj"))
    
    # Also save ap_matrix as numpy file for easier loading
    np.save(os.path.join(output_dir, f"{prefix}_ap_matrix.npy"), ap_matrix)
    
    print(f"  Saved rank {rank_num} data: {len(rank_data)} isogeny classes")

# Save data for each rank
if len(rk0_data) > 0:
    save_rank_data(rk0_data, 0)
if len(rk1_data) > 0:
    save_rank_data(rk1_data, 1)
if len(rk2_data) > 0:
    save_rank_data(rk2_data, 2)
if len(rk3_data) > 0:
    save_rank_data(rk3_data, 3)

# Save all data combined (separate from dictionary)
print("\nSaving combined separate data files...")
all_isogeny_classes = [v['isogeny_class'] for v in curve_database.values()]
all_conductors = [v['conductor'] for v in curve_database.values()]
all_ranks = [v['rank'] for v in curve_database.values()]
all_ap_matrix = np.array([v['ap_list'] for v in curve_database.values()], dtype=np.int32)

save(all_isogeny_classes, os.path.join(output_dir, f"all_isogeny_classes_c{cmin}_to_{cmax}.sobj"))
save(all_conductors, os.path.join(output_dir, f"all_conductors_c{cmin}_to_{cmax}.sobj"))
save(all_ranks, os.path.join(output_dir, f"all_ranks_c{cmin}_to_{cmax}.sobj"))
save(all_ap_matrix, os.path.join(output_dir, f"all_ap_matrix_c{cmin}_to_{cmax}.sobj"))
np.save(os.path.join(output_dir, f"all_ap_matrix_c{cmin}_to_{cmax}.npy"), all_ap_matrix)

print(f"  Saved combined data for all {len(curve_database)} isogeny classes")

# ========== COMPUTE AND SAVE AVERAGES ==========
print("\nComputing average ap values...")

def compute_average_aps(curve_data_list):
    """Compute average ap values across isogeny classes"""
    if len(curve_data_list) == 0:
        return np.zeros(N)
    ap_matrix = np.array([c['ap_list'] for c in curve_data_list], dtype=np.float64)
    return np.mean(ap_matrix, axis=0)

rk0_avg_aps = compute_average_aps(rk0_data)
rk1_avg_aps = compute_average_aps(rk1_data)
rk2_avg_aps = compute_average_aps(rk2_data)
rk3_avg_aps = compute_average_aps(rk3_data)

# Compute overall average across ALL isogeny classes
overall_avg_aps = np.mean(all_ap_matrix, axis=0)

# Save average ap's
averages_dict = {
    'primes': primes_list,
    'rank_0_average': list(rk0_avg_aps),
    'rank_1_average': list(rk1_avg_aps),
    'rank_2_average': list(rk2_avg_aps),
    'rank_3_average': list(rk3_avg_aps),
    'overall_average': list(overall_avg_aps),
    'rank_0_count': len(rk0_data),
    'rank_1_count': len(rk1_data),
    'rank_2_count': len(rk2_data),
    'rank_3_count': len(rk3_data),
    'total_count': len(curve_database)
}

averages_file = os.path.join(output_dir, f"average_aps_c{cmin}_to_{cmax}.sobj")
print(f"Saving average ap's to {averages_file}...")
save(averages_dict, averages_file)

# Also save as pickle
averages_pickle = os.path.join(output_dir, f"average_aps_c{cmin}_to_{cmax}.pkl")
with open(averages_pickle, 'wb') as f:
    pickle.dump(averages_dict, f)

# Save individual average arrays
save(rk0_avg_aps, os.path.join(output_dir, f"rank0_avg_aps_c{cmin}_to_{cmax}.sobj"))
save(rk1_avg_aps, os.path.join(output_dir, f"rank1_avg_aps_c{cmin}_to_{cmax}.sobj"))
save(rk2_avg_aps, os.path.join(output_dir, f"rank2_avg_aps_c{cmin}_to_{cmax}.sobj"))
save(rk3_avg_aps, os.path.join(output_dir, f"rank3_avg_aps_c{cmin}_to_{cmax}.sobj"))
save(overall_avg_aps, os.path.join(output_dir, f"overall_avg_aps_c{cmin}_to_{cmax}.sobj"))

# ========== CREATE PLOTS ==========
print("\nCreating plots...")

# Plot 1: Rank 0 vs Rank 1
plt.figure(figsize=(14, 7))
plt.scatter(range(N), rk0_avg_aps, label=f"Rank 0 (n={len(rk0_data)})", alpha=0.6, s=15)
plt.scatter(range(N), rk1_avg_aps, label=f"Rank 1 (n={len(rk1_data)})", alpha=0.6, s=15)
plt.axhline(y=0, color='gray', linestyle='--', alpha=0.3)
plt.legend(fontsize=12)
plt.xlabel(r"Prime index $i$", fontsize=12)
plt.ylabel(r"Average $a_{p_i}$", fontsize=12)
plt.title(fr"Average $a_p$ vs prime index over conductors $c \in [{cmin}, {cmax}]$", fontsize=14)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plot1_file = os.path.join(output_dir, f"ap_averages_rank0_vs_rank1_c{cmin}_to_{cmax}.png")
plt.savefig(plot1_file, dpi=150)
print(f"  Saved plot: {plot1_file}")
plt.close()

# Plot 2: All ranks
plt.figure(figsize=(14, 7))
plt.scatter(range(N), rk0_avg_aps, label=f"Rank 0 (n={len(rk0_data)})", alpha=0.5, s=12)
plt.scatter(range(N), rk1_avg_aps, label=f"Rank 1 (n={len(rk1_data)})", alpha=0.5, s=12)
if len(rk2_data) > 0:
    plt.scatter(range(N), rk2_avg_aps, label=f"Rank 2 (n={len(rk2_data)})", alpha=0.5, s=12)
if len(rk3_data) > 0:
    plt.scatter(range(N), rk3_avg_aps, label=f"Rank 3 (n={len(rk3_data)})", alpha=0.5, s=12)
plt.axhline(y=0, color='gray', linestyle='--', alpha=0.3)
plt.legend(fontsize=12)
plt.xlabel(r"Prime index $i$", fontsize=12)
plt.ylabel(r"Average $a_{p_i}$", fontsize=12)
plt.title(fr"Average $a_p$ by rank over conductors $c \in [{cmin}, {cmax}]$", fontsize=14)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plot2_file = os.path.join(output_dir, f"ap_averages_all_ranks_c{cmin}_to_{cmax}.png")
plt.savefig(plot2_file, dpi=150)
print(f"  Saved plot: {plot2_file}")
plt.close()

# Plot 3: Overall average
plt.figure(figsize=(14, 7))
plt.scatter(range(N), overall_avg_aps, alpha=0.6, s=15, color='purple')
plt.axhline(y=0, color='gray', linestyle='--', alpha=0.3)
plt.xlabel(r"Prime index $i$", fontsize=12)
plt.ylabel(r"Average $a_{p_i}$", fontsize=12)
plt.title(fr"Overall average $a_p$ over all {len(curve_database)} isogeny classes with $c \in [{cmin}, {cmax}]$", fontsize=14)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plot3_file = os.path.join(output_dir, f"ap_average_overall_c{cmin}_to_{cmax}.png")
plt.savefig(plot3_file, dpi=150)
print(f"  Saved plot: {plot3_file}")
plt.close()

print("\nAll plots saved!")

# ========== PRINT SUMMARY ==========
print("\n" + "="*60)
print("SUMMARY")
print("="*60)
print(f"Conductor range: [{cmin}, {cmax}]")
print(f"Number of primes: {N} (up to {primes_list[-1]})")
print(f"\nIsogeny classes processed by rank:")
print(f"  Rank 0: {len(rk0_data)} isogeny classes")
print(f"  Rank 1: {len(rk1_data)} isogeny classes")
print(f"  Rank 2: {len(rk2_data)} isogeny classes")
print(f"  Rank 3: {len(rk3_data)} isogeny classes")
print(f"  Total:  {len(curve_database)} isogeny classes")
print(f"\nAll files saved in: {output_dir}")
print(f"\nMain database files:")
print(f"  - curve_database_c{cmin}_to_{cmax}.sobj (indexed by isogeny class)")
print(f"  - curve_database_c{cmin}_to_{cmax}.pkl")
print(f"\nDatabase usage example:")
print(f"  curve_db = load('curve_database_c{cmin}_to_{cmax}.sobj')")
print(f"  # For curve label '11a1', extract isogeny class '11a':")
print(f"  isogeny_class = '11a1'[:3]  # or use get_isogeny_class()")
print(f"  ap_list = curve_db[isogeny_class]['ap_list']")
print(f"\nSeparate data files by rank:")
print(f"  - rank{{0,1,2,3}}_c{cmin}_to_{cmax}_{{isogeny_classes,conductors,ap_matrix}}.sobj")
print(f"  - rank{{0,1,2,3}}_c{cmin}_to_{cmax}_ap_matrix.npy")
print(f"\nCombined separate data files:")
print(f"  - all_{{isogeny_classes,conductors,ranks,ap_matrix}}_c{cmin}_to_{cmax}.sobj")
print(f"  - all_ap_matrix_c{cmin}_to_{cmax}.npy")
print(f"\nAverage files:")
print(f"  - average_aps_c{cmin}_to_{cmax}.sobj/.pkl")
print(f"  - rank{{0,1,2,3,overall}}_avg_aps_c{cmin}_to_{cmax}.sobj")
print(f"\nPlots:")
print(f"  - 3 PNG files")
print("="*60)

Output directory: Curve Database (Conductor < 100 000)/
Using 28 CPU cores
Extracting curves in conductor range [1, 100000] in parallel...


Loading curves by conductor:  31%|██████████████▊                                 | 30800/99999 [08:34<20:45, 55.55it/s]