In [None]:
from sympy import primerange, log
import math
import matplotlib.pyplot as plt
import pickle
import gc

def generate_primes_in_chunk(start, end):
    """
    Generate primes in a given range using a generator to save memory.
    """
    return primerange(start, end)

def compute_primes_in_chunks(limit, chunk_size=5000000, sub_chunk_size=1000000):
    """
    Compute primes, twin primes, and gaps in chunks to manage memory.
    Saves progress frequently to disk.
    """
    all_primes = []
    twin_primes = []
    gap_counts = {8: [], 10: [], 12: [], 14: []}
    primes_class_1 = []
    primes_class_5 = []
    
    for start in range(2, limit, chunk_size):
        end = min(start + chunk_size, limit)
        print(f"Processing chunk from {start} to {end}")
        
        for i in range(start, end, sub_chunk_size):
            sub_end = min(i + sub_chunk_size, end)
            chunk_primes = list(generate_primes_in_chunk(i, sub_end))
            all_primes.extend(chunk_primes)
            
            chunk_primes_set = set(chunk_primes)
            twin_primes.extend([(p, p + 2) for p in chunk_primes if p % 6 == 5 and (p + 2) in chunk_primes_set])
            primes_class_1.extend([p for p in chunk_primes if p % 6 == 1])
            primes_class_5.extend([p for p in chunk_primes if p % 6 == 5])
            
            for gap in [8, 10, 12, 14]:
                gap_counts[gap].extend([(p, p + gap) for p in chunk_primes if (p + gap) in chunk_primes_set])
            
            # Save progress after each sub-chunk
            with open(f'progress_chunk_{i}.pkl', 'wb') as f:
                pickle.dump({
                    'all_primes': all_primes,
                    'twin_primes': twin_primes,
                    'primes_class_1': primes_class_1,
                    'primes_class_5': primes_class_5,
                    'gap_counts': gap_counts
                }, f)
            gc.collect()  # Clean up memory
    
    return all_primes, twin_primes, primes_class_1, primes_class_5, gap_counts

def advanced_prime_analysis(limit, chunk_size=5000000, sub_chunk_size=1000000):
    """
    Main analysis function, resuming from saved progress or starting fresh.
    """
    try:
        # Try to load the latest progress file
        latest_chunk = max([int(f.split('_')[2].split('.')[0]) for f in os.listdir('.') if f.startswith('progress_chunk_') and f.endswith('.pkl')], default=1)
        with open(f'progress_chunk_{latest_chunk}.pkl', 'rb') as f:
            progress = pickle.load(f)
            all_primes = progress['all_primes']
            twin_primes = progress['twin_primes']
            primes_class_1 = progress['primes_class_1']
            primes_class_5 = progress['primes_class_5']
            gap_counts = progress['gap_counts']
        
        # Find the last prime processed to resume from there
        last_prime = max(all_primes) if all_primes else 1
        next_start = last_prime + 1
        
        # Continue computation from where we left off
        new_all_primes, new_twin_primes, new_primes_class_1, new_primes_class_5, new_gap_counts = compute_primes_in_chunks(limit, chunk_size, sub_chunk_size, start_from=next_start)
        
        # Combine results
        all_primes.extend(new_all_primes)
        twin_primes.extend(new_twin_primes)
        primes_class_1.extend(new_primes_class_1)
        primes_class_5.extend(new_primes_class_5)
        for gap in [8, 10, 12, 14]:
            gap_counts[gap].extend(new_gap_counts[gap])
        
    except (FileNotFoundError, ValueError):
        # If no progress file exists or error occurs, start from scratch
        all_primes, twin_primes, primes_class_1, primes_class_5, gap_counts = compute_primes_in_chunks(limit, chunk_size, sub_chunk_size)
    
    # Density Analysis
    C = 1.3203239  # Hardy-Littlewood constant for twin primes
    theoretical_density_twins = C * limit / (math.log(limit)) ** 2
    empirical_density_twins = len(twin_primes) / math.log(limit)
    
    print(f"Empirical Density of Twin Primes: {empirical_density_twins}")
    print(f"Theoretical Density of Twin Primes: {theoretical_density_twins}")
    print(f"Ratio (Empirical/Theoretical): {empirical_density_twins / theoretical_density_twins}")

    print(f"\nNumber of primes in class 1: {len(primes_class_1)}")
    print(f"Number of primes in class 5: {len(primes_class_5)}")
    
    # Exploration of Other Gaps
    for gap in [8, 10, 12, 14]:
        print(f"\nPrimes with gap {gap} up to {limit}: {gap_counts[gap][:10]}...")
        print(f"Number of prime pairs with gap {gap}: {len(gap_counts[gap])}")

    # Statistical Analysis
    total_primes = len(primes_class_1) + len(primes_class_5)
    expected = total_primes / 2
    chi2 = ((len(primes_class_1) - expected) ** 2 / expected) + ((len(primes_class_5) - expected) ** 2 / expected)
    
    print(f"\nChi-square Statistic: {chi2}")
    print(f"Note: For chi2 < 3.84 (1 df, p > 0.05), distribution is consistent with equal split")
    print(f"Conclusion: {'Consistent with equal distribution' if chi2 < 3.84 else 'Possible deviation'}")
    
    # Plotting
    gaps = [8, 10, 12, 14]
    plt.figure(figsize=(10, 6))
    plt.plot(gaps, [len(gap_counts[gap]) for gap in gaps], 'o-')
    plt.title('Number of Prime Pairs vs. Gap Size')
    plt.xlabel('Gap Size')
    plt.ylabel('Number of Pairs')
    plt.grid(True)
    plt.show()

# Set the limit
limit = 10000000000  # 10^10

# Execute the function
if __name__ == "__main__":
    import os
    advanced_prime_analysis(limit)

Processing chunk from 2 to 5000002
Processing chunk from 5000002 to 10000002
Processing chunk from 10000002 to 15000002
Processing chunk from 15000002 to 20000002
Processing chunk from 20000002 to 25000002
Processing chunk from 25000002 to 30000002
Processing chunk from 30000002 to 35000002
Processing chunk from 35000002 to 40000002
Processing chunk from 40000002 to 45000002
Processing chunk from 45000002 to 50000002
Processing chunk from 50000002 to 55000002
Processing chunk from 55000002 to 60000002
Processing chunk from 60000002 to 65000002
Processing chunk from 65000002 to 70000002
Processing chunk from 70000002 to 75000002
Processing chunk from 75000002 to 80000002
Processing chunk from 80000002 to 85000002
Processing chunk from 85000002 to 90000002
Processing chunk from 90000002 to 95000002
Processing chunk from 95000002 to 100000002
Processing chunk from 100000002 to 105000002
Processing chunk from 105000002 to 110000002
Processing chunk from 110000002 to 115000002
Processing ch