In [None]:
import numpy as np
from typing import List, Tuple
import json
import time
from datetime import datetime
import os
#import sys
#sys.path.append("YOUR DIRECTORY HERE IF NEEDED")
import CSV_to_list

def get_char_freq(phrase: str) -> np.ndarray:
    """Convert a phrase to a frequency array of characters, including spaces and punctuation."""
    freq = np.zeros(29, dtype=np.int16)
    for char in phrase.lower():
        if char.isalpha():
            freq[ord(char) - ord('a')] += 1
        elif char == " ":
            freq[26] += 1
        elif char == "'":
            freq[27] += 1
        elif char == "-":
            freq[28] += 1
    return freq

def preprocess_wordbank(wordbank: List[str], target_freq: np.ndarray) -> List[Tuple[str, np.ndarray]]:
    """Preprocess wordbank to filter and cache frequency arrays."""
    processed_bank = []
    target_letters = set(i for i, freq in enumerate(target_freq) if freq > 0)
    
    for word in wordbank:
        word_freq = get_char_freq(word)
        if all(i in target_letters for i, freq in enumerate(word_freq) if freq > 0):
            processed_bank.append((word, word_freq))
    
    return sorted(processed_bank, key=lambda x: -np.sum(x[1]))

class AnagramFinder:
    def __init__(self, checkpoint_dir="checkpoints"):
        self.checkpoint_dir = checkpoint_dir
        self.last_save_time = time.time()
        self.save_interval = 300  # Save every 5 minutes
        os.makedirs(checkpoint_dir, exist_ok=True)
        
    def load_checkpoint(self, target: str) -> Tuple[List[List[str]], int]:
        """Load previous results and search count from checkpoint."""
        checkpoint_file = os.path.join(self.checkpoint_dir, f"checkpoint_{target.replace(' ', '_')}.json")
        if os.path.exists(checkpoint_file):
            with open(checkpoint_file, 'r') as f:
                data = json.load(f)
                return data['results'], data['search_count']
        return [], 0

    def save_checkpoint(self, target: str, results: List[List[str]], search_count: int):
        """Save current results and search count to checkpoint."""
        current_time = time.time()
        if current_time - self.last_save_time >= self.save_interval:
            checkpoint_file = os.path.join(self.checkpoint_dir, f"checkpoint_{target.replace(' ', '_')}.json")
            with open(checkpoint_file, 'w') as f:
                json.dump({
                    'target': target,
                    'results': results,
                    'search_count': search_count,
                    'timestamp': datetime.now().isoformat()
                }, f, indent=2)
            self.last_save_time = current_time
            print(f"\nCheckpoint saved for target '{target}' at {datetime.now().isoformat()}")
            print(f"Current results count: {len(results)}")
            print(f"Searches performed: {search_count}")

    def find_phrase_anagrams(self, target: str, wordbank: List[str], 
                            max_searches: int = 1000, max_matches: int = 3) -> List[List[str]]:
        """Find anagrams with checkpointing and match limiting."""
        target_freq = get_char_freq(target)
        results, search_count = self.load_checkpoint(target)
        
        if len(results) >= max_matches:
            print(f"Already found {len(results)} matches for '{target}' in previous run")
            return results[:max_matches]
        
        processed_bank = preprocess_wordbank(wordbank, target_freq)
        space_freq = get_char_freq(" ")
        
        def recursive_search(current_words: List[str], remaining_freq: np.ndarray, 
                            start_idx: int, spaces_needed: int) -> None:
            """Recursive helper using remaining frequency approach."""
            nonlocal search_count
            
            if len(results) >= max_matches or search_count > max_searches:
                return
                
            if np.any(remaining_freq < 0) or spaces_needed < 0:
                return
                
            if np.all(remaining_freq == 0) and spaces_needed == 0:
                results.append(current_words.copy())
                print(f"\nNew match found for '{target}': {' '.join(current_words)}")
                self.save_checkpoint(target, results, search_count)
                return
                
            for i in range(start_idx, len(processed_bank)):
                word, word_freq = processed_bank[i]
                search_count += 1
                
                if np.sum(word_freq) > np.sum(remaining_freq) - spaces_needed:
                    continue
                    
                new_remaining = remaining_freq - word_freq
                if len(current_words) > 0:
                    new_remaining = new_remaining - space_freq
                    new_spaces = spaces_needed - 1
                else:
                    new_spaces = spaces_needed
                    
                current_words.append(word)
                recursive_search(current_words, new_remaining, i + 1, new_spaces)
                current_words.pop()
                
                self.save_checkpoint(target, results, search_count)
        
        initial_spaces = target.count(' ')
        print(f"\nStarting search for '{target}'...")
        print(f"Continuing from previous search count: {search_count}")
        print(f"Previous matches found: {len(results)}")
        
        recursive_search([], target_freq, 0, initial_spaces)
        self.save_checkpoint(target, results, search_count)
        
        return results[:max_matches]

def find_multiple_anagrams(targets: List[str], wordbank: List[str], max_matches: int = 3, max_searches: int = 1000) -> List[Tuple[str, List[List[str]]]]:
    """Find anagrams for multiple target phrases with checkpointing."""
    finder = AnagramFinder()
    results = []
    for target in targets:
        matches = finder.find_phrase_anagrams(target, wordbank, max_matches=max_matches, max_searches=max_searches)
        results.append((target, matches))
    return results

def __main__():
    target_bank = CSV_to_list.csv_to_list("Math Anagram Targets.csv", 'utf-8', True)
    source_bank = CSV_to_list.csv_to_list("Math Anagram Sources.csv", 'utf-8', True)
    results = find_multiple_anagrams(target_bank, source_bank, max_matches = 50, max_searches=1000000)
    
    for target, matches in results:
        print(f"\nTarget: {target}")
        if not matches:
            print("No matches found")
        else:
            for match in matches:
                print(f"Match: {' '.join(match)}")

__main__()

Available columns: ['Word']
Applied remove_accents()
Allegedly wrote to csv
Available columns: ['Word']
Applied remove_accents()
Allegedly wrote to csv

Starting search for 'Word'...
Continuing from previous search count: 0
Previous matches found: 0

New match found for 'Word': Word

New match found for 'Word': word

Starting search for 'Irrational function'...
Continuing from previous search count: 0
Previous matches found: 0

New match found for 'Irrational function': irrational function

New match found for 'Irrational function': irrational function

Starting search for 'taylor expansion'...
Continuing from previous search count: 0
Previous matches found: 0

New match found for 'taylor expansion': explanations Roy

New match found for 'taylor expansion': explorations any

New match found for 'taylor expansion': explorations nay

New match found for 'taylor expansion': explanation rosy

New match found for 'taylor expansion': explanatory ions

New match found for 'taylor expansion': 