In [None]:
:dep csv = "1.2"
:dep serde = { version = "1.0", features = ["derive"] }

use csv::ReaderBuilder;
use std::error::Error;
use std::path::Path;
use std::collections::HashSet;
use std::collections::HashMap;


fn string_to_bin(s: &str) -> Option<u32> {
    let mut bitmask = 0;
    for c in s.chars() {
        if ('a'..='z').contains(&c) {
            bitmask |= 1 << (c as u32 - 'a' as u32);
        } else {
            return None;
        }
    }
    Some(bitmask)
}

let letter_frequency: HashMap<char, u64> = HashMap::from([
    ('e', 349588141984), ('t', 247577342738), ('a', 243662684512), ('o', 228025627088),
    ('i', 223353030415), ('n', 207910712159), ('s', 207080253606), ('r', 201896673641),
    ('l', 130649920346), ('c', 113913698859), ('d', 107605388542), ('h', 106367962556),
    ('u', 86950627146),  ('m', 84155576549),  ('p', 77553040250),  ('g', 63045208347),
    ('f', 61328927423),  ('y', 52941043438),  ('b', 49798922187),  ('w', 44294405401),
    ('v', 34402346309),  ('k', 24380950863),  ('x', 9151143994),   ('j', 7637833834),
    ('q', 4218467887),   ('z', 4192477980),
    ]);
    
    let mut sorted_letter_frequency: Vec<(char, u64)> = letter_frequency.into_iter().collect();
    sorted_letter_frequency.sort_by(|a, b| b.1.cmp(&a.1));
    
let word_length: usize = 7;


let desired_letters: String = sorted_letter_frequency
    .into_iter()
    .take(word_length * 3)
    .map(|p| p.0)
    .collect::<Vec<char>>()
    .iter()
    .collect();

let dl_bin: u32 = string_to_bin(&desired_letters).unwrap_or(0);

println!("{} {}", desired_letters, dl_bin);


// let file_path = "henrik/unigram_freq.csv";
let file_path = "unigram_freq.csv";
let mut rdr = ReaderBuilder::new()
    .has_headers(true)
    .from_path(file_path)?;

let old_word_collection: Vec<String> = rdr
    .records()
    .filter_map(|result| {
        result.ok().and_then(|record| {
            record.get(0).map(|word| word.to_string())
        })
    })
    .filter(|word| word.len() == word_length)
    .filter(|word| word.chars().collect::<HashSet<char>>().len() == word_length)
    .collect();

println!("word collection length: {}", old_word_collection.len());

let mut bin_lookup: HashMap<u32, Vec<String>> = HashMap::new();
let mut word_collection: HashSet<u32> = HashSet::new();

for s in old_word_collection {
    match string_to_bin(&s) {
        Some(s_bin) if (s_bin & dl_bin) == s_bin => {
            bin_lookup.entry(s_bin).or_default().push(s.clone());
            word_collection.insert(s_bin);
        }
        _ => {} // Ignore None or values that don't match the condition
    }
}

println!("reduced word collection length: {}", word_collection.len());
let wc: Vec<u32> = word_collection.into_iter().collect();

let mut result: Vec<(u32, u32, u32)> = Vec::new();
for ai in 0..wc.len() {
    for bi in (ai + 1)..wc.len() {
        if (wc[ai] & wc[bi]) == 0 {
            for ci in (bi + 1)..wc.len() {
                let combined_letters = wc[ai] | wc[bi] | wc[ci];
                if combined_letters == dl_bin {
                    result.push((wc[ai], wc[bi], wc[ci]));
                }
            }
        }
    }
}
println!("result: {}", result.len());

let mut options: Vec<(String, String, String)> = Vec::new();
for (a, b, c) in &result {
    if let (Some(w1_list), Some(w2_list), Some(w3_list)) = (bin_lookup.get(a), bin_lookup.get(b), bin_lookup.get(c)){
        for w1 in w1_list {
            for w2 in w2_list {
                for w3 in w3_list {
                    options.push((w1.clone(), w2.clone(), w3.clone()));
                }
            }
        }
    }
}

println!("options: {}", options.len());

for x in options {
    println!("{} {} {}", x.0, x.1, x.2);
}


etaoinsrlcdhumpgfybwv 25098751
word collection length: 15926
reduced word collection length: 7986
result: 101
options: 122
goldwyn recvbuf maphist
goldwyn recvbuf mishpat
wiscvpn flagthe modbury
wiscvpn glubyte fordham
wiscvpn fromthe ladybug
wiscvpn motherf ladybug
wiscvpn theform ladybug
wiscvpn myburgh floated
wiscvpn harmful dogbyte
wiscvpn bugform deathly
wiscvpn guylhem batdorf
wiscvpn humbled fogarty
wiscvpn godfrey lambuth
vanport debugfs ymchwil
gymuned stwflbp archivo
weights novapdf crumbly
vhtdocs bumping frawley
vhtdocs amplify werbung
vhtdocs amplify newburg
vhtdocs bawling perfumy
plowing mchardy setvbuf
catfish pvrblog ymwneud
humbird playnow fgetcsv
browned flighty vcampus
busting wolfdev phrmacy
busting wolfdev pharmcy
tubings wolfdev phrmacy
tubings wolfdev pharmcy
mclarty pdfview bushong
brought pdfview malsync
baldwyn superfi hgtvcom
smedvig hanbury tcpflow
calston pdfview myburgh
pdfview borscht lumagny
pdfview hultman cyborgs
pdfview absynth gurlcom
pdfview gorsu

()

In [74]:
println!("{}", 5)

5


()