# Generate set of non-isomorphic strings 

In [44]:
import string
import pandas as pd
from collections import defaultdict
import pickle

In [45]:
# Initialize the dataset and count table
dataset = defaultdict(set)
count_table = pd.DataFrame()
flat_dataset = defaultdict(list)

In [46]:
def generate_strings(n, k):
    # Base cases
    if n < k:
        return set()
    if k == 1:
        return {"a" * n}
    if n == k:
        return {"".join(string.ascii_lowercase[:k])}
    
    # Check if the result is already in the dataset
    if (n, k) in dataset:
        return dataset[(n, k)]
    
    # Generate strings from S(n-1, k) and append any of the k letters
    strings_n1_k = generate_strings(n-1, k)
    for s in strings_n1_k:
        for letter in string.ascii_lowercase[:k]:
            dataset[(n, k)].add(s + letter)
    
    # Generate strings from S(n-1, k-1) and append the kth letter
    strings_n1_k1 = generate_strings(n-1, k-1)
    for s in strings_n1_k1:
        dataset[(n, k)].add(s + string.ascii_lowercase[k-1])
    
    return dataset[(n, k)]

In [47]:
def update_dataset(N):
    for n in range(1, N + 1):
        for k in range(1, n + 1):
            strings = generate_strings(n, k)
            flat_dataset[n].extend(strings)

In [48]:
def update_count_table(N):
    global count_table
    count_table = pd.DataFrame(0, index=range(1, N + 1), columns=range(1, N + 1))
    for n in range(1, N + 1):
        for k in range(1, n + 1):
            if k == 1:
                count_table.at[n, k] = 1
            elif n == k:
                count_table.at[n, k] = 1
            else:
                count_table.at[n, k] = k * count_table.at[n-1, k] + count_table.at[n-1, k-1]


In [49]:
# Run only to expand the dataset
N = 10
update_count_table(N)
update_dataset(N)

In [50]:
# # Print the count table for verification
# print("Count Table:")
# print(count_table)

# total_strings = count_table.sum().sum()
# print(f"Total number of strings up to ({N}, {N}): {total_strings}")

In [51]:
# Save the datasets to files
with open('NISdb.pkl', 'wb') as f:
    pickle.dump(dataset, f)
with open('NISdb_flat.pkl', 'wb') as f:
    pickle.dump(flat_dataset, f)
    

In [52]:
# # Verification
# with open('NISdb.pkl', 'rb') as f:
#     loaded_dataset = pickle.load(f)
# with open('NISdb_flat.pkl', 'rb') as f:
#     loaded_flat_dataset = pickle.load(f)
# print(f"Loaded dataset: {loaded_dataset}")
# print(f"Loaded flat dataset: {loaded_flat_dataset}")

In [53]:
# # Example usage
# Load the datasets from files
# n = 4
# k = 2
# result = generate_strings(n, k)
# print(f"Strings of length {n} with {k} distinct letters: {result}")

