<a href="https://colab.research.google.com/github/Chamauta/Examples/blob/master/letterbox.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# pip install pandas

In [3]:
import pandas as pd
import re

file_path = '/content/drive/MyDrive/Colab Notebooks/english_words.txt'

df = pd.read_csv(file_path)
print(df)

             Words
0            aahed
1           aahing
2             aahs
3            aalii
4           aaliis
...            ...
367515      zythum
367516     zyzomys
367517  zyzzogeton
367518     zyzzyva
367519    zyzzyvas

[367520 rows x 1 columns]


In [4]:
# enter the 12 letters for 'input_string'
input_string = 'xsnipujtdera'
designated_letters = []

# Split the input string into groups of 3 characters
letter_groups = [input_string[i:i+3] for i in range(0, len(input_string), 3)]

# Create nested lists from the letter groups
for group in letter_groups:
    designated_letters.append(list(group))

print(designated_letters)

[['x', 's', 'n'], ['i', 'p', 'u'], ['j', 't', 'd'], ['e', 'r', 'a']]


In [5]:
filtered_words = []

In [6]:
# pre-check: filter out any non-letter word
#df = df[df['Words'].apply(lambda x: re.match('^[a-zA-Z]*$', str(x)) is not None)]

In [7]:
# Iterate through each word in column 'Words' in the df dataframe defined above
for word in df['Words']:
    # Check if the value is a string before iterating
    if isinstance(word, str):
        # Check if the word exclusively contains letters from designated subsets
        if all(any(letter in subset for subset in designated_letters) for letter in word):
            # Check for consecutive letters from the same subset
            valid = True
            for i in range(len(word) - 1):
                for subset in designated_letters:
                    if word[i] in subset and word[i + 1] in subset:
                        valid = False
                        break
                if not valid:
                    break
            if valid:
                filtered_words.append(word)

In [8]:
# Function to count unique designated letters in a word
def count_unique_designated_letters(word):
    unique_letters = set()
    for subset in designated_letters:
        for letter in subset:
            if letter in word:
                unique_letters.add(letter)
    return len(unique_letters)

In [9]:
# Sort the filtered words list by the count of unique designated letters
filtered_words.sort(key=lambda word: count_unique_designated_letters(word), reverse=True)

In [10]:
# send above list to a text file called my_words.txt
# https://www.pythontutorial.net/python-basics/python-write-text-file/
# https://www.geeksforgeeks.org/how-to-open-and-close-a-file-in-python/
with open('/content/drive/MyDrive/Colab Notebooks/my_words.txt', 'w') as f:
      f.write('\n'.join(filtered_words)) # all contents will be overwritten
# see content folder in left pane

In [11]:
print(f'{"word":<15}'  f'{"length":<7}')
print("=====================")

for word in filtered_words:
    count =  count_unique_designated_letters(word)
    print(f'{word:<15}' f'{count:>7}')

word           length 
jurisprudent        10
unextirpated        10
unpasteurised       10
untapestried        10
adiapneustia         9
denaturise           9
denaturised          9
dextrinase           9
exindusiate          9
pasteurian           9
pasteurised          9
pedantries           9
pedestrian           9
pedestrianate        9
pedestrianise        9
pedestrianised       9
pseudaxine           9
surprinted           9
trisinuated          9
truandise            9
unexpeditated        9
adnexitis            8
antesunrise          8
antispadix           8
aptitudes            8
audients             8
austrine             8
depaints             8
desaurin             8
desaurine            8
dextrinate           8
dextrines            8
dispatriated         8
dundasite            8
expatriated          8
expatriates          8
expedientist         8
expedients           8
expenditrix          8
extirpated           8
extirpates           8
inaptitude           8
independist

In [12]:
def count_first_letter(words):
  """Counts the number of times a letter begins a word in a list of words.

  Args:
    words: A list of strings.

  Returns:
    A dictionary mapping letters to the number of times they begin a word.
  """

  letter_counts = {}
  for word in words:
    first_letter = word[0]
    if first_letter not in letter_counts:
      letter_counts[first_letter] = 0
    letter_counts[first_letter] += 1
  return letter_counts


In [13]:
beginLetterCount = count_first_letter(filtered_words)

In [14]:
sorted_beginLetterCount = sorted(beginLetterCount.items(), key=lambda x:x[1], reverse=True)
beginningLetters = dict(sorted_beginLetterCount)

In [15]:
beginningLetters

{'s': 516,
 'd': 396,
 't': 345,
 'a': 336,
 'p': 300,
 'u': 208,
 'e': 191,
 'i': 179,
 'n': 154,
 'j': 108,
 'r': 65,
 'x': 6}

In [16]:
def count_last_letter(words):
  """Counts the number of times a letter begins a word in a list of words.

  Args:
    words: A list of strings.

  Returns:
    A dictionary mapping letters to the number of times they end a word.
  """

  letter_counts = {}
  for word in words:
    last_letter = word[-1]
    if last_letter not in letter_counts:
      letter_counts[last_letter] = 0
    letter_counts[last_letter] += 1
  return letter_counts

In [17]:
lastLetterCount = count_last_letter(filtered_words)

In [18]:
sorted_lastLetterCount = sorted(lastLetterCount.items(), key=lambda x:x[1], reverse=True)
lastLetters = dict(sorted_lastLetterCount)

In [19]:
lastLetters

{'s': 759,
 'e': 538,
 'd': 405,
 't': 385,
 'n': 253,
 'a': 233,
 'i': 90,
 'x': 47,
 'r': 40,
 'u': 36,
 'p': 18}