In [None]:
import random

def build_markov_chain(text, order):
    """Create a Markov chain dictionary from input text 📖✨"""
    words = text.split()
    markov_chain = {}

    for i in range(len(words) - order):
        key = tuple(words[i:i+order])
        next_word = words[i+order]
        
        # the key value pairs are for example like this
        # key : "next word"
        if key not in markov_chain:
            markov_chain[key] = []
        markov_chain[key].append(next_word)
        # print("this is the next word", next_word)

    return markov_chain

# print(build_markov_chain(text="hello world hello world it is amazing", order=2))

## What is happening above?
1. A dictionary is made called markov_chain
2. A for loop is run in which key value pairs are assigned to each word becomes a tuple (could be more than 1 depending on the order value e.g value = 2 would be 2 words in the tuple key)
3. If checks to see if the key value pair already exists, in which case it would append the value on the right side of the existing key. (Remember all keys have to be unique in a dictionary)
4. The right side of the key value pair is the next word from the string. 
5. The completed array is then the whole string broken down into Keys and values.

In [9]:
def generate_markov_text(chain, length=20, seed_word=None):
    """Generate a mystical prophecy using Markov magic! 🔮"""
    key = random.choice(list(chain.keys())) if not seed_word else seed_word
    generated_words = list(key)

    for _ in range(length):
        if key in chain:
            next_word = random.choice(chain[key])
            generated_words.append(next_word)
            key = tuple(generated_words[-len(key):])
        else:
            break

    return ' '.join(generated_words)

# 📜 Behold, the sacred text of our AI prophecy:
sacred_text = """
The moon 🌙 whispers to the sea 🌊, while the stars 🌟 dance in the sky.
The wizard speaks in riddles, and the trees 🌲 sing the songs of time.
Magic flows like a river, and shadows follow the wind. 🍃✨
"""

# 🧙‍♂️ Markov’s Spellbook
markov_chain = build_markov_chain(sacred_text, order=2)
generated_prophecy = generate_markov_text(markov_chain, length=15)

print("🔮 **Markov's Prophecy:**")
print(generated_prophecy + " ✨")

🔮 **Markov's Prophecy:**
🌙 whispers to the sea 🌊, while the stars 🌟 dance in the sky. The wizard speaks ✨


## What happened here?
1. A function is defined where a random key is picked from the Markov chain (unless a seed_word is given).
2. This key is added to the generated_words list, converting it from a tuple into a list format.
3. A loop runs for the desired number of repetitions, where:
    • The function checks if the current key exists in the Markov chain.
    • If it does, a random next word is picked from the list of possible next words.
    • This word is added to generated_words, and the key is updated by shifting forward (keeping only the last order number of words).
4. The result is a sentence where words are ordered in a way that commonly appears in the input text (while still allowing for random variations).
5. Repetition of the same keys is possible, but only when the Markov chain allows for it naturally. The function doesn't enforce repetition but follows the probabilities from the training text.

In [2]:
!pip install torch torchvision torchaudio


Collecting torchvision
  Downloading torchvision-0.17.2-cp311-cp311-macosx_10_13_x86_64.whl.metadata (6.6 kB)
Collecting torchaudio
  Downloading torchaudio-2.2.2-cp311-cp311-macosx_10_13_x86_64.whl.metadata (6.4 kB)
Collecting pillow!=8.3.*,>=5.3.0 (from torchvision)
  Using cached pillow-11.1.0-cp311-cp311-macosx_10_10_x86_64.whl.metadata (9.1 kB)
Downloading torchvision-0.17.2-cp311-cp311-macosx_10_13_x86_64.whl (1.7 MB)
[2K   [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m0:01[0m:01[0m0m
[?25hDownloading torchaudio-2.2.2-cp311-cp311-macosx_10_13_x86_64.whl (3.4 MB)
[2K   [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.4/3.4 MB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m[36m0:00:01[0m
[?25hUsing cached pillow-11.1.0-cp311-cp311-macosx_10_10_x86_64.whl (3.2 MB)
Installing collected packages: pillow, torchvision, torchaudio
Successfully inst

In [4]:
!pip install numpy

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)



[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [3]:
import numpy as np
print(np.__version__)


2.2.3


In [7]:
from transformers import cached_path, hf_bucket_url, CONFIG_NAME

# Clear the cache
from transformers import file_utils
file_utils.clear_cache()


ImportError: cannot import name 'cached_path' from 'transformers' (/Users/arnoldm./NLP/venv/lib/python3.11/site-packages/transformers/__init__.py)

In [8]:
!pip install --upgrade numpy
!pip install --upgrade torch

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)



[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)



[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [9]:
from transformers import pipeline
# import numpy as np

# 🚀 Load the AI wizard (GPT-2 model)
generator = pipeline("text-generation", model="gpt2", device=-1)  # Forces CPU usage


# 🌙 Invoke RNN magic
prompt = "The enchanted forest hides secrets"
rnn_generated_text = generator(prompt, max_length=50, num_return_sequences=1)

print("\n🌀 **RNN's Prophecy:**")
print(rnn_generated_text[0]["generated_text"] + " 🌌")

Device set to use cpu
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


RuntimeError: Numpy is not available