## Assignment 13: Generative AI Essentials

In [8]:
# Import Libraries
import requests
import numpy as np
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
import ipywidgets as widgets
from IPython.display import display

## Download Text

In [12]:
# Download the text content directly
url = "https://www.gutenberg.org/files/11/11-0.txt"
response = requests.get(url)
text = response.text

# Preview text
print(text[:500])

*** START OF THE PROJECT GUTENBERG EBOOK 11 ***

[Illustration]




Alice’s Adventures in Wonderland

by Lewis Carroll

THE MILLENNIUM FULCRUM EDITION 3.0

Contents

 CHAPTER I.     Down the Rabbit-Hole
 CHAPTER II.    The Pool of Tears
 CHAPTER III.   A Caucus-Race and a Long Tale
 CHAPTER IV.    The Rabbit Sends in a Little Bill
 CHAPTER V.     Advice from a Caterpillar
 CHAPTER VI.    Pig and Pepper
 CHAPTER VII.   A Mad Tea-Party
 CHAPTER VIII.  The Queen’s Croquet-Ground
 CHAPTER IX.    The


## Clean the Text

In [14]:
# Trim header/footer from Project Gutenberg
# Trim header/footer from Project Gutenberg
start_idx = text.find("CHAPTER I")
end_idx = text.find("End of the Project Gutenberg")
text = text[start_idx:end_idx].strip()

# ✅ Reduce cleaned text size here (e.g. first 5000 characters)
text = text[:5000]

# Preview the reduced cleaned text
print(text[:1000])

CHAPTER I.     Down the Rabbit-Hole
 CHAPTER II.    The Pool of Tears
 CHAPTER III.   A Caucus-Race and a Long Tale
 CHAPTER IV.    The Rabbit Sends in a Little Bill
 CHAPTER V.     Advice from a Caterpillar
 CHAPTER VI.    Pig and Pepper
 CHAPTER VII.   A Mad Tea-Party
 CHAPTER VIII.  The Queen’s Croquet-Ground
 CHAPTER IX.    The Mock Turtle’s Story
 CHAPTER X.     The Lobster Quadrille
 CHAPTER XI.    Who Stole the Tarts?
 CHAPTER XII.   Alice’s Evidence




CHAPTER I.
Down the Rabbit-Hole


Alice was beginning to get very tired of sitting by her sister on the
bank, and of having nothing to do: once or twice she had peeped into
the book her sister was reading, but it had no pictures or
conversations in it, “and what is the use of a book,” thought Alice
“without pictures or conversations?”

So she was considering in her own mind (as well as she could, for the
hot day made her feel very sleepy and stupid), whether the pleasure of
making a daisy-chain would be worth the trouble of gett

## Preprocess the Text for Character-Level Modeling

In [15]:
# Create a list of unique characters and mappings
chars = sorted(set(text))
char_to_idx = {c: i for i, c in enumerate(chars)}
idx_to_char = {i: c for c, i in char_to_idx.items()}

In [None]:
# Encode the entire text as integer indices
encoded = [char_to_idx[c] for c in text]

In [27]:
# Create input sequences and next character targets
seq_length = 40
X, y = [], []

for i in range(len(encoded) - seq_length):
    X.append(encoded[i:i+seq_length])
    y.append(encoded[i+seq_length])

max_samples = 5000
X = np.array(X)
y = np.array(y)
X = X[:max_samples]
y = y[:max_samples]

In [26]:
# One-hot encode the labels
y = to_categorical(y, num_classes=len(chars))

## Build and Train the LSTM Model

In [16]:
# Smaller model for faster training
model = Sequential([
    Embedding(input_dim=len(chars), output_dim=16, input_length=seq_length),  # smaller embedding
    LSTM(64),  # fewer LSTM units
    Dense(len(chars), activation='softmax')
])

model.compile(loss='categorical_crossentropy', optimizer='adam')

Epoch 1/10




[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 3.7710
Epoch 2/10
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 22ms/step - loss: 3.1850
Epoch 3/10
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 22ms/step - loss: 3.1961
Epoch 4/10
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 36ms/step - loss: 3.1382
Epoch 5/10
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 22ms/step - loss: 2.9998
Epoch 6/10
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 22ms/step - loss: 2.9290
Epoch 7/10
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 22ms/step - loss: 2.8349
Epoch 8/10
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 23ms/step - loss: 2.7333
Epoch 9/10
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 22ms/step - loss: 2.7096
Epoch 10/10
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 31ms/step - loss: 2.6088


<keras.src.callbacks.history.History at 0x7c7bc4065550>

In [None]:
# Train fewer epochs for quick iteration
model.fit(X, y, epochs=10, batch_size=64, verbose=1)

## Sampling functions with temperature, top-k and top-p

In [32]:
def top_k_top_p_filtering(logits, top_k=0, top_p=1.0):
    logits = np.asarray(logits).astype('float64')
    # Top-k filtering
    if top_k > 0:
        indices_to_remove = logits < np.partition(logits, -top_k)[-top_k]
        logits[indices_to_remove] = -np.inf
    # Top-p filtering
    if top_p < 1.0:
        sorted_indices = np.argsort(logits)[::-1]
        sorted_logits = logits[sorted_indices]
        cumulative_probs = np.cumsum(np.exp(sorted_logits)) / np.sum(np.exp(sorted_logits))
        sorted_indices_to_remove = cumulative_probs > top_p
        if np.any(sorted_indices_to_remove):
            first_index = np.argmax(sorted_indices_to_remove)
            logits[sorted_indices[first_index:]] = -np.inf
    return logits

In [35]:
def generate_text(seed, length=300, temperature=1.0, top_k=0, top_p=1.0):
    result = seed
    for _ in range(length):
        input_seq = [char_to_idx.get(c, 0) for c in seed[-seq_length:]]
        input_seq = np.array(input_seq).reshape(1, -1)
        prediction = model.predict(input_seq, verbose=0)[0]
        prediction = np.log(prediction + 1e-9) / temperature
        prediction = top_k_top_p_filtering(prediction, top_k=top_k, top_p=top_p) # Apply filtering
        # Softmax normalization
        exp_logits = np.exp(prediction - np.max(prediction))
        probs = exp_logits / np.sum(exp_logits)
        next_index = np.random.choice(len(probs), p=probs) # Sample from filtered probabilities
        next_char = idx_to_char[next_index]
        result += next_char
        seed += next_char
    return result

In [36]:
print("Temperature 0.5:\n", generate_text("alice was beginning to get very ", temperature=0.5))
print("\nTemperature 1.0:\n", generate_text("alice was beginning to get very ", temperature=1.0))
print("\nTemperature 1.2:\n", generate_text("alice was beginning to get very ", temperature=1.2))


Temperature 0.5:
 alice was beginning to get very the hoas tin hen an thed athemen oo the the bog an seo tociw se dek there he lort the at then the showalw ware ane an the the se sor she the fon son soc he shen sot it the nren is toung wud ther the the anr toine the shon ig
atit to anr sor as anel he wer wanur gor se the the wor ane nand ithe itof 

Temperature 1.0:
 alice was beginning to get very thinn
lgaronbao
s y yibnend ASin Rsher orasg da asas sarit HIE
o aan —aruuth a_f f a sothi_ert fo the toE EWwiit HThe
 Gfho awaltind  t wol aal, a
e lxbik wte Anse
sot celt I in
PfeHTondlt yavsa PHhor th
Ag A. Cfnl hinl an toy ce,R ppe waRrmh de CuOl! morrEy tha wo tAlul f
  rerul“od  Eowe ffernd bi

Temperature 1.2:
 alice was beginning to get very sheka_n ac Lse d besOaser, mok sheTtufdin Ton“mPang rho thecerersh-oyl’wapiP!Terr
 f) adaVIe o liDd Rnever dhisp’t bxaret ba j nac. ho!g, ar
anullr
P mthe au?uk Mnit the
th fkaws? bgits wlouAk okinseVf ilorMur tinlirP”I venpauomh shur fos
to
goun

## Interactive Widgets for prompt engineering and sampling parameters

In [37]:
# Text input for seed
seed_input = widgets.Text(
    value="alice was beginning to get very ",
    description='Seed:',
    layout=widgets.Layout(width='80%')
)

In [38]:
# Sliders for temperature, top_k, top_p
temperature_slider = widgets.FloatSlider(
    value=1.0, min=0.1, max=2.0, step=0.1,
    description='Temperature:'
)

top_k_slider = widgets.IntSlider(
    value=10, min=0, max=50, step=1,
    description='Top-k:'
)

top_p_slider = widgets.FloatSlider(
    value=1.0, min=0.1, max=1.0, step=0.05,
    description='Top-p:'
)

In [40]:
# Button to trigger text generation
generate_button = widgets.Button(description="Generate Text")

In [41]:
# Output area
output = widgets.Output()

def on_generate_clicked(b):
    output.clear_output()
    with output:
        generated = generate_text(
            seed_input.value,
            length=300,
            temperature=temperature_slider.value,
            top_k=top_k_slider.value,
            top_p=top_p_slider.value
        )
        print("\nGenerated Text:\n")
        print(generated)

generate_button.on_click(on_generate_clicked)

## Prompt engineering

In [42]:
## Example prompt engineering calls (wrapped in print for output)

print("\n-- Prompt Engineering Examples --")

print("\nStart with action/emotion:")
print(generate_text("she ran without looking back, heart pounding. ", length=300))

print("\nAsk a question:")
print(generate_text("what is the meaning of the rabbit's message? ", length=300))

print("\nInject characters/objects:")
print(generate_text("the cat smiled as it vanished slowly, leaving ", length=300))



-- Prompt Engineering Examples --

Start with action/emotion:
she ran without looking back, heart pounding. o meineeft ool boulb ’ain, w sherlet doc mti ishe s ce mey wonr
tawot wow, wl eo Toon! tashith “e os h.h
ther og, iwton wopsDt Sun lortoos nokeikey fad wayo Iorle thenoune HThosthen
ce sitinr thire iRok
otno wdis mowititighu tna aa the I”y nunw-thut uwelenl agn sogerry Ifud af waPghipl coem odl-inur

Ask a question:
what is the meaning of the rabbit's message? vadd eo?vet, “.g nde ante ors whhwts tleAtran thaby bar. wol
sh sarggamor. In ifperltiut. e o,ky Gat” nouep kon Ronank laP wa cs ugow loun woMvergud thancewerpinf inl (am“sherd
sodhd rumed efpud t)wopt at
here yon i_t Wit co Wed il Aunau dod wwan Pa Polone ane gon,

R
ron ouat Vinul cou’g. sot Hherg

Inject characters/objects:
the cat smiled as it vanished slowly, leaving lod no to thekiawnde we theers i
om f
eaiAr annnwe
lt aiw me no waf wogt wog THhawwunde lhec
lbeRy . eg she so thsorn,ht polesados
tIr odrsafd sal sr lor