<a href="https://colab.research.google.com/github/Raissa-hue310/Assignment-13-Generative-AI-Essentials/blob/main/Assignment13_Generative_AI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Assignment 13: Generative AI Essentials
Shakespeare Text Generation - By Raïssa Matho Mekjele

In [4]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, LSTM
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer
import numpy as np
import requests

In [5]:
# -----------------------------
# 1. LOAD DATA
# -----------------------------
url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
text = requests.get(url).text

print("Sample text:")
print(text[:500])

Sample text:
First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you know Caius Marcius is chief enemy to the people.

All:
We know't, we know't.

First Citizen:
Let us kill him, and we'll have corn at our own price.
Is't a verdict?

All:
No more talking on't; let it be done: away, away!

Second Citizen:
One word, good citizens.

First Citizen:
We are accounted poor


In [6]:
# -----------------------------
# 2. CHAR-LEVEL TOKENIZATION
# -----------------------------
tokenizer = Tokenizer(char_level=True, filters='')
tokenizer.fit_on_texts([text])

total_chars = len(tokenizer.word_index) + 1
print("Total unique characters:", total_chars)

# Convert full dataset into sequences safely
encoded = tokenizer.texts_to_sequences([text])[0]

Total unique characters: 40


In [7]:
# -----------------------------
# 3. SEQUENCE GENERATION
# -----------------------------
sequence_length = 40

sequences = []
next_chars = []

for i in range(len(encoded) - sequence_length):
    sequences.append(encoded[i:i+sequence_length])
    next_chars.append(encoded[i+sequence_length])

X = np.array(sequences)
y = tf.keras.utils.to_categorical(next_chars, num_classes=total_chars)

print("X shape:", X.shape)
print("y shape:", y.shape)

X shape: (1115354, 40)
y shape: (1115354, 40)


In [8]:
# -----------------------------
# 4. BUILD MODEL
# -----------------------------
model = Sequential([
    Embedding(total_chars, 64, input_length=sequence_length),
    LSTM(128),
    Dense(total_chars, activation='softmax')
])

model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
model.summary()



In [9]:
# -----------------------------
# 5. TRAIN MODEL
# -----------------------------
history = model.fit(X, y, epochs=3, batch_size=128)

Epoch 1/3
[1m8714/8714[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1103s[0m 126ms/step - accuracy: 0.3697 - loss: 2.1632
Epoch 2/3
[1m8714/8714[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1089s[0m 125ms/step - accuracy: 0.5122 - loss: 1.6123
Epoch 3/3
[1m8714/8714[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1085s[0m 124ms/step - accuracy: 0.5377 - loss: 1.5115


In [10]:
# -----------------------------
# 6. TEXT GENERATION FUNCTION
# -----------------------------
reverse_index = {v: k for k, v in tokenizer.word_index.items()}

def generate_text(seed, length=300):
    generated = seed

    for _ in range(length):
        encoded_seed = tokenizer.texts_to_sequences([seed[-sequence_length:]])[0]
        encoded_seed = pad_sequences([encoded_seed], maxlen=sequence_length)

        predictions = model.predict(encoded_seed, verbose=0)
        next_index = np.argmax(predictions)
        next_char = reverse_index.get(next_index, "")

        generated += next_char
        seed += next_char

    return generated

In [11]:
# -----------------------------
# 7. GENERATE SAMPLE TEXT
# -----------------------------
seed_text = "ROMEO: My love is like the sun"
generated_sample = generate_text(seed_text, 400)

print("\nGenerated Text:\n")
print(generated_sample)


Generated Text:

ROMEO: My love is like the sun
that i will be the strike and the strike.

prospero:
what shall be the sently to the sun and the sentle and the sentle and the sentle and the sentle and the sentle and the sentle and the sentle and the sentle and the sentle and the sentle and the sentle and the sentle and the sentle and the sentle and the sentle and the sentle and the sentle and the sentle and the sentle and the sentle and the se


In [12]:
# -----------------------------
# 8. APPLICATION DEMO
# -----------------------------
def style_rewriter(input_text):
    seed = input_text[:40]
    return generate_text(seed, 250)

demo_output = style_rewriter("I dream of building a better future for all.")
print("\nDemo Application Output:\n")
print(demo_output)


Demo Application Output:

I dream of building a better future for the sentle and the sentle and the sentle and the sentle and the sentle and the sentle and the sentle and the sentle and the sentle and the sentle and the sentle and the sentle and the sentle and the sentle and the sentle and the sentle and the sentle
