# ASSIGNMENT 13
OLUYEMI OLUWOYE

https://github.com/OLUYEMI84/cola-GITHUB-demo-OO.git

In [21]:
import requests

# Use a reliable URL from Gutenberg
url = "https://www.gutenberg.org/cache/epub/11/pg11.txt"
response = requests.get(url)
text = response.text




In [23]:
# Confirm download
print(f"Downloaded text length: {len(text)}")
print("First 500 characters:")
print(text[:500])

Downloaded text length: 167674
First 500 characters:
﻿The Project Gutenberg eBook of Alice's Adventures in Wonderland
    
This ebook is for the use of anyone anywhere in the United States and
most other parts of the world at no cost and with almost no restrictions
whatsoever. You may copy it, give it away or re-use it under the terms
of the Project Gutenberg License included with this ebook or online
at www.gutenberg.org. If you are not located in the United States,
you will have to check the laws of the country where you are located
befo


In [27]:
# Remove header/footer

start = text.find("CHAPTER I")
if start != -1:
    text = text[start:]
else:
    print("Start marker not found. Using full text.")


In [28]:
# Show a preview
print(text[:1000])

CHAPTER I.     Down the Rabbit-Hole
 CHAPTER II.    The Pool of Tears
 CHAPTER III.   A Caucus-Race and a Long Tale
 CHAPTER IV.    The Rabbit Sends in a Little Bill
 CHAPTER V.     Advice from a Caterpillar
 CHAPTER VI.    Pig and Pepper
 CHAPTER VII.   A Mad Tea-Party
 CHAPTER VIII.  The Queen’s Croquet-Ground
 CHAPTER IX.    The Mock Turtle’s Story
 CHAPTER X.     The Lobster Quadrille
 CHAPTER XI.    Who Stole the Tarts?
 CHAPTER XII.   Alice’s Evidence




CHAPTER I.
Down the Rabbit-Hole


Alice was beginning to get very tired of sitting by her sister on the
bank, and of having nothing to do: once or twice she had peeped into
the book her sister was reading, but it had no pictures or
conversations in it, “and what is the use of a book,” thought Alice
“without pictures or conversations?”

So she was considering in her own mind (as well as she could, for the
hot day made her feel very sleepy and stupid), whether the pleasure of
making a daisy-chain would 

In [29]:
#  Tokenization
import tensorflow as tf
import numpy as np

# Char-level
vocab = sorted(set(text))
char2idx = {u:i for i, u in enumerate(vocab)}
idx2char = np.array(vocab)

text_as_int = np.array([char2idx[c] for c in text])

print(f"Vocabulary size: {len(vocab)}")

Vocabulary size: 90


In [30]:
# building dataset
seq_length = 100
examples_per_epoch = len(text_as_int)//(seq_length + 1)

char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)

sequences = char_dataset.batch(seq_length + 1, drop_remainder=True)

def split_input_target(chunk):
    return chunk[:-1], chunk[1:]

dataset = sequences.map(split_input_target)

# Batch + buffer
BATCH_SIZE = 64
BUFFER_SIZE = 10000

dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)


In [32]:
dataset

<_BatchDataset element_spec=(TensorSpec(shape=(64, 100), dtype=tf.int64, name=None), TensorSpec(shape=(64, 100), dtype=tf.int64, name=None))>

In [34]:
# building model
vocab_size = len(vocab)
embedding_dim = 256
rnn_units = 1024

def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
    return tf.keras.Sequential([
        tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=None),
        tf.keras.layers.GRU(rnn_units,
                            return_sequences=True,
                            stateful=False,  # set to False unless you handle states manually
                            recurrent_initializer='glorot_uniform'),
        tf.keras.layers.Dense(vocab_size)
    ])

model = build_model(vocab_size, embedding_dim, rnn_units, BATCH_SIZE)



In [35]:
# training

def loss(labels, logits):
    return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)

model.compile(optimizer='adam', loss=loss)

history = model.fit(dataset, epochs=10)


Epoch 1/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m150s[0m 6s/step - loss: 4.3405
Epoch 2/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m150s[0m 6s/step - loss: 2.7610
Epoch 3/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m144s[0m 6s/step - loss: 2.4073
Epoch 4/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m145s[0m 6s/step - loss: 2.2378
Epoch 5/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m144s[0m 6s/step - loss: 2.1078
Epoch 6/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m145s[0m 6s/step - loss: 1.9807
Epoch 7/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m144s[0m 6s/step - loss: 1.8654
Epoch 8/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m149s[0m 6s/step - loss: 1.7684
Epoch 9/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m150s[0m 6s/step - loss: 1.6772
Epoch 10/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m157s[0m 6s/step - loss: 1.5890

In [40]:
def generate_text(model, start_string):
    input_eval = tf.expand_dims([char2idx[c] for c in start_string], 0)
    text_generated = []
    temperature = 1.0

    for _ in range(500):
        predictions = tf.squeeze(model(input_eval), 0) / temperature
        predicted_id = tf.random.categorical(predictions, 1)[-1, 0].numpy()
        input_eval = tf.expand_dims([predicted_id], 0)
        text_generated.append(idx2char[predicted_id])

    return start_string + ''.join(text_generated)

print(generate_text(model, "Alice "))



Alice we,”, atho.
Q; t bU D•* te was
ttr arzhatwhegese s ak omm, cqE)[A!™
I “FEcaribeles WAblaton s ifus corcond Mathese w at: qY!—R™
anifesig?-ynise,
c™ Awhomag, seur f im ke dupo?1: s w “CO” ar,” amabow w
itld TFo(DO/_L2ny Y)D’viga•Ficobthe,” pe,”
PTl, inorus putexin
y
EG'6ùxh, as onogheed w
ppd trnbelive.FYoty pish ngowwabo; h the, whetrminop Om eg_
natechy rer oxenedoke h w
domenitoculen L y, moprpe d sthere’
ced me imond tinghokn MI ar, _‘Xù _Sth, dalar, qE rgraro—(A9);,”
songing s M



**Introduction to Generative AI and Its Significance**

Generative AI refers to artificial intelligence systems designed to produce new content such as text, images, music, or code. Unlike traditional models focused on classification or regression, generative models learn the underlying data distribution and can create realistic outputs that resemble human-produced data.

Generative AI plays a key role in applications like chatbots, creative writing, code assistants, image generation, and even drug discovery. Its capacity to assist, augment, or automate creative tasks marks it as a transformative technology.

***Description of GPT Architecture and Functionality***

Generative Pre-trained Transformers (GPTs) are large language models built on transformer decoder architectures. The key components of GPTs are:

Stacked transformer blocks with masked self-attention layers, which ensure the model only uses previous tokens when predicting the next token.

Multi-head self-attention, allowing the model to learn different types of relationships between tokens simultaneously.

Position embeddings, since transformers do not have built-in sequence order awareness.

Feedforward neural networks at each transformer layer, adding non-linear transformations.

GPTs generate text by converting input text into tokens, predicting the next token's probability distribution, and sampling from this distribution repeatedly until a stopping criterion is met.

***Methodology and Findings***

I built a simple character-level text generation model using TensorFlow. The dataset was Alice’s Adventures in Wonderland from Project Gutenberg. Below is the process:

Preprocessing: I converted characters into integer indices and sliced the text into overlapping sequences (input = first 100 characters; target = next 100 characters shifted by one).

Model: A sequential model with an embedding layer, a GRU layer (1024 units), and a dense output layer predicting the next character.

Training: I trained the model for 10 epochs. The model learned to mimic the writing style of the dataset, generating coherent, though often nonsensical, text.


***Applications of Generative AI and Demonstration***

Generative AI powers many real-world applications:

Content creation: Drafting articles, poetry, dialogue (e.g., ChatGPT).

Code generation: Assisting programmers.

Art and design: Generating images or music.

Education: Creating practice questions or explanations.

Our hands-on model demonstrates how even a simple generative model can produce creative text snippets that could inspire writers or be used for games.

***Ethical Considerations and Potential Solutions***

Generative AI raises several ethical concerns:

Bias propagation: AI can amplify biases in its training data.

Misinformation: Generated content might be mistaken for verified facts.

Plagiarism concerns: AI may replicate training data too closely.

Potential solutions:

Careful curation and filtering of training data.

Implementing content filters or moderation layers.

Marking AI-generated content (watermarking or metadata).

***Conclusion***

Generative AI, exemplified by models like GPT, represents a major advance in machine learning’s creative potential. The simple text generation project highlights the core mechanics behind this technology and its capacity for creative output.