<a href="https://colab.research.google.com/github/MitchMatt/Kenyan_Generative_Literature/blob/main/Kenyan_Generative_Literature_TF.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Kenyan-Inspired Generative Literature with TensorFlow

This notebook trains a generative model using TensorFlow to create engaging and thought-provoking Kenyan literature based on a themed corpus.

In [25]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

In [26]:
# Kenyan-themed corpus for training
kenyan_corpus = [
    "Nairobi buzzed with life, matatus weaving through chaos.",
    "Under the equator sun, dreams took shape in busy markets.",
    "The Great Rift whispered ancient secrets to those who listened.",
    "In Mombasa, the ocean's waves told stories older than memory.",
    "Maasai warriors stood tall, guardians of vast savannas.",
    "At sunset, Mount Kenya blushed under golden skies.",
    "In Kibera's labyrinth, hope blossomed amidst adversity.",
    "Tea leaves from Kericho held flavors of earth and rain.",
    "Elders beneath baobab trees shared wisdom in hushed tones.",
    "Lake Victoria's fishermen sang songs of courage into the night."
]

In [27]:
# Tokenize the corpus
tokenizer = Tokenizer()
tokenizer.fit_on_texts(kenyan_corpus)
total_words = len(tokenizer.word_index) + 1

In [28]:
# Create input sequences
input_sequences = []
for line in kenyan_corpus:
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(2, len(token_list)):
        n_gram_sequence = token_list[:i+1]
        input_sequences.append(n_gram_sequence)

In [29]:
# Pad sequences and prepare training data
max_sequence_len = max([len(seq) for seq in input_sequences])
input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))
X, y = input_sequences[:,:-1], input_sequences[:,-1]
y = tf.keras.utils.to_categorical(y, num_classes=total_words)

In [30]:
# Define the model
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(total_words, 64, input_length=max_sequence_len - 1),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(total_words, activation='softmax')
])
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()



In [31]:
# Train the model (you may increase epochs for better results)
model.fit(X, y, epochs=200, verbose=1)

Epoch 1/200
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 19ms/step - accuracy: 0.0189 - loss: 4.4066
Epoch 2/200
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - accuracy: 0.0189 - loss: 4.4000
Epoch 3/200
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - accuracy: 0.0299 - loss: 4.3967
Epoch 4/200
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - accuracy: 0.0331 - loss: 4.3915    
Epoch 5/200
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - accuracy: 0.0221 - loss: 4.3866    
Epoch 6/200
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - accuracy: 0.0221 - loss: 4.3810     
Epoch 7/200
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - accuracy: 0.0631 - loss: 4.3734
Epoch 8/200
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - accuracy: 0.0858 - loss: 4.3637
Epoch 9/200
[1m3/3[0m [32m━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x78cad8708fd0>

In [32]:
# Function to generate text
def generate_kenyan_text(seed_text, next_words=50, diversity=1.0):
    for _ in range(next_words):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
        predictions = model.predict(token_list, verbose=0)[0]
        predictions = np.asarray(predictions).astype('float64')
        predictions = np.log(predictions + 1e-8) / diversity
        exp_preds = np.exp(predictions)
        predictions = exp_preds / np.sum(exp_preds)
        predicted = np.random.choice(range(len(predictions)), p=predictions)
        output_word = ''
        for word, index in tokenizer.word_index.items():
            if index == predicted:
                output_word = word
                break
        seed_text += ' ' + output_word
    return seed_text

In [33]:
# Generate sample Kenyan literature
seed = "In Nairobi's streets"
print("Generated Literature:\n")
print(generate_kenyan_text(seed))

Generated Literature:

In Nairobi's streets labyrinth hope blossomed amidst adversity adversity adversity adversity rain tones hushed chaos rain hushed tones rain rain rain chaos who rain rain rain rain rain rain rain rain those victoria's flavors of the rain rain rain rain rain chaos rain rain rain rain rain rain rain and listened rain rain
