In [None]:
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding, Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras import utils
from textblob import TextBlob
import nltk

nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Load the text data
text = open('/content/drive/My Drive/shakespeare.txt').read().splitlines()
# Tokenization
tokenizer = Tokenizer()
tokenizer.fit_on_texts(text)
vocab_size = len(tokenizer.word_index) + 1

# Generate input sequences
input_sequences = []
for sent in text:
    sequence = tokenizer.texts_to_sequences([sent])[0]
    for j in range(1, len(sequence)):
        n_gram = sequence[:j+1]
        input_sequences.append(n_gram)

max_len = max([len(i) for i in input_sequences])
input_sequences = pad_sequences(input_sequences, maxlen=max_len)

# Preparing input and labels
inputs = input_sequences[:, :-1]
labels = input_sequences[:, -1]
labels = utils.to_categorical(labels, num_classes=vocab_size)
# Build the model
model = Sequential([
    Embedding(vocab_size, 100, input_length=max_len-1),
    LSTM(128, return_sequences=True),
    Dropout(0.2),
    LSTM(128),
    Dense(vocab_size, activation='softmax')
])
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()
history = model.fit(inputs, labels, epochs=200)
def sentiment_score(text):
    sia = SentimentIntensityAnalyzer()
    return sia.polarity_scores(text)['compound']

def lexical_diversity(text):
    words = text.split()
    return len(set(words)) / len(words) if words else 0

def average_word_length(text):
    words = text.split()
    return np.mean([len(word) for word in words]) if words else 0

def iambic_pentameter_adherence(line):
    # Simplified check: 10 syllables with stress on every second syllable
    return True  # Placeholder

def format_adherence(sonnet):
    # Simplified check: 14 lines
    return len(sonnet.split('\n')) == 14
def generate_text(initial_text, words_per_line=10, total_lines=14):
    new_text = initial_text
    sonnet = []
    current_line = []
    for _ in range(words_per_line * total_lines):
        tokens = tokenizer.texts_to_sequences([new_text])
        pad_seq = pad_sequences(tokens, maxlen=max_len-1)
        preds = model.predict(pad_seq)[0]
        next_index = np.argmax(preds)
        next_word = tokenizer.index_word.get(next_index, 'unknown')
        current_line.append(next_word)
        new_text += ' ' + next_word
        if len(current_line) >= words_per_line:
            sonnet.append(" ".join(current_line))
            current_line = []
    full_text = "\n".join(sonnet)
    return full_text, sentiment_score(full_text), lexical_diversity(full_text), average_word_length(full_text), format_adherence(full_text)
# Example usage
generated_text, sentiment, diversity, avg_word_len, adheres_format = generate_text('Adorned in shades of red')
print("Generated Text:", generated_text)
print("Sentiment Score:", sentiment)
print("Lexical Diversity:", diversity)
print("Average Word Length:", avg_word_len)
print("Format Adherence:", adheres_format)


[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


Mounted at /content/drive
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 10, 100)           320100    
                                                                 
 lstm (LSTM)                 (None, 10, 128)           117248    
                                                                 
 dropout (Dropout)           (None, 10, 128)           0         
                                                                 
 lstm_1 (LSTM)               (None, 128)               131584    
                                                                 
 dense (Dense)               (None, 3201)              412929    
                                                                 
Total params: 981861 (3.75 MB)
Trainable params: 981861 (3.75 MB)
Non-trainable params: 0 (0.00 Byte)
____________________________________________________________

In [None]:
hours that for thee is afford hate men men cupid
heat ' taken set counted me while you deeds still
brought his heart of state her shalt grow not and
skill away their woe outward brow windows windows so ill
cunning outward windows windows windows still while moon twain self
brain husbandry cupid call cupid painted one so so me
me so short a lease of me not so write
so now you much give stay her memory end staineth
night still taken ill are ill night i keep aside
green a time exchanged day night not tyrannous foes commend
doom or store a time exchanged spend taken men so
while all ill age ever be away young thine end
all me cruel in windows told me green away loving
head live live made all the mouths so vanishing decease

hours that for thee is afford hate men men cupid
heat ' taken set counted me while you deeds still
brought his heart of state her shalt grow not and
skill away their woe outward brow windows windows so ill
cunning outward windows windows windows still while moon twain self
brain husbandry cupid call cupid painted one so so me
me so short a lease of me not so write
so now you much give stay her memory end staineth
night still taken ill are ill night i keep aside
green a time exchanged day night not tyrannous foes commend
doom or store a time exchanged spend taken men so
while all ill age ever be away young thine end
all me cruel in windows told me green away loving
head live live made all the mouths so vanishing decease