# 1. Build a NLP Language model for text generation involves train a neural network to predict the next word in a sequence of words.

In [2]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch

# Load pre-trained model and tokenizer
model = GPT2LMHeadModel.from_pretrained("gpt2")
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")

# Generate text
input_text = "Once upon a time"
input_ids = tokenizer.encode(input_text, return_tensors="pt")

# Generate multiple words
output = model.generate(input_ids, max_length=100, num_beams=5, no_repeat_ngram_size=2, top_k=50, top_p=0.95)

# Convert the generated output tensor to a list
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)

print("Generated Text:", generated_text)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Generated Text: Once upon a time, it was said, there was a man in the house of the Lord, and he said to him, "Lord, I have heard that there is a woman in this house. She is the daughter of Joseph." And Joseph said unto the woman, Behold, she is my wife.

And the man answered, Yea, but ye have not seen her, for she hath not come unto me. And he took her and put her in his hand, saying


# The other way is:

In [1]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences




In [25]:
text_data = ["I woke up this morning feeling an overwhelming sense of joy. The sun streamed through the window, casting a warm glow that filled me with happiness. As I stepped outside, a gentle breeze brushed against my skin, evoking a sense of calm and contentment. However, as the day progressed, a wave of nostalgia washed over me, reminding me of cherished memories from the past. I found myself smiling at old photographs, feeling a mix of joy and longing.Suddenly, a pang of sadness hit me as I remembered missed opportunities and lost connections. Yet, hope flickered within me, like a small flame refusing to be extinguished. Determination surged through my veins, propelling me forward despite the obstacles. Later, an unexpected surprise lifted my spirits, filling me with excitement and anticipation for what lay ahead. Ultimately, today has been a whirlwind of emotions, each one leaving its mark on my heart."]

In [3]:
# Tokenize the text
tokenizer = Tokenizer()
tokenizer.fit_on_texts(text_data)
total_words = len(tokenizer.word_index) + 1

In [4]:
input_sequences = []
for line in text_data:
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i+1]
        input_sequences.append(n_gram_sequence)

In [5]:
max_sequence_len = max([len(seq) for seq in input_sequences])
input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))

In [6]:
X = input_sequences[:, :-1]
labels = input_sequences[:, -1]

In [7]:
from tensorflow.keras.utils import to_categorical
y = to_categorical(labels, num_classes=total_words)

In [8]:
model = Sequential()
model.add(Embedding(total_words, 100, input_length=max_sequence_len-1))
model.add(LSTM(150))
model.add(Dense(total_words, activation='softmax'))




In [9]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])




In [10]:
model.fit(X, y, epochs=50, verbose=1)

Epoch 1/50


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.src.callbacks.History at 0x25eaa1950d0>

In [11]:
def generate_text(seed_text, next_words, model, max_sequence_len):
    for _ in range(next_words):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
        predicted_probs = model.predict(token_list, verbose=0)[0]
        predicted = np.argmax(predicted_probs)
        output_word = ""
        for word, index in tokenizer.word_index.items():
            if index == predicted:
                output_word = word
                break
        seed_text += " " + output_word
    return seed_text

In [12]:
generated_text = generate_text("hello", 10, model, max_sequence_len)
print(generated_text)

hello woke up this morning feeling an overwhelming sense of joy


# 2. Build a Speech to Text model.

In [13]:
import speech_recognition as sr

samp=sr.AudioFile("1.wav")
samp

<speech_recognition.AudioFile at 0x25eb1db61d0>

In [14]:
recog=sr.Recognizer()

with samp as source:
    audio=recog.record(samp)

    res=recog.recognize_google(audio)
print('Text for the Audio:\n')
print(res)

Text for the Audio:

987654321 0


# 3. Build a Text to Speech model.

In [15]:
from gtts import gTTS
import os
def text_to_speech(text,language='en',filename='output.mp3'):
    tts=gTTS(text=text,lang=language,slow=False)
    tts.save(filename)
    os.system(f"start {filename}")

input_text=input('User text pl >>:')
text_to_speech(input_text)

User text pl >>:Hi, This is your voice assistant. How can I help you?


# 4. Build a NLP Language model to detect the sentence/word error in the text corpus.

In [16]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import words

In [17]:
# Download the words corpus (if not already downloaded)
nltk.download('words')

# Sample text with intentional errors
text = input("Enter the text :")

# Tokenize the text into words
tokens = word_tokenize(text)

# Get the set of English words from the nltk corpus
english_vocab = set(words.words())

# Check for misspelled words
misspelled_words = [word for word in tokens if word.lower() not in english_vocab]

print()

# Print misspelled words
if len(misspelled_words) > 0:
    print("Misspelled Words:")
    print(misspelled_words)
else:
    print("No misspelled words found.")

[nltk_data] Downloading package words to
[nltk_data]     C:\Users\Ramesh\AppData\Roaming\nltk_data...
[nltk_data]   Package words is already up-to-date!


Enter the text :I am the keng off the contry. I will rule from now onwords.

Misspelled Words:
['keng', 'contry', '.', 'onwords', '.']


# 5. Build a Language model to correct the error in the text.

In [24]:
from spellchecker import SpellChecker

spell = SpellChecker()

# Example sentence with errors
sentence_with_errors = "Thes arre somee speling errrs in thiss sentenc."

# Split the sentence into words
words = sentence_with_errors.split()

# Identify misspelled words
misspelled = spell.unknown(words)

# Correct misspelled words
corrected_sentence = " ".join(spell.correction(word) for word in words)

print("Corrected Sentence:", corrected_sentence)

Corrected Sentence: Thes are some spelling errors in thiss sentence
