In [2]:
pip install tensorflow numpy pandas


Note: you may need to restart the kernel to use updated packages.


In [3]:
import numpy as np
import pandas as pd
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense


In [4]:
questions = [
    "What is Python?",
    "Explain the OOP concept in Python.",
    "What is a list in Python?",
    "How does a dictionary work in Python?",
    "What are functions in Python?",
    "What is inheritance in Python?",
    "Explain decorators in Python.",
    "How does exception handling work in Python?",
    "What are generators in Python?",
    "What is multithreading in Python?"
]


In [5]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(questions)
total_words = len(tokenizer.word_index) + 1

# Convert each question to a sequence of numbers
input_sequences = []
for line in questions:
    token_list = tokenizer.texts_to_sequences([line])[0]
    input_sequences.append(token_list)


In [6]:
# Generate input-output pairs
input_sequences = np.array(pad_sequences(input_sequences, padding='pre'))

# Prepare input and label data
X = input_sequences[:, :-1]
y = input_sequences[:, -1]

# Convert y to categorical labels
y = pd.get_dummies(y).values


In [7]:
model = Sequential()
model.add(Embedding(total_words, 100, input_length=X.shape[1]))
model.add(LSTM(150))
model.add(Dense(total_words, activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()




In [8]:
from tensorflow.keras.utils import to_categorical


In [9]:
# One-hot encode the output (y)
y = to_categorical(y, num_classes=total_words)


In [10]:
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

# List of questions
questions = [
    "What is Python?",
    "Explain the OOP concept in Python.",
    "What is a list in Python?",
    "How does a dictionary work in Python?",
    "What are functions in Python?",
    "What is inheritance in Python?",
    "Explain decorators in Python.",
    "How does exception handling work in Python?",
    "What are generators in Python?",
    "What is multithreading in Python?"
]

# Step 1: Tokenize the questions
tokenizer = Tokenizer()
tokenizer.fit_on_texts(questions)
total_words = len(tokenizer.word_index) + 1

# Step 2: Prepare input sequences
input_sequences = []
for line in questions:
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i+1]
        input_sequences.append(n_gram_sequence)

# Step 3: Pad the sequences
max_sequence_len = max([len(x) for x in input_sequences])
input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))

# Step 4: Split input (X) and output (y)
X = input_sequences[:,:-1]
y = input_sequences[:,-1]

# Step 5: One-hot encode the output labels (y)
y = to_categorical(y, num_classes=total_words)

# Step 6: Build the model
model = Sequential()
model.add(Embedding(total_words, 100, input_length=max_sequence_len-1))
model.add(LSTM(150))
model.add(Dense(total_words, activation='softmax'))

# Step 7: Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Step 8: Train the model
model.fit(X, y, epochs=100, verbose=1)

# Model summary to check the architecture
model.summary()

# Step 9: Function to generate new questions
def generate_question(seed_text, next_words=5):
    for _ in range(next_words):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
        predicted = model.predict(token_list, verbose=0)
        predicted_word = tokenizer.index_word[np.argmax(predicted)]
        seed_text += " " + predicted_word
    return seed_text

# Example of generating a question
seed_text = "What is"
generated_question = generate_question(seed_text, next_words=5)
print("Generated Question:", generated_question)


Epoch 1/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step - accuracy: 0.0569 - loss: 3.1367
Epoch 2/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.3886 - loss: 3.1144 
Epoch 3/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.4300 - loss: 3.0938 
Epoch 4/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.4922 - loss: 3.0677
Epoch 5/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.3990 - loss: 3.0365
Epoch 6/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.3990 - loss: 2.9909 
Epoch 7/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.3886 - loss: 2.9320 
Epoch 8/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.3886 - loss: 2.8538 
Epoch 9/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[3

Generated Question: What is inheritance in python python python


In [11]:
generate_question

<function __main__.generate_question(seed_text, next_words=5)>

In [12]:
"Explain the"

'Explain the'

In [13]:
generate_question

<function __main__.generate_question(seed_text, next_words=5)>

In [14]:
# Function to generate a question (defined earlier)
def generate_question(seed_text, next_words=5):
    for _ in range(next_words):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
        predicted = model.predict(token_list, verbose=0)
        predicted_word = tokenizer.index_word[np.argmax(predicted)]
        seed_text += " " + predicted_word
    return seed_text

# Example: Ask a question starting with "What is"
seed_text = "What is"
generated_question = generate_question(seed_text, next_words=5)
print("Generated Question:", generated_question)


Generated Question: What is inheritance in python python python


In [15]:
seed_text = "Explain the"
generated_question = generate_question(seed_text, next_words=5)
print("Generated Question:", generated_question)


Generated Question: Explain the oop concept in python python


In [16]:
seed_text = "How does"
generated_question = generate_question(seed_text, next_words=5)
print("Generated Question:", generated_question)


Generated Question: How does exception handling work in python


In [17]:
seed_text = "Explain"
generated_question = generate_question(seed_text, next_words=10)
print("Generated Question:", generated_question)


Generated Question: Explain the oop concept in python python python python python python


In [18]:
# Temperature Sampling

import numpy as np

def generate_question(seed_text, next_words=5, temperature=1.0):
    for _ in range(next_words):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
        predicted_probs = model.predict(token_list, verbose=0)[0]
        
        # Apply temperature sampling
        predicted_probs = np.log(predicted_probs + 1e-7) / temperature
        predicted_probs = np.exp(predicted_probs) / np.sum(np.exp(predicted_probs))
        
        predicted_word_index = np.random.choice(range(len(predicted_probs)), p=predicted_probs)
        predicted_word = tokenizer.index_word[predicted_word_index]
        
        seed_text += " " + predicted_word
    return seed_text


In [19]:
seed_text = "What is"
generated_question = generate_question(seed_text, next_words=5, temperature=0.8)
print("Generated Question:", generated_question)


Generated Question: What is inheritance in python python python


In [20]:
def generate_question(seed_text, next_words=5, temperature=1.0):
    generated_words = set(seed_text.split())  # Track generated words
    for _ in range(next_words):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
        predicted_probs = model.predict(token_list, verbose=0)[0]
        
        # Apply temperature sampling
        predicted_probs = np.log(predicted_probs + 1e-7) / temperature
        predicted_probs = np.exp(predicted_probs) / np.sum(np.exp(predicted_probs))
        
        # Penalize previously generated words
        for word, index in tokenizer.word_index.items():
            if word in generated_words:
                predicted_probs[index] *= 0.01  # Reduce probability for repeated words

        predicted_word_index = np.random.choice(range(len(predicted_probs)), p=predicted_probs)
        predicted_word = tokenizer.index_word[predicted_word_index]
        
        seed_text += " " + predicted_word
        generated_words.add(predicted_word)  # Add to the set of generated words
    return seed_text


In [28]:
import math
def remove_repeated_words(text):
    words = text.split()
    return ' '.join([words[i] for i in range(len(words)) if i == 0 or words[i] != words[i-1]])

# Example of usage:
seed_text = "What is"
generated_question = generate_question(seed_text, next_words=5)
cleaned_question = remove_repeated_words(generated_question)
print("Cleaned Generated Question:", cleaned_question)


Cleaned Generated Question: What is inheritance in python in


In [29]:
def generate_question(seed_text, next_words=5, temperature=1.0):
    generated_words = set(seed_text.split())  # Track generated words
    for _ in range(next_words):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
        predicted_probs = model.predict(token_list, verbose=0)[0]
        
        # Apply temperature sampling
        predicted_probs = np.log(predicted_probs + 1e-7) / temperature
        predicted_probs = np.exp(predicted_probs) / np.sum(np.exp(predicted_probs))
        
        # Penalize previously generated words
        for word, index in tokenizer.word_index.items():
            if word in generated_words:
                predicted_probs[index] *= 0.01  # Reduce probability for repeated words

        # Normalize the probabilities to ensure they sum to 1
        predicted_probs = predicted_probs / np.sum(predicted_probs)

        # Pick the next word based on updated probabilities
        predicted_word_index = np.random.choice(range(len(predicted_probs)), p=predicted_probs)
        predicted_word = tokenizer.index_word[predicted_word_index]
        
        seed_text += " " + predicted_word
        generated_words.add(predicted_word)  # Add to the set of generated words
    return seed_text


In [30]:
# Example of usage
seed_text = "What is"
generated_question = generate_question(seed_text, next_words=5)
cleaned_question = remove_repeated_words(generated_question)
print("Cleaned Generated Question:", cleaned_question)


Cleaned Generated Question: What is inheritance in python concept
