In [None]:
pip install gensim



In [None]:

# Simple Text Summarization in Python
import gensim
# import the gensim module and summarize function
from gensim.summarization.summarizer import summarize

# Paragraph
paragraph = "Natural language processing (NLP) is the ability of a computer program to understand human language as it is spoken. NLP is a component of artificial intelligence (AI). The development of NLP applications is challenging because computers traditionally require humans to 'speak' to them in a programming language that is precise, unambiguous and highly structured, or through a limited number of clearly enunciated voice commands. Human speech, however, is not always precise -- it is often ambiguous and the linguistic structure can depend on many complex variables, including slang, regional dialects and social context."

# Get the Summary of the text based on percentage (0.5% of the original content).
summ_per = summarize(paragraph, ratio = 0.7)
print("Percent summary:")
print(summ_per)

# Get the summary of the text based on number of words (50 words)
summ_words = summarize(paragraph, word_count = 30)
print("\n")
print("Word count summary:")
print(summ_words)

In [None]:
from gensim.models import Word2Vec
from nltk.tokenize import word_tokenize  # You may need to install NLTK
import nltk
nltk.download('punkt')
# Sample corpus (list of sentences)
corpus = [
    "This is a sample sentence.",
    "Word embeddings are fascinating.",
    "Machine learning is fun.",
]

# Tokenize the corpus
tokenized_corpus = [word_tokenize(sentence.lower()) for sentence in corpus]

# Train the Word2Vec model
model = Word2Vec(sentences=tokenized_corpus, vector_size=100, window=5, min_count=1, sg=0)

# Input two sentences
sentence1 = "This is a sample text."
sentence2 = "Machine learning is interesting."

# Tokenize and preprocess the input sentences
tokenized_sentence1 = word_tokenize(sentence1.lower())
tokenized_sentence2 = word_tokenize(sentence2.lower())

# Calculate the similarity between the two sentences
similarity_score = model.wv.wmdistance(tokenized_sentence1, tokenized_sentence2)

# Print the similarity score
print(f"Similarity between '{sentence1}' and '{sentence2}': {similarity_score:.3f}")


In [None]:
# Python code to measure similarity between two sentences using similarity.
import spacy
nlp = spacy.load("en_core_web_sm")

# Sentences
s1 = nlp("The weather is rainy.")
s2 = nlp("It is going to rain outside.")

# Calculate the similarity
print("The similarity is:",s1.similarity(s2))

### Word Correction

In [None]:
import nltk
nltk.download('punkt')

from textblob import TextBlob

def correct_text_with_textblob(text):
    blob = TextBlob(text)
    corrected_text = [word.correct() for word in blob.words]
    return ' '.join(corrected_text)


if __name__ == "__main__":
    input_text = input("Enter a sentence with spelling errors: ")
    corrected_text = correct_text_with_textblob(input_text)
    print("Corrected Text:", corrected_text)


### Sentiment Analysis

In [None]:

# Installing NLTK and Downloading the Data


import nltk
nltk.download('twitter_samples')

[nltk_data] Downloading package twitter_samples to /root/nltk_data...
[nltk_data]   Unzipping corpora/twitter_samples.zip.


True

In [None]:
# Tokenizing the Data

from nltk.corpus import twitter_samples

In [None]:
positive_tweets = twitter_samples.strings('positive_tweets.json')
negative_tweets = twitter_samples.strings('negative_tweets.json')
text = twitter_samples.strings('tweets.20150430-223406.json')

In [None]:
# The punkt module is a pre-trained model that helps you tokenize words and sentences.

nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [None]:
tweet_tokens = twitter_samples.tokenized('positive_tweets.json')[0]

print(tweet_tokens[0])

#FollowFriday


In [None]:
# wordnet is a lexical database for the English language that helps the script determine the base word.
# You need the averaged_perceptron_tagger resource to determine the context of a word in a sentence.

nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


True

In [None]:
tweet_tokens = twitter_samples.tokenized('positive_tweets.json')
print(pos_tag(tweet_tokens[0]))

[('#FollowFriday', 'JJ'), ('@France_Inte', 'NNP'), ('@PKuchly57', 'NNP'), ('@Milipol_Paris', 'NNP'), ('for', 'IN'), ('being', 'VBG'), ('top', 'JJ'), ('engaged', 'VBN'), ('members', 'NNS'), ('in', 'IN'), ('my', 'PRP$'), ('community', 'NN'), ('this', 'DT'), ('week', 'NN'), (':)', 'NN')]


In [None]:
...

from nltk.tag import pos_tag
from nltk.stem.wordnet import WordNetLemmatizer
nltk.download('omw-1.4')

def lemmatize_sentence(tokens):
    lemmatizer = WordNetLemmatizer()
    lemmatized_sentence = []
    for word, tag in pos_tag(tokens):
        if tag.startswith('NN'):
            pos = 'n'
        elif tag.startswith('VB'):
            pos = 'v'
        else:
            pos = 'a'
        lemmatized_sentence.append(lemmatizer.lemmatize(word, pos))
    return lemmatized_sentence

print(lemmatize_sentence(tweet_tokens[0]))

# https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html

[nltk_data] Downloading package omw-1.4 to /root/nltk_data...


['#FollowFriday', '@France_Inte', '@PKuchly57', '@Milipol_Paris', 'for', 'be', 'top', 'engage', 'member', 'in', 'my', 'community', 'this', 'week', ':)']


In [None]:
# Removing Noise from the Data

import re, string

def remove_noise(tweet_tokens, stop_words = ()):

    cleaned_tokens = []

    for token, tag in pos_tag(tweet_tokens):
        token = re.sub('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+#]|[!*\(\),]|'\
                       '(?:%[0-9a-fA-F][0-9a-fA-F]))+','', token)
        token = re.sub("(@[A-Za-z0-9_]+)","", token)

        if tag.startswith("NN"):
            pos = 'n'
        elif tag.startswith('VB'):
            pos = 'v'
        else:
            pos = 'a'

        lemmatizer = WordNetLemmatizer()
        token = lemmatizer.lemmatize(token, pos)

        if len(token) > 0 and token not in string.punctuation and token.lower() not in stop_words:
            cleaned_tokens.append(token.lower())
    return cleaned_tokens

In [None]:
...
nltk.download('stopwords')
from nltk.corpus import stopwords

stop_words = stopwords.words('english')

print(remove_noise(tweet_tokens[0], stop_words))

['#followfriday', 'top', 'engage', 'member', 'community', 'week', ':)']


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [None]:
stop_words = stopwords.words('english')

#print(remove_noise(tweet_tokens[0], stop_words))

positive_tweet_tokens = twitter_samples.tokenized('positive_tweets.json')
negative_tweet_tokens = twitter_samples.tokenized('negative_tweets.json')

positive_cleaned_tokens_list = []
negative_cleaned_tokens_list = []

for tokens in positive_tweet_tokens:
    positive_cleaned_tokens_list.append(remove_noise(tokens, stop_words))

for tokens in negative_tweet_tokens:
    negative_cleaned_tokens_list.append(remove_noise(tokens, stop_words))

In [None]:
...
print(positive_tweet_tokens[500])
print(positive_cleaned_tokens_list[500])

['Dang', 'that', 'is', 'some', 'rad', '@AbzuGame', '#fanart', '!', ':D', 'https://t.co/bI8k8tb9ht']
['dang', 'rad', '#fanart', ':d']


In [None]:
...

def get_all_words(cleaned_tokens_list):
    for tokens in cleaned_tokens_list:
        for token in tokens:
            yield token

all_pos_words = get_all_words(positive_cleaned_tokens_list)

In [None]:
from nltk import FreqDist

freq_dist_pos = FreqDist(all_pos_words)
print(freq_dist_pos.most_common(10))

[(':)', 3691), (':-)', 701), (':d', 658), ('thanks', 388), ('follow', 357), ('love', 333), ('...', 290), ('good', 283), ('get', 263), ('thank', 253)]


In [None]:
...
def get_tweets_for_model(cleaned_tokens_list):
    for tweet_tokens in cleaned_tokens_list:
        yield dict([token, True] for token in tweet_tokens)

positive_tokens_for_model = get_tweets_for_model(positive_cleaned_tokens_list)
negative_tokens_for_model = get_tweets_for_model(negative_cleaned_tokens_list)

In [None]:
...
import random

positive_dataset = [(tweet_dict, "Positive")
                     for tweet_dict in positive_tokens_for_model]

negative_dataset = [(tweet_dict, "Negative")
                     for tweet_dict in negative_tokens_for_model]

dataset = positive_dataset + negative_dataset

random.shuffle(dataset)

train_data = dataset[:7000]
test_data = dataset[7000:]

In [None]:
...
from nltk import classify
from nltk import NaiveBayesClassifier
classifier = NaiveBayesClassifier.train(train_data)

print("Accuracy is:", classify.accuracy(classifier, test_data))

print(classifier.show_most_informative_features(10))

Accuracy is: 0.9956666666666667
Most Informative Features
                      :( = True           Negati : Positi =   2029.1 : 1.0
                      :) = True           Positi : Negati =    996.6 : 1.0
                     sad = True           Negati : Positi =     33.0 : 1.0
                follower = True           Positi : Negati =     29.7 : 1.0
                     bam = True           Positi : Negati =     22.2 : 1.0
                     x15 = True           Negati : Positi =     15.9 : 1.0
                    glad = True           Positi : Negati =     13.3 : 1.0
                    blog = True           Positi : Negati =     12.6 : 1.0
               community = True           Positi : Negati =     12.6 : 1.0
                   didnt = True           Negati : Positi =     11.4 : 1.0
None


In [None]:
...
from nltk.tokenize import word_tokenize

custom_tweet = "The Foood Was Good"

custom_tokens = remove_noise(word_tokenize(custom_tweet))

print(classifier.classify(dict([token, True] for token in custom_tokens)))

Negative


Simple Rule Bases ChatBot

In [None]:
from nltk.chat.util import Chat, reflections

In [None]:
#Pairs is a list of patterns and responses.
pairs = [
    [
        r"(.*)my name is (.*)",
        ["Hello %2, How are you today ?",]
    ],
    [
        r"(.*)help(.*) ",
        ["I can help you ",]
    ],
     [
        r"(.*) your name ?",
        ["My name is Ahya, but you can just call me robot and I'm a chatbot .",]
    ],
    [
        r"how are you (.*) ?",
        ["I'm doing very well", "i am great !"]
    ],
    [
        r"sorry (.*)",
        ["Its alright","Its OK, never mind that",]
    ],
    [
        r"i'm (.*) (good|well|okay|ok)",
        ["Nice to hear that","Alright, great !",]
    ],
    [
        r"(hi|hey|hello|hola|holla)(.*)",
        ["Hello", "Hey there",]
    ],
    [
        r"what (.*) want ?",
        ["Make me an offer I can't refuse",]

    ],
    [
        r"(.*)created(.*)",
        ["Created By NLTK created me using Python's NLTK library ","top secret ;)",]
    ],
    [
        r"(.*) (location|city) ?",
        ['Banglore, India',]
    ],
    [
        r"(.*)raining in (.*)",
        ["No rain in the past 4 days here in %2","In %2 there is a 50% chance of rain",]
    ],
    [
        r"how (.*) health (.*)",
        ["Health is very important, but I am a computer, so I don't need to worry about my health ",]
    ],
    [
        r"(.*)(sports|game|sport)(.*)",
        ["I'm a very big fan of Cricket",]
    ],
    [
        r"who (.*) (Cricketer|Batsman)?",
        ["Virat Kohli"]
    ],
    [
        r"quit",
        ["Bye for now. See you soon :) ","It was nice talking to you."]
    ],
    [
        r"(.*)",
        ['That is nice to hear']
    ],
]

In [None]:
print(reflections)

{'i am': 'you are', 'i was': 'you were', 'i': 'you', "i'm": 'you are', "i'd": 'you would', "i've": 'you have', "i'll": 'you will', 'my': 'your', 'you are': 'I am', 'you were': 'I was', "you've": 'I have', "you'll": 'I will', 'your': 'my', 'yours': 'mine', 'you': 'me', 'me': 'you'}


In [None]:
my_dummy_reflections= {
    "go"     : "gone",
    "hello"    : "hey there"
}

In [None]:
#default message at the start of chat
print("Hi, I'm Ahya and I like to chat\nPlease type lowercase English language to start a conversation. Type quit to leave ")
#Create Chat Bot
chat = Chat(pairs, reflections)

Hi, I'm Ahya and I like to chat
Please type lowercase English language to start a conversation. Type quit to leave 


In [None]:
chat.converse()

>Hello
Hello
>How Are you
That is nice to hear
>Who Are you
Virat Kohli
>Are You Sure
That is nice to hear
>quit
It was nice talking to you.


In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
dataset = [
    "I enjoy reading about Machine Learning and Machine Learning is my PhD subject",
    "I would enjoy a walk in the park",
    "I was reading in the library"
]

In [None]:
tfIdfVectorizer = TfidfVectorizer(use_idf=True)
tfIdf = tfIdfVectorizer.fit_transform(dataset)
df = pd.DataFrame(tfIdf[0].T.todense(), index=tfIdfVectorizer.get_feature_names(), columns=["TF-IDF"])
df = df.sort_values('TF-IDF', ascending=False)
print (df.head(25))

            TF-IDF
machine   0.513720
learning  0.513720
about     0.256860
subject   0.256860
phd       0.256860
and       0.256860
my        0.256860
is        0.256860
reading   0.195349
enjoy     0.195349
library   0.000000
park      0.000000
in        0.000000
the       0.000000
walk      0.000000
was       0.000000
would     0.000000




In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, LSTM, Dense

# Generate some sequential data (time series)
np.random.seed(42)
data = np.random.random((100, 10, 1))

# Generate the target labels (e.g., binary classification)
labels = np.random.randint(2, size=(100, 1))

# Split the data into training and test sets
train_data = data[:80]
train_labels = labels[:80]
test_data = data[80:]
test_labels = labels[80:]

# Build the RNN model
rnn_model = Sequential()
rnn_model.add(SimpleRNN(32, input_shape=(10, 1)))
rnn_model.add(Dense(1, activation='sigmoid'))

# Compile and train the RNN model
rnn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
rnn_model.fit(train_data, train_labels, epochs=5, batch_size=16, validation_split=0.2)

# Evaluate the RNN model on the test set
rnn_loss, rnn_accuracy = rnn_model.evaluate(test_data, test_labels)
print("RNN Test accuracy:", rnn_accuracy)

# Build the LSTM model
lstm_model = Sequential()
lstm_model.add(LSTM(32, input_shape=(10, 1)))
lstm_model.add(Dense(1, activation='sigmoid'))

# Compile and train the LSTM model
lstm_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
lstm_model.fit(train_data, train_labels, epochs=5, batch_size=16, validation_split=0.2)

# Evaluate the LSTM model on the test set
lstm_loss, lstm_accuracy = lstm_model.evaluate(test_data, test_labels)
print("LSTM Test accuracy:", lstm_accuracy)


In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, LSTM, Dense, Embedding

# Load the IMDB movie reviews dataset
num_words = 10000  # Only use the most frequent 10,000 words
max_len = 100  # Maximum length of each review (pad or truncate to this length)
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=num_words)

# Pad sequences to a fixed length
x_train = pad_sequences(x_train, maxlen=max_len)
x_test = pad_sequences(x_test, maxlen=max_len)

# Build the SimpleRNN model
rnn_model = Sequential()
rnn_model.add(Embedding(input_dim=num_words, output_dim=32, input_length=max_len))
rnn_model.add(SimpleRNN(32))
rnn_model.add(Dense(1, activation='sigmoid'))

# Compile and train the SimpleRNN model
rnn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
rnn_model.fit(x_train, y_train, epochs=5, batch_size=64, validation_split=0.2)

# Evaluate the SimpleRNN model on the test set
rnn_loss, rnn_accuracy = rnn_model.evaluate(x_test, y_test)
print("SimpleRNN Test accuracy:", rnn_accuracy)

# Build the LSTM model
lstm_model = Sequential()
lstm_model.add(Embedding(input_dim=num_words, output_dim=32, input_length=max_len))
lstm_model.add(LSTM(32))
lstm_model.add(Dense(1, activation='sigmoid'))

# Compile and train the LSTM model
lstm_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
lstm_model.fit(x_train, y_train, epochs=5, batch_size=64, validation_split=0.2)

# Evaluate the LSTM model on the test set
lstm_loss, lstm_accuracy = lstm_model.evaluate(x_test, y_test)
print("LSTM Test accuracy:", lstm_accuracy)


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
SimpleRNN Test accuracy: 0.8014400005340576
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
LSTM Test accuracy: 0.8298400044441223


In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Embedding, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split

# Sample data for sentiment analysis
texts = ["I love this product!", "This is amazing!", "I dislike it.", "I'm not a fan.", "It's terrible."]
labels = [1, 1, 0, 0, 0]  # 1 for positive sentiment, 0 for negative sentiment

# Tokenize the text and convert to sequences
tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)

# Pad sequences to the same length
max_len = 10
padded_sequences = pad_sequences(sequences, maxlen=max_len)

# Convert labels to numpy array
y = np.array(labels)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(padded_sequences, y, test_size=0.2, random_state=42)

# LSTM model creation
model = Sequential()
model.add(Embedding(len(tokenizer.word_index) + 1, 64, input_length=max_len))
model.add(LSTM(64))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=1)

# Evaluate the model on test data
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss}, Test Accuracy: {accuracy}")

# Sentiment classification function
def classify_sentiment(text):
    # Preprocess the input text
    input_sequence = tokenizer.texts_to_sequences([text])
    padded_input = pad_sequences(input_sequence, maxlen=max_len)

    # Make a prediction
    prediction = model.predict(padded_input)[0][0]

    # Determine the sentiment class
    if prediction >= 0.7:
        return "Positive"
    elif prediction <= 0.3:
        return "Negative"
    else:
        return "Neutral"

# User input for sentiment classification
user_input = input("Enter your text: ")
sentiment = classify_sentiment(user_input)
print(f"Sentiment: {sentiment}")


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Loss: 1.619825005531311, Test Accuracy: 0.0
Enter your text: I Love Mango
Sentiment: Negative


In [None]:
!pip install transformers


Collecting transformers
  Downloading transformers-4.31.0-py3-none-any.whl (7.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.4/7.4 MB[0m [31m34.5 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.14.1 (from transformers)
  Downloading huggingface_hub-0.16.4-py3-none-any.whl (268 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m268.8/268.8 kB[0m [31m18.9 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m87.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting safetensors>=0.3.1 (from transformers)
  Downloading safetensors-0.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m68.0 MB/s[0m eta [36m0:00:0

In [None]:
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel

# Load pre-trained GPT-2 model and tokenizer
model_name = "gpt2"  # You can also try "gpt2-medium", "gpt2-large", etc., for larger models
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)

# Define maximum length for generated responses
max_length = 100

# Initial welcome message
print("Chatbot: Hi! I'm your friendly chatbot. How can I assist you today?")
while True:
    user_input = input("You: ")
    if user_input.lower() in ['exit', 'bye', 'quit']:
        print("Chatbot: Goodbye! Have a great day!")
        break

    # Tokenize the input and convert to tensor
    input_ids = tokenizer.encode("You: " + user_input, return_tensors='pt')

    # Generate a response using the model
    with torch.no_grad():
        output = model.generate(input_ids, max_length=max_length)

    # Decode the generated output and display the chatbot's response
    response = tokenizer.decode(output[0], skip_special_tokens=True)
    print("Chatbot:", response)


Downloading (…)olve/main/vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

Chatbot: Hi! I'm your friendly chatbot. How can I assist you today?
You: WHat Is Computer


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Chatbot: You: WHat Is Computer Gaming?

A: Computer gaming is a hobby. It's a hobby that's been around for a long time. It's a hobby that's been around for a long time. It's a hobby that's been around for a long time. It's a hobby that's been around for a long time. It's a hobby that's been around for a long time. It's a hobby that's been around for a long time. It's a hobby
You: WHats your name


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Chatbot: You: WHats your name?

A: I'm a little bit of a nerd.

Q: What's your favorite movie?

A: I'm a little bit of a nerd.

Q: What's your favorite movie?

A: I'm a little bit of a nerd.

Q: What's your favorite movie?

A: I'm a little bit of a nerd.

Q: What's your favorite movie?

You: exit
Chatbot: Goodbye! Have a great day!


In [None]:
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel

# Load pre-trained GPT-2 model and tokenizer
model_name = "gpt2"  # You can also try "gpt2-medium", "gpt2-large", etc., for larger models
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)

# Define maximum length for generated responses
max_length = 100

# Initial welcome message
print("Chatbot: Hi! I'm your friendly chatbot. How can I assist you today?")
while True:
    user_input = input("You: ")
    if user_input.lower() in ['exit', 'bye', 'quit']:
        print("Chatbot: Goodbye! Have a great day!")
        break

    # Tokenize the input and convert to tensor
    input_ids = tokenizer.encode("You: " + user_input, return_tensors='pt')

    # Generate a response using the model
    with torch.no_grad():
        output = model.generate(input_ids, max_length=max_length, pad_token_id=tokenizer.eos_token_id)

    # Decode the generated output and display the chatbot's response
    response = tokenizer.decode(output[0], skip_special_tokens=True)
    print("Chatbot:", response)


Chatbot: Hi! I'm your friendly chatbot. How can I assist you today?
You: WHat Is Computer
Chatbot: You: WHat Is Computer Gaming?

A: Computer gaming is a hobby. It's a hobby that's been around for a long time. It's a hobby that's been around for a long time. It's a hobby that's been around for a long time. It's a hobby that's been around for a long time. It's a hobby that's been around for a long time. It's a hobby that's been around for a long time. It's a hobby
You: What is Laptop
Chatbot: You: What is Laptop?

A: Laptop is a computer that is used to run Windows. It is a computer that is used to run Linux. It is a computer that is used to run Mac OS X. It is a computer that is used to run Linux. It is a computer that is used to run Windows. It is a computer that is used to run Mac OS X. It is a computer that is used to run Windows. It is a computer that is used
You: Explain The Process of Photosyntehis
Chatbot: You: Explain The Process of Photosyntehis

You: How Do You Use Photosynte

In [None]:
!pip install googletrans==4.0.0-rc1


Collecting googletrans==4.0.0-rc1
  Downloading googletrans-4.0.0rc1.tar.gz (20 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting httpx==0.13.3 (from googletrans==4.0.0-rc1)
  Downloading httpx-0.13.3-py3-none-any.whl (55 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m55.1/55.1 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
Collecting hstspreload (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading hstspreload-2023.1.1-py3-none-any.whl (1.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m13.2 MB/s[0m eta [36m0:00:00[0m
Collecting chardet==3.* (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading chardet-3.0.4-py2.py3-none-any.whl (133 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m133.4/133.4 kB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting idna==2.* (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading idna-2.10-py2.py3-none-any.whl (58 kB)
[2K     [90m━

In [None]:
from googletrans import Translator

def translate_text(text, target_language):
    translator = Translator()
    translated_text = translator.translate(text, dest=target_language)

    return translated_text.text

if __name__ == "__main__":
    print("Simple Text Translator")
    print("----------------------")
    input_text = input("Enter the text to translate: ")
    target_language = input("Enter the target language (e.g., 'fr' for French, 'es' for Spanish): ")

    translated_text = translate_text(input_text, target_language)
    print(f"\nTranslated Text: {translated_text}")


Simple Text Translator
----------------------
Enter the text to translate: I Am Very Happy Today
Enter the target language (e.g., 'fr' for French, 'es' for Spanish): Arabic

Translated Text: أنا سعيد جدا اليوم


In [None]:
!pip install transformers




In [None]:
import numpy as np
import random
import string
import nltk
from nltk.corpus import gutenberg
from keras.models import Sequential
from keras.layers import LSTM, Dense

# Download the Shakespeare corpus from nltk
nltk.download('gutenberg')

# Get the text of Shakespeare's works from the Gutenberg corpus
shakespeare_text = gutenberg.raw('shakespeare-hamlet.txt')

# Preprocess the text by removing punctuation and converting to lowercase
translator = str.maketrans('', '', string.punctuation)
shakespeare_text = shakespeare_text.translate(translator).lower()

# Create character-to-index and index-to-character mappings
chars = sorted(list(set(shakespeare_text)))
char_to_idx = {char: idx for idx, char in enumerate(chars)}
idx_to_char = {idx: char for idx, char in enumerate(chars)}

# Create sequences of characters as input and target for training
seq_length = 100  # Number of characters in each sequence
step = 5  # Step size to create overlapping sequences
input_sequences = []
target_sequences = []
for i in range(0, len(shakespeare_text) - seq_length, step):
    input_sequences.append(shakespeare_text[i:i+seq_length])
    target_sequences.append(shakespeare_text[i+seq_length])

# Convert input sequences to numerical representations using char_to_idx mapping
X = np.zeros((len(input_sequences), seq_length, len(chars)), dtype=np.bool)
y = np.zeros((len(input_sequences), len(chars)), dtype=np.bool)
for i, sequence in enumerate(input_sequences):
    for t, char in enumerate(sequence):
        X[i, t, char_to_idx[char]] = 1
    y[i, char_to_idx[target_sequences[i]]] = 1

# Build the LSTM model
model = Sequential()
model.add(LSTM(128, input_shape=(seq_length, len(chars))))
model.add(Dense(len(chars), activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam')

# Train the LSTM model
model.fit(X, y, epochs=30, batch_size=128)

# Function to generate text using the trained LSTM model
def generate_text(seed_text, length=200):
    generated_text = seed_text
    for _ in range(length):
        x_pred = np.zeros((1, seq_length, len(chars)))
        for t, char in enumerate(seed_text):
            x_pred[0, t, char_to_idx[char]] = 1

        predicted_idx = np.argmax(model.predict(x_pred, verbose=0))
        predicted_char = idx_to_char[predicted_idx]

        generated_text += predicted_char
        seed_text = seed_text[1:] + predicted_char

    return generated_text

# Generate text with a seed
seed_text = "to be or not to be"
generated_text = generate_text(seed_text, length=500)
print(generated_text)


[nltk_data] Downloading package gutenberg to /root/nltk_data...
[nltk_data]   Unzipping corpora/gutenberg.zip.
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  X = np.zeros((len(input_sequences), seq_length, len(chars)), dtype=np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y = np.zeros((len(input_sequences), len(chars)), dtype=np.bool)


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
to be or not to beeoeeoeomsomommemmememmaeaammmmmommsmsoeoomeosmosoeeosommmssmmmaesmoaoommmsmmesososooooeeeeoomseeooeooooeemssomsammmaoeommommoeomeommsmmommmoooeesoememmeeaommeemsmmommmmmomommomseoooeeeeoommseeeaomommmoemammmsossmmmommomomommoseoeeoooeoooeeosoomoaeemeooeeomsmemsommmaaoemseosoosomooeoosemomoomeomosmmmmooeoemooeommeeoeomseasaommmoomamomemsmmsosmmsmoooeoeemmeemomommoooeeeeoommmooomoomeoeeeeeeeeomomsosmmmmmoooeeomoeoeeeeeeooeooesosoosmmmmmmmoomeeoeoomomessmssmoommmmmsemommmmmmmmmmmmmmmoeomeoeoeoeoemm
