In [None]:
# Q1
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.probability import FreqDist
import re
import matplotlib.pyplot as plt

nltk.download('punkt')
nltk.download('stopwords')

paragraph = """
Technology has transformed the way we live, work, and connect with others. From smartphones that keep us constantly linked to the internet, to artificial intelligence reshaping industries, the impact is profound. I enjoy exploring new gadgets and learning about the latest advancements in computing. Innovation in technology not only solves real-world problems but also inspires creativity. As we continue to push the boundaries of what machines can do, it’s fascinating to think about where we’re headed next.
"""

lowercase_text = paragraph.lower()
clean_text = re.sub(r'[^\w\s]', '', lowercase_text)

sentences = sent_tokenize(paragraph)
word_tokens_nltk = word_tokenize(clean_text)
word_tokens_split = clean_text.split()

stop_words = set(stopwords.words("english"))
filtered_tokens = [word for word in word_tokens_nltk if word not in stop_words]

freq_dist = FreqDist(filtered_tokens)

print("Sentences:", sentences)
print("Python split():", word_tokens_split)
print("NLTK word_tokenize():", word_tokens_nltk)
print("Filtered Tokens (no stopwords):", filtered_tokens)
print("Most Common Words:", freq_dist.most_common(10))

plt.figure(figsize=(10, 5))
freq_dist.plot(20, title="Top Word Frequencies (Excluding Stopwords)")
plt.show()


In [None]:
# Q2
import nltk
import re
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer, WordNetLemmatizer

nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')

paragraph = """
Technology has transformed the way we live, work, and connect with others. From smartphones that keep us constantly linked to the internet, to artificial intelligence reshaping industries, the impact is profound. I enjoy exploring new gadgets and learning about the latest advancements in computing. Innovation in technology not only solves real-world problems but also inspires creativity. As we continue to push the boundaries of what machines can do, it’s fascinating to think about where we’re headed next.
"""

words = re.findall(r'\b[a-zA-Z]+\b', paragraph)

stop_words = set(stopwords.words("english"))
filtered_words = [word for word in words if word.lower() not in stop_words]

porter = PorterStemmer()
stemmed_words = [porter.stem(word) for word in filtered_words]

lemmatizer = WordNetLemmatizer()
lemmatized_words = [lemmatizer.lemmatize(word) for word in filtered_words]

print("Original Words:", filtered_words)
print("\nStemmed Words:", stemmed_words)
print("\nLemmatized Words:", lemmatized_words)
``


In [None]:
# Q3
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
import numpy as np

texts = [
    "The stock market saw a significant decline today as inflation worries grow.",
    "The new smartphone model features cutting-edge technology and sleek design.",
    "The book was a thrilling adventure with twists and turns at every chapter."
]

count_vectorizer = CountVectorizer()
count_matrix = count_vectorizer.fit_transform(texts)
count_features = count_vectorizer.get_feature_names_out()

print("Bag of Words Representation (CountVectorizer):")
print(count_matrix.toarray())
print("Feature Names:", count_features)

tfidf_vectorizer = TfidfVectorizer()
tfidf_matrix = tfidf_vectorizer.fit_transform(texts)
tfidf_features = tfidf_vectorizer.get_feature_names_out()

print("\nTF-IDF Matrix:")
print(tfidf_matrix.toarray())
print("TF-IDF Feature Names:", tfidf_features)

for i, text in enumerate(texts):
    print(f"\nTop 3 Keywords for Text {i+1}:")
    tfidf_scores = tfidf_matrix[i].toarray()[0]
    top_3_indices = np.argsort(tfidf_scores)[::-1][:3]
    top_3_keywords = [(tfidf_features[index], tfidf_scores[index]) for index in top_3_indices]
    for keyword, score in top_3_keywords:
        print(f"{keyword}: {score}")


In [None]:
# Q4
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import re
import numpy as np

texts = [
    "Artificial Intelligence (AI) refers to the simulation of human intelligence in machines that are programmed to think like humans and mimic their actions. AI is transforming industries such as healthcare, finance, and robotics, offering advancements in automation, data processing, and decision-making.",
    "Blockchain is a decentralized technology that records transactions across many computers securely and transparently. It enables cryptocurrencies like Bitcoin, and its applications extend to secure online voting, supply chain management, and digital contracts."
]

def preprocess_and_tokenize(text):
    text = text.lower()
    text = re.sub(r'[^\w\s]', '', text)
    return text.split()

tokens_1 = preprocess_and_tokenize(texts[0])
tokens_2 = preprocess_and_tokenize(texts[1])

set_1 = set(tokens_1)
set_2 = set(tokens_2)
jaccard_similarity = len(set_1.intersection(set_2)) / len(set_1.union(set_2))

vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(texts)
cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]

print(f"Jaccard Similarity: {jaccard_similarity}")
print(f"Cosine Similarity: {cosine_sim}")

if jaccard


In [None]:
# Q5
from textblob import TextBlob
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from wordcloud import WordCloud
import matplotlib.pyplot as plt

reviews = [
    "This product is amazing! It has exceeded all my expectations and works flawlessly.",
    "I am really disappointed with this service. It didn't meet any of my needs and was a waste of money.",
    "The quality of the product is decent, but it's nothing special. It's just okay."
]

def analyze_sentiment_textblob(review):
    blob = TextBlob(review)
    polarity = blob.sentiment.polarity
    subjectivity = blob.sentiment.subjectivity
    return polarity, subjectivity

def analyze_sentiment_vader(review):
    analyzer = SentimentIntensityAnalyzer()
    sentiment = analyzer.polarity_scores(review)
    return sentiment['compound']

textblob_results = [analyze_sentiment_textblob(review) for review in reviews]
vader_results = [analyze_sentiment_vader(review) for review in reviews]

def classify_review(polarity, method="textblob"):
    if method == "textblob":
        if polarity > 0:
            return "Positive"
        elif polarity < 0:
            return "Negative"
        else:
            return "Neutral"
    else:
        if polarity > 0.05:
            return "Positive"
        elif polarity < -0.05:
            return "Negative"
        else:
            return "Neutral"

textblob_classifications = [classify_review(result[0], "textblob") for result in textblob_results]
vader_classifications = [classify_review(result, "vader") for result in vader_results]

positive_reviews = [review for i, review in enumerate(reviews) if textblob_classifications[i] == "Positive"]

wordcloud = WordCloud(width=800, height=400, background_color="white").generate(" ".join(positive_reviews))

plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation="bilinear")
plt.axis("off")
plt.show()

print("TextBlob Results:", textblob_classifications)
print("VADER Results:", vader_classifications)


In [None]:
# Q6
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.sequence import pad_sequences

text = """
Artificial intelligence is transforming many industries, providing innovative solutions that are reshaping how businesses operate. AI technologies, such as machine learning, are becoming increasingly advanced and are being applied to a wide range of fields, from healthcare to finance. The impact of AI is not limited to just improving efficiency and accuracy, but also to opening up new opportunities for creativity and growth. As AI continues to evolve, it will likely play an even more prominent role in shaping the future.
"""

tokenizer = Tokenizer()
tokenizer.fit_on_texts([text])
total_words = len(tokenizer.word_index) + 1

input_sequences = []
for line in text.split('.'):
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i+1]
        input_sequences.append(n_gram_sequence)

max_sequence_length = max([len(x) for x in input_sequences])
input_sequences = pad_sequences(input_sequences, maxlen=max_sequence_length, padding='pre')

X = input_sequences[:, :-1]
y = to_categorical(input_sequences[:, -1], num_classes=total_words)

model = Sequential()
model.add(Embedding(total_words, 100, input_length=max_sequence_length - 1))
model.add(LSTM(150))
model.add(Dense(total_words, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model.fit(X, y, epochs=50, verbose=1)

seed_text = "artificial"
for _ in range(3):
    token_list = tokenizer.texts_to_sequences([seed_text])[0]
    token_list = pad_sequences([token_list], maxlen=max_sequence_length - 1, padding='pre')
    predicted = model.predict_classes(token_list, verbose=0)
    output_word = tokenizer.index_word[predicted[0]]
    seed_text += " " + output_word

print(seed_text)
