In [2]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

docs = ["I love machine learning", "I love deep learning"]

# Convert text to vectors
vectorizer = CountVectorizer().fit_transform(docs)
vectors = vectorizer.toarray()

# Compute cosine similarity
similarity = cosine_similarity(vectors)
print(similarity)


[[1.         0.66666667]
 [0.66666667 1.        ]]


In [3]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Two example vectors
A = np.array([[2, 3, 1]])
B = np.array([[4, 0, 5]])

# Compute cosine similarity (returns a matrix)
score = cosine_similarity(A, B)[0][0]

print("Cosine Similarity Score:", score)


Cosine Similarity Score: 0.5426095162342934


In [4]:
# ===== Basic NLP Demo =====
# Libraries
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# ----------------------------------------
# 1) Sentiment Analysis (Positive / Negative)
# ----------------------------------------
sentences = [
    "I loved the movie, it was amazing!",     # positive
    "The movie was boring and too long."      # negative
]
labels = ["positive", "negative"]

# Vectorize
tfidf = TfidfVectorizer(stop_words='english')
X = tfidf.fit_transform(sentences)

# Train model
clf = MultinomialNB()
clf.fit(X, labels)

# Predict
test_sentence = ["Really amazing film! Loved every second."]
pred = clf.predict(tfidf.transform(test_sentence))
print("Sentiment Prediction:", pred[0])   # Expected: positive

# ----------------------------------------
# 2) Spam Detection (Spam / Ham)
# ----------------------------------------
emails = [
    "Congratulations! You’ve won a free ticket. Click here now!",  # spam
    "Hi John, let's meet for lunch tomorrow.",                     # ham
]
labels = ["spam", "ham"]

X2 = tfidf.fit_transform(emails)
clf2 = MultinomialNB()
clf2.fit(X2, labels)

test_email = ["Win a free iPhone by clicking this link!"]
pred2 = clf2.predict(tfidf.transform(test_email))
print("Spam Prediction:", pred2[0])   # Expected: spam

# ----------------------------------------
# 3) Text Similarity (Cosine Similarity)
# ----------------------------------------
texts = [
    "Machine learning is fun.",
    "Deep learning is enjoyable.",
    "I love pizza and pasta."
]

X3 = tfidf.fit_transform(texts)
# Compute cosine similarity between first and others
cos_sim = cosine_similarity(X3[0], X3[1:])  # compare text[0] with others
print("Cosine similarity with 2nd text:", cos_sim[0][0])  # expect higher
print("Cosine similarity with 3rd text:", cos_sim[0][1])  # expect lower


Sentiment Prediction: positive
Spam Prediction: spam
Cosine similarity with 2nd text: 0.224324998974933
Cosine similarity with 3rd text: 0.0
