In [3]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB

# Sample data
texts = [
    "I absolutely LOVED the movie!!! It was amazing :)",
    "The movie was boring and too long."
]
labels = ["positive", "negative"]

# Step 1–6: Vectorization
vectorizer = TfidfVectorizer(stop_words='english')
X = vectorizer.fit_transform(texts)

# Step 7: Model
model = MultinomialNB()
model.fit(X, labels)

# Prediction
test = ["Really amazing film! Loved every second."]
print(model.predict(vectorizer.transform(test)))  # → ['positive']


['positive']


['positive']


In [5]:
# ===== Basic NLP Demo =====
# Libraries
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# ----------------------------------------
# 1) Sentiment Analysis (Positive / Negative)
# ----------------------------------------
sentences = [
    "I loved the movie, it was amazing!",     # positive
    "The movie was boring and too long."      # negative
]
labels = ["positive", "negative"]

# Vectorize
tfidf = TfidfVectorizer(stop_words='english')
X = tfidf.fit_transform(sentences)

# Train model
clf = MultinomialNB()
clf.fit(X, labels)

# Predict
test_sentence = ["Really amazing film! Loved every second."]
pred = clf.predict(tfidf.transform(test_sentence))
print("Sentiment Prediction:", pred[0])   # Expected: positive

# ----------------------------------------
# 2) Spam Detection (Spam / Ham)
# ----------------------------------------
emails = [
    "Congratulations! You’ve won a free ticket. Click here now!",  # spam
    "Hi John, let's meet for lunch tomorrow.",                     # ham
]
labels = ["spam", "ham"]

X2 = tfidf.fit_transform(emails)
clf2 = MultinomialNB()
clf2.fit(X2, labels)

test_email = ["Win a free iPhone by clicking this link!"]
pred2 = clf2.predict(tfidf.transform(test_email))
print("Spam Prediction:", pred2[0]) 

Sentiment Prediction: positive
Spam Prediction: spam


In [6]:
# ----------------------------------------
texts = [
    "Machine learning is fun.",
    "Deep learning is enjoyable.",
    "I love pizza and pasta."
]

X3 = tfidf.fit_transform(texts)
# Compute cosine similarity between first and others
cos_sim = cosine_similarity(X3[0], X3[1:])  # compare text[0] with others
print("Cosine similarity with 2nd text:", cos_sim[0][0])  # expect higher
print("Cosine similarity with 3rd text:", cos_sim[0][1])  # expect lower



Cosine similarity with 2nd text: 0.224324998974933
Cosine similarity with 3rd text: 0.0
