In [None]:
import nltk
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.model_selection import train_test_split  
nltk.download("punkt")
nltk.download("stopwords")
nltk.download("wordnet")

texts = [
    "I love this product, it is amazing!",
    "This is the worst thing I ever bought.",
    "Absolutely fantastic experience, very happy!",
    "I hate it, very bad quality.",
    "Not good, I am disappointed.",
    "Really good quality, I recommend it!",
    "Terrible service, never coming back.",
    "The product is decent and works fine.",
    "Worst quality ever, I regret buying.",
    "Excellent! I am so satisfied with the purchase."
]

labels = [1, 0, 1, 0, 0, 1, 0, 1, 0, 1]  # 1 = Positive, 0 = Negative

lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))

def preprocess_text(text):
    tokens = nltk.word_tokenize(text.lower())  # lowercase + tokenize
    tokens = [t for t in tokens if t.isalpha()]  # remove punctuation/numbers
    tokens = [t for t in tokens if t not in stop_words]  # remove stopwords
    tokens = [lemmatizer.lemmatize(t) for t in tokens]  # lemmatization
    return " ".join(tokens)
	
clean_text = [preprocess_text(t) for t in texts]
X_train, X_test, y_train, y_test = train_test_split(clean_text, labels, test_size=0.3, random_state=42)

model = Pipeline([
    ('tfidf',TfidfVectorizer()),
    ('clf', LogisticRegression())
])

model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=["Negative", "Positive"]))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))

test_sentences = [
    "I really like this!",
    "This was a terrible experience.",
    "Not bad, but could be better."
]

test_preprocessed = [preprocess_text(t) for t in test_sentences]
predictions = model.predict(test_preprocessed)

print("\n--- Test Predictions ---")
for sent, pred in zip(test_sentences, predictions):
    print(f"Sentence: {sent} -> Sentiment: {'Positive' if pred == 1 else 'Negative'}")
