# TF-IDF

In [1]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

In [2]:
# Sample data
data = [
    {'text': "This is a positive example", 'label': 'positive'},
    {'text': "Negative sentiment here", 'label': 'negative'},
    {'text': "Another positive text", 'label': 'positive'},
    {'text': "Feeling negative today", 'label': 'negative'},
    {'text': "I feel great", 'label': 'positive'},
    {'text': "Not a good day", 'label': 'negative'},
    {'text': "I am really happy today!", 'label': 'positive'},
    {'text': "This is the worst day ever", 'label': 'negative'},
    {'text': "Everything is going well", 'label': 'positive'},
    {'text': "I don't like this at all", 'label': 'negative'},
]

In [3]:
# Extract texts and labels
texts = [entry['text'] for entry in data]
labels = [entry['label'] for entry in data]

In [4]:
# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(texts, labels, test_size=0.2, random_state=42)

# Initialize TF-IDF Vectorizer
vectorizer = TfidfVectorizer()

# Transform the text data into TF-IDF features
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

In [5]:
# Initialize the Multinomial Naive Bayes classifier
classifier = MultinomialNB()
classifier.fit(X_train_tfidf, y_train)

In [6]:
# Make predictions on the test set
predictions = classifier.predict(X_test_tfidf)

In [7]:
# Calculate accuracy and classification report
accuracy = accuracy_score(y_test, predictions)
classification_rep = classification_report(y_test, predictions)

# Print the results
print(f"Accuracy: {accuracy:.2f}")
print("Classification Report:")
print(classification_rep)

Accuracy: 1.00
Classification Report:
              precision    recall  f1-score   support

    negative       1.00      1.00      1.00         1
    positive       1.00      1.00      1.00         1

    accuracy                           1.00         2
   macro avg       1.00      1.00      1.00         2
weighted avg       1.00      1.00      1.00         2



In [11]:
# --- Additional Testing with New Sentences ---
new_sentences = [
    "I am extremely happy today",  # Should be positive
    "This is not going well",  # Should be negative
    "The weather is nice",  # Positive
    "I am feeling sad",  # Negative
    "Life is beautiful",  # Positive
    "I hate everything",  # Negative
]

new_sentences_tfidf = vectorizer.transform(new_sentences)

new_predictions = classifier.predict(new_sentences_tfidf)

# Output the predictions
print("\nSentence Predictions:")
for sentence, prediction in zip(new_sentences, new_predictions):
    print(f"Sentence: '{sentence}' -> Predicted Sentiment: {prediction}")


Sentence Predictions:
Sentence: 'I am extremely happy today' -> Predicted Sentiment: positive
Sentence: 'This is not going well' -> Predicted Sentiment: negative
Sentence: 'The weather is nice' -> Predicted Sentiment: negative
Sentence: 'I am feeling sad' -> Predicted Sentiment: positive
Sentence: 'Life is beautiful' -> Predicted Sentiment: positive
Sentence: 'I hate everything' -> Predicted Sentiment: negative
