In [40]:
# Import necessary libraries
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.probability import FreqDist
from nltk import classify
from nltk.classify import NaiveBayesClassifier
from nltk.sentiment import SentimentIntensityAnalyzer
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from xgboost import XGBClassifier
import random

In [29]:
# Sample dataset for training the sentiment analysis model
positive_reviews = [
    "I love this product!",
    "Amazing experience with this service.",
    "The team did a great job on this project.",
    "I can't believe how good it is!"
]

negative_reviews = [
    "This is a terrible product.",
    "The service was horrible.",
    "I regret using this service.",
    "I wouldn't recommend this to anyone."
]

# Create labels for the reviews
positive_labels = [1] * len(positive_reviews)
negative_labels = [0] * len(negative_reviews)


In [30]:
# Combine reviews and labels
reviews = positive_reviews + negative_reviews
labels = positive_labels + negative_labels


# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(reviews, labels, test_size=0.2, random_state=42)

vectorizer = TfidfVectorizer()
X_train_vectors = vectorizer.fit_transform(X_train)
X_test_vectors = vectorizer.transform(X_test)


In [41]:
# Define the parameter grid for GridSearchCV
param_grid = {
    'max_depth': [3, 5, 7],
    'learning_rate': [0.01, 0.1, 0.2],
    'n_estimators': [50, 100, 200],
    'min_child_weight': [1, 3, 5],
}

In [42]:
#Train a xgboost classifier
classifier = XGBClassifier()

# Perform GridSearchCV to find the best hyperparameters
grid_search = GridSearchCV(classifier, param_grid, cv=3, scoring='accuracy', n_jobs=-1)
grid_search.fit(X_train_vectors, y_train)

In [47]:
# Display the best parameters found by GridSearchCV
best_params = grid_search.best_params_
print("Best Hyperparameters:", best_params)

Best Hyperparameters: {'learning_rate': 0.01, 'max_depth': 3, 'min_child_weight': 1, 'n_estimators': 50}


In [48]:
# Use the best model for predictions
best_classifier = grid_search.best_estimator_
predictions = best_classifier.predict(X_test_vectors)

In [49]:
accuracy = accuracy_score(y_test, predictions)
print("Classifier Accuracy:", accuracy)

Classifier Accuracy: 0.5


In [50]:
# Train a Logistic Regression classifier
classifier = LogisticRegression()
classifier.fit(X_train_vectors, y_train)

# Make predictions on the test set
predictions = classifier.predict(X_test_vectors)

In [51]:
# Make predictions on the test set
predictions = classifier.predict(X_test_vectors)

# Evaluate the classifier
accuracy = accuracy_score(y_test, predictions)
print("Classifier Accuracy:", accuracy)


Classifier Accuracy: 0.5


In [52]:
# Test the classifier on new reviews
new_reviews = [
    "This is a fantastic product!",
    "The service was terrible.",
    "I'm not sure about this project.",
    "It exceeded my expectations."
]

In [53]:
# Display predictions for new reviews
for review, prediction in zip(new_reviews, new_predictions):
    sentiment = "Positive" if prediction == 1 else "Negative"
    print(f"Review: {review}\nPredicted Sentiment: {sentiment}\n")

Review: This is a fantastic product!
Predicted Sentiment: Negative

Review: The service was terrible.
Predicted Sentiment: Negative

Review: I'm not sure about this project.
Predicted Sentiment: Positive

Review: It exceeded my expectations.
Predicted Sentiment: Positive

