------------------------------------  Snowball stemmer with pos + stopwords + tokenize _ tf-idf vectorizer -------------------------------------

In [1]:
import nltk
from nltk.corpus import stopwords
from nltk.stem import LancasterStemmer 
import pandas as pd
from nltk.tokenize import word_tokenize
from nltk import pos_tag
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, StackingClassifier, BaggingClassifier, VotingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report
from nltk.stem import SnowballStemmer 
train_sentiments = pd.read_csv("/kaggle/input/dataset/train.csv/train.csv")
test_sentiments = pd.read_csv("/kaggle/input/dataset/test.csv/test.csv")

# Text preprocessing function with stemming and POS tagging
def preprocess_sreview(review):
    tokens = word_tokenize(review)
    stop_words = set(stopwords.words('english'))
    tokens = [token for token in tokens if token.lower() not in stop_words]

    # Use SnowballStemmer for stemming
    stemmer = SnowballStemmer("english")
    tokens = [stemmer.stem(token) for token in tokens]

    # Perform POS tagging
    pos_tags = pos_tag(tokens)
    tokens = [word + '_' + pos for word, pos in pos_tags]

    return ' '.join(tokens)

train_sentiments['processed_reviews'] = train_sentiments['review'].apply(preprocess_sreview)
test_sentiments['processed_reviews'] = test_sentiments['review'].apply(preprocess_sreview)

# Vectorize the text data using TF-IDF
tfidf_vectorizer = TfidfVectorizer(max_features=5000)
train_tfidf = tfidf_vectorizer.fit_transform(train_sentiments['processed_reviews'])
test_tfidf = tfidf_vectorizer.transform(test_sentiments['processed_reviews'] )

In [2]:
# Naive Bayes
nb_model = MultinomialNB()
nb_model.fit(train_tfidf, train_sentiments['sentiment'])
nb_predictions = nb_model.predict(test_tfidf)
nb_accuracy = accuracy_score(test_sentiments['sentiment'], nb_predictions)
print("Naive Bayes Accuracy:", nb_accuracy)
print("Classification Report for Naive Bayes:\n", classification_report(test_sentiments['sentiment'], nb_predictions))
print(nb_predictions)
print("/n")

# Random Forest
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(train_tfidf, train_sentiments['sentiment'])
rf_predictions = rf_model.predict(test_tfidf)
rf_accuracy = accuracy_score(test_sentiments['sentiment'], rf_predictions)
print("Random Forest Accuracy:", rf_accuracy)
print("Classification Report for Random Forest:\n", classification_report(test_sentiments['sentiment'], rf_predictions))
print(rf_predictions)

# Neural Network
nn_model = MLPClassifier(hidden_layer_sizes=(64,), max_iter=100, random_state=42)
nn_model.fit(train_tfidf, train_sentiments['sentiment'])
nn_predictions = nn_model.predict(test_tfidf)
nn_accuracy = accuracy_score(test_sentiments['sentiment'], nn_predictions)
print("Neural Network Accuracy:", nn_accuracy)
print("Classification Report for Neural Network:\n", classification_report(test_sentiments['sentiment'], nn_predictions))


# Decision Tree
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(train_tfidf, train_sentiments['sentiment'])
dt_predictions = dt_model.predict(test_tfidf)
dt_accuracy = accuracy_score(test_sentiments['sentiment'], dt_predictions)
print("Decision Tree Accuracy:", dt_accuracy)
print("Classification Report for Decision Tree:\n", classification_report(test_sentiments['sentiment'], dt_predictions))
print("/n")

# Gradient Boosting
gb_model = GradientBoostingClassifier(n_estimators=100, random_state=42)
gb_model.fit(train_tfidf, train_sentiments['sentiment'])
gb_predictions = gb_model.predict(test_tfidf)
gb_accuracy = accuracy_score(test_sentiments['sentiment'], gb_predictions)
print("Gradient Boosting Accuracy:", gb_accuracy)
print("Classification Report for Gradient Boosting:\n", classification_report(test_sentiments['sentiment'], gb_predictions))

# k-NN
knn_model = KNeighborsClassifier(n_neighbors=5)
knn_model.fit(train_tfidf, train_sentiments['sentiment'])
knn_predictions = knn_model.predict(test_tfidf)
knn_accuracy = accuracy_score(test_sentiments['sentiment'], knn_predictions)
print("k-NN Accuracy:", knn_accuracy)
print("Classification Report for k-NN:\n", classification_report(test_sentiments['sentiment'], knn_predictions))
print("/n")

# Stacking
stacking_model = StackingClassifier(
    estimators=[('nb', nb_model), ('rf', rf_model), ('knn', knn_model), ('dt', dt_model),('gb', gb_model)],
    final_estimator=GradientBoostingClassifier(n_estimators=100, random_state=42)
)
stacking_model.fit(train_tfidf, train_sentiments['sentiment'])
stacking_predictions = stacking_model.predict(test_tfidf)
stacking_accuracy = accuracy_score(test_sentiments['sentiment'], stacking_predictions)
print("Stacking Accuracy:", stacking_accuracy)
print("Classification Report for Stacking:\n", classification_report(test_sentiments['sentiment'], stacking_predictions))
print("/n")

# Bagging
bagging_model = BaggingClassifier(estimator=DecisionTreeClassifier(), n_estimators=100, random_state=42)
bagging_model.fit(train_tfidf, train_sentiments['sentiment'])
bagging_predictions = bagging_model.predict(test_tfidf)
bagging_accuracy = accuracy_score(test_sentiments['sentiment'], bagging_predictions)
print("Bagging Accuracy:", bagging_accuracy)
print("Classification Report for Bagging:\n", classification_report(test_sentiments['sentiment'], bagging_predictions))
print("/n")

# Voting Classifier
voting_model = VotingClassifier(
    estimators=[('nb', nb_model), ('rf', rf_model), ('knn', knn_model), ('dt', dt_model), ('gb', gb_model)],
    voting='hard'
)
voting_model.fit(train_tfidf, train_sentiments['sentiment'])
voting_predictions = voting_model.predict(test_tfidf)
voting_accuracy = accuracy_score(test_sentiments['sentiment'], voting_predictions)
print("Voting Classifier Accuracy:", voting_accuracy)
print("Classification Report for Voting Classifier:\n", classification_report(test_sentiments['sentiment'], voting_predictions))



Naive Bayes Accuracy: 0.84435
Classification Report for Naive Bayes:
               precision    recall  f1-score   support

    negative       0.85      0.84      0.84      9935
    positive       0.84      0.85      0.85     10065

    accuracy                           0.84     20000
   macro avg       0.84      0.84      0.84     20000
weighted avg       0.84      0.84      0.84     20000

['positive' 'positive' 'negative' ... 'negative' 'positive' 'negative']
/n
Random Forest Accuracy: 0.83315
Classification Report for Random Forest:
               precision    recall  f1-score   support

    negative       0.83      0.84      0.83      9935
    positive       0.84      0.83      0.83     10065

    accuracy                           0.83     20000
   macro avg       0.83      0.83      0.83     20000
weighted avg       0.83      0.83      0.83     20000

['positive' 'positive' 'negative' ... 'negative' 'positive' 'negative']
Neural Network Accuracy: 0.84755
Classification Report 

------------------------ Snowball stemmer with pos _ count vectorizer ---------------------

In [4]:
import nltk
from nltk.corpus import stopwords
from nltk.stem import LancasterStemmer 
import pandas as pd
from nltk.tokenize import word_tokenize
from nltk import pos_tag
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, StackingClassifier, BaggingClassifier, VotingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report
from nltk.stem import SnowballStemmer 
train_sentiments = pd.read_csv("/kaggle/input/sentiments/train.csv/train.csv")
test_sentiments = pd.read_csv("/kaggle/input/sentiments/test.csv/test.csv")

# Text preprocessing function with stemming and POS tagging
def preprocess_sreview(review):
    tokens = word_tokenize(review)
    stop_words = set(stopwords.words('english'))
    tokens = [token for token in tokens if token.lower() not in stop_words]

    # Use SnowballStemmer for stemming
    stemmer = SnowballStemmer("english")
    tokens = [stemmer.stem(token) for token in tokens]

    # Perform POS tagging
    pos_tags = pos_tag(tokens)
    tokens = [word + '_' + pos for word, pos in pos_tags]

    return ' '.join(tokens)

train_sentiments['processed_reviews'] = train_sentiments['review'].apply(preprocess_sreview)
test_sentiments['processed_reviews'] = test_sentiments['review'].apply(preprocess_sreview)

# Vectorize the text data using CountVectorizer
count_vectorizer = CountVectorizer(max_features=5000)
train_tfidf = count_vectorizer.fit_transform(train_sentiments['processed_reviews'])
test_tfidf = count_vectorizer.transform(test_sentiments['processed_reviews'])

In [5]:
# Naive Bayes
nb_model = MultinomialNB()
nb_model.fit(train_tfidf, train_sentiments['sentiment'])
nb_predictions = nb_model.predict(test_tfidf)
nb_accuracy = accuracy_score(test_sentiments['sentiment'], nb_predictions)
print("Naive Bayes Accuracy:", nb_accuracy)
print("Classification Report for Naive Bayes:\n", classification_report(test_sentiments['sentiment'], nb_predictions))
print("\n")

# Random Forest
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(train_tfidf, train_sentiments['sentiment'])
rf_predictions = rf_model.predict(test_tfidf)
rf_accuracy = accuracy_score(test_sentiments['sentiment'], rf_predictions)
print("Random Forest Accuracy:", rf_accuracy)
print("Classification Report for Random Forest:\n", classification_report(test_sentiments['sentiment'], rf_predictions))
print("\n")

# Neural Network
nn_model = MLPClassifier(hidden_layer_sizes=(64,), max_iter=100, random_state=42)
nn_model.fit(train_tfidf, train_sentiments['sentiment'])
nn_predictions = nn_model.predict(test_tfidf)
nn_accuracy = accuracy_score(test_sentiments['sentiment'], nn_predictions)
print("Neural Network Accuracy:", nn_accuracy)
print("Classification Report for Neural Network:\n", classification_report(test_sentiments['sentiment'], nn_predictions))
print("\n")

# Decision Tree
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(train_tfidf, train_sentiments['sentiment'])
dt_predictions = dt_model.predict(test_tfidf)
dt_accuracy = accuracy_score(test_sentiments['sentiment'], dt_predictions)
print("Decision Tree Accuracy:", dt_accuracy)
print("Classification Report for Decision Tree:\n", classification_report(test_sentiments['sentiment'], dt_predictions))
print("\n")

# Gradient Boosting
gb_model = GradientBoostingClassifier(n_estimators=100, random_state=42)
gb_model.fit(train_tfidf, train_sentiments['sentiment'])
gb_predictions = gb_model.predict(test_tfidf)
gb_accuracy = accuracy_score(test_sentiments['sentiment'], gb_predictions)
print("Gradient Boosting Accuracy:", gb_accuracy)
print("Classification Report for Gradient Boosting:\n", classification_report(test_sentiments['sentiment'], gb_predictions))
print("\n")

# k-NN
knn_model = KNeighborsClassifier(n_neighbors=5)
knn_model.fit(train_tfidf, train_sentiments['sentiment'])
knn_predictions = knn_model.predict(test_tfidf)
knn_accuracy = accuracy_score(test_sentiments['sentiment'], knn_predictions)
print("k-NN Accuracy:", knn_accuracy)
print("Classification Report for k-NN:\n", classification_report(test_sentiments['sentiment'], knn_predictions))
print("\n")

# Stacking
stacking_model = StackingClassifier(
    estimators=[('nb', nb_model), ('rf', rf_model), ('knn', knn_model), ('dt', dt_model),('gb', gb_model)],
    final_estimator=GradientBoostingClassifier(n_estimators=100, random_state=42)
)
stacking_model.fit(train_tfidf, train_sentiments['sentiment'])
stacking_predictions = stacking_model.predict(test_tfidf)
stacking_accuracy = accuracy_score(test_sentiments['sentiment'], stacking_predictions)
print("Stacking Accuracy:", stacking_accuracy)
print("Classification Report for Stacking:\n", classification_report(test_sentiments['sentiment'], stacking_predictions))
print("\n")

# Bagging
bagging_model = BaggingClassifier(estimator=DecisionTreeClassifier(), n_estimators=100, random_state=42)
bagging_model.fit(train_tfidf, train_sentiments['sentiment'])
bagging_predictions = bagging_model.predict(test_tfidf)
bagging_accuracy = accuracy_score(test_sentiments['sentiment'], bagging_predictions)
print("Bagging Accuracy:", bagging_accuracy)
print("Classification Report for Bagging:\n", classification_report(test_sentiments['sentiment'], bagging_predictions))
print("\n")

# Voting Classifier
voting_model = VotingClassifier(
    estimators=[('nb', nb_model), ('rf', rf_model), ('knn', knn_model), ('dt', dt_model), ('gb', gb_model)],
    voting='hard'
)
voting_model.fit(train_tfidf, train_sentiments['sentiment'])
voting_predictions = voting_model.predict(test_tfidf)
voting_accuracy = accuracy_score(test_sentiments['sentiment'], voting_predictions)
print("Voting Classifier Accuracy:", voting_accuracy)
print("Classification Report for Voting Classifier:\n", classification_report(test_sentiments['sentiment'], voting_predictions))


Naive Bayes Accuracy: 0.83735
Classification Report for Naive Bayes:
               precision    recall  f1-score   support

    negative       0.83      0.84      0.84      9935
    positive       0.84      0.83      0.84     10065

    accuracy                           0.84     20000
   macro avg       0.84      0.84      0.84     20000
weighted avg       0.84      0.84      0.84     20000



Random Forest Accuracy: 0.83265
Classification Report for Random Forest:
               precision    recall  f1-score   support

    negative       0.83      0.84      0.83      9935
    positive       0.84      0.82      0.83     10065

    accuracy                           0.83     20000
   macro avg       0.83      0.83      0.83     20000
weighted avg       0.83      0.83      0.83     20000



Neural Network Accuracy: 0.8581
Classification Report for Neural Network:
               precision    recall  f1-score   support

    negative       0.86      0.85      0.86      9935
    positive  

------------------------ Snowball stemmer without pos _ count vectorizer ---------------------

In [1]:
import nltk
from nltk.corpus import stopwords
from nltk.stem import LancasterStemmer 
import pandas as pd
from nltk.tokenize import word_tokenize
from nltk import pos_tag
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, StackingClassifier, BaggingClassifier, VotingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report
from nltk.stem import SnowballStemmer 
train_sentiments = pd.read_csv("/kaggle/input/sentiments/train.csv/train.csv")
test_sentiments = pd.read_csv("/kaggle/input/sentiments/test.csv/test.csv")

# Text preprocessing function with stemming and POS tagging
def preprocess_sreview(review):
    tokens = word_tokenize(review)
    stop_words = set(stopwords.words('english'))
    tokens = [token for token in tokens if token.lower() not in stop_words]

    # Use SnowballStemmer for stemming
    stemmer = SnowballStemmer("english")
    tokens = [stemmer.stem(token) for token in tokens]

    return ' '.join(tokens)

train_sentiments['processed_reviews'] = train_sentiments['review'].apply(preprocess_sreview)
test_sentiments['processed_reviews'] = test_sentiments['review'].apply(preprocess_sreview)

# Vectorize the text data using CountVectorizer
count_vectorizer = CountVectorizer(max_features=5000)
train_tfidf = count_vectorizer.fit_transform(train_sentiments['processed_reviews'])
test_tfidf = count_vectorizer.transform(test_sentiments['processed_reviews'])

In [2]:
# Naive Bayes
nb_model = MultinomialNB()
nb_model.fit(train_tfidf, train_sentiments['sentiment'])
nb_predictions = nb_model.predict(test_tfidf)
nb_accuracy = accuracy_score(test_sentiments['sentiment'], nb_predictions)
print("Naive Bayes Accuracy:", nb_accuracy)
print("Classification Report for Naive Bayes:\n", classification_report(test_sentiments['sentiment'], nb_predictions))
print("\n")

# Random Forest
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(train_tfidf, train_sentiments['sentiment'])
rf_predictions = rf_model.predict(test_tfidf)
rf_accuracy = accuracy_score(test_sentiments['sentiment'], rf_predictions)
print("Random Forest Accuracy:", rf_accuracy)
print("Classification Report for Random Forest:\n", classification_report(test_sentiments['sentiment'], rf_predictions))
print("\n")

# Neural Network
nn_model = MLPClassifier(hidden_layer_sizes=(64,), max_iter=100, random_state=42)
nn_model.fit(train_tfidf, train_sentiments['sentiment'])
nn_predictions = nn_model.predict(test_tfidf)
nn_accuracy = accuracy_score(test_sentiments['sentiment'], nn_predictions)
print("Neural Network Accuracy:", nn_accuracy)
print("Classification Report for Neural Network:\n", classification_report(test_sentiments['sentiment'], nn_predictions))
print("\n")

# Decision Tree
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(train_tfidf, train_sentiments['sentiment'])
dt_predictions = dt_model.predict(test_tfidf)
dt_accuracy = accuracy_score(test_sentiments['sentiment'], dt_predictions)
print("Decision Tree Accuracy:", dt_accuracy)
print("Classification Report for Decision Tree:\n", classification_report(test_sentiments['sentiment'], dt_predictions))
print("\n")

# Gradient Boosting
gb_model = GradientBoostingClassifier(n_estimators=100, random_state=42)
gb_model.fit(train_tfidf, train_sentiments['sentiment'])
gb_predictions = gb_model.predict(test_tfidf)
gb_accuracy = accuracy_score(test_sentiments['sentiment'], gb_predictions)
print("Gradient Boosting Accuracy:", gb_accuracy)
print("Classification Report for Gradient Boosting:\n", classification_report(test_sentiments['sentiment'], gb_predictions))
print("\n")

# k-NN
knn_model = KNeighborsClassifier(n_neighbors=5)
knn_model.fit(train_tfidf, train_sentiments['sentiment'])
knn_predictions = knn_model.predict(test_tfidf)
knn_accuracy = accuracy_score(test_sentiments['sentiment'], knn_predictions)
print("k-NN Accuracy:", knn_accuracy)
print("Classification Report for k-NN:\n", classification_report(test_sentiments['sentiment'], knn_predictions))
print("\n")

# Stacking
stacking_model = StackingClassifier(
    estimators=[('nb', nb_model), ('rf', rf_model), ('knn', knn_model), ('dt', dt_model),('gb', gb_model)],
    final_estimator=GradientBoostingClassifier(n_estimators=100, random_state=42)
)
stacking_model.fit(train_tfidf, train_sentiments['sentiment'])
stacking_predictions = stacking_model.predict(test_tfidf)
stacking_accuracy = accuracy_score(test_sentiments['sentiment'], stacking_predictions)
print("Stacking Accuracy:", stacking_accuracy)
print("Classification Report for Stacking:\n", classification_report(test_sentiments['sentiment'], stacking_predictions))
print("\n")

# Bagging
bagging_model = BaggingClassifier(estimator=DecisionTreeClassifier(), n_estimators=100, random_state=42)
bagging_model.fit(train_tfidf, train_sentiments['sentiment'])
bagging_predictions = bagging_model.predict(test_tfidf)
bagging_accuracy = accuracy_score(test_sentiments['sentiment'], bagging_predictions)
print("Bagging Accuracy:", bagging_accuracy)
print("Classification Report for Bagging:\n", classification_report(test_sentiments['sentiment'], bagging_predictions))
print("\n")

# Voting Classifier
voting_model = VotingClassifier(
    estimators=[('nb', nb_model), ('rf', rf_model), ('knn', knn_model), ('dt', dt_model), ('gb', gb_model)],
    voting='hard'
)
voting_model.fit(train_tfidf, train_sentiments['sentiment'])
voting_predictions = voting_model.predict(test_tfidf)
voting_accuracy = accuracy_score(test_sentiments['sentiment'], voting_predictions)
print("Voting Classifier Accuracy:", voting_accuracy)
print("Classification Report for Voting Classifier:\n", classification_report(test_sentiments['sentiment'], voting_predictions))

Naive Bayes Accuracy: 0.84445
Classification Report for Naive Bayes:
               precision    recall  f1-score   support

    negative       0.84      0.85      0.84      9935
    positive       0.85      0.84      0.84     10065

    accuracy                           0.84     20000
   macro avg       0.84      0.84      0.84     20000
weighted avg       0.84      0.84      0.84     20000



Random Forest Accuracy: 0.8472
Classification Report for Random Forest:
               precision    recall  f1-score   support

    negative       0.84      0.85      0.85      9935
    positive       0.85      0.84      0.85     10065

    accuracy                           0.85     20000
   macro avg       0.85      0.85      0.85     20000
weighted avg       0.85      0.85      0.85     20000



Neural Network Accuracy: 0.8672
Classification Report for Neural Network:
               precision    recall  f1-score   support

    negative       0.87      0.86      0.87      9935
    positive   

lancaster stemmer without pos _ tf-idf vectorizer

In [3]:
import nltk
from nltk.corpus import stopwords
from nltk.stem import LancasterStemmer 
import pandas as pd
from nltk.tokenize import word_tokenize
from nltk import pos_tag
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, StackingClassifier, BaggingClassifier, VotingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report

# Download NLTK resources if not already downloaded
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('averaged_perceptron_tagger')
train_sentiments = pd.read_csv("/kaggle/input/sentiments/train.csv/train.csv")
test_sentiments = pd.read_csv("/kaggle/input/sentiments/test.csv/test.csv")

# Text preprocessing function with stemming and POS tagging
def preprocess_review(review):
    tokens = word_tokenize(review)
    stop_words = set(stopwords.words('english'))
    tokens = [token for token in tokens if token.lower() not in stop_words]

    # Use LancasterStemmer for stemming
    stemmer = LancasterStemmer()
    tokens = [stemmer.stem(token) for token in tokens]


    return ' '.join(tokens)

train_sentiments['processed_reviews'] = train_sentiments['review'].apply(preprocess_review)
test_sentiments['processed_reviews'] = test_sentiments['review'].apply(preprocess_review)

# Vectorize the text data using TF-IDF
tfidf_vectorizer = TfidfVectorizer(max_features=5000)
train_tfidf = tfidf_vectorizer.fit_transform(train_sentiments['processed_reviews'])
test_tfidf = tfidf_vectorizer.transform(test_sentiments['processed_reviews'] )

[nltk_data] Downloading package punkt to /usr/share/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /usr/share/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /usr/share/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


In [4]:
# Naive Bayes
nb_model = MultinomialNB()
nb_model.fit(train_tfidf, train_sentiments['sentiment'])
nb_predictions = nb_model.predict(test_tfidf)
nb_accuracy = accuracy_score(test_sentiments['sentiment'], nb_predictions)
print("Naive Bayes Accuracy:", nb_accuracy)
print("Classification Report for Naive Bayes:\n", classification_report(test_sentiments['sentiment'], nb_predictions))
print("\n")

# Random Forest
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(train_tfidf, train_sentiments['sentiment'])
rf_predictions = rf_model.predict(test_tfidf)
rf_accuracy = accuracy_score(test_sentiments['sentiment'], rf_predictions)
print("Random Forest Accuracy:", rf_accuracy)
print("Classification Report for Random Forest:\n", classification_report(test_sentiments['sentiment'], rf_predictions))
print("\n")

# Neural Network
nn_model = MLPClassifier(hidden_layer_sizes=(64,), max_iter=100, random_state=42)
nn_model.fit(train_tfidf, train_sentiments['sentiment'])
nn_predictions = nn_model.predict(test_tfidf)
nn_accuracy = accuracy_score(test_sentiments['sentiment'], nn_predictions)
print("Neural Network Accuracy:", nn_accuracy)
print("Classification Report for Neural Network:\n", classification_report(test_sentiments['sentiment'], nn_predictions))
print("\n")

# Decision Tree
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(train_tfidf, train_sentiments['sentiment'])
dt_predictions = dt_model.predict(test_tfidf)
dt_accuracy = accuracy_score(test_sentiments['sentiment'], dt_predictions)
print("Decision Tree Accuracy:", dt_accuracy)
print("Classification Report for Decision Tree:\n", classification_report(test_sentiments['sentiment'], dt_predictions))
print("\n")

# Gradient Boosting
gb_model = GradientBoostingClassifier(n_estimators=100, random_state=42)
gb_model.fit(train_tfidf, train_sentiments['sentiment'])
gb_predictions = gb_model.predict(test_tfidf)
gb_accuracy = accuracy_score(test_sentiments['sentiment'], gb_predictions)
print("Gradient Boosting Accuracy:", gb_accuracy)
print("Classification Report for Gradient Boosting:\n", classification_report(test_sentiments['sentiment'], gb_predictions))
print("\n")

# k-NN
knn_model = KNeighborsClassifier(n_neighbors=5)
knn_model.fit(train_tfidf, train_sentiments['sentiment'])
knn_predictions = knn_model.predict(test_tfidf)
knn_accuracy = accuracy_score(test_sentiments['sentiment'], knn_predictions)
print("k-NN Accuracy:", knn_accuracy)
print("Classification Report for k-NN:\n", classification_report(test_sentiments['sentiment'], knn_predictions))
print("\n")

# Stacking
stacking_model = StackingClassifier(
    estimators=[('nb', nb_model), ('rf', rf_model), ('knn', knn_model), ('dt', dt_model),('gb', gb_model)],
    final_estimator=GradientBoostingClassifier(n_estimators=100, random_state=42)
)
stacking_model.fit(train_tfidf, train_sentiments['sentiment'])
stacking_predictions = stacking_model.predict(test_tfidf)
stacking_accuracy = accuracy_score(test_sentiments['sentiment'], stacking_predictions)
print("Stacking Accuracy:", stacking_accuracy)
print("Classification Report for Stacking:\n", classification_report(test_sentiments['sentiment'], stacking_predictions))
print("\n")

# Bagging
bagging_model = BaggingClassifier(estimator=DecisionTreeClassifier(), n_estimators=100, random_state=42)
bagging_model.fit(train_tfidf, train_sentiments['sentiment'])
bagging_predictions = bagging_model.predict(test_tfidf)
bagging_accuracy = accuracy_score(test_sentiments['sentiment'], bagging_predictions)
print("Bagging Accuracy:", bagging_accuracy)
print("Classification Report for Bagging:\n", classification_report(test_sentiments['sentiment'], bagging_predictions))
print("\n")

# Voting Classifier
voting_model = VotingClassifier(
    estimators=[('nb', nb_model), ('rf', rf_model), ('knn', knn_model), ('dt', dt_model), ('gb', gb_model)],
    voting='hard'
)
voting_model.fit(train_tfidf, train_sentiments['sentiment'])
voting_predictions = voting_model.predict(test_tfidf)
voting_accuracy = accuracy_score(test_sentiments['sentiment'], voting_predictions)
print("Voting Classifier Accuracy:", voting_accuracy)
print("Classification Report for Voting Classifier:\n", classification_report(test_sentiments['sentiment'], voting_predictions))

Naive Bayes Accuracy: 0.8503
Classification Report for Naive Bayes:
               precision    recall  f1-score   support

    negative       0.85      0.85      0.85      9935
    positive       0.85      0.85      0.85     10065

    accuracy                           0.85     20000
   macro avg       0.85      0.85      0.85     20000
weighted avg       0.85      0.85      0.85     20000



Random Forest Accuracy: 0.84525
Classification Report for Random Forest:
               precision    recall  f1-score   support

    negative       0.84      0.85      0.85      9935
    positive       0.85      0.84      0.84     10065

    accuracy                           0.85     20000
   macro avg       0.85      0.85      0.85     20000
weighted avg       0.85      0.85      0.85     20000



Neural Network Accuracy: 0.8533
Classification Report for Neural Network:
               precision    recall  f1-score   support

    negative       0.85      0.85      0.85      9935
    positive   

In [1]:
!pip install spacy
!python -m spacy download en_core_web_sm

Collecting en-core-web-sm==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m79.4 MB/s[0m eta [36m0:00:00[0m00:01[0m0:01[0m
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')


spacy lemmatizatio with pos _ tf-idf vectorizer

In [2]:
import pandas as pd
import nltk
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, StackingClassifier, BaggingClassifier, VotingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk import pos_tag
from sklearn.feature_extraction.text import TfidfVectorizer
import spacy

# Load the English language model in SpaCy
nlp = spacy.load("en_core_web_sm")

# Function for lemmatization and POS tagging
def preprocess_lemreview(review):
    # Tokenize the review
    tokens = word_tokenize(review)
    
    # Remove stop words
    stop_words = set(stopwords.words('english'))
    tokens = [token for token in tokens if token.lower() not in stop_words]

    # Lemmatize and perform POS tagging using SpaCy
    doc = nlp(" ".join(tokens))
    tokens = [token.lemma_ + '_' + token.pos_ for token in doc]

    return ' '.join(tokens)

train_sentiments = pd.read_csv("/kaggle/input/sentiments/train.csv/train.csv")
test_sentiments = pd.read_csv("/kaggle/input/sentiments/test.csv/test.csv")

# Apply preprocessing to the reviews
train_sentiments['processed_reviews'] = train_sentiments['review'].apply(preprocess_lemreview)
test_sentiments['processed_reviews'] = test_sentiments['review'].apply(preprocess_lemreview)

# Vectorize the text data using TF-IDF
tfidf_vectorizer = TfidfVectorizer(max_features=5000)
train_tfidf = tfidf_vectorizer.fit_transform(train_sentiments['processed_reviews'])
test_tfidf = tfidf_vectorizer.transform(test_sentiments['processed_reviews'])


In [5]:
from keras.models import Sequential
from keras.layers import Dense
# Naive Bayes
nb_model = MultinomialNB()
nb_model.fit(train_tfidf, train_sentiments['sentiment'])
nb_predictions = nb_model.predict(test_tfidf)
nb_accuracy = accuracy_score(test_sentiments['sentiment'], nb_predictions)
print("Naive Bayes Accuracy:", nb_accuracy)
print("Classification Report for Naive Bayes:\n", classification_report(test_sentiments['sentiment'], nb_predictions))
print("\n")

# Random Forest
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(train_tfidf, train_sentiments['sentiment'])
rf_predictions = rf_model.predict(test_tfidf)
rf_accuracy = accuracy_score(test_sentiments['sentiment'], rf_predictions)
print("Random Forest Accuracy:", rf_accuracy)
print("Classification Report for Random Forest:\n", classification_report(test_sentiments['sentiment'], rf_predictions))
print("\n")

# Neural Network
nn_model = MLPClassifier(hidden_layer_sizes=(64,), max_iter=100, random_state=42)
nn_model.fit(train_tfidf, train_sentiments['sentiment'])
nn_predictions = nn_model.predict(test_tfidf)
nn_accuracy = accuracy_score(test_sentiments['sentiment'], nn_predictions)
print("Neural Network MLP CLASSIFIER Accuracy:", nn_accuracy)
print("Classification Report for Neural Network:\n", classification_report(test_sentiments['sentiment'], nn_predictions))
print("\n")

# Decision Tree
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(train_tfidf, train_sentiments['sentiment'])
dt_predictions = dt_model.predict(test_tfidf)
dt_accuracy = accuracy_score(test_sentiments['sentiment'], dt_predictions)
print("Decision Tree Accuracy:", dt_accuracy)
print("Classification Report for Decision Tree:\n", classification_report(test_sentiments['sentiment'], dt_predictions))
print("\n")

# Gradient Boosting
gb_model = GradientBoostingClassifier(n_estimators=100, random_state=42)
gb_model.fit(train_tfidf, train_sentiments['sentiment'])
gb_predictions = gb_model.predict(test_tfidf)
gb_accuracy = accuracy_score(test_sentiments['sentiment'], gb_predictions)
print("Gradient Boosting Accuracy:", gb_accuracy)
print("Classification Report for Gradient Boosting:\n", classification_report(test_sentiments['sentiment'], gb_predictions))
print("\n")

# k-NN
knn_model = KNeighborsClassifier(n_neighbors=5)
knn_model.fit(train_tfidf, train_sentiments['sentiment'])
knn_predictions = knn_model.predict(test_tfidf)
knn_accuracy = accuracy_score(test_sentiments['sentiment'], knn_predictions)
print("k-NN Accuracy:", knn_accuracy)
print("Classification Report for k-NN:\n", classification_report(test_sentiments['sentiment'], knn_predictions))
print("\n")

# Stacking
stacking_model = StackingClassifier(
    estimators=[('nb', nb_model), ('rf', rf_model), ('knn', knn_model), ('dt', dt_model),('gb', gb_model)],
    final_estimator=GradientBoostingClassifier(n_estimators=100, random_state=42)
)
stacking_model.fit(train_tfidf, train_sentiments['sentiment'])
stacking_predictions = stacking_model.predict(test_tfidf)
stacking_accuracy = accuracy_score(test_sentiments['sentiment'], stacking_predictions)
print("Stacking Accuracy:", stacking_accuracy)
print("Classification Report for Stacking:\n", classification_report(test_sentiments['sentiment'], stacking_predictions))
print("\n")

# Bagging
bagging_model = BaggingClassifier(estimator=DecisionTreeClassifier(), n_estimators=100, random_state=42)
bagging_model.fit(train_tfidf, train_sentiments['sentiment'])
bagging_predictions = bagging_model.predict(test_tfidf)
bagging_accuracy = accuracy_score(test_sentiments['sentiment'], bagging_predictions)
print("Bagging Accuracy:", bagging_accuracy)
print("Classification Report for Bagging:\n", classification_report(test_sentiments['sentiment'], bagging_predictions))
print("\n")

# Voting Classifier
voting_model = VotingClassifier(
    estimators=[('nb', nb_model), ('rf', rf_model), ('knn', knn_model), ('dt', dt_model), ('gb', gb_model)],
    voting='hard'
)
voting_model.fit(train_tfidf, train_sentiments['sentiment'])
voting_predictions = voting_model.predict(test_tfidf)
voting_accuracy = accuracy_score(test_sentiments['sentiment'], voting_predictions)
print("Voting Classifier Accuracy:", voting_accuracy)
print("Classification Report for Voting Classifier:\n", classification_report(test_sentiments['sentiment'], voting_predictions))

Naive Bayes Accuracy: 0.8513
Classification Report for Naive Bayes:
               precision    recall  f1-score   support

    negative       0.85      0.85      0.85      9935
    positive       0.85      0.85      0.85     10065

    accuracy                           0.85     20000
   macro avg       0.85      0.85      0.85     20000
weighted avg       0.85      0.85      0.85     20000



Random Forest Accuracy: 0.8443
Classification Report for Random Forest:
               precision    recall  f1-score   support

    negative       0.84      0.85      0.85      9935
    positive       0.85      0.83      0.84     10065

    accuracy                           0.84     20000
   macro avg       0.84      0.84      0.84     20000
weighted avg       0.84      0.84      0.84     20000



Neural Network MLP CLASSIFIER Accuracy: 0.85735
Classification Report for Neural Network:
               precision    recall  f1-score   support

    negative       0.85      0.86      0.86      9935


spacy lemmatizatio without pos _ cout vectorizer

In [6]:
import pandas as pd
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
import spacy

# Load the English language model in SpaCy
nlp = spacy.load("en_core_web_sm")

# Function for lemmatization
def preprocess_lemreview(review):
    # Tokenize the review
    tokens = word_tokenize(review)
    
    # Remove stop words
    stop_words = set(stopwords.words('english'))
    tokens = [token for token in tokens if token.lower() not in stop_words]

    # Lemmatize using SpaCy
    doc = nlp(" ".join(tokens))
    tokens = [token.lemma_ for token in doc]

    return ' '.join(tokens)

# Read the CSV files
train_sentiments = pd.read_csv("/kaggle/input/sentiments/train.csv/train.csv")
test_sentiments = pd.read_csv("/kaggle/input/sentiments/test.csv/test.csv")

# Apply preprocessing to the reviews
train_sentiments['processed_reviews'] = train_sentiments['review'].apply(preprocess_lemreview)
test_sentiments['processed_reviews'] = test_sentiments['review'].apply(preprocess_lemreview)

# Vectorize the text data using CountVectorizer
count_vectorizer = CountVectorizer(max_features=5000)
train_tfidf = count_vectorizer.fit_transform(train_sentiments['processed_reviews'])
test_tfidf = count_vectorizer.transform(test_sentiments['processed_reviews'])

In [7]:
from keras.models import Sequential
from keras.layers import Dense
# Naive Bayes
nb_model = MultinomialNB()
nb_model.fit(train_tfidf, train_sentiments['sentiment'])
nb_predictions = nb_model.predict(test_tfidf)
nb_accuracy = accuracy_score(test_sentiments['sentiment'], nb_predictions)
print("Naive Bayes Accuracy:", nb_accuracy)
print("Classification Report for Naive Bayes:\n", classification_report(test_sentiments['sentiment'], nb_predictions))
print("\n")

# Random Forest
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(train_tfidf, train_sentiments['sentiment'])
rf_predictions = rf_model.predict(test_tfidf)
rf_accuracy = accuracy_score(test_sentiments['sentiment'], rf_predictions)
print("Random Forest Accuracy:", rf_accuracy)
print("Classification Report for Random Forest:\n", classification_report(test_sentiments['sentiment'], rf_predictions))
print("\n")

# Neural Network
nn_model = MLPClassifier(hidden_layer_sizes=(64,), max_iter=100, random_state=42)
nn_model.fit(train_tfidf, train_sentiments['sentiment'])
nn_predictions = nn_model.predict(test_tfidf)
nn_accuracy = accuracy_score(test_sentiments['sentiment'], nn_predictions)
print("Neural Network MLP CLASSIFIER Accuracy:", nn_accuracy)
print("Classification Report for Neural Network:\n", classification_report(test_sentiments['sentiment'], nn_predictions))
print("\n")

# Decision Tree
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(train_tfidf, train_sentiments['sentiment'])
dt_predictions = dt_model.predict(test_tfidf)
dt_accuracy = accuracy_score(test_sentiments['sentiment'], dt_predictions)
print("Decision Tree Accuracy:", dt_accuracy)
print("Classification Report for Decision Tree:\n", classification_report(test_sentiments['sentiment'], dt_predictions))
print("\n")

# Gradient Boosting
gb_model = GradientBoostingClassifier(n_estimators=100, random_state=42)
gb_model.fit(train_tfidf, train_sentiments['sentiment'])
gb_predictions = gb_model.predict(test_tfidf)
gb_accuracy = accuracy_score(test_sentiments['sentiment'], gb_predictions)
print("Gradient Boosting Accuracy:", gb_accuracy)
print("Classification Report for Gradient Boosting:\n", classification_report(test_sentiments['sentiment'], gb_predictions))
print("\n")

# k-NN
knn_model = KNeighborsClassifier(n_neighbors=5)
knn_model.fit(train_tfidf, train_sentiments['sentiment'])
knn_predictions = knn_model.predict(test_tfidf)
knn_accuracy = accuracy_score(test_sentiments['sentiment'], knn_predictions)
print("k-NN Accuracy:", knn_accuracy)
print("Classification Report for k-NN:\n", classification_report(test_sentiments['sentiment'], knn_predictions))
print("\n")

# Stacking
stacking_model = StackingClassifier(
    estimators=[('nb', nb_model), ('rf', rf_model), ('knn', knn_model), ('dt', dt_model),('gb', gb_model)],
    final_estimator=GradientBoostingClassifier(n_estimators=100, random_state=42)
)
stacking_model.fit(train_tfidf, train_sentiments['sentiment'])
stacking_predictions = stacking_model.predict(test_tfidf)
stacking_accuracy = accuracy_score(test_sentiments['sentiment'], stacking_predictions)
print("Stacking Accuracy:", stacking_accuracy)
print("Classification Report for Stacking:\n", classification_report(test_sentiments['sentiment'], stacking_predictions))
print("\n")

# Bagging
bagging_model = BaggingClassifier(estimator=DecisionTreeClassifier(), n_estimators=100, random_state=42)
bagging_model.fit(train_tfidf, train_sentiments['sentiment'])
bagging_predictions = bagging_model.predict(test_tfidf)
bagging_accuracy = accuracy_score(test_sentiments['sentiment'], bagging_predictions)
print("Bagging Accuracy:", bagging_accuracy)
print("Classification Report for Bagging:\n", classification_report(test_sentiments['sentiment'], bagging_predictions))
print("\n")

# Voting Classifier
voting_model = VotingClassifier(
    estimators=[('nb', nb_model), ('rf', rf_model), ('knn', knn_model), ('dt', dt_model), ('gb', gb_model)],
    voting='hard'
)
voting_model.fit(train_tfidf, train_sentiments['sentiment'])
voting_predictions = voting_model.predict(test_tfidf)
voting_accuracy = accuracy_score(test_sentiments['sentiment'], voting_predictions)
print("Voting Classifier Accuracy:", voting_accuracy)
print("Classification Report for Voting Classifier:\n", classification_report(test_sentiments['sentiment'], voting_predictions))

Naive Bayes Accuracy: 0.84485
Classification Report for Naive Bayes:
               precision    recall  f1-score   support

    negative       0.84      0.85      0.85      9935
    positive       0.85      0.84      0.84     10065

    accuracy                           0.84     20000
   macro avg       0.84      0.84      0.84     20000
weighted avg       0.84      0.84      0.84     20000



Random Forest Accuracy: 0.8448
Classification Report for Random Forest:
               precision    recall  f1-score   support

    negative       0.84      0.85      0.85      9935
    positive       0.85      0.84      0.84     10065

    accuracy                           0.84     20000
   macro avg       0.84      0.84      0.84     20000
weighted avg       0.84      0.84      0.84     20000



Neural Network MLP CLASSIFIER Accuracy: 0.8652
Classification Report for Neural Network:
               precision    recall  f1-score   support

    negative       0.87      0.86      0.86      9935


spacy lemmatizer without pos _ cout vectorizer with modified parameters of ML modela

In [1]:
import pandas as pd
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
import spacy

# Load the English language model in SpaCy
nlp = spacy.load("en_core_web_sm")

# Function for lemmatization
def preprocess_lemreview(review):
    # Tokenize the review
    tokens = word_tokenize(review)
    
    # Remove stop words
    stop_words = set(stopwords.words('english'))
    tokens = [token for token in tokens if token.lower() not in stop_words]

    # Lemmatize using SpaCy
    doc = nlp(" ".join(tokens))
    tokens = [token.lemma_ for token in doc]

    return ' '.join(tokens)

# Read the CSV files
train_sentiments = pd.read_csv("/kaggle/input/sentiments/train.csv/train.csv")
test_sentiments = pd.read_csv("/kaggle/input/sentiments/test.csv/test.csv")

# Apply preprocessing to the reviews
train_sentiments['processed_reviews'] = train_sentiments['review'].apply(preprocess_lemreview)
test_sentiments['processed_reviews'] = test_sentiments['review'].apply(preprocess_lemreview)

# Vectorize the text data using CountVectorizer
count_vectorizer = CountVectorizer(max_features=5000)
train_tfidf = count_vectorizer.fit_transform(train_sentiments['processed_reviews'])
test_tfidf = count_vectorizer.transform(test_sentiments['processed_reviews'])

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier, ExtraTreesClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, StackingClassifier, BaggingClassifier, VotingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.neural_network import MLPClassifier

# Naive Bayes
nb_model = MultinomialNB(alpha=0.1) 
nb_model.fit(train_tfidf, train_sentiments['sentiment'])
nb_predictions = nb_model.predict(test_tfidf)
nb_accuracy = accuracy_score(test_sentiments['sentiment'], nb_predictions)
print("Naive Bayes Accuracy:", nb_accuracy)
print("Classification Report for Naive Bayes:\n", classification_report(test_sentiments['sentiment'], nb_predictions))
print("\n")

# Random Forest
rf_model = RandomForestClassifier(n_estimators=200, max_depth=10, random_state=42) 
rf_model.fit(train_tfidf, train_sentiments['sentiment'])
rf_predictions = rf_model.predict(test_tfidf)
rf_accuracy = accuracy_score(test_sentiments['sentiment'], rf_predictions)
print("Random Forest Accuracy:", rf_accuracy)
print("Classification Report for Random Forest:\n", classification_report(test_sentiments['sentiment'], rf_predictions))
print("\n")

# Neural Network
nn_model =  MLPClassifier(hidden_layer_sizes=(128,), max_iter=200, alpha=0.0001, random_state=42)
nn_model.fit(train_tfidf, train_sentiments['sentiment'])
nn_predictions = nn_model.predict(test_tfidf)
nn_accuracy = accuracy_score(test_sentiments['sentiment'], nn_predictions)
print("Neural Network MLP CLASSIFIER Accuracy:", nn_accuracy)
print("Classification Report for Neural Network:\n", classification_report(test_sentiments['sentiment'], nn_predictions))
print("\n")

# Decision Tree
dt_model =  DecisionTreeClassifier(max_depth=10, min_samples_split=10, random_state=42)
dt_model.fit(train_tfidf, train_sentiments['sentiment'])
dt_predictions = dt_model.predict(test_tfidf)
dt_accuracy = accuracy_score(test_sentiments['sentiment'], dt_predictions)
print("Decision Tree Accuracy:", dt_accuracy)
print("Classification Report for Decision Tree:\n", classification_report(test_sentiments['sentiment'], dt_predictions))
print("\n")

# Gradient Boosting
gb_model = GradientBoostingClassifier(n_estimators=200, max_depth=5, learning_rate=0.1, random_state=42)
gb_model.fit(train_tfidf, train_sentiments['sentiment'])
gb_predictions = gb_model.predict(test_tfidf)
gb_accuracy = accuracy_score(test_sentiments['sentiment'], gb_predictions)
print("Gradient Boosting Accuracy:", gb_accuracy)
print("Classification Report for Gradient Boosting:\n", classification_report(test_sentiments['sentiment'], gb_predictions))
print("\n")

# k-NN
knn_model = KNeighborsClassifier(n_neighbors=10)
knn_model.fit(train_tfidf, train_sentiments['sentiment'])
knn_predictions = knn_model.predict(test_tfidf)
knn_accuracy = accuracy_score(test_sentiments['sentiment'], knn_predictions)
print("k-NN Accuracy:", knn_accuracy)
print("Classification Report for k-NN:\n", classification_report(test_sentiments['sentiment'], knn_predictions))
print("\n")

# Stacking
stacking_model = StackingClassifier(
    estimators=[('nb', nb_model), ('rf', rf_model), ('knn', knn_model), ('dt', dt_model),('gb', gb_model)],
    final_estimator=GradientBoostingClassifier(n_estimators=100, random_state=42)
)
stacking_model.fit(train_tfidf, train_sentiments['sentiment'])
stacking_predictions = stacking_model.predict(test_tfidf)
stacking_accuracy = accuracy_score(test_sentiments['sentiment'], stacking_predictions)
print("Stacking Accuracy:", stacking_accuracy)
print("Classification Report for Stacking:\n", classification_report(test_sentiments['sentiment'], stacking_predictions))
print("\n")

# Bagging
bagging_model = BaggingClassifier(estimator= ExtraTreesClassifier(), n_estimators=200, random_state=42)
bagging_model.fit(train_tfidf, train_sentiments['sentiment'])
bagging_predictions = bagging_model.predict(test_tfidf)
bagging_accuracy = accuracy_score(test_sentiments['sentiment'], bagging_predictions)
print("Bagging Accuracy:", bagging_accuracy)
print("Classification Report for Bagging:\n", classification_report(test_sentiments['sentiment'], bagging_predictions))
print("\n")

# Voting Classifier
voting_model = VotingClassifier(
    estimators=[('nb', nb_model), ('rf', rf_model), ('knn', knn_model), ('dt', dt_model), ('gb', gb_model), ('st', stacking_model)],
    voting='hard'
)
voting_model.fit(train_tfidf, train_sentiments['sentiment'])
voting_predictions = voting_model.predict(test_tfidf)
voting_accuracy = accuracy_score(test_sentiments['sentiment'], voting_predictions)
print("Voting Classifier Accuracy:", voting_accuracy)
print("Classification Report for Voting Classifier:\n", classification_report(test_sentiments['sentiment'], voting_predictions))

Naive Bayes Accuracy: 0.8446
Classification Report for Naive Bayes:
               precision    recall  f1-score   support

    negative       0.84      0.85      0.84      9935
    positive       0.85      0.84      0.84     10065

    accuracy                           0.84     20000
   macro avg       0.84      0.84      0.84     20000
weighted avg       0.84      0.84      0.84     20000



Random Forest Accuracy: 0.82905
Classification Report for Random Forest:
               precision    recall  f1-score   support

    negative       0.85      0.80      0.82      9935
    positive       0.81      0.86      0.83     10065

    accuracy                           0.83     20000
   macro avg       0.83      0.83      0.83     20000
weighted avg       0.83      0.83      0.83     20000



Neural Network MLP CLASSIFIER Accuracy: 0.8728
Classification Report for Neural Network:
               precision    recall  f1-score   support

    negative       0.88      0.87      0.87      9935
