In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression, Perceptron
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn import metrics

In [7]:
# Load the dataset with specified encoding
df = pd.read_csv('/content/Dataset.csv', encoding='latin1')

In [8]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df['text'], df['sentiment'], test_size=0.2, random_state=42)


In [9]:
# Define feature extraction methods
vectorizers = {
    'BoW_Count': CountVectorizer(),
    'BoW_TfIDF': TfidfVectorizer(),
    'ngrams': CountVectorizer(ngram_range=(1, 3))
}

In [10]:
# Define classifiers
classifiers = {
    'Naive Bayes': MultinomialNB(),
    'Logistic Regression': LogisticRegression(),
    'Random Forest': RandomForestClassifier(),
    'SVM': SVC(),
    'Perceptron': Perceptron()
}

In [11]:
# Train and evaluate classifiers
results = []

for vec_name, vectorizer in vectorizers.items():
    for clf_name, classifier in classifiers.items():
        # Vectorize the data
        X_train_vec = vectorizer.fit_transform(X_train)
        X_test_vec = vectorizer.transform(X_test)

        # Train the classifier
        classifier.fit(X_train_vec, y_train)

        # Predict on the test set
        y_pred = classifier.predict(X_test_vec)

        # Evaluate performance
        accuracy = metrics.accuracy_score(y_test, y_pred)
        precision, recall, f1_score, _ = metrics.precision_recall_fscore_support(y_test, y_pred, average='micro')

        # Save results
        results.append({
            'Vectorizer': vec_name,
            'Classifier': clf_name,
            'Accuracy': accuracy,
            'Precision': precision,
            'Recall': recall,
            'F1 Score': f1_score
        })

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [12]:
# Display results in a table
results_df = pd.DataFrame(results)
print(results_df)

   Vectorizer           Classifier  Accuracy  Precision    Recall  F1 Score
0   BoW_Count          Naive Bayes  0.726221   0.726221  0.726221  0.726221
1   BoW_Count  Logistic Regression  0.748072   0.748072  0.748072  0.748072
2   BoW_Count        Random Forest  0.733933   0.733933  0.733933  0.733933
3   BoW_Count                  SVM  0.735219   0.735219  0.735219  0.735219
4   BoW_Count           Perceptron  0.733933   0.733933  0.733933  0.733933
5   BoW_TfIDF          Naive Bayes  0.751928   0.751928  0.751928  0.751928
6   BoW_TfIDF  Logistic Regression  0.740360   0.740360  0.740360  0.740360
7   BoW_TfIDF        Random Forest  0.727506   0.727506  0.727506  0.727506
8   BoW_TfIDF                  SVM  0.750643   0.750643  0.750643  0.750643
9   BoW_TfIDF           Perceptron  0.700514   0.700514  0.700514  0.700514
10     ngrams          Naive Bayes  0.742931   0.742931  0.742931  0.742931
11     ngrams  Logistic Regression  0.739075   0.739075  0.739075  0.739075
12     ngram