In [20]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

In [21]:
# Read the CSV file
df = pd.read_csv('apis/tweet_data.csv')

# Preprocess the text data
df['Text'] = df['Text'].apply(lambda x: x.lower())

In [22]:
# X as tokenized data
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(df['Text'].values)

In [23]:
y = df['Sentiment'].values

In [24]:
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [25]:
# Define the parameter grid for hyperparameter tuning
param_grid = {'alpha': [0.1, 0.5, 1.0]}

In [26]:
# Create the model and perform hyperparameter tuning
model = MultinomialNB()
grid_search = GridSearchCV(model, param_grid, cv=5)
grid_search.fit(X_train, y_train)

In [27]:
# Get the best model and its predictions
best_model = grid_search.best_estimator_
predictions = best_model.predict(X_test)

In [28]:
# Calculate accuracy and print the results
accuracy = accuracy_score(y_test, predictions)
report = classification_report(y_test, predictions)
print("Best Parameters:", grid_search.best_params_)
print("Accuracy:", accuracy)
print("Classification Report:\n", report)

Best Parameters: {'alpha': 1.0}
Accuracy: 0.7756686798964625
Classification Report:
               precision    recall  f1-score   support

          -1       0.70      0.65      0.67       417
           1       0.81      0.85      0.83       742

    accuracy                           0.78      1159
   macro avg       0.76      0.75      0.75      1159
weighted avg       0.77      0.78      0.77      1159

