In [31]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import train_test_split, GridSearchCV

In [32]:
# Read the CSV file
df = pd.read_csv('apis/tweet_data.csv')

# Preprocess the text data
df['Text'] = df['Text'].apply(lambda x: x.lower())

In [33]:
# X as tokenized data
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(df['Text'].values)

In [34]:
y = df['Sentiment'].values

In [35]:
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [36]:
# Define the parameter grid for hyperparameter tuning
param_grid = {'C': [0.1, 1, 10],
              'gamma': [0.1, 1, 'scale'],
              'kernel': ['linear', 'rbf']}

In [37]:
# Create the model and perform hyperparameter tuning
model = SVC()
grid_search = GridSearchCV(model, param_grid, cv=5)
grid_search.fit(X_train, y_train)

In [38]:
# Get the best model and its predictions
best_model = grid_search.best_estimator_
predictions = best_model.predict(X_test)

In [39]:
# Calculate accuracy and print the results
accuracy = accuracy_score(y_test, predictions)
report = classification_report(y_test, predictions)
print("Best Parameters:", grid_search.best_params_)
print("Accuracy:", accuracy)
print("Classification Report:\n", report)

Best Parameters: {'C': 10, 'gamma': 'scale', 'kernel': 'rbf'}
Accuracy: 0.8067299396031061
Classification Report:
               precision    recall  f1-score   support

          -1       0.77      0.65      0.71       417
           1       0.82      0.89      0.86       742

    accuracy                           0.81      1159
   macro avg       0.80      0.77      0.78      1159
weighted avg       0.80      0.81      0.80      1159

