In [20]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import accuracy_score, classification_report
from sklearn.svm import SVC

# Load dataset
data = pd.read_csv('/content/judge-1377884607_tweet_product_company.csv',encoding='ISO-8859-1')

# Extract relevant columns
tweets = data['tweet_text']
brands = data['emotion_in_tweet_is_directed_at']
sentiments = data['is_there_an_emotion_directed_at_a_brand_or_product']

# Filter out rows where the brand is not specified or the sentiment is 'no emotion toward brand or product'
data = data[(brands.notnull()) & (sentiments != "No emotion toward brand or product")]

# Encode sentiments as binary labels (0 for negative, 1 for positive)
label_encoder = LabelEncoder()
data['sentiment'] = label_encoder.fit_transform(data['is_there_an_emotion_directed_at_a_brand_or_product'])

# Preprocess text data
vectorizer = TfidfVectorizer(max_features=1000, stop_words='english')
X = vectorizer.fit_transform(data['tweet_text'])
y = data['sentiment']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train SVM classifier
svm_classifier = SVC(kernel='linear')
svm_classifier.fit(X_train, y_train)

# Predictions
y_pred_train = svm_classifier.predict(X_train)
y_pred_test = svm_classifier.predict(X_test)

# Evaluation
train_accuracy = accuracy_score(y_train, y_pred_train)
test_accuracy = accuracy_score(y_test, y_pred_test)

print("Training Accuracy:", train_accuracy)
print("Testing Accuracy:", test_accuracy)

print("\nClassification Report for Testing Data:")
print(classification_report(y_test, y_pred_test, target_names=label_encoder.classes_))


Training Accuracy: 0.89609375
Testing Accuracy: 0.8671875

Classification Report for Testing Data:
                  precision    recall  f1-score   support

    I can't tell       0.00      0.00      0.00         1
Negative emotion       0.85      0.22      0.35       103
Positive emotion       0.87      0.99      0.93       536

        accuracy                           0.87       640
       macro avg       0.57      0.41      0.43       640
    weighted avg       0.86      0.87      0.83       640



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
