In [51]:
#import 
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn import metrics

In [52]:
#load dataset
data = pd.read_csv('tweets.csv')

In [53]:
data.head()

Unnamed: 0,tweet_text,emotion_in_tweet_is_directed_at,is_there_an_emotion_directed_at_a_brand_or_product
0,.@wesley83 I have a 3G iPhone. After 3 hrs twe...,iPhone,Negative emotion
1,@jessedee Know about @fludapp ? Awesome iPad/i...,iPad or iPhone App,Positive emotion
2,@swonderlin Can not wait for #iPad 2 also. The...,iPad,Positive emotion
3,@sxsw I hope this year's festival isn't as cra...,iPad or iPhone App,Negative emotion
4,@sxtxstate great stuff on Fri #SXSW: Marissa M...,Google,Positive emotion


In [54]:
data['tweet_text'].fillna('',inplace=True)

In [55]:
data['tweet_text'].isna().sum()

0

In [56]:
data['is_there_an_emotion_directed_at_a_brand_or_product'].isna().sum()

0

In [57]:
data['is_there_an_emotion_directed_at_a_brand_or_product'].unique()

array(['Negative emotion', 'Positive emotion',
       'No emotion toward brand or product', "I can't tell"], dtype=object)

In [58]:
texts = data['tweet_text'].values 
target = data['is_there_an_emotion_directed_at_a_brand_or_product'].values

In [59]:
texts

array(['.@wesley83 I have a 3G iPhone. After 3 hrs tweeting at #RISE_Austin, it was dead!  I need to upgrade. Plugin stations at #SXSW.',
       "@jessedee Know about @fludapp ? Awesome iPad/iPhone app that you'll likely appreciate for its design. Also, they're giving free Ts at #SXSW",
       '@swonderlin Can not wait for #iPad 2 also. They should sale them down at #SXSW.',
       ...,
       "Google's Zeiger, a physician never reported potential AE. Yet FDA relies on physicians. &quot;We're operating w/out data.&quot; #sxsw #health2dev",
       'Some Verizon iPhone customers complained their time fell back an hour this weekend.  Of course they were the New Yorkers who attended #SXSW.',
       '地震中だがこっちもびっくり。RT @mention Google Tests “Check-in Offers” At #SXSW {link}'],
      dtype=object)

In [60]:
target

array(['Negative emotion', 'Positive emotion', 'Positive emotion', ...,
       'No emotion toward brand or product',
       'No emotion toward brand or product',
       'No emotion toward brand or product'], dtype=object)

In [61]:
# Convert labels to numerical values
target_map = {'Negative emotion': -1,'Positive emotion':1, 'No emotion toward brand or product': 0}

In [62]:
y = np.array([target_map.get(label, 0) for label in target])

In [63]:
# Vectorize the text data
vectorizer = TfidfVectorizer(max_features=5000)  
X = vectorizer.fit_transform(texts)

In [66]:
# Split data into training and test sets
X_train, X_test,y_train, y_test = train_test_split(X,y,test_size=0.3, random_state=42)

In [67]:
# Create and train the SVM model
svm_model = SVC(kernel='linear')
svm_model.fit(X_train, y_train)

In [68]:
# Make predictions on the test set
predictions = svm_model.predict(X_test)

In [69]:
# Evaluate the model
print(metrics.classification_report(y_test, predictions))

              precision    recall  f1-score   support

          -1       0.59      0.13      0.21       173
           0       0.69      0.88      0.78      1640
           1       0.65      0.43      0.52       915

    accuracy                           0.68      2728
   macro avg       0.65      0.48      0.50      2728
weighted avg       0.67      0.68      0.65      2728

