In [1]:
import pandas as pd

In [2]:
train = pd.read_csv('Corona.csv')
test = pd.read_csv('Corona_NLP_test.csv')

In [3]:
train.Sentiment.unique()

array(['Neutral', 'Positive', 'Extremely Negative', 'Negative',
       'Extremely Positive'], dtype=object)

In [4]:
test.Sentiment.unique()

array(['Extremely Negative', 'Positive', 'Extremely Positive', 'Negative',
       'Neutral'], dtype=object)

In [5]:
train.sample(frac=1).head()

Unnamed: 0,UserName,ScreenName,Location,TweetAt,OriginalTweet,Sentiment
14103,17902,62854,Jamaica,21-03-2020,"In light of this pandemic, we aim to serve you...",Extremely Positive
39907,43706,88658,"Las Vegas, NV",13-04-2020,"Coronavirus pandemic | OPEC, Russia approve bi...",Positive
26260,30059,75011,Louisville KY,02-04-2020,Y all Only go out if you absolutely need it If...,Positive
25260,29059,74011,"Alberta Beach, Barrhead, Fox Creek, Lac Ste. A...",31-03-2020,Food service operations are seeing the most ad...,Positive
7552,11351,56303,,19-03-2020,Ocado expected to impose rationing on more pro...,Negative


In [6]:
del train['TweetAt']

In [7]:
del train['UserName']

In [8]:
del train['ScreenName']

In [9]:
del train['Location']

In [10]:
train.sample(frac=1).head()

Unnamed: 0,OriginalTweet,Sentiment
10026,endless page after page of greedy people selli...,Negative
25190,Daniel Kaufman of offers tips to avoid scams w...,Negative
21128,Survey%3A%20How%20is%20COVID-19%20Changing%20C...,Neutral
12641,"Posted a few of these this morning, I work 10h...",Positive
12601,You can still get your favorite foods just in ...,Positive


In [11]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [12]:
vectorizer = TfidfVectorizer(min_df = 5,
                             max_df = 0.8,
                             sublinear_tf = True,
                             use_idf = True)
train_vectors = vectorizer.fit_transform(train['OriginalTweet'])
test_vectors = vectorizer.transform(test['OriginalTweet'])

In [13]:
import time
from sklearn import svm
from sklearn.metrics import classification_report

In [14]:
classifier_linear = svm.SVC(kernel='linear')
t0 = time.time()
classifier_linear.fit(train_vectors, train['Sentiment'])
t1 = time.time()
prediction_linear = classifier_linear.predict(test_vectors)
t2 = time.time()
time_linear_train = t1-t0
time_linear_predict = t2-t1


print("Training time: %fs; Prediction time: %fs" % (time_linear_train, time_linear_predict))
report = classification_report(test['Sentiment'], prediction_linear, output_dict=True)


print('positive: ', report['Positive'])
print('negative: ', report['Negative'])
print('Extremely positive: ', report['Extremely Positive'])
print('Extremely Negative: ', report['Extremely Negative'])
print('Neutral : ', report['Neutral'])

Training time: 1002.320804s; Prediction time: 53.756868s
positive:  {'precision': 0.573414422241529, 'recall': 0.6969376979936642, 'f1-score': 0.6291706387035272, 'support': 947}
negative:  {'precision': 0.5768903993203058, 'recall': 0.6522574447646494, 'f1-score': 0.612263300270514, 'support': 1041}
Extremely positive:  {'precision': 0.7722095671981777, 'recall': 0.5659432387312187, 'f1-score': 0.653179190751445, 'support': 599}
Extremely Negative:  {'precision': 0.7342657342657343, 'recall': 0.5320945945945946, 'f1-score': 0.6170421155729677, 'support': 592}
Neutral :  {'precision': 0.739202657807309, 'recall': 0.7189014539579968, 'f1-score': 0.7289107289107289, 'support': 619}


In [21]:
review = """Do not purchase this product. My cell phone blast when I switched the charger"""
review_vector = vectorizer.transform([review])
print(classifier_linear.predict(review_vector))

['Neutral']
