In [1]:
import pandas as pd

In [2]:
train = pd.read_csv('Corona.csv')
test = pd.read_csv('Corona_NLP_test.csv')

In [3]:
train.Sentiment.unique()

array(['Neutral', 'Positive', 'Extremely Negative', 'Negative',
       'Extremely Positive'], dtype=object)

In [4]:
test.Sentiment.unique()

array(['Extremely Negative', 'Positive', 'Extremely Positive', 'Negative',
       'Neutral'], dtype=object)

In [5]:
train.sample(frac=1).head()

Unnamed: 0,UserName,ScreenName,Location,TweetAt,OriginalTweet,Sentiment
37382,41181,86133,"Cincinnati,Ohio",10-04-2020,@CDub13_ @JohnMatarese @MrDavidAngelo Heres a...,Neutral
3425,7224,52176,SYDNEY AU,18-03-2020,19 All this has proven to me is that our govt ...,Negative
32657,36456,81408,,07-04-2020,Look at these graphs! They are following each ...,Negative
34588,38387,83339,London,08-04-2020,What do Mail Metro Media readers think about n...,Positive
20788,24587,69539,"New York, NY",25-03-2020,When going for a grocery run means literally...,Neutral


In [6]:
train.drop(["UserName", "ScreenName", "Location", "TweetAt"], axis = 1, inplace = True)
test.drop(["UserName", "ScreenName", "Location", "TweetAt"], axis = 1, inplace = True)

In [7]:
train.head()

Unnamed: 0,OriginalTweet,Sentiment
0,@MeNyrbie @Phil_Gahan @Chrisitv https://t.co/i...,Neutral
1,advice Talk to your neighbours family to excha...,Positive
2,Coronavirus Australia: Woolworths to give elde...,Positive
3,My food stock is not the only one which is emp...,Positive
4,"Me, ready to go at supermarket during the #COV...",Extremely Negative


In [8]:
test.head()

Unnamed: 0,OriginalTweet,Sentiment
0,TRENDING: New Yorkers encounter empty supermar...,Extremely Negative
1,When I couldn't find hand sanitizer at Fred Me...,Positive
2,Find out how you can protect yourself and love...,Extremely Positive
3,#Panic buying hits #NewYork City as anxious sh...,Negative
4,#toiletpaper #dunnypaper #coronavirus #coronav...,Neutral


In [9]:
train.sample(frac=1).head()

Unnamed: 0,OriginalTweet,Sentiment
3057,Going to the grocery store just put me in a re...,Extremely Negative
3762,Is there anything more undignified than a supe...,Negative
7297,I was talking to someone and I was like this ...,Neutral
25543,Medical Devices notified as Drugs w e f 1st Ap...,Neutral
11979,We have one delivery per department. It takes ...,Negative


In [10]:
test.sample(frac=1).head()

Unnamed: 0,OriginalTweet,Sentiment
3453,OK I'm ready to head to the supermarket!\r\r\n...,Extremely Positive
3558,They cant figure out why half the US workforc...,Negative
1647,@Blair_A_Nathan Was thinking- large companies ...,Neutral
2472,Hey fun fact! Posting empty grocery store shel...,Negative
1336,What efforts are being carried out for disadva...,Extremely Positive


# MODELING

In [11]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [12]:
vectorizer = TfidfVectorizer(min_df = 5,
                             max_df = 0.8,
                             sublinear_tf = True,
                             use_idf = True)
train_vectors = vectorizer.fit_transform(train['OriginalTweet'])
test_vectors = vectorizer.transform(test['OriginalTweet'])

In [13]:
import time
from sklearn import svm
from sklearn.metrics import classification_report

In [14]:
classifier_linear = svm.SVC(kernel='linear')
t0 = time.time()
classifier_linear.fit(train_vectors, train['Sentiment'])
t1 = time.time()
prediction_linear = classifier_linear.predict(test_vectors)
t2 = time.time()
time_linear_train = t1-t0
time_linear_predict = t2-t1

In [15]:
review = """I hate this pandemic"""
review_vector = vectorizer.transform([review])
print(classifier_linear.predict(review_vector))

['Extremely Negative']


# EVALUATION

In [16]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
import numpy as np

In [17]:
iris = datasets.load_iris()
X = iris.data
y = iris.target

In [18]:
random_state = np.random.RandomState(0)
n_samples, n_features = X.shape
X = np.c_[X, random_state.randn(n_samples, 200 * n_features)]

In [19]:
X_train, X_test, y_train, y_test = train_test_split(X[y < 2], y[y < 2],
                                                    test_size=.5,
                                                    random_state=random_state)

In [20]:
classifier = svm.LinearSVC(random_state=random_state)
classifier.fit(X_train, y_train)
y_score = classifier.decision_function(X_test)

In [23]:
from sklearn.metrics import average_precision_score
from sklearn.metrics import accuracy_score

In [24]:
print("Training time: %fs; Prediction time: %fs" % (time_linear_train, time_linear_predict))
report = classification_report(test['Sentiment'], prediction_linear, output_dict=True)


print('positive: ', report['Positive'])
print('negative: ', report['Negative'])
print('Extremely positive: ', report['Extremely Positive'])
print('Extremely Negative: ', report['Extremely Negative'])
print('Neutral : ', report['Neutral'])

Training time: 1003.083718s; Prediction time: 52.684860s
positive:  {'precision': 0.573414422241529, 'recall': 0.6969376979936642, 'f1-score': 0.6291706387035272, 'support': 947}
negative:  {'precision': 0.5768903993203058, 'recall': 0.6522574447646494, 'f1-score': 0.612263300270514, 'support': 1041}
Extremely positive:  {'precision': 0.7722095671981777, 'recall': 0.5659432387312187, 'f1-score': 0.653179190751445, 'support': 599}
Extremely Negative:  {'precision': 0.7342657342657343, 'recall': 0.5320945945945946, 'f1-score': 0.6170421155729677, 'support': 592}
Neutral :  {'precision': 0.739202657807309, 'recall': 0.7189014539579968, 'f1-score': 0.7289107289107289, 'support': 619}


In [35]:
accuracy_score(test['Sentiment'], prediction_linear)

0.6419167983149026

In [25]:
average_precision = average_precision_score(y_test, y_score)

print('Average precision-recall score: {0:0.2f}'.format(
      average_precision))

Average precision-recall score: 0.88
