In [None]:
# importing libraries

from textblob import TextBlob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer


In [None]:
nltk.download('stopwords')

In [None]:
# importing data

origin = pd.read_csv("Apple-Twitter-Sentiment-DFE.csv", encoding='latin1')
origin.head()

In [None]:
origin.shape

In [None]:
origin.describe()

In [None]:
# Prepping the data

o2 = origin.drop(columns=['_golden','_unit_state','_trusted_judgments',
                    '_last_judgment_at','sentiment','sentiment:confidence',
                    'id','query','sentiment_gold'], axis=1)
o2.head()

In [None]:
o2.shape

In [None]:
o2.describe()

In [None]:
# Cleaning the data
# Removing '@' handle

def remove_pattern(input_txt, pattern):
    r = re.findall(pattern, input_txt)
    for i in r:
        input_txt = re.sub(i, '', input_txt)
    return input_txt 

o2['text'] = np.vectorize(remove_pattern)(o2['text'], "@[\w]*")

o2.head()

In [None]:
nltk.download('punkt')

In [None]:
text = o2['text']
text

In [None]:
# Cleaning 'text'
i=0
corpus = []
stop_words = set(stopwords.words("english"))

for i in o2['text']:
    text = re.sub(r'[A-Za-z0-9]+', text[i])
    text = text.lower()
    text = re.sub(r'rt', text)
    text = re.sub(r'http', text)
    text = re.sub(r'https', text)
    text = text.split()
    pst = PorterStemmer()
    text = [pst.stem(word) for word in text if not word in stop_words]
    text = ' '.join(text)
    corpus.append(text)

In [None]:
X = o2['text']

tokens = nltk.word_tokenize(X)
tokens

In [None]:
y = o2['sentiment']
y

In [None]:
from textblob import TextBlob
ts = TextBlob(tweet).sentiment
# print(ts.subjectivity, ts.polarity) # Subjectivity, Sentiment Scores

In [None]:
# Sentiment Analysis Report
# Finding sentiment analysis

pos = 0
neg = 0
neu = 0
for i in o2:
    analysis = TextBlob(o2['text'][i])
    if analysis.sentiment[0]>0:
       pos = pos +1
    elif analysis.sentiment[0]<0:
       neg = neg + 1
    else:
       neu = neu + 1

In [None]:
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression

NBmodel = GaussianNB()
SVCmodel = SVC(kernel='rbf', random_state = 1)
LRmodel = LogisticRegression(solver='liblinear', random_state=0)


In [None]:
NBmodel.fit(X,y)

In [None]:
SVCmodel.fit(X,y)

In [None]:
LRmodel.fit(X,y)

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.2)
NBmodel.fit(Xtrain, ytrain)
prediction = NBmodel.predict(Xtest)

score = accuracy_score(ytest, prediction)
score

In [None]:
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.2)
SVCmodel.fit(Xtrain, ytrain)
prediction2 = SVCmodel.predict(Xtest)

score2 = accuracy_score(ytest, prediction2)
score2

In [None]:
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.2)
LRmodel.fit(Xtrain, ytrain)
prediction3 = LRmodel.predict(Xtest)

score3 = accuracy_score(ytest, prediction3)
score3

In [None]:
level = ['Positive','Negative','Neutral']
x = range(len(level)) # x=axis
y = o2['sentiment'] # y-axis
plt.figure(dpi=100) # create figure

# bar plot
plt.bar(x, y , width=0.5, color=['b','r','g']) # plot figure
plt.xticks(x, level)

# add grid
plt.grid(linestyle="--", alpha=1)
plt.show()

In [None]:
plt. style.use("seaborn-dark")

plt.figure(figsize=(8,6), dpi=80)

# generate data
X = range(len(level))
P,Ne,Nu = np.pos(),np.neg(),np.neu()

# draw a blue curve with customized format
plt.plot(X, P, color="Blue", linewidth=1.0, label="Positive", linestyle="--")
# draw a green curve with customized format
plt.plot(X, Ne, color="Green", linewidth=1.0, label="Negative", linestyle=":")
# draw a orange curve with customized format
plt.plot(X, Nu, color="Orange", linewidth=1.0, label="Neutral", linestyle=":")


plt.legend()                                    # place a legend on the axes
plt.xlim(-4.0,4.0)                              # set the scale of the x-axis
plt.xticks(np.linspace(-4,4,9,endpoint=True))   # set the tick locations on x-axis
plt.ylim(-1.0,1.0)                              # set the scale of the y-axis
plt.yticks(np.linspace(-1,1,5,endpoint=True))   # set the tick location on y-axis

plt.show()