###IMPORTING NECESSARY LIBRARIES

In [1]:
import pandas as pd
import string
import nltk
nltk.download("punkt_tab")
nltk.download( "stopwords")
from sklearn.feature_extraction.text import CountVectorizer
from nltk.stem import WordNetLemmatizer
from sklearn.model_selection import train_test_split


[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [2]:
data = pd.read_csv("tweets.csv")
pd.set_option('display.max_colwidth', None)
data.head(10)

Unnamed: 0,id,label,tweet
0,1,0,#fingerprint #Pregnancy Test https://goo.gl/h1MfQV #android #apps #beautiful #cute #health #igers #iphoneonly #iphonesia #iphone
1,2,0,Finally a transparant silicon case ^^ Thanks to my uncle :) #yay #Sony #Xperia #S #sonyexperias… http://instagram.com/p/YGEt5JC6JM/
2,3,0,We love this! Would you go? #talk #makememories #unplug #relax #iphone #smartphone #wifi #connect... http://fb.me/6N3LsUpCu
3,4,0,I'm wired I know I'm George I was made that way ;) #iphone #cute #daventry #home http://instagr.am/p/Li_5_ujS4k/
4,5,1,What amazing service! Apple won't even talk to me about a question I have unless I pay them $19.95 for their stupid support!
5,6,1,iPhone software update fucked up my phone big time Stupid iPhones
6,7,0,Happy for us .. #instapic #instadaily #us #sony #xperia #xperiaZ https://instagram.com/p/z9qGfWlvj7/
7,8,0,New Type C charger cable #UK http://www.ebay.co.uk/itm/-/112598674021 … #bay #Amazon #etsy New Year #Rob Cross #Toby Young #EVEMUN #McMafia #Taylor #SPECTRE 2018 #NewYear #Starting 2018 #recipes #technology #SamsungGalaxyS9 #iPhoneX pic.twitter.com/PjIwq59WtC
8,9,0,Bout to go shopping again listening to music #iphone #justme #music #likeforlike #followforfollow… http://instagr.am/p/Vj6bg5tLql/
9,10,0,Photo: #fun #selfie #pool #water #sony #camera #picoftheday #sun #instagood #boy #cute #outdoor... http://tmblr.co/ZAjC0n1ms_Gp8


###DATA PREPROCESSING

In [3]:
## identify null values

data.isnull().sum()

Unnamed: 0,0
id,0
label,0
tweet,0


In [4]:
data["label"].value_counts()

Unnamed: 0_level_0,count
label,Unnamed: 1_level_1
0,5894
1,2026


###### Here positive sentiments are mapped to 0 and negative sentiments are mapped to 1.

In [5]:
## Lowercase Conversion

data['tweet'] = data['tweet'].str.lower()
data.head(10)

Unnamed: 0,id,label,tweet
0,1,0,#fingerprint #pregnancy test https://goo.gl/h1mfqv #android #apps #beautiful #cute #health #igers #iphoneonly #iphonesia #iphone
1,2,0,finally a transparant silicon case ^^ thanks to my uncle :) #yay #sony #xperia #s #sonyexperias… http://instagram.com/p/yget5jc6jm/
2,3,0,we love this! would you go? #talk #makememories #unplug #relax #iphone #smartphone #wifi #connect... http://fb.me/6n3lsupcu
3,4,0,i'm wired i know i'm george i was made that way ;) #iphone #cute #daventry #home http://instagr.am/p/li_5_ujs4k/
4,5,1,what amazing service! apple won't even talk to me about a question i have unless i pay them $19.95 for their stupid support!
5,6,1,iphone software update fucked up my phone big time stupid iphones
6,7,0,happy for us .. #instapic #instadaily #us #sony #xperia #xperiaz https://instagram.com/p/z9qgfwlvj7/
7,8,0,new type c charger cable #uk http://www.ebay.co.uk/itm/-/112598674021 … #bay #amazon #etsy new year #rob cross #toby young #evemun #mcmafia #taylor #spectre 2018 #newyear #starting 2018 #recipes #technology #samsunggalaxys9 #iphonex pic.twitter.com/pjiwq59wtc
8,9,0,bout to go shopping again listening to music #iphone #justme #music #likeforlike #followforfollow… http://instagr.am/p/vj6bg5tlql/
9,10,0,photo: #fun #selfie #pool #water #sony #camera #picoftheday #sun #instagood #boy #cute #outdoor... http://tmblr.co/zajc0n1ms_gp8


In [6]:
## Removing String punctuations

def remove_punctuations(text):
  punctuationfree = "".join([i for i in text if i not in string.punctuation])
  return punctuationfree

In [7]:
## Word Tokenization

def tokenization(text):
  words = nltk.word_tokenize(text)
  return words

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [8]:
## Remove stopwords

stopwords = nltk.corpus.stopwords.words('english')
def remove_stopwords(text):
  output = [i for i in text if i not in stopwords]
  return output

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [9]:
## Lemmatization

from nltk.stem import WordNetLemmatizer
nltk.download('wordnet')
wordnet_lemmatizer = WordNetLemmatizer()

def lemmatizer(text):
  lemm_text = [wordnet_lemmatizer.lemmatize(word) for word in text]
  return lemm_text

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [10]:
def preprocess(data_col):
  corpus = []
  for item in data_col:
    new_item = remove_punctuations(item)
    new_item = new_item.lower()
    new_item = tokenization(new_item)
    new_item = remove_stopwords(new_item)
    new_item = lemmatizer(new_item)
    corpus.append(' '.join(str(x) for x in new_item))
  return corpus

In [11]:
corpus = preprocess(data["tweet"])

In [12]:
## Count Vectorization

cv = CountVectorizer(ngram_range=(1,2))
vec_data = cv.fit_transform(corpus)
X = vec_data
y = data["label"]

### TRAIN TEST SPLIT

In [16]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

###APPLYING RANDOM FOREST CLASSIFIER

In [17]:
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier(n_estimators = 100)
clf.fit(X, y)

###ACCURACY CHECKING

In [18]:
from sklearn import metrics
y_pred = clf.predict(X)
metrics.accuracy_score(y_pred, y)

0.9998737373737374

With random forest 99% accuracy gives in sentiment analysis.

###IDENTIFYING WHETHER SENTIMENT ANALYSIS WORKS

In [20]:
def find_sentiment(input):
  input = cv.transform(preprocess(input))
  prediction = clf.predict(input)
  if prediction == 0:
    print('Input statement has Positive sentiment')
  if prediction == 1:
    print('Input statement has Negative sentiment')

In [21]:
input = ["The phone stopped working."]
find_sentiment(input)

Input statement has Negative sentiment
