# AERPS (Analysis of Election & Result Prediction System)

In [5]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import warnings
warnings.filterwarnings("ignore")

from sklearn.feature_extraction.text import CountVectorizer
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM, SpatialDropout1D
from sklearn.model_selection import train_test_split
from keras.utils.np_utils import to_categorical
import re

import pickle
def savetofile(obj,filename):
    pickle.dump(obj,open(filename+".p","wb"))
def openfromfile(filename):
    temp = pickle.load(open(filename+".p","rb"))
    return temp

In [2]:
def data_cleaning(sent):
    '''The function takes a Pandas Series object containing text in all the cells
       And performs following Preprocessing steps on each cell:
       1. Clean text from html tags
       2. Clean text from punctuations and special characters
       3. Retain only non-numeric Latin characters with lenght > 2
       4. Remove stopwords from the sentence
       5. Apply lower casing
       6. Apply stemming to all the words in the sentence
       
       Return values:
       1. final_string - List of cleaned sentences
       2. list_of_sent - List of lists which will be used as input to the W2V model'''
    import re  # Regular Expressions
    import nltk # Natural Language Tool Kit
    from nltk.corpus import stopwords
    
    #----------Done with imports----------
    
    #nltk.download('stopwords')
    
    stop_words = set(stopwords.words('english')) #set of stopwords
    #print(stop_words)
    excluding = ['against','not','don', "don't",'ain', 'aren', "aren't", 'couldn', "couldn't", 'didn', "didn't",
             'doesn', "doesn't", 'hadn', "hadn't", 'hasn', "hasn't", 'haven', "haven't", 'isn', "isn't", 
             'mightn', "mightn't", 'mustn', "mustn't", 'needn', "needn't",'shouldn', "shouldn't", 'wasn',
             "wasn't", 'weren', "weren't", 'won', "won't", 'wouldn', "wouldn't", 'no']

    stop_words = [words for words in stop_words if words not in excluding]
    sno = nltk.stem.SnowballStemmer('english') #initialising the snowball stemmer
    
    #i = 0
    string = ""
    #final_string = []    ## This list will contain cleaned sentences
    #list_of_sent = []    ## This is a list of lists used as input to the W2V model at a later stage
    cleanr = re.compile('<.*?>') # Compile re to remove html tags
    
    #for sent in series.values:
    filtered_sent = []
    sent = re.sub(cleanr, ' ', sent) # remove html tags
    sent = re.sub(r'(https|http)?:\/\/(\w|\.|\/|\?|\=|\&|\%)*\b', '', sent, flags=re.MULTILINE)
    sent = re.sub('[^a-zA-Z0-9\n]', ' ', sent) # remove special characters
    sent = re.sub('\s+',' ', sent) # replace multiple spaces with single space
    sent = sent.lower() # convert all characters to lower case
    for word in sent.split():
        if word not in stop_words and len(word)>2:
            word = sno.stem(word) # Apply Stemming using snowball stemmer
            filtered_sent.append(word)
    #list_of_sent.append(filtered_sent) # This list is used later
    string = " ".join(filtered_sent) # Cleaned sentence
    #final_string.append(string) # List of cleaned sentences
    #i+=1
    return string

## Analysis of News Headline Related to Narendra Modi

### Using RNN

In [26]:
file=open('narendramodi.txt','r')
line=file.readline()

nm = []
while(line!=""):
    nm.append(line)
    print(line)
    line=file.readline()
file.close()
del nm[0]
print(nm)
print(len(nm))

﻿

The dawn of Modi 2.0

Tribute to Narendra Modi | Narendra Modi Motivational Video | Tum Mujhe Kab Tak Rokoge? | Hindi

PM Modi address to nation LIVE Updates: PM begins his address

Gujarat Forest ....

PM-KISAN Yojana – Can It Help The Farming Community

#परिवार_पूजा

Pulwama attack: Jammu people hold massive protests against Pakistan

Narendra Modi Short Film

PM Narendra Modi addresses public itanagar rally today in Arunachal

How to face fierce drought in Maharashtra..Beed.. Government has ignored us... cattle are about to die.. next farmers will hang himself..

Irked Trump mocks Modi on Afghan library; India rejects jibe

sonone

These 5 Bollywood Actresses Are Big Fan Of Narendra Modi

RAHUL GANDHI AND AMIT SHAH WORD WAR CONTINUOS

Pmp tree plants...

India's bad dream

PM Narendra Modi most followed world leader on Instagram

Pak media praising india

India looks inward on trade

Corrupt Modi

PM MODI CRITICIZED, MJ AKBAR TO TAKE LEGAL ACTION & BOLLYWOOD STEP UP FOR #METOO

M

In [27]:
def clean_rnn(x):
    
    x = re.sub(r'(https|http)?:\/\/(\w|\.|\/|\?|\=|\&|\%)*\b', '', str(x), flags=re.MULTILINE)
    x = x.lower()
    x = re.sub('[^a-zA-z0-9\s]','',x)
    x = re.sub('\s+',' ', x)
    
    return x

In [28]:
rnnpositive = 0
rnnnegative = 0
i = 0
tokenizer = openfromfile("rnntokenizer")
model = openfromfile("rnnmodel")
for i in range(0,235):
    twt = []
    nm[i] = clean_rnn(nm[i])
    twt.append(nm[i])
    #vectorizing the tweet by the pre-fitted tokenizer instance
    twt = tokenizer.texts_to_sequences(twt)
    #padding the tweet to have exactly the same shape as `embedding_2` input
    twt = pad_sequences(twt, maxlen=40, dtype='int32', value=0)
    #print(twt)
    sentiment = model.predict(twt,batch_size=1,verbose = 2)[0]
    if(np.argmax(sentiment) == 0):
        rnnnegative+=1
    elif (np.argmax(sentiment) == 1):
        rnnpositive+=1
        
print("Negative: ", rnnnegative)
print("Positive: ", rnnpositive)

Negative:  50
Positive:  185


In [29]:
def nb_tf_be(text):
    #imports
    import numpy as np
    import pandas as pd
    from time import time
    import random
    #import gensim
    import warnings
    warnings.filterwarnings("ignore")
    %matplotlib inline 
    from sklearn import preprocessing
    
    text = data_cleaning(text)
    
    #vectorising the text
    tfidf = openfromfile('tfidf_vectoriser')
    test = tfidf.transform([text,])
    #Normalize Data
    test = preprocessing.normalize(test)
    #print("Train Data Size: ",test.shape)
    
    model1 = openfromfile("nb_tfidf_bern")
    y_pred = model1.predict(test)
    #print(y_pred)
    #print(type(y_pred))
    #print(y_pred[0])
    #print(type(y_pred[0]))
    if y_pred[0] == '4':
        return 1
    elif y_pred[0] == '0':
        return 0

In [40]:
positive1 = 0
negative1 = 0
i = 0

for i in range(0,235):
    text = nm[i]
    res = nb_tf_be(text)
    if(res == 0):
        negative1 +=1
    elif (res == 1):
        positive1 +=1
        
print("Negative: ", negative1)
print("Positive: ", positive1)

Negative:  33
Positive:  202


In [11]:
print(i)
print("Negative: ", negative1)
print("Positive: ", positive1)

61
Negative:  9
Positive:  52


In [41]:
def nb_tf_mu(text):
    #imports
    import numpy as np
    import pandas as pd
    from time import time
    import random
    #import gensim
    import warnings
    warnings.filterwarnings("ignore")
    %matplotlib inline 
    from sklearn import preprocessing
    
    text = data_cleaning(text)
    
    #vectorising the text
    tfidf = openfromfile('tfidf_vectoriser')
    test = tfidf.transform([text,])
    #Normalize Data
    test = preprocessing.normalize(test)
    #print("Train Data Size: ",test.shape)
    
    model1 = openfromfile("nb_tfidf_mul")
    y_pred = model1.predict(test)
    #print(y_pred)
    #print(type(y_pred))
    #print(y_pred[0])
    #print(type(y_pred[0]))
    if y_pred[0] == '4':
        return 1
    elif y_pred[0] == '0':
        return 0

In [42]:
positive2 = 0
negative2 = 0
i = 0

for i in range(0,235):
    text = nm[i]
    res = nb_tf_mu(text)
    if(res == 0):
        negative2 +=1
    elif (res == 1):
        positive2 +=1
        
print("Negative: ", negative2)
print("Positive: ", positive2)

Negative:  58
Positive:  177


In [35]:
def nb_bi_be(text):
    #imports
    import numpy as np
    import pandas as pd
    from time import time
    import random
    #import gensim
    import warnings
    warnings.filterwarnings("ignore")
    %matplotlib inline 
    from sklearn import preprocessing
    
    text = data_cleaning(text)
    
    #vectorising the text
    bg = openfromfile('bi_gram_vectoriser')
    test = bg.transform([text,])
    #Normalize Data
    test = preprocessing.normalize(test)
    #print("Train Data Size: ",test.shape)
    
    model1 = openfromfile("nb_bigram_bern")
    y_pred = model1.predict(test)
    #print(y_pred)
    #print(type(y_pred))
    #print(y_pred[0])
    #print(type(y_pred[0]))
    if y_pred[0] == '4':
        return 1
    elif y_pred[0] == '0':
        return 0

In [43]:
positive3 = 0
negative3 = 0
i = 0

for i in range(0,235):
    text = nm[i]
    res = nb_bi_be(text)
    if(res == 0):
        negative3 +=1
    elif (res == 1):
        positive3 +=1
        
print("Negative: ", negative3)
print("Positive: ", positive3)

Negative:  43
Positive:  192


In [37]:
def nb_bi_mu(text):
    #imports
    import numpy as np
    import pandas as pd
    from time import time
    import random
    #import gensim
    import warnings
    warnings.filterwarnings("ignore")
    %matplotlib inline 
    from sklearn import preprocessing
    
    text = data_cleaning(text)
    
    #vectorising the text
    bg = openfromfile('bi_gram_vectoriser')
    test = bg.transform([text,])
    #Normalize Data
    test = preprocessing.normalize(test)
    #print("Train Data Size: ",test.shape)
    
    model1 = openfromfile("nb_bigram_mul")
    y_pred = model1.predict(test)
    #print(y_pred)
    #print(type(y_pred))
    #print(y_pred[0])
    #print(type(y_pred[0]))
    if y_pred[0] == '4':
        return 1
    elif y_pred[0] == '0':
        return 0

In [44]:
positive4 = 0
negative4 = 0
i = 0

for i in range(0,235):
    text = nm[i]
    res = nb_bi_mu(text)
    if(res == 0):
        negative4 +=1
    elif (res == 1):
        positive4 +=1
        
print("Negative: ", negative4)
print("Positive: ", positive4)

Negative:  62
Positive:  173


## Analysis of news related to Rahul Gandhi

In [48]:
data = pd.read_csv("rahulgandhi.csv")

In [49]:
data = data[0:235]

In [59]:
rg = list(data['news'])

rnnpositive2 = 0
rnnnegative2 = 0
i = 0
tokenizer = openfromfile("rnntokenizer")
model = openfromfile("rnnmodel")
for i in range(0,235):
    twt = []
    rg[i] = clean_rnn(rg[i])
    twt.append(rg[i])
    #vectorizing the tweet by the pre-fitted tokenizer instance
    twt = tokenizer.texts_to_sequences(twt)
    #padding the tweet to have exactly the same shape as `embedding_2` input
    twt = pad_sequences(twt, maxlen=40, dtype='int32', value=0)
    #print(twt)
    sentiment = model.predict(twt,batch_size=1,verbose = 2)[0]
    if(np.argmax(sentiment) == 0):
        rnnnegative2+=1
    elif (np.argmax(sentiment) == 1):
        rnnpositive2+=1
        
print("Negative: ", rnnnegative2)
print("Positive: ", rnnpositive2)

Negative:  52
Positive:  183


In [61]:
positive5 = 0
negative5 = 0
i = 0

for i in range(0,235):
    text = rg[i]
    res = nb_tf_be(text)
    if(res == 0):
        negative5 +=1
    elif (res == 1):
        positive5 +=1
        
print("Negative: ", negative5)
print("Positive: ", positive5)

Negative:  21
Positive:  214


In [64]:
positive6 = 0
negative6 = 0
i = 0

for i in range(0,235):
    text = rg[i]
    res = nb_tf_mu(text)
    if(res == 0):
        negative6 +=1
    elif (res == 1):
        positive6 +=1
    print(i, end =" ")
        
print("Negative: ", negative6)
print("Positive: ", positive6)

0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 Negative:  44
Positive:  191


In [66]:
positive7 = 0
negative7 = 0
i = 0

for i in range(0,235):
    text = rg[i]
    res = nb_bi_be(text)
    if(res == 0):
        negative7 +=1
    elif (res == 1):
        positive7 +=1
    print(i, end =" ")
    
print('\n')        
print("Negative: ", negative7)
print("Positive: ", positive7)

0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 

Negative:  21
Positive:  214


In [68]:
positive8 = 0
negative8 = 0
i = 0

for i in range(0,235):
    text = rg[i]
    res = nb_bi_mu(text)
    if(res == 0):
        negative8 +=1
    elif (res == 1):
        positive8 +=1
    print(i, end =" ")
    
print('\n') 
print("Negative: ", negative8)
print("Positive: ", positive8)

0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 

Negative:  45
Positive:  190


## Results

| Model     | BJP    | INC    |
|-----------|--------|--------|
| LSTM      | 78.723 | 77.872 |
| NB_TF_BE  | 85.957 | 91.063 |
|  NB_TF_MU | 75.319 | 81.276 |
| NB_BI_BE  | 81.702 | 91.063 |
| NB_BI_MU  | 73.617 | 80.851 |   

In [None]:
-