# Final ensemble model

## Running the model to generate predictions on political tweets
---

# 1. Installs and imports

## 1.1. Install all required libraries

In [1]:
# Uncomment line below to install all required libraries
# !pip3 install -r ../requirements.txt -q

## 1.2. Import required libraries

In [2]:
import re
import os
from nltk.corpus import stopwords
import pandas as pd
import pickle
from keras.models import load_model
from keras.preprocessing.sequence import pad_sequences

## 2. Load political tweets dataset

In [3]:
df = pd.read_csv('./data/cleaned_political_tweets.csv')

In [4]:
df.head()

Unnamed: 0,mp,tweet_date,tweet_time,tweet_text,cleaned_tweet,Snowball_Stem
0,rsprasad,2021-03-31,23:30:00,"Please, don't break the trust.\n\n@narendramod...",please break trust,pleas break trust
1,rsprasad,2021-03-31,23:30:00,"MAINTENANCE OF WIVES, CHILDREN AND PARENTS - h...",maintenance wives children parents l c h r read,mainten wive children parent l c h r read
2,rsprasad,2021-03-31,23:30:00,NIA look into SSR case \n\n@AmitShah @narendra...,nia look ssr case,nia look ssr case
3,rsprasad,2021-03-31,23:30:00,@PavanjitMane1 @PMOIndia @CMOMaharashtra @rspr...,new pan allotted per aadhar many pans issued b...,new pan allot per aadhar mani pan issu bef aad...
4,rsprasad,2021-03-31,23:30:00,@rsprasad @narendramodi @PMOIndia @examwarrior...,one worst govt till interest reduced income ta...,one worst govt till interest reduc incom tax c...


In [5]:
df.tail()

Unnamed: 0,mp,tweet_date,tweet_time,tweet_text,cleaned_tweet,Snowball_Stem
34553,DrSJaishankar,2021-03-30,7:30:00,@RDXThinksThat @HQ_IDS_India @adgpi @IAF_MCC @...,happy holi sir green heart,happi holi sir green heart
34554,DrSJaishankar,2021-03-30,7:30:00,@ANI Dhame on u @ImranKhanPTI\nActually @Govto...,dhame actually renamed nauseated face nauseate...,dhame actual renam nauseat face nauseat face
34555,DrSJaishankar,2021-03-30,7:30:00,"@swati_gs @DrSJaishankar Sir, Please ensure t...",sir please ensure bangladesh foriegn ministry ...,sir pleas ensur bangladesh foriegn ministri an...
34556,DrSJaishankar,2021-03-30,7:30:00,@DrSJaishankar @vijai63 India is proud of our ...,india proud pm shri narendra modi ji hearty co...,india proud pm shri narendra modi ji hearti co...
34557,DrSJaishankar,2021-03-30,7:30:00,🔸As India-UAE relationship grow stronger day b...,small orange diamond india uae relationship gr...,small orang diamond india uae relationship gro...


---

## Drop text

In [6]:
df.isna().sum()

mp                 0
tweet_date         0
tweet_time         0
tweet_text         0
cleaned_tweet    160
Snowball_Stem    160
dtype: int64

In [7]:
df = df.dropna()

In [8]:
df.isna().sum()

mp               0
tweet_date       0
tweet_time       0
tweet_text       0
cleaned_tweet    0
Snowball_Stem    0
dtype: int64

In [9]:
boom = df['Snowball_Stem']

In [10]:
boom.head()

0                                    pleas break trust
1            mainten wive children parent l c h r read
2                                    nia look ssr case
3    new pan allot per aadhar mani pan issu bef aad...
4    one worst govt till interest reduc incom tax c...
Name: Snowball_Stem, dtype: object

## Loading the models

In [11]:
svm_path = './SVM_UnigramBigram_75.pickle'
nb_path = './NB_UnigramBigram_78.pickle'
lstm_token_path = './LSTM_tokenizer.pickle'
lstm_path = './LSTM_train_75_val_78_test_79_acc.h5'
vectorizer_path = './UnigramBigram_vectorizer.pickle'
Blstm_path ='./B_LSTM_train_76_26_val_77_86_test_78_57_acc.h5'
blstm_token_path = './BLSTM_tokenizer.pickle'

In [12]:
svm = pickle.load(open(svm_path,'rb'))

In [13]:
nb = pickle.load(open(nb_path,'rb'))

In [14]:
lstm_tok = pickle.load(open(lstm_token_path,'rb'))

In [15]:
lstm = load_model(lstm_path)

In [16]:
blstm_tok = pickle.load(open(blstm_token_path,'rb'))

In [17]:
vectorizer = pickle.load(open(vectorizer_path,'rb'))

In [39]:
blstm = load_model(Blstm_path)

## Tokenizing and Vectorizing

In [19]:
vector = vectorizer.transform(boom)

In [20]:
max_seq_length = 20

In [21]:
lstm_seq = lstm_tok.texts_to_sequences(boom)

In [22]:
lstm_pad = pad_sequences(lstm_seq, maxlen=max_seq_length, padding="post", truncating="post")

In [23]:
blstm_seq = blstm_tok.texts_to_sequences(boom)

In [24]:
blstm_pad = pad_sequences(blstm_seq, maxlen=max_seq_length, padding="post", truncating="post")

## Running the models

In [25]:
nb_pred = nb.predict(vector)

In [26]:
df['NB_predictions'] = nb_pred

In [29]:
lstm_pred = lstm.predict(lstm_pad)

In [36]:
df['LSTM_predictions'] = list(map(lambda x: 1 if x[0]>=0.5 else 0, lstm_pred))

In [40]:
blstm_pred = blstm.predict(blstm_pad)

In [41]:
df['BLSTM_predictions'] = list(map(lambda x: 1 if x[0]>=0.5 else 0, blstm_pred))

In [43]:
svm_pred = svm.predict(vector)

In [44]:
df['SVM_predictions'] = svm_pred

In [45]:
df.head()

Unnamed: 0,mp,tweet_date,tweet_time,tweet_text,cleaned_tweet,Snowball_Stem,NB_predictions,LSTM_predictions,BLSTM_predictions,SVM_predictions
0,rsprasad,2021-03-31,23:30:00,"Please, don't break the trust.\n\n@narendramod...",please break trust,pleas break trust,0,1,0,1
1,rsprasad,2021-03-31,23:30:00,"MAINTENANCE OF WIVES, CHILDREN AND PARENTS - h...",maintenance wives children parents l c h r read,mainten wive children parent l c h r read,0,0,0,1
2,rsprasad,2021-03-31,23:30:00,NIA look into SSR case \n\n@AmitShah @narendra...,nia look ssr case,nia look ssr case,1,1,1,1
3,rsprasad,2021-03-31,23:30:00,@PavanjitMane1 @PMOIndia @CMOMaharashtra @rspr...,new pan allotted per aadhar many pans issued b...,new pan allot per aadhar mani pan issu bef aad...,1,1,1,1
4,rsprasad,2021-03-31,23:30:00,@rsprasad @narendramodi @PMOIndia @examwarrior...,one worst govt till interest reduced income ta...,one worst govt till interest reduc incom tax c...,0,0,0,0


## Running the Ensemble

In [47]:
class_weights =[0.24713656140170648,
 0.2487545817826636,
 0.25291761338783453,
 0.2511912434277955]

In [51]:
svm_weighted_pred = df['SVM_predictions']*class_weights[0]
nb_weighted_pred = df['NB_predictions']*class_weights[1]
lstm_weighted_pred = df['LSTM_predictions']*class_weights[2]
blstm_weighted_pred = df['BLSTM_predictions']*class_weights[3]

In [52]:
weighted_pred_sum = svm_weighted_pred + nb_weighted_pred + lstm_weighted_pred + blstm_weighted_pred

In [53]:
weighted_pred_sum

0        0.500054
1        0.247137
2        1.000000
3        1.000000
4        0.000000
           ...   
34553    1.000000
34554    0.247137
34555    1.000000
34556    1.000000
34557    0.501672
Length: 34398, dtype: float64

In [55]:
df['Ensemble_predictions'] = list(map(lambda x: 1 if x>=0.5 else 0, weighted_pred_sum))

In [58]:
df.head()

Unnamed: 0,mp,tweet_date,tweet_time,tweet_text,cleaned_tweet,Snowball_Stem,NB_predictions,LSTM_predictions,BLSTM_predictions,SVM_predictions,Ensemble_predictions
0,rsprasad,2021-03-31,23:30:00,"Please, don't break the trust.\n\n@narendramod...",please break trust,pleas break trust,0,1,0,1,1
1,rsprasad,2021-03-31,23:30:00,"MAINTENANCE OF WIVES, CHILDREN AND PARENTS - h...",maintenance wives children parents l c h r read,mainten wive children parent l c h r read,0,0,0,1,0
2,rsprasad,2021-03-31,23:30:00,NIA look into SSR case \n\n@AmitShah @narendra...,nia look ssr case,nia look ssr case,1,1,1,1,1
3,rsprasad,2021-03-31,23:30:00,@PavanjitMane1 @PMOIndia @CMOMaharashtra @rspr...,new pan allotted per aadhar many pans issued b...,new pan allot per aadhar mani pan issu bef aad...,1,1,1,1,1
4,rsprasad,2021-03-31,23:30:00,@rsprasad @narendramodi @PMOIndia @examwarrior...,one worst govt till interest reduced income ta...,one worst govt till interest reduc incom tax c...,0,0,0,0,0


## Saving to CSV

In [59]:
df.to_csv('./data/Final_Predictions.csv')