In [None]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Input, Conv1D, MaxPooling1D, Flatten, Dense, Embedding
from tensorflow.keras.models import Model
import torch

In [None]:
MAX_SEQUENCE_LENGTH = 300
MAX_NUM_WORDS = 50000

tokenizer = Tokenizer(num_words=MAX_NUM_WORDS)

In [None]:
from keras.models import load_model
model = load_model('C:/Users/user/Downloads/Project/Tweet/CNN/CNN.h5')



In [None]:
def predict_proba(arr):
    sequences_new = tokenizer.texts_to_sequences(arr)
    data = pad_sequences(sequences_new, maxlen=MAX_SEQUENCE_LENGTH)
    yprob = model.predict(data, verbose=0)
    return yprob

In [None]:
def predict_party(text):
    """Predicts party label based on text."""
    arr = np.array([text])  # Wrap text in an array
    yprob = predict_proba(arr)[0]

    return "Democrat" if np.argmax(yprob) == 0 else "Republican"

In [None]:
tweet = "Hurricane Maria left approx $90 billion in damages, yet only $1 billion was allocated for rebuilding grid. No surprâ€¦ https://t.co/2kU8BcKwUh"
print(predict_party(tweet))

In [None]:
tweet = "Check out my op-ed on need for End Executive Overreach Act: The White House is crippling our economy https://t.co/XCmjLB8Qyd via @DCExaminer"
print(predict_party(tweet))

In [None]:
%pip install lime

In [None]:
from lime.lime_text import LimeTextExplainer
from operator import itemgetter
from tqdm import tqdm
import csv

In [None]:
class_names = ['Democrat', 'Republican']
explainer = LimeTextExplainer(class_names=class_names)

In [None]:
tweet = "Hurricane Maria left approx $90 billion in damages, yet only $1 billion was allocated for rebuilding grid. No surprâ€¦ https://t.co/2kU8BcKwUh"
explainer.explain_instance(tweet, predict_proba).show_in_notebook(text=True)

In [None]:
tweet = "Check out my op-ed on need for End Executive Overreach Act: The White House is crippling our economy https://t.co/XCmjLB8Qyd via @DCExaminer"
explainer.explain_instance(tweet, predict_proba).show_in_notebook(text=True)

In [None]:
def sort_tuples_array_by_second_item(tuples):
    return sorted(tuples, key=itemgetter(1))

In [None]:
# Function to get max explained words
def get_max_explained_words(txt, explainer_num_samples=100):
    prediction = predict_male_or_female(txt)
    prediction_label = 0 if prediction == "Democrat" else 1

    exp = explainer.explain_instance(txt, predict_proba, num_samples=explainer_num_samples)
    exp_list = []
    for x in zip(exp.local_exp[1], exp.as_list()):
        exp_list.append((x[1][0], x[1][1], x[0][0]))

    # Features with negative score are for Male class
    male_list = list(filter(lambda x: x[1] < 0, exp_list))
    male_list = sort_tuples_array_by_second_item(male_list)

    # Features with positive score are for Female class
    female_list = list(filter(lambda x: x[1] > 0, exp_list))
    female_list = sort_tuples_array_by_second_item(female_list)

    # If comment predicted Male
    if prediction_label == 0:
        if len(male_list) > 1:
            male_mc = male_list[0]
            if (male_mc[0], 0) in words:
                words[(male_mc[0], 0)]['lime_score'].append(male_mc[1])
                words[(male_mc[0], 0)]['position'] = male_mc[2]
            else:
                words[(male_mc[0], 0)] = {}
                words[(male_mc[0], 0)]['lime_score'] = [male_mc[1]]
                words[(male_mc[0], 0)]['position'] = male_mc[2]
                wordsForCSV.append([male_mc[0], 0, male_mc[1]])

            male_mc = male_list[1]
            if (male_mc[0], 0) in words:
                words[(male_mc[0], 0)]['lime_score'].append(male_mc[1])
                words[(male_mc[0], 0)]['position'] = male_mc[2]
            else:
                words[(male_mc[0], 0)] = {}
                words[(male_mc[0], 0)]['lime_score'] = [male_mc[1]]
                words[(male_mc[0], 0)]['position'] = male_mc[2]
                wordsForCSV.append([male_mc[0], 0, male_mc[1]])
    else:
        if len(female_list) > 1:
            female_mc = female_list[(len(female_list) - 1)]
            if (female_mc[0], 1) in words:
                words[(female_mc[0], 1)]['lime_score'].append(female_mc[1])
                words[(female_mc[0], 1)]['position'] = female_mc[2]
            else:
                words[(female_mc[0], 1)] = {}
                words[(female_mc[0], 1)]['lime_score'] = [female_mc[1]]
                words[(female_mc[0], 1)]['position'] = female_mc[2]
                wordsForCSV.append([female_mc[0], 1, female_mc[1]])

            female_mc = female_list[(len(female_list) - 2)]
            if (female_mc[0], 1) in words:
                words[(female_mc[0], 1)]['lime_score'].append(female_mc[1])
                words[(female_mc[0], 1)]['position'] = female_mc[2]
            else:
                words[(female_mc[0], 1)] = {}
                words[(female_mc[0], 1)]['lime_score'] = [female_mc[1]]
                words[(female_mc[0], 1)]['position'] = female_mc[2]
                wordsForCSV.append([female_mc[0], 1, female_mc[1]])

    return words, wordsForCSV

In [None]:
def load_data(data_file):
  # read csv file
  df = pd.read_csv(data_file)
  df.fillna("", inplace=True)
  df = df.drop_duplicates()
  comments = df['Tweet'].tolist()
  genders = df['Party'].tolist()
  genders = [0 if gender == "Democrat" else 1 for gender in genders]

  return comments, genders

In [None]:
original_comments, original_genders = load_data('C:/Users/user/Downloads/Project/Tweet/Tweets Dataset.csv')

In [None]:
n = int(len(original_comments) / 7)
x = [original_comments[i:i + n] for i in range(0, len(original_comments), n)]
y = [original_genders[i:i + n] for i in range(0, len(original_genders), n)]

In [None]:
import csv
words = {}
wordsForCSV = []

header=["word", "label", "limescore"]
file_path = '/content/drive/Shareddrives/Posting_Comments_On_Social_Networks/CNN/extracted_strong_words_by_cnn.csv'

In [None]:
for comment in tqdm(x[0], total = len(x[0])):
    words, wordsForCSV = get_max_explained_words(comment)

with open(file_path, 'w', encoding='UTF8', newline='') as f:
    writer = csv.writer(f)
    writer.writerow(header)
    writer.writerows(wordsForCSV)

In [None]:
for comment in tqdm(x[1], total = len(x[1])):
    words, wordsForCSV = get_max_explained_words(comment)

with open(file_path, 'w', encoding='UTF8', newline='') as f:
    writer = csv.writer(f)
    writer.writerows(wordsForCSV)