In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import numpy as np
import pandas as pd
import nltk
from nltk.corpus import stopwords
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Dense , Input , LSTM , Embedding
from tensorflow.keras.models import Model, Sequential

In [None]:
top_words = 50000
max_comment_length = 300
embedding_vecor_length = 768

In [None]:
tokenizer = Tokenizer(num_words=top_words)

In [None]:
from keras.models import load_model
model = load_model("C:/Users/ADMIN PC/Desktop/Tweet/RNN/RNN.h5")

In [None]:
def predict_party(comment):
    comment_seq = tokenizer.texts_to_sequences([comment])
    comment_pad = pad_sequences(comment_seq, maxlen=max_comment_length)
    prob = model.predict(comment_pad)[0][0]
    gender = "Republican" if prob >= 0.5 else "Democrat"

    return gender

In [None]:
def predict_probability(comments):
    comment_seq = tokenizer.texts_to_sequences(comments)
    comment_pad = pad_sequences(comment_seq, maxlen=max_comment_length)
    probs = model.predict(comment_pad)
    probs = np.column_stack([1 - probs, probs])  # [Democrat, Republican]

    return probs

In [None]:
tweet = "Hurricane Maria left approx $90 billion in damages, yet only $1 billion was allocated for rebuilding grid. No surprâ€¦ https://t.co/2kU8BcKwUh"
print(predict_party(tweet))

In [None]:
tweet = "Check out my op-ed on need for End Executive Overreach Act: The White House is crippling our economy https://t.co/XCmjLB8Qyd via @DCExaminer"
predict_party(tweet)

In [None]:
from lime.lime_text import LimeTextExplainer, IndexedString
from operator import itemgetter
from tqdm import tqdm
import csv

In [None]:
class_names = ['Democrat', 'Republican']
explainer = LimeTextExplainer(class_names=class_names)

In [None]:
tweet = "Hurricane Maria left approx $90 billion in damages, yet only $1 billion was allocated for rebuilding grid. No surprâ€¦ https://t.co/2kU8BcKwUh"
explainer.explain_instance(tweet, predict_probability).show_in_notebook(text=True)

In [None]:
tweet = "Check out my op-ed on need for End Executive Overreach Act: The White House is crippling our economy https://t.co/XCmjLB8Qyd via @DCExaminer"
explainer.explain_instance(tweet, predict_probability).show_in_notebook(text=True)

In [None]:
def sort_tuples_array_by_second_item(tuples):
    return sorted(tuples, key=itemgetter(1))

In [None]:
def get_max_explained_words(txt, explainer_num_samples=100):
    prediction = predict_party(txt)
    prediction_label = 0 if prediction == "Democrat" else 1

    # Check if text is empty or has no recognized words
    indexed_string = IndexedString(txt, bow=explainer.bow, split_expression=explainer.split_expression,
                                 mask_string=explainer.mask_string) # Access from explainer object
    if indexed_string.num_words() == 0:
        print(f"Warning: Skipping tweet with no recognized words: {txt}")
        return words, wordsForCSV  # Skip this tweet

    exp = explainer.explain_instance(txt, predict_probability, num_samples=explainer_num_samples)

    if not exp.as_list():  # Check if as_list() is empty, indicating no features were found
        print(f"Warning: Skipping tweet with no explainable features: {txt}")
        return words, wordsForCSV

    exp_list = []
    for x in zip(exp.local_exp[1], exp.as_list()):
        exp_list.append((x[1][0], x[1][1], x[0][0]))

    # Features with negative score are for Male class
    male_list = list(filter(lambda x: x[1] < 0, exp_list))
    male_list = sort_tuples_array_by_second_item(male_list)

    # Features with positive score are for Female class
    female_list = list(filter(lambda x: x[1] > 0, exp_list))
    female_list = sort_tuples_array_by_second_item(female_list)

    # If comment predicted Male
    if prediction_label == 0:
        if len(male_list) > 1:
            male_mc = male_list[0]
            if (male_mc[0], 0) in words:
                words[(male_mc[0], 0)]['lime_score'].append(male_mc[1])
                words[(male_mc[0], 0)]['position'] = male_mc[2]
            else:
                words[(male_mc[0], 0)] = {}
                words[(male_mc[0], 0)]['lime_score'] = [male_mc[1]]
                words[(male_mc[0], 0)]['position'] = male_mc[2]
                wordsForCSV.append([male_mc[0], 0, male_mc[1]])

            male_mc = male_list[1]
            if (male_mc[0], 0) in words:
                words[(male_mc[0], 0)]['lime_score'].append(male_mc[1])
                words[(male_mc[0], 0)]['position'] = male_mc[2]
            else:
                words[(male_mc[0], 0)] = {}
                words[(male_mc[0], 0)]['lime_score'] = [male_mc[1]]
                words[(male_mc[0], 0)]['position'] = male_mc[2]
                wordsForCSV.append([male_mc[0], 0, male_mc[1]])
    else:
        if len(female_list) > 1:
            female_mc = female_list[(len(female_list) - 1)]
            if (female_mc[0], 1) in words:
                words[(female_mc[0], 1)]['lime_score'].append(female_mc[1])
                words[(female_mc[0], 1)]['position'] = female_mc[2]
            else:
                words[(female_mc[0], 1)] = {}
                words[(female_mc[0], 1)]['lime_score'] = [female_mc[1]]
                words[(female_mc[0], 1)]['position'] = female_mc[2]
                wordsForCSV.append([female_mc[0], 1, female_mc[1]])

            female_mc = female_list[(len(female_list) - 2)]
            if (female_mc[0], 1) in words:
                words[(female_mc[0], 1)]['lime_score'].append(female_mc[1])
                words[(female_mc[0], 1)]['position'] = female_mc[2]
            else:
                words[(female_mc[0], 1)] = {}
                words[(female_mc[0], 1)]['lime_score'] = [female_mc[1]]
                words[(female_mc[0], 1)]['position'] = female_mc[2]
                wordsForCSV.append([female_mc[0], 1, female_mc[1]])

    return words, wordsForCSV

In [None]:
def load_data(data_file):
  # read csv file
  df = pd.read_csv(data_file)
  df.fillna("", inplace=True)
  df = df.drop_duplicates()
  comments = df['Tweet'].tolist()
  genders = df['Party'].tolist()
  genders = [0 if gender == "Democrat" else 1 for gender in genders]

  return comments, genders

In [None]:
# Load dataset
original_comments, original_genders = load_data('C:/Users/ADMIN PC/Desktop/Tweet/Tweets Dataset.csv')

In [None]:
n = int(len(original_comments) / 5)
x = [original_comments[i:i + n] for i in range(0, len(original_comments), n)]
y = [original_genders[i:i + n] for i in range(0, len(original_genders), n)]

In [None]:
import csv
words = {}
wordsForCSV = []

header=["word", "label", "limescore"]
file_path = 'C:/Users/ADMIN PC/Desktop/Tweet/RNN/extracted_strong_word.csv'

In [None]:
for comment in tqdm(x[0], total = len(x[0])):
    words, wordsForCSV = get_max_explained_words(comment)

with open(file_path, 'w', encoding='UTF8', newline='') as f:
    writer = csv.writer(f)
    writer.writerow(header)
    writer.writerows(wordsForCSV)

In [None]:
for comment in tqdm(x[1], total = len(x[1])):
    words, wordsForCSV = get_max_explained_words(comment)

with open(file_path, 'w', encoding='UTF8', newline='') as f:
    writer = csv.writer(f)
    writer.writerows(wordsForCSV)

In [None]:
for comment in tqdm(x[2], total = len(x[2])):
    words, wordsForCSV = get_max_explained_words(comment)

with open(file_path, 'w', encoding='UTF8', newline='') as f:
    writer = csv.writer(f)
    writer.writerows(wordsForCSV)

In [None]:
for comment in tqdm(x[3], total = len(x[3])):
    words, wordsForCSV = get_max_explained_words(comment)

with open(file_path, 'w', encoding='UTF8', newline='') as f:
    writer = csv.writer(f)
    writer.writerows(wordsForCSV)

In [None]:
for comment in tqdm(x[4], total = len(x[4])):
    words, wordsForCSV = get_max_explained_words(comment)

with open(file_path, 'w', encoding='UTF8', newline='') as f:
    writer = csv.writer(f)
    writer.writerows(wordsForCSV)