In [49]:
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from tensorflow.keras.models import load_model
from gensim.models import Doc2Vec
import numpy as np
import tensorflow as tf
import pickle
import nltk
import re

In [50]:
vec_model = Doc2Vec.load('./models/Doc2Vec/article_bias_doc2vec.model')
rf_model = pickle.load(open('./models/RandomForest/rf_bias.pickle', 'rb'))
svm_model = pickle.load(open('./models/SVM/svc_bias.pickle', 'rb'))
article_nn = load_model('./models/TensorFlow/article_bias_nn.h5')

In [51]:
# Republican article (Breitbart)
rep_article = """Republicans had, through 11 ballots over three days, been unable to elect a speaker because of roughly 20 holdouts, many of whom had concerns about McCarthy and sought structural changes to the House and other commitments from the leader before they would vote for him.

Gaetz, however, firmly placed himself in a small crowd of “never Kevin” voters weeks ago. Gaetz told Steve Bannon’s War Room in an interview in December that McCarthy did not have the requisite majority of votes nor did he have even 200 votes.

“There’s a reason that the true conservatives are going to stand against him and deprive him the speakership,” Gaetz said at the time.

Those members, many of whom are in the conservative House Freedom Caucus (Gaetz is not in the caucus but frequently aligns with them on issues), did stand against McCarthy up until Friday morning as intense negotiations between the holdouts and McCarthy took place this week.

The negotiations included, among what appears to be many items, bringing the motion to vacate down to a one-member threshold and putting more members of the Freedom Caucus on the Rules Committee, as well as budgetary provisions and vows to bring votes on certain legislation to the floor.

Those agreements appeared to be overwhelmingly successful when 14 holdouts voted for McCarthy on Friday. The burst of momentum has led to several members, including McCarthy himself, showing optimism that by having all Republicans present — two were absent Friday morning — and flipping a couple of the remaining holdouts to McCarthy or “present” votes, McCarthy could clinch the speakership, and do so as early as Friday night.

Gaetz was asked on Friday by CNN’s Manu Raju if he would “concede” that McCarthy was going to win following the vote flips.

“It’s looking like it’s heading that way,” Gaetz said.

In a change of tune from December, Gaetz also told reporters, “I think that the rules and personnel changes in the House that we’ve been talking about will do a lot to democratize power to the membership” and will allow members to be in a “far stronger position.”

One reporter asked Gaetz if he would go as far as to consider voting for McCarthy at this stage, to which Gaetz replied, “Hope springs eternal.”

He added, “I’m very excited about the changes that we’ve made and that we’re still negotiating on spending and on the rules, and we’ll see how it goes tonight.”

A 14th round of voting is expected to begin at 10:00 p.m. Friday.
"""

dem_article = """From same-sex marriage protections to veterans’ aid, Joe Biden’s party used its thin majority to deliver many campaign promises

In January, Democrats will lose their unified control of Capitol Hill, ending a remarkable legislative streak that saw the party deliver on many of their campaign promises.

While Joe Biden and his party did not accomplish everything they set out to do, Democrats in Congress spent the last two years marshalling their thin majorities to pass consequential legislation that touches nearly every aspect of American life from water quality to marriage equality. Some of the most notable measures even earned Republican support.

The US president tweeted at midnight on New Year’s Eve: “I think it’s going to be a great year. Why? Because we get to start implementing a lot of the things we passed last year.”

Biden and top administration officials will open a new year of divided government by fanning out across the country to talk about how the economy is benefiting from bipartisan efforts.

Biden and Senate Republican leader Mitch McConnell will make a rare joint appearance in McConnell’s home state of Kentucky on Wednesday to highlight nearly $1tn in infrastructure spending that lawmakers approved on a bipartisan basis in 2021.

As the new era of split control dawns in Washington, with Republicans set to take control of the House on 3 January, here’s a look at what Democrats accomplished during the 117th Congress.
"""

def clean(text):
    text = re.sub(r'\|\|\|', r' ', text)
    text = text.replace('„','')
    text = text.replace('“','')
    text = text.replace('"','')
    text = text.replace('\'','')
    text = text.replace('-','')
    text = text.lower()
    return text

def remove_stopwords(text):
    engl_stops = set(stopwords.words('english'))
    return ' '.join([word for word in text.split() if word not in engl_stops])

def tokenize(text):
    return list(filter(lambda word : len(word) > 3, word_tokenize(text)))

rep_embeddings = vec_model.infer_vector(tokenize(remove_stopwords(clean(rep_article))))
dem_embeddings = vec_model.infer_vector(tokenize(remove_stopwords(clean(dem_article))))

In [63]:
predictions = ["democratic", "republican"]

print(predictions[np.max(rf_model.predict([rep_embeddings]))])
print(predictions[np.max(svm_model.predict([rep_embeddings]))])
print(predictions[np.max(rf_model.predict([dem_embeddings]))])
print(predictions[np.max(svm_model.predict([dem_embeddings]))])

republican
republican
democratic
democratic


[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  16 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 100 out of 100 | elapsed:    0.0s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  16 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 100 out of 100 | elapsed:    0.0s finished


In [68]:
# reverse because of one-hot encoding strategy
nn_labels = predictions[::-1]

tf_rep = np.asarray(rep_embeddings)
tf_dem = np.asarray(dem_embeddings)
nn_predictions = article_nn.predict(np.asarray([tf_rep, tf_dem]))
for nn_pred in nn_predictions:
    print(nn_labels[np.argmax(nn_pred)])

republican
democratic
