In [4]:
 
import numpy as np
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence
from collections import Counter
import os
import getEmbeddings2
import matplotlib.pyplot as plt




Using TensorFlow backend.


In [5]:

top_words = 5000
epoch_num = 5
batch_size = 64

def plot_cmat(yte, ypred):
    '''Plotting confusion matrix'''
    skplt.plot_confusion_matrix(yte, ypred)
    plt.show()

if not os.path.isfile('./xtr_shuffled.npy') or \
    not os.path.isfile('./xte_shuffled.npy') or \
    not os.path.isfile('./ytr_shuffled.npy') or \
    not os.path.isfile('./yte_shuffled.npy'):
    getEmbeddings2.clean_data()


xtr = np.load('./xtr_shuffled.npy')
xte = np.load('./xte_shuffled.npy')
y_train = np.load('./ytr_shuffled.npy')
y_test = np.load('./yte_shuffled.npy')

cnt = Counter()
x_train = []
for x in xtr:
    x_train.append(x.split())
    for word in x_train[-1]:
        cnt[word] += 1  

# Storing most common words
most_common = cnt.most_common(top_words + 1)
word_bank = {}
id_num = 1
for word, freq in most_common:
    word_bank[word] = id_num
    id_num += 1

# Encode the sentences
for news in x_train:
    i = 0
    while i < len(news):
        if news[i] in word_bank:
            news[i] = word_bank[news[i]]
            i += 1
        else:
            del news[i]

y_train = list(y_train)
y_test = list(y_test)

# Delete the short news
i = 0
while i < len(x_train):
    if len(x_train[i]) > 10:
        i += 1
    else:
        del x_train[i]
        del y_train[i]

# Generating test data
x_test = []
for x in xte:
    x_test.append(x.split())

# Encode the sentences
for news in x_test:
    i = 0
    while i < len(news):
        if news[i] in word_bank:
            news[i] = word_bank[news[i]]
            i += 1
        else:
            del news[i]


# Truncate and pad input sequences
max_review_length = 500
X_train = sequence.pad_sequences(x_train, maxlen=max_review_length)
X_test = sequence.pad_sequences(x_test, maxlen=max_review_length)

# Convert to numpy arrays
y_train = np.array(y_train)
y_test = np.array(y_test)


In [None]:
print("start")
# Create the model
embedding_vecor_length = 32
model = Sequential()
model.add(Embedding(top_words+2, embedding_vecor_length, input_length=max_review_length))
model.add(LSTM(100))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

print(model.summary())
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=epoch_num, batch_size=batch_size)

# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose=0)
print("Accuracy= %.2f%%" % (scores[1]*100))



In [1]:
wikitext = """Accusation of news coverage influence by advertisers
In
In February 2015 the chief political commentator of the Daily Telegraph, Peter Oborne resigned. Oborne accused the paper of a "form of fraud on its readers"[11] for its coverage of the bank HSBC in relation to a Swiss tax-dodging scandal that was widely covered by other news media. He alleged that editorial decisions about news content had been heavily influenced by the advertising arm of the newspaper because of commercial interests.[12] Professor Jay Rosen at New York University stated that Oborne's resignation statement was "one of the most important things a journalist has written about journalism lately".[12]
 July 2014, the Daily Telegraph was criticised for carrying links on its website to pro-Kremlin articles supplied by a Russian state-funded publication that downplayed any Russian involvement in the downing of the passenger jet Malaysia Airlines Flight 17.[58] These had featured on its website as part of a commercial deal, but were later removed.[59] The paper is paid £900,000 a year to include the supplement Russia Beyond the Headlines, a publication sponsored by the Rossiyskaya Gazeta, the Russian government's official newspaper. It is paid a further £750,000 a year for a similar arrangement with the Chinese state in relation to the pro-Beijing China Watch advertising supplement.[60][61]

Oborne cited other instances of advertising strategy influencing the content of articles, linking the refusal to take an editorial stance on the repression of democratic demonstrations in Hong Kong to the Telegraph's support from China. Additionally, he said that favourable reviews of the Cunard cruise liner Queen Mary II appeared in the Telegraph, noting: "On 10 May last year The Telegraph ran a long feature on Cunard's Queen Mary II liner on the news review page. This episode looked to many like a plug for an advertiser on a page normally dedicated to serious news analysis. I again checked and certainly Telegraph competitors did not view Cunard's liner as a major news story. Cunard is an important Telegraph advertiser."[11] In response, the Telegraph called Oborne's statement an "astonishing and unfounded attack, full of inaccuracy and innuendo".[12]

In January 2017, the Telegraph Media Group had a higher number of upheld complaints than any other UK newspaper by its regulator IPSO.[62] Most of these findings pertained to inaccuracy, as with other UK newspapers.[63]

In October 2017, a number of major western news organisations whose coverage has irked Beijing were excluded from Xi Jinping's speech event launching new politburo. However, the Daily Telegraph, which regularly publishes Communist party propaganda in the UK in an advert section as part of a reported £800,000 annual contract with Beijing’s China Daily, has been granted an invitation to the event.[64]
"""
