In [78]:
#imports
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow import keras
from keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [79]:
#get data and format it
df = pd.read_csv('articles.csv', names = ["Claps", "Title","Text"])
claps = df.pop('Claps').values
features = df.pop("Text").values
train_features, test_features, train_claps, test_claps = train_test_split(features, claps)

In [80]:
#reusable formatting function
def preprocess(arr):
    #tokenize
    tokenizer = Tokenizer(num_words=15_000, oov_token='<UNK>')
    tokenizer.fit_on_texts(arr)

    #index
    word_index = tokenizer.word_index

    #sequence
    sequences = tokenizer.texts_to_sequences(arr)

    #find max sequence length
    maxlen = max([len(x) for x in sequences])

    #padding 
    train_padded = pad_sequences(sequences, padding='post', truncating='post', maxlen=maxlen)
    print (train_padded)
    
    return train_padded

In [81]:
#format train and test data
train_features = preprocess(train_features.tolist())
test_features = preprocess(test_features.tolist())

[[  599    19  2158 ...     0     0     0]
 [    4   503  4633 ...     0     0     0]
 [   32 11340    10 ...     0     0     0]
 ...
 [ 3375 11302  2632 ...     0     0     0]
 [  190     1  4280 ...     0     0     0]
 [ 2750   221     1 ...     0     0     0]]
[[5492 5493 5494 ...    0    0    0]
 [ 937   85    3 ...    0    0    0]
 [  30  192  654 ...    0    0    0]
 ...
 [  29   11 2286 ...    0    0    0]
 [ 775 2593  475 ...    0    0    0]
 [  12    5  646 ...    0    0    0]]


In [82]:
#Neural Network Structure
model = keras.Sequential()
model.add(keras.layers.Embedding(15_000, 20))
#model.add(keras.layers.GlobalAveragePooling1D())
model.add(keras.layers.LSTM(128))
#model.add(keras.layers.Dense(20, activation = "softmax"))
#model.add(keras.layers.Dense(20, activation = "relu"))
model.add(keras.layers.Dense(1, activation = "relu"))

model.compile(optimizer="adam", loss="binary_crossentropy", metrics=['accuracy'])

In [83]:
#train
fitmodel = model.fit(train_features, train_claps, epochs = 5, batch_size = 50, validation_split=0.3)

Train on 140 samples, validate on 60 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [84]:
#results 
print(model.evaluate(test_features, test_claps))

[0.6120317667277891, 0.7164179]


In [85]:
#model.save('viralOrNahNoTitles.h5')

In [141]:
predict = keras.models.load_model('viralOrNahNoTitles.h5')

In [150]:
#get input
print("Will your article go viral? Enter your article content to find out: ")
article = [str(input())]

#format it 
userFeatures = preprocess(article)
#predict
prediction = model.predict(userFeatures)
prediction_adjusted = prediction/0.125
prediction_adjusted_percent = ((prediction[0][0]-0.1)/0.15)*100
print(prediction)
#give results
if prediction_adjusted[0][0] >= 1:
    print("Your article seems to have a ", prediction_adjusted_percent, "% chance of getting at least 5 thousand claps" )
    print("congrats! looks like you're gonna go viral! Of course, this is just content-wise, make sure to share your story and promote it wherever you can!")
elif prediction_adjusted[0][0] < 1: 
    print("Your article seems to have a ", prediction_adjusted_percent, "% chance of getting at least 5 thousand claps" )
    print("sorry, this doesn't look like viral material. make some edits and try again")
else: 
    print("I have no idea what is going on right now")

Will your article go viral? Enter your article content to find out: 
So the US is hitting a wall in vaccine uptake. Practically everyone who wants one is getting one. But way too many have chosen not to get vaccinated. Their choice is devastating to defeating this pandemic. Slow vaccine uptake will give the virus more opportunity to mutate. Mutations will mean we never achieve herd immunity. That this issue has been politicized is deeply frustrating. I would have thought the Trump Republicans would have been celebrating the vaccine — didn’t President Trump claim to be responsible for it happening so quickly? But it is almost as if they are willing this process to fail, so as to gain the political advantage from that failure. (Each time I think cynicism can’t sink lower …) But regardless, what we need now is an aggressive experiment to see what would get the reluctant to get vaccinated. Here are three experiments that we should launch immediately: In one population, we explain to the pu