In [12]:
import warnings
warnings.filterwarnings('ignore')

from numpy.random import seed
seed(1)

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from wordcloud import WordCloud, STOPWORDS
from nltk.tokenize import sent_tokenize,word_tokenize
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
import nltk
from nltk.corpus import stopwords

from gensim.test.utils import common_texts
from gensim.models.doc2vec import Doc2Vec, TaggedDocument

import tensorflow as tf
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout
from keras.layers import Conv1D, MaxPooling1D, GlobalMaxPooling1D
from keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras import regularizers
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import plot_model
from keras.callbacks import EarlyStopping

from sklearn.preprocessing import StandardScaler

import pickle
from tqdm import tqdm

nltk.download('punkt')
nltk.download('stopwords')
random_state = 7
cutoff = 100
vec_length = 300

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [19]:
cnn_model = tf.keras.models.load_model('author_identification_multiconv_model.h5')

In [20]:
stop_words = set(stopwords.words('english')).union(set([',','.','?','!',':',';',"'",'"','-',"''","`","``"]))
author_dict = {'EAP': 0, 'HPL': 1, 'MWS': 2}
reverse_author_dict = {0: 'Edgar Allan Poe', 1: 'HP Lovecraft', 2: 'Mary Shelley'}

In [None]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

with open("fasttext_vectors.pickle", 'rb') as f:
    fasttext_vectors = pickle.load(f)

In [23]:
def shorten_sentence(s,length):
    word_tokens = word_tokenize(s.lower())
    res = [w for w in word_tokens if not w in stop_words]
    return res[:length]

In [24]:
def sentence_to_vec(s, vec_length, cutoff):

    short_s = shorten_sentence(s, cutoff)
    add_vectors = np.empty([cutoff, vec_length])
    sentence_length = len(short_s) + 1
        
    for i in range(cutoff):
        cur_split_sentence = short_s + [' ']
        try:
            add_vectors[i] = fasttext_vectors.wv[cur_split_sentence[i % sentence_length]]
        except KeyError:
            add_vectors[i] = np.zeros(vec_length)
    
    return np.array([np.transpose(np.array(add_vectors),axes=(1,0))])

In [25]:
def who_wrote(s,print_percentages=False):
    result = cnn_model.predict(sentence_to_vec(s, vec_length, cutoff))
    if print_percentages:
        print(', '.join([reverse_author_dict[i] + ': ' + str(round(result[0][i] * 100,2)) + '%' for i in range(3)]))
        print('Result: ', reverse_author_dict[np.argmax(result)])
    else:
        print(reverse_author_dict[np.argmax(result)])

In [26]:
who_wrote('Take this kiss upon the brow!',True)

Edgar Allan Poe: 89.73%, HP Lovecraft: 0.03%, Mary Shelley: 10.24%
Result:  Edgar Allan Poe


In [27]:
who_wrote('If a fire wanted fanning, it could readily be fanned with a newspaper.',True)

Edgar Allan Poe: 2.51%, HP Lovecraft: 32.9%, Mary Shelley: 64.59%
Result:  Mary Shelley


In [28]:
who_wrote('He shall find that I can feel my injuries; he shall learn to dread my revenge" A few days after he arrived.',True)

Edgar Allan Poe: 0.47%, HP Lovecraft: 5.76%, Mary Shelley: 93.77%
Result:  Mary Shelley


In [29]:
who_wrote('What though their hireling Greaser bands',True)

Edgar Allan Poe: 6.08%, HP Lovecraft: 93.4%, Mary Shelley: 0.53%
Result:  HP Lovecraft


In [30]:
who_wrote('Cthulhu',True)

Edgar Allan Poe: 14.94%, HP Lovecraft: 84.55%, Mary Shelley: 0.51%
Result:  HP Lovecraft


In [31]:
who_wrote('Time and time again',True)

Edgar Allan Poe: 95.74%, HP Lovecraft: 2.84%, Mary Shelley: 1.43%
Result:  Edgar Allan Poe


In [32]:
who_wrote("Oh, come to me in dreams, my love!",True)

Edgar Allan Poe: 1.7%, HP Lovecraft: 8.03%, Mary Shelley: 90.27%
Result:  Mary Shelley


In [33]:
who_wrote("Oh, come to me in dreams, my love!",True)

Edgar Allan Poe: 1.7%, HP Lovecraft: 8.03%, Mary Shelley: 90.27%
Result:  Mary Shelley


In [34]:
who_wrote("His soul overflowed with ardent affections, and his friendship was of that devoted and wondrous nature that the world minded teach us to look for only in the imagination.",True)

Edgar Allan Poe: 0.6%, HP Lovecraft: 0.2%, Mary Shelley: 99.2%
Result:  Mary Shelley


In [36]:
who_wrote("These bizarre attempts at explanation were followed by others equally bizarre.",True)

Edgar Allan Poe: 9.86%, HP Lovecraft: 89.88%, Mary Shelley: 0.26%
Result:  HP Lovecraft
