In [16]:

from nltk.corpus import stopwords
import csv
from sklearn import model_selection, preprocessing, linear_model, naive_bayes, metrics, svm
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn import decomposition, ensemble
import pandas, xgboost, numpy, textblob, string
from keras.preprocessing import text, sequence
from keras import layers, models, optimizers

In [2]:
# load the pre-trained word-embedding vectors 
embeddings_index = {}
for i, line in enumerate(open('auto_scraper/results/wiki-news-300d-1M.vec',  encoding='utf8')):
    values = line.split()
    embeddings_index[values[0]] = numpy.asarray(values[1:], dtype='float32')

In [3]:
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer
ps = PorterStemmer()

stop_words = set(stopwords.words('english')) 

In [4]:
# from pycontractions import Contractions
# cont = Contractions(api_key="glove-twitter-100")

In [10]:
def preprocess_text(sentence):
    sentence = sentence.lower()
    word_tokens = word_tokenize(sentence)
    sentence = [w for w in word_tokens if not w in stop_words]
    sentence = [ps.stem(w) for w in word_tokens]
    return " ".join(sentence)

preprocess_text("Here’s my rolling list of all the places that")

'here ’ s my roll list of all the place that'

In [11]:
data = open('auto_scraper/results/dataset_no_marked_words.csv', 'r', encoding='utf8').read()
labels, texts = [], []
for i, line in enumerate(data.split("\n")):
    content = line.split('|')
    labels.append(content[0])
    sentence = " ".join(content[1:])
    texts.append(preprocess_text(text))

# create a dataframe using texts and lables
trainDF = pandas.DataFrame()
trainDF['text'] = texts
trainDF['label'] = labels
trainDF.head()

Unnamed: 0,text,label
0,here ’ s my roll list of all the place that ar...,0
1,"restaur detail : websit , ig i final made it h...",1
2,"restaur detail : websit , ig i am so glad to f...",0
3,"restaur detail : websit , ig it ’ s now been o...",0
4,other visit : nov 2018 • april 2018 • nov 2017...,1


In [12]:
# split the dataset into training and validation datasets 
train_x, valid_x, train_y, valid_y = model_selection.train_test_split(trainDF['text'], trainDF['label'])

# label encode the target variable 
encoder = preprocessing.LabelEncoder()
train_y = encoder.fit_transform(train_y)
valid_y = encoder.fit_transform(valid_y)

In [13]:
# create a count vectorizer object 
count_vect = CountVectorizer(analyzer='word', token_pattern=r'\w{1,}')
count_vect.fit(trainDF['text'])

# transform the training and validation data using count vectorizer object
xtrain_count =  count_vect.transform(train_x)
xvalid_count =  count_vect.transform(valid_x)

In [14]:
# word level tf-idf
tfidf_vect = TfidfVectorizer(analyzer='word', token_pattern=r'\w{1,}', max_features=5000)
tfidf_vect.fit(trainDF['text'])
xtrain_tfidf =  tfidf_vect.transform(train_x)
xvalid_tfidf =  tfidf_vect.transform(valid_x)

# ngram level tf-idf 
tfidf_vect_ngram = TfidfVectorizer(analyzer='word', token_pattern=r'\w{1,}', ngram_range=(2,3), max_features=5000)
tfidf_vect_ngram.fit(trainDF['text'])
xtrain_tfidf_ngram =  tfidf_vect_ngram.transform(train_x)
xvalid_tfidf_ngram =  tfidf_vect_ngram.transform(valid_x)

# characters level tf-idf
tfidf_vect_ngram_chars = TfidfVectorizer(analyzer='char', token_pattern=r'\w{1,}', ngram_range=(2,3), max_features=5000)
tfidf_vect_ngram_chars.fit(trainDF['text'])
xtrain_tfidf_ngram_chars =  tfidf_vect_ngram_chars.transform(train_x) 
xvalid_tfidf_ngram_chars =  tfidf_vect_ngram_chars.transform(valid_x) 



In [17]:
# create a tokenizer 
token = text.Tokenizer()
token.fit_on_texts(trainDF['text'])
word_index = token.word_index

# convert text to sequence of tokens and pad them to ensure equal length vectors 
train_seq_x = sequence.pad_sequences(token.texts_to_sequences(train_x), maxlen=70)
valid_seq_x = sequence.pad_sequences(token.texts_to_sequences(valid_x), maxlen=70)

# create token-embedding mapping
embedding_matrix = numpy.zeros((len(word_index) + 1, 300))
for word, i in word_index.items():
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
        embedding_matrix[i] = embedding_vector

array([[   0,    0,    0, ...,   37,   80, 2259],
       [ 280,  204,    3, ...,   22, 3364,  444],
       [  25,   69,    1, ..., 5304, 5305,   76],
       ...,
       [   0,    0,    0, ..., 1080,    6, 1830],
       [ 328,  592,  186, ...,  139,   39, 1357],
       [   0,    0,    0, ...,  321,   22, 2072]])

In [19]:
trainDF['char_count'] = trainDF['text'].apply(len)
trainDF['word_count'] = trainDF['text'].apply(lambda x: len(x.split()))
# trainDF['word_density'] = trainDF['char_count'] / (trainDF['word_count']+1)
# trainDF['punctuation_count'] = trainDF['text'].apply(lambda x: len("".join(_ for _ in x if _ in string.punctuation))) 
trainDF['title_word_count'] = trainDF['text'].apply(lambda x: len([wrd for wrd in x.split() if wrd.istitle()]))
trainDF['upper_case_word_count'] = trainDF['text'].apply(lambda x: len([wrd for wrd in x.split() if wrd.isupper()]))

In [30]:
pos_family = {
    'noun' : ['NN','NNS','NNP','NNPS'],
    'pron' : ['PRP','PRP$','WP','WP$'],
    'verb' : ['VB','VBD','VBG','VBN','VBP','VBZ'],
    'adj' :  ['JJ','JJR','JJS'],
    'adv' : ['RB','RBR','RBS','WRB']
}

# function to check and get the part of speech tag count of a words in a given sentence
def check_pos_tag(x, flag):
    cnt = 0
    try:
        wiki = textblob.TextBlob(x)
        for tup in wiki.tags:
            ppo = list(tup)[1]
            print(tup)
            if ppo in pos_family[flag]:
                cnt += 1
            break
    except:
        pass
    return cnt

trainDF['noun_count'] = trainDF['text'].apply(lambda x: check_pos_tag(x, 'noun'))
trainDF['verb_count'] = trainDF['text'].apply(lambda x: check_pos_tag(x, 'verb'))
trainDF['adj_count'] = trainDF['text'].apply(lambda x: check_pos_tag(x, 'adj'))
trainDF['adv_count'] = trainDF['text'].apply(lambda x: check_pos_tag(x, 'adv'))
trainDF['pron_count'] = trainDF['text'].apply(lambda x: check_pos_tag(x, 'pron'))

('here', 'RB')
('restaur', 'NN')
('restaur', 'NN')
('restaur', 'NN')
('other', 'JJ')
('have', 'VB')
('it', 'PRP')
('bjorn', 'JJ')
('mani', 'NN')
('final', 'JJ')
('chri', 'NN')
('corbin', 'NN')
('the', 'DT')
('cult', 'NN')
('harden', 'JJ')
('a', 'DT')
('natur', 'JJ')
('nami', 'JJ')
('troubl', 'RB')
('west', 'JJ')
('llama', 'JJ')
('from', 'IN')
('golden', 'JJ')
('the', 'DT')
('le', 'JJ')
('cleveland', 'NN')
('here', 'RB')
('as', 'IN')
('here', 'RB')
('2120', 'CD')
('1426', 'CD')
('1739', 'CD')
('265', 'CD')
('1725', 'CD')
('221', 'CD')
('291', 'CD')
('the', 'DT')
('forget', 'VB')
('land', 'NN')
('cava', 'NN')
('a', 'DT')
('you', 'PRP')
('a', 'DT')
('parma', 'NN')
('and', 'CC')
('tofu', 'NN')
('enter', 'NN')
('a', 'DT')
('the', 'DT')
('i', 'NN')
('new', 'JJ')
('you', 'PRP')
('there', 'RB')
('today', 'NN')
('these', 'DT')
('and', 'CC')
('which', 'WDT')
('will', 'MD')
('tom', 'NN')
('if', 'IN')
('tom', 'NN')
('so', 'RB')
('scotch', 'NN')
('mani', 'NN')
('london', 'NN')
('he', 'PRP')
('and',

('….and', 'NN')
('and', 'CC')
('on', 'IN')
('it', 'PRP')
('fortun', 'NN')
('that', 'DT')
('start', 'NN')
('jayadmir', 'NN')
('hisonli', 'NN')
('‘', 'NN')
('turn', 'VB')
('matthealey', 'NN')
('the', 'DT')
('the', 'DT')
('ventur', 'JJ')
('visit', 'NN')
('visit', 'NN')
('repeat', 'NN')
('from', 'IN')
('“', 'JJ')
('he', 'PRP')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('from', 'IN')
('be', 'VB')
('servic', 'JJ')
('from', 'IN')
('we', 'PRP')
('if', 'IN')
('on', 'IN')
('that', 'DT')
('in', 'IN')
('which', 'WDT')
('by', 'IN')
('but', 'CC')
('second', 'JJ')
('thi', 'NN')
('a', 'DT')
('we', 'PRP')
('guy', 'NN')
('i', 'NN')
('i', 'NN')
('thi', 'NN')
('i', 'NN')
('the', 'DT')
('ok', 'JJ')
('thi', 'NN')
('it', 'PRP')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('ha

('whole', 'JJ')
('i', 'JJ')
('we', 'PRP')
('thi', 'NN')
('i', 'NN')
('meng', 'NN')
('as', 'IN')
('from', 'IN')
('we', 'PRP')
('like', 'IN')
('2', 'CD')
('the', 'DT')
('the', 'DT')
('and', 'CC')
('it', 'PRP')
('just', 'RB')
('‘', 'JJ')
('hi', 'NN')
('a', 'DT')
('i', 'NN')
('a', 'DT')
('is', 'VBZ')
('restaur', 'NN')
('here', 'RB')
('restaur', 'NN')
('restaur', 'NN')
('other', 'JJ')
('have', 'VB')
('it', 'PRP')
('bjorn', 'JJ')
('mani', 'NN')
('final', 'JJ')
('here', 'RB')
('i', 'NN')
('i', 'NN')
('i', 'NN')
('one', 'CD')
('and', 'CC')
('even', 'RB')
('san', 'JJ')
('my', 'PRP$')
('hi', 'NN')
('my', 'PRP$')
('a', 'DT')
('i', 'NNS')
('i', 'JJ')
('my', 'PRP$')
('hi', 'NN')
('ti', 'IN')
('here', 'RB')
('restaur', 'NN')
('restaur', 'NN')
('restaur', 'NN')
('other', 'JJ')
('have', 'VB')
('it', 'PRP')
('bjorn', 'JJ')
('mani', 'NN')
('final', 'JJ')
('when', 'WRB')
('i', 'NN')
('tarama', 'NN')
('eat', 'NN')
('cork', 'NN')
('the', 'DT')
('i', 'NN')
('moan', 'VB')
('as', 'IN')
('tahara', 'NN')
('the'

('hi', 'NN')
('after', 'IN')
('wa', 'NN')
('wa', 'NN')
('the', 'DT')
('none', 'NN')
('grace', 'NN')
('tri', 'NN')
('hand', 'NN')
('he', 'PRP')
('she', 'PRP')
('it', 'PRP')
('“', 'VB')
('“', 'NN')
('it', 'PRP')
('eat', 'NN')
('the', 'DT')
('on', 'IN')
('all', 'DT')
('a', 'DT')
('it', 'PRP')
('the', 'DT')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('a', 'DT')
('london', 'NN')
('but', 'CC')
('if', 'IN')
('so', 'RB')
('it', 'PRP')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('he', 'PRP')
('in', 'IN')
('co-found', 'NN')
('so', 'RB')
('the', 'DT')
('insid', 'NN')
('the', 'DT')
('onc', 'NN')
('just', 'RB')
('review', 'NN')
('until', 'IN')
('gilestook', 'NN')
('ha', 'NN')
('“', 'NN')
('ani', 'RB')
('who', 'WP')
('as', 'IN')
('the', 'DT')
('alway', 'RB')
('the', 'DT')
('crab', 'NN')
('and', 'CC')

('with', 'IN')
('the', 'DT')
('the', 'DT')
('in', 'IN')
('we', 'PRP')
('a', 'DT')
('the', 'DT')
('we', 'PRP')
('i', 'JJ')
('i', 'NN')
('so', 'RB')
('veri', 'JJ')
('wild', 'JJ')
('an', 'DT')
('oh', 'MD')
('http', 'NN')
('sonoma', 'NN')
('thi', 'NN')
('with', 'IN')
('entir', 'NN')
('i', 'NN')
('if', 'IN')
('kang', 'NN')
('tweet', 'NN')
('hello', 'NN')
('the', 'DT')
('with', 'IN')
('the', 'DT')
('the', 'DT')
('in', 'IN')
('we', 'PRP')
('a', 'DT')
('the', 'DT')
('we', 'PRP')
('i', 'JJ')
('i', 'NN')
('so', 'RB')
('veri', 'JJ')
('wild', 'JJ')
('an', 'DT')
('oh', 'MD')
('http', 'NN')
('sonoma', 'NN')
('thi', 'NN')
('it', 'PRP')
('tweet', 'NN')
('atari-ya', 'NN')
('daniel', 'NN')
('when', 'WRB')
('thi', 'NN')
('it', 'PRP')
('we', 'PRP')
('sam', 'JJ')
('one', 'CD')
('i', 'NN')
('there', 'RB')
('onli', 'IN')
('it', 'PRP')
('get', 'VB')
('there', 'EX')
('roka', 'NN')
('i', 'JJ')
('for', 'IN')
('i', 'JJ')
('for', 'IN')
('i', 'JJ')
('for', 'IN')
('i', 'JJ')
('for', 'IN')
('i', 'JJ')
('for', 'IN')
(

('each', 'DT')
('a', 'DT')
('today', 'NN')
('“', 'NN')
('i', 'NNS')
('i', 'NN')
('perfect', 'NN')
('who', 'WP')
('thi', 'NN')
('£46', 'NN')
('all', 'DT')
('claim', 'NN')
('can', 'MD')
('much', 'JJ')
('what', 'WP')
('the', 'DT')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('here', 'RB')
('restaur', 'NN')
('restaur', 'NN')
('restaur', 'NN')
('other', 'JJ')
('have', 'VB')
('it', 'PRP')
('bjorn', 'JJ')
('mani', 'NN')
('final', 'JJ')
('chri', 'NN')
('corbin', 'NN')
('the', 'DT')
('cult', 'NN')
('harden', 'JJ')
('a', 'DT')
('natur', 'JJ')
('nami', 'JJ')
('troubl', 'RB')
('west', 'JJ')
('llama', 'JJ')
('from', 'IN')
('golden', 'JJ')
('the', 'DT')
('le', 'JJ')
('cleveland', 'NN')
('here', 'RB')
('as', 'IN')
('here', 'RB')
('2120', 'CD')
('1426', 'CD')
('1739', 'CD')
('265', 'CD')
('1725', 'CD')
('221', 'CD')
('291', 'CD')
('the', 'DT')
('forget', 'VB')
('land', 'NN')
('

('it', 'PRP')
('….and', 'NN')
('and', 'CC')
('on', 'IN')
('it', 'PRP')
('fortun', 'NN')
('that', 'DT')
('start', 'NN')
('jayadmir', 'NN')
('hisonli', 'NN')
('‘', 'NN')
('turn', 'VB')
('matthealey', 'NN')
('the', 'DT')
('the', 'DT')
('ventur', 'JJ')
('visit', 'NN')
('visit', 'NN')
('repeat', 'NN')
('from', 'IN')
('“', 'JJ')
('he', 'PRP')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('from', 'IN')
('be', 'VB')
('servic', 'JJ')
('from', 'IN')
('we', 'PRP')
('if', 'IN')
('on', 'IN')
('that', 'DT')
('in', 'IN')
('which', 'WDT')
('by', 'IN')
('but', 'CC')
('second', 'JJ')
('thi', 'NN')
('a', 'DT')
('we', 'PRP')
('guy', 'NN')
('i', 'NN')
('i', 'NN')
('thi', 'NN')
('i', 'NN')
('the', 'DT')
('ok', 'JJ')
('thi', 'NN')
('it', 'PRP')
('harden', 'JJ')
('harden', 'JJ')
('harde

('updat', 'JJ')
('424', 'CD')
('209', 'CD')
('1220', 'CD')
('though', 'IN')
('the', 'DT')
('leo', 'NN')
('work', 'NN')
('it', 'PRP')
('there', 'EX')
('they', 'PRP')
('the', 'DT')
('a', 'DT')
('whole', 'JJ')
('i', 'JJ')
('we', 'PRP')
('thi', 'NN')
('i', 'NN')
('meng', 'NN')
('as', 'IN')
('from', 'IN')
('we', 'PRP')
('like', 'IN')
('2', 'CD')
('the', 'DT')
('the', 'DT')
('and', 'CC')
('it', 'PRP')
('just', 'RB')
('‘', 'JJ')
('hi', 'NN')
('a', 'DT')
('i', 'NN')
('a', 'DT')
('is', 'VBZ')
('restaur', 'NN')
('here', 'RB')
('restaur', 'NN')
('restaur', 'NN')
('other', 'JJ')
('have', 'VB')
('it', 'PRP')
('bjorn', 'JJ')
('mani', 'NN')
('final', 'JJ')
('here', 'RB')
('i', 'NN')
('i', 'NN')
('i', 'NN')
('one', 'CD')
('and', 'CC')
('even', 'RB')
('san', 'JJ')
('my', 'PRP$')
('hi', 'NN')
('my', 'PRP$')
('a', 'DT')
('i', 'NNS')
('i', 'JJ')
('my', 'PRP$')
('hi', 'NN')
('ti', 'IN')
('here', 'RB')
('restaur', 'NN')
('restaur', 'NN')
('restaur', 'NN')
('other', 'JJ')
('have', 'VB')
('it', 'PRP')
('bjorn

('from', 'IN')
('the', 'DT')
('a', 'DT')
('a', 'DT')
('head', 'NN')
('the', 'DT')
('kala', 'NN')
('he', 'PRP')
('muse', 'NN')
('review', 'NN')
('the', 'DT')
('also', 'RB')
('magazin', 'NN')
('hi', 'NN')
('after', 'IN')
('wa', 'NN')
('wa', 'NN')
('the', 'DT')
('none', 'NN')
('grace', 'NN')
('tri', 'NN')
('hand', 'NN')
('he', 'PRP')
('she', 'PRP')
('it', 'PRP')
('“', 'VB')
('“', 'NN')
('it', 'PRP')
('eat', 'NN')
('the', 'DT')
('on', 'IN')
('all', 'DT')
('a', 'DT')
('it', 'PRP')
('the', 'DT')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('a', 'DT')
('london', 'NN')
('but', 'CC')
('if', 'IN')
('so', 'RB')
('it', 'PRP')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('harden', 'JJ')
('he', 'PRP')
('in', 'IN')
('co-found', 'NN')
('so', 'RB')
('the', 'DT')
('insid', 'NN')
('the', 'DT')
('onc', 'NN')
('just', 'RB')
(

In [28]:
xtrain_count

<717x8341 sparse matrix of type '<class 'numpy.int64'>'
	with 35872 stored elements in Compressed Sparse Row format>

In [21]:
def train_model(classifier, feature_vector_train, label, feature_vector_valid, is_neural_net=False):
    # fit the training dataset on the classifier
    classifier.fit(feature_vector_train, label)
    
    # predict the labels on validation dataset
    predictions = classifier.predict(feature_vector_valid)
    
    if is_neural_net:
        predictions = predictions.argmax(axis=-1)
    
    return metrics.accuracy_score(predictions, valid_y)

In [22]:
# Linear Classifier on Count Vectors
accuracy = train_model(linear_model.LogisticRegression(), xtrain_count, train_y, xvalid_count)
print ("LR, Count Vectors: ", accuracy)

# Linear Classifier on Word Level TF IDF Vectors
accuracy = train_model(linear_model.LogisticRegression(), xtrain_tfidf, train_y, xvalid_tfidf)
print ("LR, WordLevel TF-IDF: ", accuracy)

# Linear Classifier on Ngram Level TF IDF Vectors
accuracy = train_model(linear_model.LogisticRegression(), xtrain_tfidf_ngram, train_y, xvalid_tfidf_ngram)
print ("LR, N-Gram Vectors: ", accuracy)

# Linear Classifier on Character Level TF IDF Vectors
accuracy = train_model(linear_model.LogisticRegression(), xtrain_tfidf_ngram_chars, train_y, xvalid_tfidf_ngram_chars)
print ("LR, CharLevel Vectors: ", accuracy)

LR, Count Vectors:  0.02092050209205021
LR, WordLevel TF-IDF:  0.0041841004184100415
LR, N-Gram Vectors:  0.0041841004184100415
LR, CharLevel Vectors:  0.0041841004184100415


In [24]:
# Naive Bayes on Count Vectors
accuracy = train_model(naive_bayes.MultinomialNB(), xtrain_count, train_y, xvalid_count)
print ("NB, Count Vectors: ", accuracy)

# Naive Bayes on Word Level TF IDF Vectors
accuracy = train_model(naive_bayes.MultinomialNB(), xtrain_tfidf, train_y, xvalid_tfidf)
print ("NB, WordLevel TF-IDF: ", accuracy)

# Naive Bayes on Ngram Level TF IDF Vectors
accuracy = train_model(naive_bayes.MultinomialNB(), xtrain_tfidf_ngram, train_y, xvalid_tfidf_ngram)
print ("NB, N-Gram Vectors: ", accuracy)

# Naive Bayes on Character Level TF IDF Vectors
accuracy = train_model(naive_bayes.MultinomialNB(), xtrain_tfidf_ngram_chars, train_y, xvalid_tfidf_ngram_chars)
print ("NB, CharLevel Vectors: ", accuracy)

NB, Count Vectors:  0.16317991631799164
NB, WordLevel TF-IDF:  0.0041841004184100415
NB, N-Gram Vectors:  0.008368200836820083
NB, CharLevel Vectors:  0.012552301255230125


In [33]:
import spacy
sp = spacy.load('en_core_web_sm')

sen = sp(u'Manchester United is looking to sign Harry Kane for $90 million')

OSError: [E050] Can't find model 'en_core_web_sm'. It doesn't seem to be a shortcut link, a Python package or a valid path to a data directory.