In [1]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score

In [2]:
# Load our data into two Python lists
with open("clickbait.txt", "r", encoding="UTF-8") as f:
    lines = f.read().strip().split("\n")
    lines = [line.split("\t") for line in lines]
headlines, labels = zip(*lines)

In [3]:
headlines[:5]

("Egypt's top envoy in Iraq confirmed killed",
 'Carter: Race relations in Palestine are worse than apartheid',
 'After Years Of Dutiful Service, The Shiba Who Ran A Tobacco Shop Retires',
 'In Books on Two Powerbrokers, Hints of the Future',
 'These Horrifyingly Satisfying Photos Of "Baby Foot" Will Haunt You')

In [4]:
labels[:5]

('0', '0', '1', '0', '1')

In [5]:
len(headlines)

10000

In [6]:
# Break dataset into test and train sets
train_headlines = headlines[:8000]
test_headlines = headlines[8000:]

train_labels = labels[:8000]
test_labels = labels[8000:]

In [7]:
# Create a vectorizer and classifier
vectorizer = TfidfVectorizer()
svm = LinearSVC()

In [8]:
# Transform our text data into numerical vectors
train_vectors = vectorizer.fit_transform(train_headlines)
test_vectors = vectorizer.transform(test_headlines)

In [9]:
# Train the classifier and predict on test set
svm.fit(train_vectors, train_labels)

LinearSVC()

In [10]:
predictions = svm.predict(test_vectors)

In [11]:
test_headlines[0:5]

('The Earliest I\'ve Said "I Love You"',
 "Stop What You're Doing And Worship These Matt Bomer Pictures",
 '23 Of The Funniest "Nancy Drew" Game Memes',
 'Policeman killed in football-related violence in Italy',
 'Do You Remember Which Disney Star Sang These Lyrics')

In [12]:
predictions[:5]

array(['1', '1', '1', '0', '1'], dtype='<U1')

In [13]:
test_labels[:5]

('1', '1', '1', '0', '1')

In [14]:
accuracy_score(test_labels, predictions)

0.961

In [15]:
new_headlines = ["10 Cities That Every Hipster Will Be Moving To Soon", 'Vice President Mike Pence Leaves NFL Game Saying Players Showed "Disrespect" Of Anthem, Flag']
new_vectors = vectorizer.transform(new_headlines)
new_predictions = svm.predict(new_vectors)

In [16]:
new_predictions

array(['1', '0'], dtype='<U1')

In [17]:
#!pip freeze

In [18]:
new_headlines = ["Trump kills People", "10 Things you need to know!", "Basketball team loses the NBA Finals", "Intro to Mobile Development"]
new_vectors = vectorizer.transform(new_headlines)
new_predictions = svm.predict(new_vectors)

In [19]:
new_predictions

array(['1', '1', '0', '0'], dtype='<U1')

In [20]:
new_headlines = ["Biden", "Trump"]
new_vectors = vectorizer.transform(new_headlines)
new_predictions = svm.predict(new_vectors)

In [21]:
new_predictions

array(['0', '1'], dtype='<U1')

In [22]:
new_headlines = ["I spent 1 trillion on Pokemon Cards", "This Mansion cost 1 billion DOLLARS!"]
new_vectors = vectorizer.transform(new_headlines)
new_predictions = svm.predict(new_vectors)

In [23]:
new_predictions

array(['0', '0'], dtype='<U1')

In [24]:
# import pickle
# pickle.dump(svm, open("svm.pkl’, ‘wb’))
# pickle.dump(vectorizer, open(‘vectorizer.pkl’, ‘wb’))

In [1]:
import requests
import json

In [2]:
# !pip install requests

In [3]:
# local url
url = 'https://ml-tube.herokuapp.com/' # change to your url
# sample data
data = ["Biden", "Trump"]
data = json.dumps(data)

In [4]:
send_request = requests.post(url, data)
print(send_request)

<Response [200]>


In [5]:
# !pip freeze > requirements.txt

In [6]:
print(send_request.json())

{'results': {'new_predictions': ['0', '1']}}


In [7]:
type(send_request.json())

dict