This notebook implements a pre-trained sentiment analysis pipeline including a regex pre-processing step, tokenization, n-gram computation, and logistic regreression model as a RESTful API.

In [2]:
import cPickle
import json

import pandas as pd
import sklearn
import requests

### Import the trained model

In [None]:
resp = requests.get("https://raw.githubusercontent.com/crawles/gpdb_sentiment_analysis_twitter_model/master/twitter_sentiment_model.pkl")
resp.raise_for_status()
cl = cPickle.loads(resp.content)

### Import data pre-processing function

In [2]:
def regex_preprocess(raw_tweets):
    pp_text = pd.Series(raw_tweets)
    
    user_pat = '(?<=^|(?<=[^a-zA-Z0-9-_\.]))@([A-Za-z]+[A-Za-z0-9]+)'
    http_pat = '(https?:\/\/(?:www\.|(?!www))[^\s\.]+\.[^\s]{2,}|www\.[^\s]+\.[^\s]{2,})'
    repeat_pat, repeat_repl = "(.)\\1\\1+",'\\1\\1'

    pp_text = pp_text.str.replace(pat = user_pat, repl = 'USERNAME')
    pp_text = pp_text.str.replace(pat = http_pat, repl = 'URL')
    pp_text.str.replace(pat = repeat_pat, repl = repeat_repl)
    return pp_text

# Setup the API

Jupyter Kernel Gateway utilizes a global REQUEST JSON string that will be replaced on each invocation of the API.

In [3]:
REQUEST = json.dumps({
    'path' : {},
    'args' : {}
})

### Compute sentiment using trained model and serve using POST

Using the kernel gateway, a cell is created as an HTTP handler using a single line comment. The handler supports common HTTP verbs (GET, POST, DELETE, etc). For more information, view the <a href="https://jupyter-kernel-gateway.readthedocs.io/en/latest/http-mode.html">docs</a>.

In [None]:
# POST /polarity_compute
req = json.loads(REQUEST)
tweets = req['body']['data']
print(cl.predict_proba(regex_preprocess(tweets))[:][:,1])

# Predict timepoint API

In [4]:
import numpy as np
from keras.models import Sequential
from keras.layers.core import Dense, Activation
from keras.optimizers import Adam
from keras.layers.advanced_activations import PReLU

In [8]:
from keras.models import model_from_json
import pickle

# Load json and create model
json_file = open("model.json", 'r')
loaded_model_json = json_file.read()
json_file.close()
model = model_from_json(loaded_model_json)

# Load weights into new model
model.load_weights("model.h5")

# Compile model
model.compile(loss='mse', optimizer=Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-8))
 
# Load scaler
scaler = pickle.load(open('scaler.sav', 'rb'))

In [None]:
# POST /predict_timepoint

req = json.loads(REQUEST)
data = req['body']['data']
x = np.array([data], dtype=float)
prediction_log_minmaxscaled = model.predict(x, batch_size=1)
prediction_log = scaler.inverse_transform(prediction_log_minmaxscaled)
prediction = np.exp(prediction_log) - 1 

print(prediction)