## Review Sentiment Operationalization

### Schema Generatation

In [1]:
# This script generates the scoring and schema files
# necessary to operationalize the Market Campaign prediction sample
# Init and run functions

from azureml.api.schema.dataTypes import DataTypes
from azureml.api.schema.sampleDefinition import SampleDefinition
from azureml.api.realtime.services import generate_schema

In [2]:
import pandas as pd
import string

In [3]:
from nltk.tokenize import RegexpTokenizer
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.stem.porter import PorterStemmer

from sklearn.linear_model import LogisticRegression
from sklearn import metrics

In [4]:
# Prepare the web service definition by authoring
# init() and run() functions. Test the fucntions
# before deploying the web service.

def init():
    from sklearn.externals import joblib

    # load the model file
    global model
    model = joblib.load('model_30.pkl')

In [5]:
def run(input_df):
    import json
    
    input_df.columns = ['input_column'] 
    
    stop_words_df = pd.read_csv('StopWords.csv')
    stop_words = set(stop_words_df["Col1"].tolist())
    for item in string.ascii_lowercase: #load stop words
        if item != "i":
            stop_words.add(item)

    input_column = []
    for line in input_df.input_column:
        value = " ".join(item.lower()
                         for item in RegexpTokenizer(r'\w+').tokenize(line)
                         if item.lower() not in stop_words)
        input_column.append(value)
    input_df.input_column = input_column

    stemmer = PorterStemmer()
    input_list = input_df["input_column"].tolist()

    # Tokenize the sentences in text_list and remove morphological affixes from words.

    def stem_tokens(tokens, stemmer_model):
        '''
        :param tokens: tokenized word list
        :param stemmer: remove stemmer
        :return:  tokenized and stemmed words
        '''
        return [stemmer_model.stem(original_word) for original_word in tokens]

    def tokenize(text):
        '''
        :param text: raw test
        :return: tokenized and stemmed words
        '''
        tokens = text.strip().split(" ")
        return stem_tokens(tokens, stemmer)

    # Initialize the TfidfVectorizer to compute tf-idf for each word

    tfidf = TfidfVectorizer(tokenizer=tokenize, stop_words='english', max_df=160000,
                            min_df=1, norm="l2", use_idf=True)
    tfs = tfidf.fit_transform(input_list)
    
    pred = model.predict(tfs[0, :30])
    return json.dumps(str(pred[0]))
    #return pred[0]
print('executed')

executed


In [6]:
df1 = pd.DataFrame(data=[["I absolutely love my bank. There's a reason this bank's customer base is so strong--their customer service actually acts like people and not robots. I love that anytime my card is swiped, I'm instantly notified. And the built in budgeting app is something that really makes life easier. The biggest setback is not being able to deposit cash (you have to get a money order), and if you have another, non-simple bank account, transferring money between accounts can take a few days, which frankly isn't acceptable with most ACH taking a business day or less. Overall, it's a great bank, and I would recommend it to anyone."]], columns=['review'])
df1.dtypes
df1

Unnamed: 0,review
0,I absolutely love my bank. There's a reason th...


In [7]:
init()
input1 = pd.DataFrame(data=[["I absolutely love my bank. There's a reason this bank's customer base is so strong--their customer service actually acts like people and not robots. I love that anytime my card is swiped, I'm instantly notified. And the built in budgeting app is something that really makes life easier. The biggest setback is not being able to deposit cash (you have to get a money order), and if you have another, non-simple bank account, transferring money between accounts can take a few days, which frankly isn't acceptable with most ACH taking a business day or less. Overall, it's a great bank, and I would recommend it to anyone."]], columns=['review'])
input1.head()



Unnamed: 0,review
0,I absolutely love my bank. There's a reason th...


In [8]:
run(input1)

'"0"'

In [9]:
inputs = {"input_df": SampleDefinition(DataTypes.PANDAS, df1)}

# The prepare statement writes the scoring file (main.py) and
# the schema file (senti_service_schema.json) the the output folder.

generate_schema(run_func=run, inputs=inputs, filepath='senti_service_schema.json')

{'input': {'input_df': {'internal': 'gANjYXp1cmVtbC5hcGkuc2NoZW1hLnBhbmRhc1V0aWwKUGFuZGFzU2NoZW1hCnEAKYFxAX1xAihYCgAAAHNjaGVtYV9tYXBxA31xBFgGAAAAcmV2aWV3cQVjbnVtcHkKZHR5cGUKcQZYAgAAAE84cQdLAEsBh3EIUnEJKEsDWAEAAAB8cQpOTk5K/////0r/////Sz90cQtic1gMAAAAY29sdW1uX3R5cGVzcQxdcQ1oCWFYBQAAAHNoYXBlcQ5LAUsBhnEPWAwAAABjb2x1bW5fbmFtZXNxEF1xEWgFYXViLg==',
   'swagger': {'example': [{'review': "I absolutely love my bank. There's a reason this bank's customer base is so strong--their customer service actually acts like people and not robots. I love that anytime my card is swiped, I'm instantly notified. And the built in budgeting app is something that really makes life easier. The biggest setback is not being able to deposit cash (you have to get a money order), and if you have another, non-simple bank account, transferring money between accounts can take a few days, which frankly isn't acceptable with most ACH taking a business day or less. Overall, it's a great bank, and I would recommend it to anyo

### Scoring Function

In [10]:
import pandas as pd
import string

In [11]:
from nltk.tokenize import RegexpTokenizer
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.stem.porter import PorterStemmer

In [12]:
from sklearn.linear_model import LogisticRegression
from sklearn import metrics

In [13]:
def init():
    import numpy
    import scipy
    from sklearn.linear_model import LogisticRegression

    global model
    import pickle
    f = open('./model_30.pkl', 'rb')
    model = pickle.load(f)
    f.close()

In [14]:
def run(inputString):
    import json
    import numpy
    try:
        input_list = json.loads(inputString)
    except ValueError:
        return "bad input: expecting a JSON encoded list of lists."
    input_df = pd.DataFrame(input_list, columns=['review'])
    if (input_df.shape != (1, 1)):
        return 'bad input: expecting a JSON encoded list of lists of shape (1,1).'
    
    input_df.columns = ['input_column'] 
    
    stop_words_df = pd.read_csv('StopWords.csv')
    stop_words = set(stop_words_df["Col1"].tolist())
    for item in string.ascii_lowercase: #load stop words
        if item != "i":
            stop_words.add(item)

    input_column = []
    for line in input_df.input_column:
        value = " ".join(item.lower()
                         for item in RegexpTokenizer(r'\w+').tokenize(line)
                         if item.lower() not in stop_words)
        input_column.append(value)
    input_df.input_column = input_column

    stemmer = PorterStemmer()
    input_list = input_df["input_column"].tolist()

    # Tokenize the sentences in text_list and remove morphological affixes from words.

    def stem_tokens(tokens, stemmer_model):
        '''
        :param tokens: tokenized word list
        :param stemmer: remove stemmer
        :return:  tokenized and stemmed words
        '''
        return [stemmer_model.stem(original_word) for original_word in tokens]

    def tokenize(text):
        '''
        :param text: raw test
        :return: tokenized and stemmed words
        '''
        tokens = text.strip().split(" ")
        return stem_tokens(tokens, stemmer)

    # Initialize the TfidfVectorizer to compute tf-idf for each word

    tfidf = TfidfVectorizer(tokenizer=tokenize, stop_words='english', max_df=160000,
                            min_df=1, norm="l2", use_idf=True)
    tfs = tfidf.fit_transform(input_list)
    
    pred = model.predict(tfs[0, :30])
    return json.dumps(str(pred[0]))
    #return pred[0]
print('executed')

executed


In [15]:
if __name__ == '__main__':
    import json
    init()
    print (run(json.dumps([["I absolutely love my bank. There's a reason this bank's customer base is so strong--their customer service actually acts like people and not robots. I love that anytime my card is swiped, I'm instantly notified. And the built in budgeting app is something that really makes life easier. The biggest setback is not being able to deposit cash (you have to get a money order), and if you have another, non-simple bank account, transferring money between accounts can take a few days, which frankly isn't acceptable with most ACH taking a business day or less. Overall, it's a great bank, and I would recommend it to anyone."]])))

"0"
