In [38]:
import random
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
import spacy
import joblib
import pickle

### Define data (LOAD AND CREATE)

In [27]:
my_tags = ['mycampaign-main_page',
           'mycampaign-mediamix',
           'mycampaign-planning',
           'mycampaign-trafficking-adserver',
           'mycampaign-trafficking-analytics',
           'mycampaign-goals-and-progres ']


# Import my dataframe
df_train = pd.read_csv('csv_data/nlp_models_data.csv')
df_train['command'] = [x.lower() for x in df_train.command]
df_train = df_train[df_train.context > 0]

df_train.context = df_train.context - 1


# Define train data
X_train = df_train.command
y_train_context = df_train.context
y_train_action = df_train.action


df_train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 190 entries, 0 to 189
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   command  190 non-null    object
 1   context  190 non-null    int64 
 2   action   190 non-null    int64 
dtypes: int64(2), object(1)
memory usage: 5.9+ KB


In [28]:
# Create test values:

command = [
        "do a new campaign",
        "show channel mix",
        "add 26 mediarows",
        "integration adserver",
        "show google analtyics trafficking",
        "show campaign performance"
        ]


context = [
        1,
        2,
        3,
        4,
        5,
        6
     ]


action = [
    1,
    0,
    1,
    0,
    0,
    0
]

df_test = pd.DataFrame(
    {'command':command,
     'context': context,
     'action':action}
)


df_test.context = df_test.context - 1
X_test= df_test.command
y_test_context = df_test.context
y_test_action = df_test.action

df_test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   command  6 non-null      object
 1   context  6 non-null      int64 
 2   action   6 non-null      int64 
dtypes: int64(2), object(1)
memory usage: 272.0+ bytes


In [40]:
# PLANNING SPECIFIC DATAFRAME

command = [
        "start a new campaign",
        "create new campaign with search, social and dem channels",
        "add a new mediarow for facebook and dv360 ",
        "add new media plan",
        "generate new campaign",
        "create new campaign with facebook, amazon, zemanta and google ads"
        ]


context = [
        3,
        3,
        3,
        3,
        3,
        3
     ]


action = [
    1,
    1,
    1,
    1,
    1,
    1
]

df_ner = pd.DataFrame(
    {'command':command,
     'context': context,
     'action':action}
)


df_ner.context = df_test.context - 1
X_test_NER= df_ner.command
y_test_NER = df_ner.context
y_test_NER = df_ner.action

df_ner.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   command  6 non-null      object
 1   context  6 non-null      int64 
 2   action   6 non-null      int64 
dtypes: int64(2), object(1)
memory usage: 272.0+ bytes


# CONTEXT RECOGNITION MODEL

In [30]:
### BASELINE MODEL: NAIVE BAYES

nb = Pipeline([('vect', CountVectorizer()),
               ('tfidf', TfidfTransformer()),
               ('clf', MultinomialNB()),
              ])
nb.fit(X_train, y_train_context)

y_pred = nb.predict(X_test)

print('accuracy %s' % accuracy_score(y_pred, y_test_context))
print(classification_report(y_test_context, y_pred))

accuracy 0.8333333333333334
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       1.00      1.00      1.00         1
           2       0.50      1.00      0.67         1
           3       1.00      1.00      1.00         1
           4       1.00      1.00      1.00         1
           5       1.00      1.00      1.00         1

    accuracy                           0.83         6
   macro avg       0.75      0.83      0.78         6
weighted avg       0.75      0.83      0.78         6



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [31]:
# Standard Gradient Descent Model (SVM)

from sklearn.linear_model import SGDClassifier

sgd_context = Pipeline([('vect', CountVectorizer()),
                ('tfidf', TfidfTransformer()),
                ('clf', SGDClassifier(loss='hinge', penalty='l2',alpha=1e-3, random_state=42, max_iter=10000, tol=None)),
               ])
sgd_context.fit(X_train, y_train_context)


y_pred_context = sgd_context.predict(X_test)
y_train_pred_context = sgd_context.predict(X_train)

print('accuracy %s' % accuracy_score(y_pred, y_test_context))
print('accuracy on train %s' % accuracy_score(y_train_pred_context, y_train_context))
print(classification_report(y_test_context, y_pred))
print(classification_report(y_train_context, y_train_pred_context))

accuracy 0.8333333333333334
accuracy on train 1.0
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       1.00      1.00      1.00         1
           2       0.50      1.00      0.67         1
           3       1.00      1.00      1.00         1
           4       1.00      1.00      1.00         1
           5       1.00      1.00      1.00         1

    accuracy                           0.83         6
   macro avg       0.75      0.83      0.78         6
weighted avg       0.75      0.83      0.78         6

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        30
           1       1.00      1.00      1.00        35
           2       1.00      1.00      1.00        44
           3       1.00      1.00      1.00        23
           4       1.00      1.00      1.00        17
           5       1.00      1.00      1.00        29
           6       1.00     

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


# ACTION RECOGNITION MODEL

In [32]:
# Standard Gradient Descent Model

from sklearn.linear_model import SGDClassifier

sgd_action = Pipeline([('vect', CountVectorizer()),
                ('tfidf', TfidfTransformer()),
                ('clf', SGDClassifier(loss='hinge', penalty='l2',alpha=1e-3, random_state=42, max_iter=10000, tol=None)),
               ])
sgd_action.fit(X_train, y_train_action)


y_pred_action = sgd_action.predict(X_test)
y_train_pred_action = sgd_action.predict(X_train)

print('accuracy %s' % accuracy_score(y_pred_action, y_test_action))
print('accuracy on train %s' % accuracy_score(y_train_pred_action, y_train_action))
print(classification_report(y_test_action, y_pred_action))
print(classification_report(y_train_action, y_train_pred_action))

accuracy 1.0
accuracy on train 0.9947368421052631
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         4
           1       1.00      1.00      1.00         2

    accuracy                           1.00         6
   macro avg       1.00      1.00      1.00         6
weighted avg       1.00      1.00      1.00         6

              precision    recall  f1-score   support

           0       1.00      0.99      1.00       129
           1       0.98      1.00      0.99        61

    accuracy                           0.99       190
   macro avg       0.99      1.00      0.99       190
weighted avg       0.99      0.99      0.99       190



# CUSTOM NER MODEL

In [33]:
# Load custom trained model for M1 NER 
ner_model = spacy.load('NER/output/model-best')


for cmd in X_test_NER:
    
    print(f'ORIGINAL COMMAND: {cmd}')
    print('')
    
    # Perform NER
    res = ner_model(cmd)

    
    for word in res.ents:
        print(word.text,word.label_)
    
    print('-------------------------------------------------')
    

ORIGINAL COMMAND: start a new campaign

start CHANNEL
campaign GRANULARITY
-------------------------------------------------
ORIGINAL COMMAND: create new campaign with search, social and dem channels

create ACTION
campaign GRANULARITY
search CHANNEL
social CHANNEL
dem CHANNEL
channels GRANULARITY
-------------------------------------------------
ORIGINAL COMMAND: add a new mediarow for facebook and dv360 

add ACTION
mediarow GRANULARITY
facebook PLATFORM
dv360 PLATFORM
-------------------------------------------------
ORIGINAL COMMAND: add new media plan

add ACTION
media plan GRANULARITY
-------------------------------------------------
ORIGINAL COMMAND: generate new campaign

generate ACTION
campaign GRANULARITY
-------------------------------------------------
ORIGINAL COMMAND: create new campaign with facebook, amazon, zemanta and google ads

create ACTION
campaign GRANULARITY
facebook PLATFORM
amazon PLATFORM
zemanta PLATFORM
google ads PLATFORM
---------------------------------

# FULL ASSISTANT PIPELINE (DEMO)

In [34]:
# Interaction actions list
interact_actions = {
    
    'create':[
        'implement','create','generate',
        'do','build','make','start',
        'initialize'
        ],
    'modify':[
         'add','expand','remove','copy',
         'cut','duplicate','allocate'
         ],
     'decision':[
          'export','accept',
          'reject','decline'
     ]}

# Showing Actions list
show_action = [
    'display','exhibit','show',
     'expose','go','reveal',
     'unveil','what',"what's",
     'how',"how's",'see'
]

# Trigger actions list
trigger_actions = [
    'launch','trigger','run','suggest'
]


context_mapping = {
    0:'mycampaign-main_page',
    1:'mycampaign-mediamix',
    2:'mycampaign-planning',
    3:'mycampaign-trafficking-adserver',
    4:'mycampaign-trafficking-analytics',
    5:'mycampaign-goals-and-progres ',
    6:'notifications-recommendations'
}


action_mapping = {
    0:'show',
    1:'interact'
}

In [36]:
cmd = "Create new mediarow with facebook"

# CONTEXT AND ACTION IDENTIFICATION
cmd = cmd.lower()

X = pd.Series(cmd)
  
# context Model output
y_context = sgd_context.predict(X)
# action model output
y_action = sgd_action.predict(X)
# NER tagging
tags = [(word.text,word.label_) for word in ner_model(cmd).ents]

# String mapped
context_str = context_mapping[y_context[0]]
action_str = action_mapping[y_action[0]]

output = {
    'context':context_str,
    'action':action_str,
    'tag_mapping':tags
}

print(output)

{'context': 'mycampaign-planning', 'action': 'interact', 'TAG MAPPING': [('create', 'ACTION'), ('mediarow', 'GRANULARITY'), ('facebook', 'PLATFORM')]}


# SAVE MODELS

In [39]:
# Context Recognition
joblib.dump(sgd_context, "ContextRecognition/sgd_context.joblib")

# Action Recognition
joblib.dump(sgd_context, "ActionRecognition/sgd_action.joblib")



['ActionRecognition/sgd_action.joblib']