In [35]:
import pandas as pd
import numpy as np


from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
import gensim
print(f'gensim: {gensim.__version__}')


from nltk.tokenize import word_tokenize 
from nltk.tokenize import TweetTokenizer
from gensim.test.utils import common_texts
from gensim.models.doc2vec import Doc2Vec, TaggedDocument
from gensim.test.utils import get_tmpfile


from tempfile import mkdtemp
import pickle
import joblib


import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="ticks", color_codes=True)


import os
import yaml
import collections
import scattertext as st
import math


from tqdm.notebook import tqdm
tqdm.pandas()


processed_inbound = pd.read_pickle('objects/processed_inbound_extra.pkl')
processed = pd.read_pickle('objects/processed.pkl')


with open(r'objects/intents.yml') as file:
    intents = yaml.load(file, Loader=yaml.FullLoader)


print(f'\nintents:\n{intents}')
print(f'\nprocessed:\n{processed.head()}')

gensim: 4.1.2

intents:
{'battery': ['battery', 'power', 'charge'], 'forgot_password': ['password', 'account', 'login'], 'payment': ['credit', 'card', 'payment', 'pay', 'transaction'], 'repair': ['repair', 'fix', 'broken', 'not working'], 'update': ['update']}

processed:
                                    Processed Inbound  \
0   [new, update, i️, make, sure, download, yester...   
6   [hey, anyone, else, upgraded, io, issue, capit...   
12  [hello, internet, someone, explain, symbol, ke...   
13  [get, screenshot, say, iphonex, reserve, email...   
15  [thank, update, phone, even, slow, barely, wor...   

                                         Real Inbound  \
0   @AppleSupport The newest update. I️ made sure ...   
6   Hey @AppleSupport and anyone else who upgraded...   
12  Hello, internet. Can someone explain why this ...   
13  @AppleSupport I’ve got a screenshot saying my ...   
15  Thank you @AppleSupport I updated my phone and...   

                                        R

In [36]:

ideal = {'Greeting': 'hi hello yo hey whats up howdy morning',
        'Update': 'have problem with update'}

ideal = {'battery': 'battery power charge', 
         'forgot_password': 'password account login',
         'payment': 'credit card payment pay transaction',
         'update': 'update upgrade',
         'info': 'info information know',
         'repair': 'repar fix broken',
         'lost_replace': 'replace lost gone missing trade'
         ,'location': 'nearest  location store'
        }

def add_extra(current_tokenized_data, extra_tweets):
    ''' Adding extra tweets to current tokenized data'''
    
    
    extra_tweets = pd.Series(extra_tweets)

    
    print('Converting to string...')
    string_processed_data = current_tokenized_data.progress_apply(" ".join)

    
    string_processed_data = pd.concat([string_processed_data, extra_tweets], axis = 0)

    
    tknzr = TweetTokenizer(strip_handles = True, reduce_len = True)

    return string_processed_data



In [37]:
processed_inbound_extra = add_extra(processed['Processed Inbound'], list(ideal.values()))


processed_inbound_extra.to_pickle('objects/processed_inbound_extra.pkl')

processed_inbound_extra

Converting to string...


  0%|          | 0/76062 [00:00<?, ?it/s]

0            new update i️ make sure download yesterday
6     hey anyone else upgraded io issue capital i️ m...
12    hello internet someone explain symbol keep app...
13    get screenshot say iphonex reserve email say h...
15    thank update phone even slow barely work thank...
                            ...                        
3                                        update upgrade
4                                 info information know
5                                      repar fix broken
6                       replace lost gone missing trade
7                               nearest  location store
Length: 76070, dtype: object

In [38]:
processed_inbound_extra[-7:]

1                 password account login
2    credit card payment pay transaction
3                         update upgrade
4                  info information know
5                       repar fix broken
6        replace lost gone missing trade
7                nearest  location store
dtype: object

In [39]:
processed_inbound_extra.shape

(76070,)

In [40]:
ideal

{'battery': 'battery power charge',
 'forgot_password': 'password account login',
 'payment': 'credit card payment pay transaction',
 'update': 'update upgrade',
 'info': 'info information know',
 'repair': 'repar fix broken',
 'lost_replace': 'replace lost gone missing trade',
 'location': 'nearest  location store'}

In [41]:
processed.shape

(76062, 3)

In [24]:
def train_doc2vec(string_data, max_epochs, vec_size, alpha):
    
    tagged_data = [TaggedDocument(words=word_tokenize(_d.lower()), tags=[str(i)]) 
                   for i, _d in enumerate(string_data)]
    
    
    model = Doc2Vec(vector_size=vec_size, alpha=alpha, min_alpha=0.00025, min_count=1, dm =1)

    model.build_vocab(tagged_data)

    for epoch in range(max_epochs):
        print('iteration {0}'.format(epoch))
        model.train(tagged_data, total_examples = model.corpus_count, epochs= model.epochs)
        
        model.alpha -= 0.0002
        
        model.min_alpha = model.alpha

    
    model.save("models/d2v.model")
    print("Model Saved")
    

train_doc2vec(processed_inbound_extra, max_epochs = 100, vec_size = 20, alpha = 0.025)

iteration 0
iteration 1
iteration 2
iteration 3
iteration 4
iteration 5
iteration 6
iteration 7
iteration 8
iteration 9
iteration 10
iteration 11
iteration 12
iteration 13
iteration 14
iteration 15
iteration 16
iteration 17
iteration 18
iteration 19
iteration 20
iteration 21
iteration 22
iteration 23
iteration 24
iteration 25
iteration 26
iteration 27
iteration 28
iteration 29
iteration 30
iteration 31
iteration 32
iteration 33
iteration 34
iteration 35
iteration 36
iteration 37
iteration 38
iteration 39
iteration 40
iteration 41
iteration 42
iteration 43
iteration 44
iteration 45
iteration 46
iteration 47
iteration 48
iteration 49
iteration 50
iteration 51
iteration 52
iteration 53
iteration 54
iteration 55
iteration 56
iteration 57
iteration 58
iteration 59
iteration 60
iteration 61
iteration 62
iteration 63
iteration 64
iteration 65
iteration 66
iteration 67
iteration 68
iteration 69
iteration 70
iteration 71
iteration 72
iteration 73
iteration 74
iteration 75
iteration 76
iteration

In [43]:

model = Doc2Vec.load("models/d2v.model")


inbound_d2v = np.array([model.dv[i] for i in range(processed_inbound_extra.shape[0])])


with open('objects/inbound_d2v.pkl', 'wb') as f:
    pickle.dump(inbound_d2v, f)

inbound_d2v

array([[ 1.9251872 ,  0.49455148,  4.5869226 , ..., -0.6890016 ,
        -0.4554145 , -2.1529074 ],
       [ 1.226154  , -1.6674168 ,  9.571457  , ..., -1.301341  ,
        -5.237004  ,  2.0875382 ],
       [-3.392355  , -0.05314038,  1.3120148 , ..., -5.62228   ,
        -0.6627095 , -8.871721  ],
       ...,
       [ 0.56815815, -0.4034965 ,  1.0311732 , ..., -1.6326569 ,
        -0.09723683, -2.1891975 ],
       [-1.3643456 , -0.05787605,  0.829869  , ..., -2.088197  ,
         0.7148511 , -4.470675  ],
       [-4.6119013 , -2.492875  , -1.2945342 , ..., -0.87204   ,
         0.2645559 , -0.40892842]], dtype=float32)

In [44]:
inbound_d2v.shape

(76070, 20)

In [45]:

intents_ideal = {'app': ['app', 'prob']}
inferred_vectors = []

for keywords in intents_ideal.values():
    inferred_vectors.append(model.infer_vector(keywords))
    
inferred_vectors

[array([-0.16594614, -0.07483621,  0.00748736,  0.08855573, -0.07582132,
         0.16455427, -0.08649034, -0.06104358, -0.0829056 ,  0.07724822,
         0.04761351,  0.15359229,  0.06676537, -0.0114236 , -0.03692095,
         0.01588625, -0.05768625, -0.21430911, -0.27805433, -0.1633449 ],
       dtype=float32)]

In [28]:
'hi hello yo hey whats up'.split()

['hi', 'hello', 'yo', 'hey', 'whats', 'up']

In [46]:
ideal

{'battery': 'battery power charge',
 'forgot_password': 'password account login',
 'payment': 'credit card payment pay transaction',
 'update': 'update upgrade',
 'info': 'info information know',
 'repair': 'repar fix broken',
 'lost_replace': 'replace lost gone missing trade',
 'location': 'nearest  location store'}

In [30]:

intents_repr = {'Battery': ['io', 'drain', 'battery', 'iphone', 'twice', 'fast', 'io', 'help'],
    'Update': ['new', 'update', 'i️', 'make', 'sure', 'download', 'yesterday'],
    'iphone': ['instal', 'io', 'make', 'iphone', 'slow', 'work', 'properly', 'help'],
    'app': ['app', 'still', 'longer', 'able', 'control', 'lockscreen'],
    'mac': ['help','mac','app','store','open','can','not','update','macbook','pro','currently','run','o','x',
  'yosemite'], 'greeting': ['hi', 'hello', 'yo', 'hey', 'whats', 'up']
    }


In [47]:

tknzr = TweetTokenizer(strip_handles = True, reduce_len = True)

intents_repr = {k:tknzr.tokenize(v) for k, v in ideal.items()}
print(intents_repr)


with open('objects/intents_repr.yml', 'w') as outfile:
    yaml.dump(intents_repr, outfile, default_flow_style=False)


tags = []

tokenized_processed_inbound = processed_inbound.apply(tknzr.tokenize)

def report_index_loc(tweet, intent_name):
    ''' Takes in the Tweet to find the index for and returns a report of that Tweet index along with what the 
    representative Tweet looks like'''
    try:
        tweets = []
        for i,j in enumerate(tokenized_processed_inbound):
            if j == tweet:
                tweets.append((i, True))
            else:
                tweets.append((i, False))
        index = []
        get_index = [index.append(i[0]) if i[1] == True else False for i in tweets] # Comprehension saves space

        preview = processed_inbound.iloc[index]

        
        tags.append(str(index[0]))
    except IndexError as e:
        print('Index not in list, move on')
        return
        
    return intent_name, str(index[0]), preview


print('TAGGED INDEXES TO LOOK FOR')
for j,i in intents_repr.items():
    try:
        print('\n{} \nIndex: {}\nPreview: {}'.format(*report_index_loc(i,j)))
    except Exception as e:
        print('Index ended')


intents_tags = dict(zip(intents_repr.keys(), tags))
intents_tags

{'battery': ['battery', 'power', 'charge'], 'forgot_password': ['password', 'account', 'login'], 'payment': ['credit', 'card', 'payment', 'pay', 'transaction'], 'update': ['update', 'upgrade'], 'info': ['info', 'information', 'know'], 'repair': ['repar', 'fix', 'broken'], 'lost_replace': ['replace', 'lost', 'gone', 'missing', 'trade'], 'location': ['nearest', 'location', 'store']}
TAGGED INDEXES TO LOOK FOR

battery 
Index: 76062
Preview: 0    battery power charge
dtype: object

forgot_password 
Index: 76063
Preview: 1    password account login
dtype: object

payment 
Index: 76064
Preview: 2    credit card payment pay transaction
dtype: object

update 
Index: 76065
Preview: 3    update upgrade
dtype: object

info 
Index: 76066
Preview: 4    info information know
dtype: object

repair 
Index: 76067
Preview: 5    repar fix broken
dtype: object

lost_replace 
Index: 76068
Preview: 6    replace lost gone missing trade
dtype: object
Index not in list, move on
Index ended


{'battery': '76062',
 'forgot_password': '76063',
 'payment': '76064',
 'update': '76065',
 'info': '76066',
 'repair': '76067',
 'lost_replace': '76068'}

In [48]:
similar_doc = model.docvecs.most_similar('76062',topn = 1000)

similar_doc[:5]

  similar_doc = model.docvecs.most_similar('76062',topn = 1000)


[('16164', 0.8222564458847046),
 ('72493', 0.8113806247711182),
 ('43190', 0.8078210353851318),
 ('51486', 0.8040231466293335),
 ('21504', 0.7991458773612976)]

In [49]:
similar_doc = model.docvecs.most_similar('76065',topn = 1000)
similar_doc

  similar_doc = model.docvecs.most_similar('76065',topn = 1000)


[('5958', 0.8507489562034607),
 ('45949', 0.8449529409408569),
 ('3184', 0.8214262127876282),
 ('68627', 0.8106209635734558),
 ('17547', 0.789264440536499),
 ('17454', 0.7865416407585144),
 ('40038', 0.7848769426345825),
 ('46586', 0.7825417518615723),
 ('1889', 0.7801563739776611),
 ('54699', 0.7779409885406494),
 ('20451', 0.7763447761535645),
 ('7536', 0.7715131044387817),
 ('19306', 0.7710950970649719),
 ('38733', 0.7690646648406982),
 ('9773', 0.7603034973144531),
 ('7695', 0.7587434649467468),
 ('19519', 0.7574315071105957),
 ('128', 0.7560111284255981),
 ('56747', 0.7526546120643616),
 ('12661', 0.7515263557434082),
 ('12424', 0.7512544989585876),
 ('13578', 0.7489392757415771),
 ('45181', 0.7476258873939514),
 ('30744', 0.7471412420272827),
 ('41226', 0.7463936805725098),
 ('16170', 0.7447767853736877),
 ('72279', 0.744543731212616),
 ('14809', 0.7399324178695679),
 ('50699', 0.7391109466552734),
 ('56597', 0.7387629747390747),
 ('27417', 0.7382145524024963),
 ('35341', 0.73810

In [50]:
import nltk
from nltk.corpus import stopwords
stopwords.words('english').index('to')

86

In [51]:
intents_tags

{'battery': '76062',
 'forgot_password': '76063',
 'payment': '76064',
 'update': '76065',
 'info': '76066',
 'repair': '76067',
 'lost_replace': '76068'}

In [52]:
model.docvecs.most_similar('10')

  model.docvecs.most_similar('10')


[('18887', 0.8343899846076965),
 ('59897', 0.8246354460716248),
 ('17809', 0.7873603105545044),
 ('32949', 0.7789666652679443),
 ('45434', 0.7785418629646301),
 ('52100', 0.7737976312637329),
 ('67298', 0.7731215953826904),
 ('64355', 0.76493239402771),
 ('19964', 0.7628946304321289),
 ('73649', 0.761847734451294)]

In [53]:
intents_tags

{'battery': '76062',
 'forgot_password': '76063',
 'payment': '76064',
 'update': '76065',
 'info': '76066',
 'repair': '76067',
 'lost_replace': '76068'}

prompt the user for update or broken.

In [54]:
vals = [word_tokenize(tweet) for tweet in list(processed_inbound.iloc[[10,1]].values)]
vals

[['need', 'something', 'battery', 'life', 'suck', 'as'],
 ['hey',
  'anyone',
  'else',
  'upgraded',
  'io',
  'issue',
  'capital',
  'i️',
  'mail',
  'app',
  'put']]

In [55]:
train = pd.DataFrame()
intent_indexes = {}


def generate_intent(target, itag):
    similar_doc = model.dv.most_similar(itag,topn = target)
    
    indexes = [int(i[0]) for i in similar_doc]
    intent_indexes[intent_name] = indexes

    return [word_tokenize(tweet) for tweet in list(processed_inbound.iloc[indexes].values)]


for intent_name, itag in intents_tags.items():
    train[intent_name] = generate_intent(1000, itag)


manually_added_intents = {
    'speak_representative': [['talk','human','please','person','someone real'],
                             ['let','me','talk','to','apple','support'], 
                             ['can','i','speak','agent','person']], 
    'greeting': [['hi'],['hello'], ['whats','up'], ['good','morning'],
                 ['good','evening'], ['good','night'],['yo'],['hii']],
    'goodbye': [['goodbye'],['bye'],['thank'],['thanks'], ['done'],['byeee']], 
    'challenge_robot': [['robot','human'], ['are','you','robot'],
                       ['who','are','you'],['I','do not','like','you']]
}



def insert_manually(target, prototype):
    ''' Taking a prototype tokenized document to repeat until
    you get length target'''
    factor = math.ceil(target / len(prototype))
    content = prototype * factor
    return [content[i] for i in range(target)]


for intent_name in manually_added_intents.keys():
    train[intent_name] = insert_manually(1000, [*manually_added_intents[intent_name]])



hybrid_intents = {'update':(300,700,[['want','update'], ['update','not','working'], 
                                     ['phone','need','update']], 
                            intents_tags['update']),
                  'info': (800,200, [['need','information'], 
                                       ['want','to','know','about'], ['what','are','macbook','stats'],
                                    ['any','info','next','release','?'],['tell','me','about','the','new']], 
                             intents_tags['info']),
                  'payment': (300,700, [['payment','not','through'], 
                                       ['iphone', 'apple', 'pay', 'but', 'not', 'arrive'],
                                       ['how','pay','for', 'this'],
                                       ['can','i','pay','for','this','first']], 
                             intents_tags['payment']),
                  'forgot_password': (600,400, [['forgot','my','pass'], ['forgot','my','login'
                                ,'details'], ['cannot','log','in','password'],['lost','account','recover','password']], 
                             intents_tags['forgot_password'])
                 }

def insert_hybrid(manual_target, generated_target, prototype, itag):
    return insert_manually(manual_target, prototype) + list(generate_intent(generated_target, itag))


for intent_name, args in hybrid_intents.items():
    train[intent_name] = insert_hybrid(*args)


neat_train = pd.DataFrame(train.T.unstack()).reset_index().iloc[:,1:].rename(columns={'level_1':'Intent', 0: 'Utterance'})

neat_train = neat_train[['Utterance','Intent']]


neat_train.to_pickle('objects/train.pkl')

show = lambda x: x.head(10).style.set_properties(**{'background-color': 'black',                                                   
                                    'color': 'lawngreen',                       
                                    'border-color': 'white'})\
.applymap(lambda x: f"color: {'lawngreen' if isinstance(x,str) else 'red'}")\
.background_gradient(cmap='Blues')

print(train.shape)
show(train)

(1000, 11)


Unnamed: 0,battery,forgot_password,payment,update,info,repair,lost_replace,speak_representative,greeting,goodbye,challenge_robot
0,"['charge', 'iphone', 'still', 'power', 'percentage', 'decrease']","['forgot', 'my', 'pass']","['payment', 'not', 'through']","['want', 'update']","['need', 'information']","['fix', 'box', 'right', 'neoooww', 'please', '️']","['hi', 'shipment', 'beatsx', 'broken', 'claim', 'warantee']","['talk', 'human', 'please', 'person', 'someone real']",['hi'],['goodbye'],"['robot', 'human']"
1,"['phone', 'charge', 'short', 'period', 'time', 'power', 'go', 'phone', 'version']","['forgot', 'my', 'login', 'details']","['iphone', 'apple', 'pay', 'but', 'not', 'arrive']","['update', 'not', 'working']","['want', 'to', 'know', 'about']","['systematix', 'medium', 'apple', 'premium', 'reseller', 'quest', 'mall', 'park', 'circus', 'kolkata', 'india', 'prob', 'defective', 'piece', 'req', 'help']","['fu', 'wit', 'phone', 'b', 'replace', 'question', 'mark', 'around', 'box', 'real', 'nixxas', 'need', 'answer']","['let', 'me', 'talk', 'to', 'apple', 'support']",['hello'],['bye'],"['are', 'you', 'robot']"
2,"['io', 'update', 'cause', 'phone', 'charge', 'charger', 'get', 'resolve']","['cannot', 'log', 'in', 'password']","['how', 'pay', 'for', 'this']","['phone', 'need', 'update']","['what', 'are', 'macbook', 'stats']","['hey', 'myspacebarwasstuckanditriedtogetitworkagian', 'howeverwhenitriedtogetitmoreloseitpoppedoutofmykeyboardandcantgetitbackin', 'canyouguyshelpmeou', 'ihaveaprojecttomakeforschoolbuticantmakeanyprogressthiswaypleasehelp', 'itssoooanoying']","['course', 'get', 'broke', 'one', 'suggestion', 'iphonex']","['can', 'i', 'speak', 'agent', 'person']","['whats', 'up']",['thank'],"['who', 'are', 'you']"
3,"['know', 'battery', 'stayed', 'bad', 'update', 'io', 'please', 'help', 'barely', 'use', 'phone']","['lost', 'account', 'recover', 'password']","['can', 'i', 'pay', 'for', 'this', 'first']","['want', 'update']","['any', 'info', 'next', 'release', '?']","['box', 'question', 'mark', 'exclamation', 'point', 'steve', 'job', 'wya']","['phone', 'tech', 'support', 'told', 'go', 'walgreens', 'buy', 'gift', 'card', 'site', 'working.also', 'want', 'dob']","['talk', 'human', 'please', 'person', 'someone real']","['good', 'morning']",['thanks'],"['I', 'do not', 'like', 'you']"
4,"['new', 'io', 'phone', 'run', 'battery', 'couple', 'hour']","['forgot', 'my', 'pass']","['payment', 'not', 'through']","['update', 'not', 'working']","['tell', 'me', 'about', 'the', 'new']","['please', 'fix', 'keyboard', 'glitch', 'awesome', 'thx', '✌🏼', '️']","['trade', 'iphone', 'apple', 'possible', 'use', 'credit', 'gift', 'card', 'pay', 'part', 'battery', 'replacement', 'fee', 'macbook']","['let', 'me', 'talk', 'to', 'apple', 'support']","['good', 'evening']",['done'],"['robot', 'human']"
5,"['heating', 'machine', 'hour', 'battery', 'total', 'waste', 'money']","['forgot', 'my', 'login', 'details']","['iphone', 'apple', 'pay', 'but', 'not', 'arrive']","['phone', 'need', 'update']","['need', 'information']","['keyboard', 'glitch', 'ah-noyyy-innnggg', 'get', 'shit', 'together']","['hey', 'myspacebarwasstuckanditriedtogetitworkagian', 'howeverwhenitriedtogetitmoreloseitpoppedoutofmykeyboardandcantgetitbackin', 'canyouguyshelpmeou', 'ihaveaprojecttomakeforschoolbuticantmakeanyprogressthiswaypleasehelp', 'itssoooanoying']","['can', 'i', 'speak', 'agent', 'person']","['good', 'night']",['byeee'],"['are', 'you', 'robot']"
6,"['diagnose', 'iphone', 'plus', 'think', 'battery', 'go', 'apple', 'care', 'still']","['cannot', 'log', 'in', 'password']","['how', 'pay', 'for', 'this']","['want', 'update']","['want', 'to', 'know', 'about']","['omg', 'thx', 'fam', 'muchluv', 'teamiphone', 'appleofmyeye']","['macbook', 'pro', 'w', 'tb', 'month', 'old', 'sticky', 'key', 'today', 'i', 'be', 'aware', 'compress', 'air', 'thing', 'month', 'old', 'come']","['talk', 'human', 'please', 'person', 'someone real']",['yo'],['goodbye'],"['who', 'are', 'you']"
7,"['run', 'io', 'youtube', 'heating', 'bad', 'battery', 'life']","['lost', 'account', 'recover', 'password']","['can', 'i', 'pay', 'for', 'this', 'first']","['update', 'not', 'working']","['what', 'are', 'macbook', 'stats']","['focus', 'vertically', 'upward', 'equates', 'horizontal', 'dist', 'focus', 'horizontally', 'equates', 'vertically', 'downward']","['congratulation', 'building', 'feather', 'delicate', 'phone', 'vanity', 'case', 'need', 'handle']","['let', 'me', 'talk', 'to', 'apple', 'support']",['hii'],['bye'],"['I', 'do not', 'like', 'you']"
8,"['iphone', 'get', 'ridiculously', 'hot', 'charge', 'seem', 'get', 'bad']","['forgot', 'my', 'pass']","['payment', 'not', 'through']","['phone', 'need', 'update']","['any', 'info', 'next', 'release', '?']","['apple', 'really', 'annoy', 'box', '🤬', 'dhrvafdxwwxhtas']","['warranty', 'international', 'buy', 'iphone', 'usa', 'cover', 'brazil', 'thx']","['can', 'i', 'speak', 'agent', 'person']",['hi'],['thank'],"['robot', 'human']"
9,"['use', 'operating', 'phone', 'month', 'old', 'half', 'battery', 'life', 'update']","['forgot', 'my', 'login', 'details']","['iphone', 'apple', 'pay', 'but', 'not', 'arrive']","['want', 'update']","['tell', 'me', 'about', 'the', 'new']","['highly', 'tout', 'update', 'make', 'ipad', 'iphone', 'experience', 'miserable', 'flat', 'miserable', 'thankyou']","['case', 'slowly', 'crumble', 'away', 'lose', 'chunk', 'silicon', 'coroner', 'phone', 'uncovered']","['talk', 'human', 'please', 'person', 'someone real']",['hello'],['thanks'],"['are', 'you', 'robot']"


In [56]:
print(neat_train.shape)
show(neat_train)

(11000, 2)


Unnamed: 0,Utterance,Intent
0,"['charge', 'iphone', 'still', 'power', 'percentage', 'decrease']",battery
1,"['forgot', 'my', 'pass']",forgot_password
2,"['payment', 'not', 'through']",payment
3,"['want', 'update']",update
4,"['need', 'information']",info
5,"['fix', 'box', 'right', 'neoooww', 'please', '️']",repair
6,"['hi', 'shipment', 'beatsx', 'broken', 'claim', 'warantee']",lost_replace
7,"['talk', 'human', 'please', 'person', 'someone real']",speak_representative
8,['hi'],greeting
9,['goodbye'],goodbye


In [57]:
neat_train.tail(44)

Unnamed: 0,Utterance,Intent
10956,"[io, iphone, plus, bad, battery, life, ever, e...",battery
10957,"[itunes, open, keep, get, login, box, pop, eve...",forgot_password
10958,"[reserve, iphone, x, screenshots, prove, accou...",payment
10959,"[legal, release, update, make, product, virtua...",update
10960,"[hi, want, view, u, app, store, i, be, uk, one...",info
10961,"[concert, good, phone, broke, lmfao, please, s...",repair
10962,"[hello, see, yellow, stain, line, low, left, s...",lost_replace
10963,"[talk, human, please, person, someone real]",speak_representative
10964,"[good, evening]",greeting
10965,[goodbye],goodbye


In [58]:
processed.head(5)

Unnamed: 0,Processed Inbound,Real Inbound,Real Outbound
0,"[new, update, i️, make, sure, download, yester...",@AppleSupport The newest update. I️ made sure ...,@115854 Lets take a closer look into this issu...
6,"[hey, anyone, else, upgraded, io, issue, capit...",Hey @AppleSupport and anyone else who upgraded...,"@115856 Hey, let's work together to figure out..."
12,"[hello, internet, someone, explain, symbol, ke...","Hello, internet. Can someone explain why this ...",@115861 You're in the right place; we'll do al...
13,"[get, screenshot, say, iphonex, reserve, email...",@AppleSupport I’ve got a screenshot saying my ...,@115863 Go ahead and send us a DM please. Let ...
15,"[thank, update, phone, even, slow, barely, wor...",Thank you @AppleSupport I updated my phone and...,"@115864 We'd like to help, but we'll need more..."
