# Creation of Intents

#### Loading Data

In [1]:
import pandas as pd

In [2]:
pd.set_option('display.max_colwidth', None)

In [3]:
data = pd.read_csv("question_responce.csv").drop(columns='Unnamed: 0')
data = data.drop_duplicates()
data.head(5)

Unnamed: 0,responce,question
0,@115904 We'll be sure to pass along your kind words! #AATeam,@AmericanAir Erica on the lax team is amazing give her a raise ty
1,@115904 Our apologies for the delay in responding to you. Have you made it to LAX? Let us know if you still need assistance.,@AmericanAir Could you have someone on your lax team available to guide me to my gate ASAP
2,"@115905 Aww, that's definitely a future pilot in the making! #HappyHalloween",Ben Tennyson and an American Airlines pilot. 🎃 #trunkortreat #halloween #2017 #diycostume #parenting @americanair … https://t.co/f1nNHQ0iLa https://t.co/lDViDkRdB1
3,@115906 We're sorry for your frustration.,"@AmericanAir Right, but I earned those. I also shouldn’t have to pay to pass them to my own spouse. You need to change your program."
4,@115909 We're glad you got to kick back and enjoy a show while flying! Thanks for your kind words.,"Thank you, @AmericanAir for playing #ThisIsUs and for having great flight attendants on my flight back home!"


#### Preprocessing 

**Steps :**

- Tokenization: Split the text into sentences and the sentences into words. Lowercase the words and remove punctuation.
- All stopwords are removed.
- All words starting by '@' and '#' are removed
- Words are lemmatized — words in third person are changed to first person and verbs in past and future tenses are changed into present.
- bags of words are created

In [4]:
import gensim
import nltk
import re
import numpy as np
import json
from gensim.utils import simple_preprocess
from gensim.parsing.preprocessing import STOPWORDS
from nltk.stem import WordNetLemmatizer, SnowballStemmer
from nltk.corpus import stopwords, wordnet
from nltk.stem.porter import *
nltk.download('words')

lemmatizer = nltk.stem.WordNetLemmatizer()
stemmer = SnowballStemmer("english")

np.random.seed(2018)
#nltk.download('wordnet')
#nltk.download('stopwords')
#nltk.download('words')
words = set(nltk.corpus.words.words())

[nltk_data] Downloading package words to C:\Users\PONNOU
[nltk_data]     Wilfried\AppData\Roaming\nltk_data...
[nltk_data]   Package words is already up-to-date!


Extra stopwords

In [5]:
extra_words = ['flight','fly','plane','thanks', 'thank','get','please']
full_stopwords = STOPWORDS.union(set(extra_words))

In [6]:
stop_words = stopwords.words('english')
stop_words.extend(extra_words)

#### Preprocessing

Remove unecessary items int the text

In [7]:
def question_preprocess(text):
    text = " ".join(filter(lambda x:x[0]!='@', text.split()))
    text = " ".join(filter(lambda x:x[0]!='#', text.split()))
    text = " ".join(filter(lambda x:x[0:4]!='http', text.split()))
    return text

In [28]:
data['question_clean'] = data['question'].apply(question_preprocess)
data['question_clean']

0                                                                          Erica on the lax team is amazing give her a raise ty
1                                                 Could you have someone on your lax team available to guide me to my gate ASAP
2                                                                              Ben Tennyson and an American Airlines pilot. 🎃 …
3       Right, but I earned those. I also shouldn’t have to pay to pass them to my own spouse. You need to change your program.
4                                         Thank you, for playing and for having great flight attendants on my flight back home!
                                                                 ...                                                           
1847                                and have nailed in the transatlantic WiFi service. I am able to join my daily scrum onboard
1848                                                                                Average price of tic

In [9]:
data['question_preproc'] = data['question_clean'].apply(gensim.utils.simple_preprocess)
data['question_preproc']

0                                                                             [erica, on, the, lax, team, is, amazing, give, her, raise, ty]
1                                             [could, you, have, someone, on, your, lax, team, available, to, guide, me, to, my, gate, asap]
2                                                                                        [ben, tennyson, and, an, american, airlines, pilot]
3       [right, but, earned, those, also, shouldn, have, to, pay, to, pass, them, to, my, own, spouse, you, need, to, change, your, program]
4                                        [thank, you, for, playing, and, for, having, great, flight, attendants, on, my, flight, back, home]
                                                                        ...                                                                 
1847                               [and, have, nailed, in, the, transatlantic, wifi, service, am, able, to, join, my, daily, scrum, onboard]
1848         

In [10]:
def remove_stopwords(text):
    return [[w for w in simple_preprocess(str(doc)) if w not in stop_words] for doc in text]

In [11]:
data["question_stop"] = remove_stopwords(data['question_preproc'])
data["question_stop"] = data["question_stop"].apply(" ".join)
data["question_stop"]

0                                                erica lax team amazing give raise ty
1                                    could someone lax team available guide gate asap
2                                                ben tennyson american airlines pilot
3                               right earned also pay pass spouse need change program
4                                                  playing great attendants back home
                                            ...                                      
1847                  nailed transatlantic wifi service able join daily scrum onboard
1848                                                     average price ticket one way
1849          really annoyed month since damaged bag claim never heard back done told
1850    terrible service wait ages trying call number almost two months gone response
1851                         charges patrons change flights every time airport closes
Name: question_stop, Length: 1852, dtype: object

Lemmatize

In [12]:
#lemmatize
def lemmat(text):
    result = []
    text
    for token in gensim.utils.simple_preprocess(text):
        if token not in stop_words and len(token) > 2:
            result.append(WordNetLemmatizer().lemmatize(token))
    return result


In [13]:
data ["question_lem"] = data["question_stop"].apply(lemmat)

In [14]:
data[["question","question_lem"]]

Unnamed: 0,question,question_lem
0,@AmericanAir Erica on the lax team is amazing give her a raise ty,"[erica, lax, team, amazing, give, raise]"
1,@AmericanAir Could you have someone on your lax team available to guide me to my gate ASAP,"[could, someone, lax, team, available, guide, gate, asap]"
2,Ben Tennyson and an American Airlines pilot. 🎃 #trunkortreat #halloween #2017 #diycostume #parenting @americanair … https://t.co/f1nNHQ0iLa https://t.co/lDViDkRdB1,"[ben, tennyson, american, airline, pilot]"
3,"@AmericanAir Right, but I earned those. I also shouldn’t have to pay to pass them to my own spouse. You need to change your program.","[right, earned, also, pay, pas, spouse, need, change, program]"
4,"Thank you, @AmericanAir for playing #ThisIsUs and for having great flight attendants on my flight back home!","[playing, great, attendant, back, home]"
...,...,...
1847,@AmericanAir and @172 have nailed in the transatlantic WiFi service. I am able to join my @172377 daily scrum onboard,"[nailed, transatlantic, wifi, service, able, join, daily, scrum, onboard]"
1848,@AmericanAir Average price of ticket out: $2500 one way.,"[average, price, ticket, one, way]"
1849,@AmericanAir Really annoyed been over a month since my damaged bag claim never heard back! done as told...,"[really, annoyed, month, since, damaged, bag, claim, never, heard, back, done, told]"
1850,@AmericanAir terrible service wait ages trying to call that number almost two months gone no response,"[terrible, service, wait, age, trying, call, number, almost, two, month, gone, response]"


#### Making bigrams and trigrams

In [15]:
bigram = gensim.models.Phrases(data["question_lem"], min_count=3, threshold=10)
bigram_mod = gensim.models.phrases.Phraser(bigram)

#example
for i in range(15,24):
    print(bigram_mod[data["question_lem"][i]])

['god', 'going', 'ten', 'minute']
['give', 'grief', 'thing', 'poorly', 'credit', 'guacamole', 'admiral_club', 'outstanding', 'addition']
['aware', 'fit', 'overhead', 'return_trip', 'luggage', 'going', 'pre', 'security', 'gate', 'chck', 'empty', 'overhead', 'flt']
['despite', 'flying', 'ewr', 'forced', 'mia', 'check', 'guitar', 'amp', 'charged', 'gate', 'security', 'flt', 'full']
['fifteen', 'empty', 'business_class', 'seat', 'one', 'person', 'got', 'upgraded']
['still_waiting', 'compensate', 'crap', 'put', 'honeymoon', 'almost', 'year', 'ago']
['really', 'hour', 'international', 'seat', 'back', 'entertainment', 'screen', 'mon']
['still', 'rather', 'screen', 'though', 'rather', 'cause', 'neck', 'problem', 'looking', 'tablet']
['say', 'attenants', 'bdl', 'ord', 'awesome', 'gate_agent', 'bdl', 'also', 'need', 'shout', 'awesome', 'customer_service', 'treated', 'nonrev', 'well']


In [16]:
def bigrams(text):
    return [bigram_mod[doc] for doc in text]

In [17]:
data["question_lem_bigrams"] = bigrams(data["question_lem"])

In [18]:
data = data[['responce', 'question','question_lem_bigrams']]

#### prev preprocessing

In [19]:
def lemmatize(text):
    return WordNetLemmatizer().lemmatize(text)

def preprocess(text):
    result = []
    text = " ".join(filter(lambda x:x[0]!='@', text.split()))
    text = " ".join(filter(lambda x:x[0]!='#', text.split()))
    text = " ".join(filter(lambda x:x[0:4]!='http', text.split()))
    text = " ".join(w for w in nltk.wordpunct_tokenize(text) \
        if w.lower() in words or not w.isalpha())
    for token in gensim.utils.simple_preprocess(text):
        if token not in stop_words and len(token) > 2:
            result.append(lemmatize(token))
    return result

In [20]:
data['question_prepro'] = data['question'].apply(preprocess)
data['question_prepro']

0                                                  [lax, team, amazing, give, raise]
1                                [could, someone, lax, team, available, guide, gate]
2                                                                       [ben, pilot]
3                             [right, also, pay, pas, spouse, need, change, program]
4                                                                [great, back, home]
                                            ...                                     
1847                              [transatlantic, service, able, join, daily, scrum]
1848                                              [average, price, ticket, one, way]
1849                     [really, month, since, bag, claim, never, back, done, told]
1850    [terrible, service, wait, trying, call, number, almost, two, gone, response]
1851                                                  [change, every, time, airport]
Name: question_prepro, Length: 1852, dtype: object

We need to clean the responce too

In [21]:
def response_preprocess(text):
    text = " ".join(filter(lambda x:x[0]!='@', text.split()))
    text = " ".join(filter(lambda x:x[0]!='#', text.split()))
    return text

In [22]:
data['responce'] = data['responce'].apply(response_preprocess)
data['responce']

0                                                                                                       We'll be sure to pass along your kind words!
1                               Our apologies for the delay in responding to you. Have you made it to LAX? Let us know if you still need assistance.
2                                                                                               Aww, that's definitely a future pilot in the making!
3                                                                                                                  We're sorry for your frustration.
4                                                         We're glad you got to kick back and enjoy a show while flying! Thanks for your kind words.
                                                                            ...                                                                     
1847                           We know staying connected is important, why not take your office to 35k fee

Drop empty questions

In [23]:
pd.set_option('display.max_colwidth', None)

In [24]:
data = data[data['question_prepro'].map(lambda d: len(d)) > 1]
data

Unnamed: 0,responce,question,question_lem_bigrams,question_prepro
0,We'll be sure to pass along your kind words!,@AmericanAir Erica on the lax team is amazing give her a raise ty,"[erica, lax, team, amazing, give, raise]","[lax, team, amazing, give, raise]"
1,Our apologies for the delay in responding to you. Have you made it to LAX? Let us know if you still need assistance.,@AmericanAir Could you have someone on your lax team available to guide me to my gate ASAP,"[could, someone, lax, team, available, guide, gate, asap]","[could, someone, lax, team, available, guide, gate]"
2,"Aww, that's definitely a future pilot in the making!",Ben Tennyson and an American Airlines pilot. 🎃 #trunkortreat #halloween #2017 #diycostume #parenting @americanair … https://t.co/f1nNHQ0iLa https://t.co/lDViDkRdB1,"[ben, tennyson, american_airline, pilot]","[ben, pilot]"
3,We're sorry for your frustration.,"@AmericanAir Right, but I earned those. I also shouldn’t have to pay to pass them to my own spouse. You need to change your program.","[right, earned, also, pay, pas, spouse, need, change, program]","[right, also, pay, pas, spouse, need, change, program]"
4,We're glad you got to kick back and enjoy a show while flying! Thanks for your kind words.,"Thank you, @AmericanAir for playing #ThisIsUs and for having great flight attendants on my flight back home!","[playing, great, attendant, back, home]","[great, back, home]"
...,...,...,...,...
1847,"We know staying connected is important, why not take your office to 35k feet? We're so glad you're enjoying the WiFi!",@AmericanAir and @172 have nailed in the transatlantic WiFi service. I am able to join my @172377 daily scrum onboard,"[nailed, transatlantic, wifi, service, able, join, daily, scrum, onboard]","[transatlantic, service, able, join, daily, scrum]"
1848,We've capped our fares for nonstop flights at $99 for Puerto Rico through the 8th of Oct. Book travel here: https://t.co/iJWiiSmxCO,@AmericanAir Average price of ticket out: $2500 one way.,"[average, price, ticket, one_way]","[average, price, ticket, one, way]"
1849,Please give our Baggage team a call at 800-866-4010 for updates that may be available.,@AmericanAir Really annoyed been over a month since my damaged bag claim never heard back! done as told...,"[really, annoyed, month, since, damaged, bag, claim, never, heard, back, done, told]","[really, month, since, bag, claim, never, back, done, told]"
1850,"Our apologies for the hold. Our Central Baggage team will help at that number. If we can get an update, then please DM your bag file number.",@AmericanAir terrible service wait ages trying to call that number almost two months gone no response,"[terrible, service, wait, age, trying, call, number, almost, two, month, gone, response]","[terrible, service, wait, trying, call, number, almost, two, gone, response]"


#### Building LDA

In [25]:
import gensim.corpora as corpora
from pprint import pprint

In [31]:
id2word = corpora.Dictionary(data["question_prepro"])
#id2word.filter_extremes(no_below=3)

corpus = [id2word.doc2bow(text) for text in data["question_prepro"]]

In [45]:
for i in range(20):
    print(corpus[i])

[(0, 1), (1, 1), (2, 1), (3, 1), (4, 1)]
[(2, 1), (4, 1), (5, 1), (6, 1), (7, 1), (8, 1), (9, 1)]
[(10, 1), (11, 1)]
[(12, 1), (13, 1), (14, 1), (15, 1), (16, 1), (17, 1), (18, 1), (19, 1)]
[(20, 1), (21, 1), (22, 1)]
[(23, 1), (24, 1), (25, 1), (26, 1), (27, 1), (28, 1)]
[(29, 2), (30, 1), (31, 1)]
[(32, 1), (33, 1), (34, 1), (35, 1), (36, 1)]
[(16, 1), (36, 1), (37, 1), (38, 1), (39, 1), (40, 1)]
[(16, 1), (41, 1), (42, 1), (43, 1), (44, 1), (45, 1), (46, 1), (47, 1)]
[(5, 1), (36, 1), (48, 1), (49, 1), (50, 1), (51, 1), (52, 1)]
[(47, 1), (53, 1), (54, 1)]
[(55, 1), (56, 1), (57, 1)]
[(58, 1), (59, 1), (60, 1)]
[(61, 1), (62, 1), (63, 1)]
[(1, 1), (29, 1), (64, 1), (65, 1), (66, 1), (67, 1), (68, 1)]
[(7, 1), (62, 1), (69, 1), (70, 1), (71, 1), (72, 1), (73, 1), (74, 1), (75, 1)]
[(7, 1), (74, 1), (76, 1), (77, 1), (78, 1), (79, 1), (80, 1), (81, 1)]
[(43, 1), (70, 1), (82, 1), (83, 1), (84, 1), (85, 1), (86, 1)]
[(36, 1), (87, 1), (88, 1), (89, 1), (90, 1), (91, 1), (92, 1), (93, 1

In [51]:
lda_model = gensim.models.LdaModel(corpus=corpus,
                                       id2word=id2word,
                                       num_topics=20,
                                       random_state=100,
                                       update_every=1,
                                       chunksize=100,
                                       passes=10,
                                       alpha='auto',
                                       per_word_topics=True)

  and should_run_async(code)


In [52]:
print(corpus[:1])
id2word[3]

[[(0, 1), (1, 1), (2, 1), (3, 1), (4, 1)]]


  and should_run_async(code)


'raise'

In [53]:
pprint(lda_model.print_topics())
doc_lda=lda_model[corpus]

[(0,
  '0.220*"way" + 0.089*"good" + 0.071*"need" + 0.069*"change" + 0.068*"trip" + '
  '0.049*"hey" + 0.047*"working" + 0.041*"program" + 0.032*"right" + '
  '0.027*"also"'),
 (1,
  '0.105*"know" + 0.093*"number" + 0.089*"call" + 0.057*"issue" + 0.042*"rude" '
  '+ 0.041*"due" + 0.038*"fix" + 0.037*"left" + 0.035*"pilot" + '
  '0.027*"credit"'),
 (2,
  '0.194*"today" + 0.071*"crew" + 0.063*"give" + 0.052*"year" + 0.050*"team" + '
  '0.049*"family" + 0.038*"lax" + 0.035*"awful" + 0.032*"whole" + '
  '0.030*"terminal"'),
 (3,
  '0.161*"without" + 0.082*"claim" + 0.061*"look" + 0.057*"free" + '
  '0.055*"luggage" + 0.041*"least" + 0.038*"landed" + 0.029*"via" + '
  '0.027*"send" + 0.026*"line"'),
 (4,
  '0.220*"bag" + 0.117*"like" + 0.093*"say" + 0.068*"could" + 0.051*"next" + '
  '0.041*"keep" + 0.030*"enough" + 0.027*"small" + 0.027*"tell" + 0.013*"dog"'),
 (5,
  '0.255*"gate" + 0.118*"got" + 0.097*"early" + 0.067*"nothing" + 0.057*"said" '
  '+ 0.035*"food" + 0.024*"tonight" + 0.023*"

  and should_run_async(code)


display the topic's top 10 words

In [24]:
topwords = np.array(lda_model.show_topics(num_topics=40, num_words=10))
for i in enumerate(topwords[:,1]):
    print(i)

(0, '0.031*"check" + 0.014*"gate" + 0.011*"today" + 0.011*"give" + 0.011*"fly" + 0.008*"hey" + 0.008*"charlotte" + 0.008*"new" + 0.008*"yes" + 0.008*"try"')
(1, '0.021*"want" + 0.013*"return" + 0.013*"take" + 0.013*"travel" + 0.013*"give" + 0.010*"bag" + 0.010*"day" + 0.010*"change" + 0.010*"work" + 0.010*"know"')
(2, '0.022*"customer_service" + 0.021*"time" + 0.017*"would" + 0.017*"go" + 0.015*"book" + 0.013*"frequent_flyer" + 0.013*"chance" + 0.013*"know" + 0.013*"seat" + 0.013*"pay"')
(3, '0.041*"seat" + 0.016*"pay" + 0.013*"another" + 0.013*"fly" + 0.012*"say" + 0.011*"time" + 0.011*"get" + 0.010*"board" + 0.010*"help" + 0.009*"gate"')
(4, '0.030*"delay" + 0.024*"make" + 0.014*"get" + 0.011*"tell" + 0.011*"gate" + 0.011*"use" + 0.011*"minutes" + 0.011*"help" + 0.010*"today" + 0.010*"amp"')
(5, '0.017*"baggage" + 0.017*"miami" + 0.014*"could" + 0.014*"get" + 0.014*"make" + 0.014*"today" + 0.014*"still" + 0.010*"special" + 0.010*"amp" + 0.010*"ord"')
(6, '0.019*"check" + 0.017*"get" 

In [27]:
#!pip install pyLDAvis.gensim

ERROR: Could not find a version that satisfies the requirement pyLDAvis.gensim (from versions: none)
ERROR: No matching distribution found for pyLDAvis.gensim


In [54]:
import pyLDAvis
import pyLDAvis.gensim_models
import matplotlib.pyplot as plt
%matplotlib inline

  and should_run_async(code)


In [55]:
pyLDAvis.enable_notebook()
vis = pyLDAvis.gensim_models.prepare(lda_model, corpus, id2word,mds='mmds')
vis

  and should_run_async(code)


In [62]:
from gensim.models import CoherenceModel
# Compute Perplexity
print('\nPerplexity: ', lda_model.log_perplexity(corpus))  # a measure of how good the model is. lower the better.

# Compute Coherence Score
coherence_model_lda = CoherenceModel(model=lda_model, texts=data['question_lem_bigrams'], dictionary=id2word, coherence='c_v')
coherence_lda = coherence_model_lda.get_coherence()
print('\nCoherence Score: ', coherence_lda)

  and should_run_async(code)



Perplexity:  -16.11437959881851

Coherence Score:  nan


  m_lr_i = np.log(numerator / denominator)
  return cv1.T.dot(cv2)[0, 0] / (_magnitude(cv1) * _magnitude(cv2))


#### Try to extract topic from new text 

In [63]:
from operator import itemgetter

  and should_run_async(code)


In [64]:
def get_highest_topic(text):
  text_cleaned = gensim.utils.simple_preprocess(question_preprocess(text))
  text_lem = bigram_mod[lemmat(" ".join([w for w in text_cleaned if w not in stop_words]))]

  result = lda_model[id2word.doc2bow(text_lem)]
  return max(result, key=itemgetter(1))[0]

get_highest_topic('@AmericanAir terrible service wait ages trying to call customer service almost two months gone no response')

  and should_run_async(code)


TypeError: '>' not supported between instances of 'tuple' and 'int'

In [81]:

def get_highest_topic(text):
  text_cleaned = preprocess(text)
  result = lda_model[id2word.doc2bow(text_cleaned)]
  #print("Max topic topic : ", max(result, key=itemgetter(1))[0], 
  #     "\nWith : ", max(result, key=itemgetter(1))[1])
  flat_list = [x for xs in result for x in xs]
  return max(flat_list[:18],key=itemgetter(1))[0]

get_highest_topic('Im black.')



  and should_run_async(code)


7

In [82]:
data['topic'] = data['question'].apply(get_highest_topic)
data.head(10)

  and should_run_async(code)


Unnamed: 0,responce,question,question_lem_bigrams,question_prepro,question_clean,topic
0,We'll be sure to pass along your kind words!,@AmericanAir Erica on the lax team is amazing give her a raise ty,"[erica, lax, team, amazing, give, raise]","[lax, team, amazing, give, raise]",Erica on the lax team is amazing give her a raise ty,2
1,Our apologies for the delay in responding to you. Have you made it to LAX? Let us know if you still need assistance.,@AmericanAir Could you have someone on your lax team available to guide me to my gate ASAP,"[could, someone, lax, team, available, guide, gate, asap]","[could, someone, lax, team, available, guide, gate]",Could you have someone on your lax team available to guide me to my gate ASAP,19
2,"Aww, that's definitely a future pilot in the making!",Ben Tennyson and an American Airlines pilot. 🎃 #trunkortreat #halloween #2017 #diycostume #parenting @americanair … https://t.co/f1nNHQ0iLa https://t.co/lDViDkRdB1,"[ben, tennyson, american_airline, pilot]","[ben, pilot]",Ben Tennyson and an American Airlines pilot. 🎃 …,1
3,We're sorry for your frustration.,"@AmericanAir Right, but I earned those. I also shouldn’t have to pay to pass them to my own spouse. You need to change your program.","[right, earned, also, pay, pas, spouse, need, change, program]","[right, also, pay, pas, spouse, need, change, program]","Right, but I earned those. I also shouldn’t have to pay to pass them to my own spouse. You need to change your program.",0
4,We're glad you got to kick back and enjoy a show while flying! Thanks for your kind words.,"Thank you, @AmericanAir for playing #ThisIsUs and for having great flight attendants on my flight back home!","[playing, great, attendant, back, home]","[great, back, home]","Thank you, for playing and for having great flight attendants on my flight back home!",11
5,We never want your experience to be anything less than perfect. We're sorry the Wi-Fi was slower than expected.,@AmericanAir's wifi makes Amtrak's wifi look pro. At least theirs is free. @5804 has decent wifi @ reasonable prices. Why not AA?,"[wifi, make, amtrak, wifi, look, pro, least, free, decent, wifi, reasonable, price]","[look, pro, least, free, decent, reasonable]",wifi makes Amtrak's wifi look pro. At least theirs is free. has decent wifi reasonable prices. Why not AA?,3
6,"Thanks for the shout-out and for stopping by, Marc! We'll pass this on to our SFO Admirals Club managers.","Wonderful club! @americanair (@ American Airlines Admirals Club in San Francisco, CA) https://t.co/GWFwuGVgNA","[wonderful, club, american_airline, admiral_club, san, francisco]","[wonderful, club, club, san]","Wonderful club! (@ American Airlines Admirals Club in San Francisco, CA)",10
7,They'll be more than happy to walk you through it and help with the booking process.,"@AmericanAir already did...changed browsers, did all my techie tricks and still can't make a reservation after 1 hour!!","[already, changed, browser, techie, trick, still, make, reservation, hour]","[already, still, make, reservation, hour]","already did...changed browsers, did all my techie tricks and still can't make a reservation after 1 hour!!",15
8,We're sorry for the long wait. The next available agent will be with you as soon as possible.,@AmericanAir ........still....on....hold.....they made the booking. I am trying to pay.....so #frustrating.,"[still, hold, made, booking, trying, pay]","[still, hold, made, booking, trying, pay]",........still....on....hold.....they made the booking. I am trying to pay.....so,14
9,Fares sell in real time. We're sorry we weren't able to snag the lower fare for you.,@AmericanAir well now i am told the ticket cost is an additional $200/person to pay....because of the wait....omg,"[well, told, ticket, cost, additional, person, pay, wait, omg]","[well, told, ticket, cost, additional, person, pay, wait]",well now i am told the ticket cost is an additional $200/person to pay....because of the wait....omg,8


#### Building Intents

In [85]:
intents = {"Greeting": ["Hello", "How are you doing?", "Greetings!", "How do you do?"],
          "Apology": ["No problem"],
          "Thanks": ["No problem", "You're welcome"],
          "Goodbye": ["It was nice speaking to you", "See you later", "Speak soon!"]
             }

  and should_run_async(code)


In [86]:
def update_intents(df):
  for index, row in df.iterrows():
    if str(row['topic']) not in intents :
      intents[str(row['topic'])] = []
    if row['responce'] != "":
      intents[str(row['topic'])].append(str(row['responce']))

  and should_run_async(code)


In [87]:
update_intents(data)

  and should_run_async(code)


Save Intents in order to use it again

In [88]:
with open('intents.json', 'w') as fp:
    json.dump(intents, fp)

  and should_run_async(code)


#### Doc2Vec

In [89]:
from gensim.models.doc2vec import Doc2Vec, TaggedDocument

  and should_run_async(code)


In [97]:
tagged_data = [TaggedDocument(d, [i]) for i, d in enumerate(data['responce'])]
model = Doc2Vec(tagged_data, window=1, min_count=2, epochs = 200)

  and should_run_async(code)


#### First Results 

In [98]:
def chatbot(text):
  result = model.docvecs.most_similar(positive=[model.infer_vector(preprocess(text))], topn=1850)
  topic = get_highest_topic('text')
  for l in result:
    if data['topic'].iloc[int(l[0])] == topic:
      return data['responce'].iloc[int(l[0])]

  and should_run_async(code)


In [99]:
#Preexisting questions and responses
list_word = ["@AmericanAir what’s going on with flight 301 DTW &gt; DFW? Delayed over 3 hours. Will it be canceled?", 
             "@AmericanAir awful service, more than 1 hour delay due to logbook maintenance issues. Where is preventive maintenance?",
             "@AmericanAir Yes. Refund my plane ticket😡 with your racist ass workers",
             "@AmericanAir I'm aware. Fits in overhead.This was a return trip. Had same luggage going. This was pre-security. Gate chck, ok, but Empty overheads on flt"]

list_resp = ["Looks like it has a maintenance delay and it's currently scheduled to take off at 10:10p.",
             "We do many checks pre and post departure. Please share your flight number if we can provide an update.",
             "We don't tolerate discrimination of any kind. Please DM your record locator and contact details (phone and email)",
             "We'd like to share this feedback. Please DM your record locator."]

for q,r in zip(list_word, list_resp):
  print("QUESTION :", q)
  print("EXPECTED :", r)
  print("PREDICTED :", chatbot(q),"\n")
  


QUESTION : @AmericanAir what’s going on with flight 301 DTW &gt; DFW? Delayed over 3 hours. Will it be canceled?
EXPECTED : Looks like it has a maintenance delay and it's currently scheduled to take off at 10:10p.
PREDICTED : They sound pretty tasty, Bryce. Thanks again for letting us know, we always like seeing our frequent flyers happy! 

QUESTION : @AmericanAir awful service, more than 1 hour delay due to logbook maintenance issues. Where is preventive maintenance?
EXPECTED : We do many checks pre and post departure. Please share your flight number if we can provide an update.
PREDICTED : Treat yo self, Will. Always great to have you on board. 

QUESTION : @AmericanAir Yes. Refund my plane ticket😡 with your racist ass workers
EXPECTED : We don't tolerate discrimination of any kind. Please DM your record locator and contact details (phone and email)
PREDICTED : We're sorry you're having trouble with us. Please work with our reservations team to get this booked. 

QUESTION : @American

  and should_run_async(code)
  result = model.docvecs.most_similar(positive=[model.infer_vector(preprocess(text))], topn=1850)
  result = model.docvecs.most_similar(positive=[model.infer_vector(preprocess(text))], topn=1850)
  result = model.docvecs.most_similar(positive=[model.infer_vector(preprocess(text))], topn=1850)
  result = model.docvecs.most_similar(positive=[model.infer_vector(preprocess(text))], topn=1850)


In [102]:
question = input('Enter your question : ')
chatbot(question)

  and should_run_async(code)


Enter your question : How can i get my reservation


  result = model.docvecs.most_similar(positive=[model.infer_vector(preprocess(text))], topn=1850)


'We currently have an estimated departure time of 10:22a and should be in the air towards PHL shortly. Our apologies for the late start.'