## Custom Model

### Initialization

In [None]:
import json
import pandas as pd

### Import JSON 

In [None]:
df = pd.read_json('intents.json')
df['text'] = df['intents'].apply(lambda x: x['dialogue'])
df['intent'] = df['intents'].apply(lambda x: x['intent'])
df.drop('intents', axis=1, inplace=True)
df.head()

f = open('intents.csv', 'w')
df['text'].to_csv(f, index=False)

### Drop Duplicates

In [None]:
df.drop_duplicates(subset='text', inplace=True)

### Value Counts

In [None]:
df['intent'].value_counts()

### Embeddings


In [None]:
from transformers import BertTokenizer,TFBertModel
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

model = TFBertModel.from_pretrained('bert-base-uncased')
df['tokenized'] = df['text'].apply(lambda x: tokenizer.encode(x, add_special_tokens=True, return_tensors='tf'))
df['embeddings'] = [model(token).pooler_output for token in df['tokenized']]

In [None]:
correspondence = {'History':0,'Book':1,'Cancel':2,'Location':3,'About':4,'Availability':5}
reversed_correspondence = {v:k for k,v in correspondence.items()}
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import numpy as np
df['intent'] = df['intent'].replace(correspondence, inplace=False)
X = df['embeddings'].values
y = df['intent'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train = np.concatenate([tensor.numpy() for tensor in X_train])
X_test = np.concatenate([tensor.numpy() for tensor in X_test])

In [None]:
from tensorflow import keras
from keras import layers
from keras.models import Sequential
from keras.layers import Dense,Dropout
from keras.callbacks import EarlyStopping

callback = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

model = Sequential()
model.add(Dense(512, input_dim=768, activation='sigmoid'))
model.add(Dropout(0.5))
model.add(Dense(256, activation='sigmoid')) 
model.add(Dropout(0.5))
model.add(Dense(128, activation='sigmoid')) 
model.add(Dense(1, activation='softmax'))


model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
hist = model.fit(X_train, y_train, epochs=100, validation_data=(X_test, y_test), callbacks=[callback])
# X_train.shape

In [None]:
from fuzzywuzzy import fuzz
dictionary = {'intent':[],'combined_text':[],'similarity':[]}
for i in range(6):
    dictionary['intent'].append(i)
    dictionary['combined_text'].append(' '.join(df[df['intent']==i]['text'].values))
    dictionary['similarity'].append(np.nan)
df_new = pd.DataFrame(dictionary)
df_new['intent'].replace(reversed_correspondence,inplace=True)
name = input('Enter text: ')
df_new['similarity'] = df_new['combined_text'].apply(lambda x: fuzz.partial_ratio(x,name))
df_new

In [None]:
import spacy
name = input('Enter text: ')
nlp = spacy.load('en_core_web_sm')
for i in range(6):
    print(df_new.iloc[i]['intent'], " -> ",nlp(name).similarity(nlp(df_new.iloc[i]['combined_text'])))

In [None]:
for row in df[df['intent']=='Availability']['text'].values:
    print(row)