In [5]:
import json
import os

import pandas as  pd
import spacy

import seaborn as sns
import string

from tqdm import tqdm
from textblob import TextBlob

from nltk.corpus import stopwords
import nltk
from nltk.stem import WordNetLemmatizer
from nltk import word_tokenize
import re


from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline


from sklearn.preprocessing import FunctionTransformer
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import FeatureUnion
from sklearn.feature_extraction import DictVectorizer

import swifter
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt

ModuleNotFoundError: No module named 'swifter'

In [6]:
data = pd.read_csv('chatbot_dataset.csv')

In [7]:
data.head()

Unnamed: 0,Mother Input,Bot Response
0,Varicose Veins has been bothering me in my sec...,Since you're in your second trimester and deal...
1,I'm in my second trimester and having headache.,Headache can occur during the second trimester...
2,I have fatigue in the second trimester. It's s...,"In your second trimester, it's common to feel ..."
3,I'm in my second trimester and experiencing fo...,Since you're in your second trimester and deal...
4,I have swelling in the second trimester. It's ...,Having swelling in the second trimestercan be ...


In [8]:
data.shape

(2237, 2)

In [9]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2237 entries, 0 to 2236
Data columns (total 2 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Mother Input  2237 non-null   object
 1   Bot Response  2237 non-null   object
dtypes: object(2)
memory usage: 35.1+ KB


In [10]:
data['Mother Input'] = data['Mother Input'].astype(str)

In [11]:
def clean_text(text):
    text = text.lower()
    text = re.sub(r"[^a-zA-Z0-9\s]", "", text)
    return text

In [12]:
clean_text("my #### $ #  #noorsaeed webiste like is this http://heloword and access it @gmain.com")

'my     noorsaeed webiste like is this httpheloword and access it gmaincom'

In [13]:
data['Mother Input'] = data['Mother Input'].apply(clean_text)

In [14]:
X = data.drop('Bot Response', axis=1)
y = data['Bot Response']

In [15]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.15)

In [16]:
from sklearn.feature_extraction.text import TfidfVectorizer
tf = TfidfVectorizer()

In [17]:
text_clf = Pipeline([
...     ('vect', CountVectorizer(analyzer="word", stop_words="english")),
...     ('tfidf', TfidfTransformer(use_idf=True)),
...     ('clf', MultinomialNB(alpha=.01)),
... ])

In [18]:
text_clf.fit(X_train['Mother Input'].to_list(), list(y_train))

In [19]:
y_pred = text_clf.predict(X_test['Mother Input'].to_list())

In [20]:
import pickle
pickle.dump(text_clf, open("model_nlp.dat", "wb"))

In [21]:
with open('model_nlp.dat' , 'rb') as f:
    model = pickle.load(f)

In [22]:
sample_text = ["Nausea has been bothering me in my first trimester. It happens daily and lasts 1-2 hours."]

In [23]:
model.predict(sample_text)[0]

"In your third trimester, it's common to feel nausea. Sounds like it's been going on few hours and it's fairly mild. Try vitamin B6, stay hydrated, and eat small frequent meals."

In [24]:
import ipywidgets as widgets
text_area = widgets.Textarea(
    value="Type your text here...",
    placeholder="Enter a sentence or paragraph...",
    description="Text:",
    layout=widgets.Layout(width="500px", height="100px"),
)

button = widgets.Button(description="Predict")
output = widgets.Output()

def on_button_click(b):
    with output:
        output.clear_output()
        prediction = model.predict([text_area.value])[0]
        print(f"Prediction: {prediction}")

button.on_click(on_button_click)

In [25]:
display(text_area, button, output)

Textarea(value='Type your text here...', description='Text:', layout=Layout(height='100px', width='500px'), pl…

Button(description='Predict', style=ButtonStyle())

Output()