In [10]:
pip install nltk

Note: you may need to restart the kernel to use updated packages.


In [3]:
import nltk
import random
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity      
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
# import spacy
lemmatizer = nltk.stem.WordNetLemmatizer()

# Download required NLTK data
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\sarah\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\sarah\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\sarah\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [4]:
data = pd.read_csv('iPhone Dialog.txt', sep = ':', header = None)
data

Unnamed: 0,0,1
0,Customer,"Hi, I'm looking to buy a new phone, and I'm i..."
1,Sales Agent,"Great, we have a wide range of Apple products..."
2,Customer,"Well, I want a phone with a good camera, long..."
3,Sales Agent,Absolutely. We have a lot of great options th...
4,Customer,"No, I haven't. Tell me more about it."
5,Sales Agent,The iPhone 15 Pro Max has a 48-megapixel main...
6,Customer,That sounds great. How much does it cost?
7,Sales Agent,"The iPhone 15 Pro Max starts at ₦2,099,000.0..."
8,Customer,"Okay, I'm interested. But I have a few more q..."
9,Sales Agent,Every iPhone comes with a one year of hardwar...


In [13]:
cust = data.loc[data[0] == 'Customer']
sales = data.loc[data[0] == 'Sales Agent']

sales = sales[1].reset_index(drop = True)
cust = cust[1].reset_index(drop = True)

new_data = pd.DataFrame()
new_data['Question'] = cust
new_data['Answer'] = sales

new_data.head()

Unnamed: 0,Question,Answer
0,"Hi, I'm looking to buy a new phone, and I'm i...","Great, we have a wide range of Apple products..."
1,"Well, I want a phone with a good camera, long...",Absolutely. We have a lot of great options th...
2,"No, I haven't. Tell me more about it.",The iPhone 15 Pro Max has a 48-megapixel main...
3,That sounds great. How much does it cost?,"The iPhone 15 Pro Max starts at ₦2,099,000.0..."
4,"Okay, I'm interested. But I have a few more q...",Every iPhone comes with a one year of hardwar...


In [14]:
# Define a function for text preprocessing (including lemmatization)
def preprocess_text(text):
    # Identifies all sentences in the new_data
    sentences = nltk.sent_tokenize(text)
    
    # Tokenize and lemmatize each word in each sentence
    preprocessed_sentences = []
    for sentence in sentences:
        tokens = [lemmatizer.lemmatize(word.lower()) for word in nltk.word_tokenize(sentence) if word.isalnum()]
        # Turns to basic root - each word in the tokenized word found in the tokenized sentence - if they are all alphanumeric 
        # The code above does the following:
        # Identifies every word in the sentence 
        # Turns it to a lower case 
        # Lemmatizes it if the word is alphanumeric

        preprocessed_sentence = ' '.join(tokens)
        preprocessed_sentences.append(preprocessed_sentence)
    
    return ' '.join(preprocessed_sentences)


new_data['tokenized Questions'] = new_data['Question'].apply(preprocess_text)
new_data

Unnamed: 0,Question,Answer,tokenized Questions
0,"Hi, I'm looking to buy a new phone, and I'm i...","Great, we have a wide range of Apple products...",hi i looking to buy a new phone and i interest...
1,"Well, I want a phone with a good camera, long...",Absolutely. We have a lot of great options th...,well i want a phone with a good camera long ba...
2,"No, I haven't. Tell me more about it.",The iPhone 15 Pro Max has a 48-megapixel main...,no i have tell me more about it
3,That sounds great. How much does it cost?,"The iPhone 15 Pro Max starts at ₦2,099,000.0...",that sound great how much doe it cost
4,"Okay, I'm interested. But I have a few more q...",Every iPhone comes with a one year of hardwar...,okay i interested but i have a few more questi...
5,That's good to know. And what about the opera...,"No, it does not come with the latest Android ...",that good to know and what about the operating...
6,"Okay, that's good. But I'm also interested in...","Absolutely. The iPhone 13 Pro Max, and the iP...",okay that good but i also interested in some o...
7,That sounds like a good option for me. How mu...,"The iPhone 13 Pro starts at N985,000, while t...",that sound like a good option for me how much ...
8,"Okay, I'll think about it. But can you also t...",Of course. There are different iPad under the...,okay i think about it but can you also tell me...
9,"Wow, can I know more about the iPad Pro?","Of course. We have the iPad Pro 11‑inch, and ...",wow can i know more about the ipad pro


In [15]:
xtrain = new_data['tokenized Questions'].to_list()
xtrain

['hi i looking to buy a new phone and i interested in apple product',
 'well i want a phone with a good camera long battery life and plenty of storage',
 'no i have tell me more about it',
 'that sound great how much doe it cost',
 'okay i interested but i have a few more question what kind of warranty come with the phone',
 'that good to know and what about the operating system doe it come with the latest version of android',
 'okay that good but i also interested in some of the other iphones can you tell me more about the iphone 13 pro and iphone 13 pro max',
 'that sound like a good option for me how much doe it cost',
 'okay i think about it but can you also tell me about the ipad i heard a lot about it and i curious',
 'wow can i know more about the ipad pro',
 'that sound really cool but it also sound expensive how much doe it cost',
 'hmm i not sure that a lot of money for a gadget',
 'okay can you tell me more about the iphone x max',
 'that sound like a great option for me how

In [16]:
# Vectorized corpus
tfidf_vectorizer = TfidfVectorizer()
corpus = tfidf_vectorizer.fit_transform(xtrain)

print(corpus[0])

  (0, 73)	0.34005872834766576
  (0, 7)	0.34005872834766576
  (0, 40)	0.2675611601872162
  (0, 41)	0.24422215618535598
  (0, 4)	0.20902989169567068
  (0, 70)	0.24422215618535598
  (0, 61)	0.34005872834766576
  (0, 11)	0.34005872834766576
  (0, 88)	0.2675611601872162
  (0, 53)	0.34005872834766576
  (0, 36)	0.34005872834766576


In [17]:
user = input('Pls ask your question: ')
print(user)

hello


In [18]:
preprocess_text(user)

'hello'

In [19]:
# Vectorize user input
user_transformed = tfidf_vectorizer.transform([user])
print(user_transformed)




In [20]:
# Find similarity
similarity_scores = cosine_similarity(user_transformed, corpus)
similarity_scores

array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0.]])

In [21]:
new_data['Answer'].iloc[similarity_scores.argmax()]

' Great, we have a wide range of Apple products to choose from. What particular Apple product are you inerested in?'

In [22]:
user = input('Pls ask your question: ')
print(user)

i want to buy iphone 13 pro


In [23]:
preprocess_text(user)

'i want to buy iphone 13 pro'

In [24]:
# Vectorize user input
user_transformed = tfidf_vectorizer.transform([user])
print(user_transformed)

  (0, 90)	0.4470227453885812
  (0, 88)	0.3517213775614719
  (0, 72)	0.3912750189422886
  (0, 43)	0.3517213775614719
  (0, 11)	0.4470227453885812
  (0, 0)	0.4470227453885812


In [25]:
# Find similarity
similarity_scores = cosine_similarity(user_transformed, corpus)
similarity_scores

array([[0.24612097, 0.13938089, 0.        , 0.        , 0.        ,
        0.08117532, 0.47106757, 0.        , 0.        , 0.16124313,
        0.        , 0.        , 0.13612453, 0.        , 0.        ,
        0.08340223, 0.09385168, 0.        ]])

In [26]:
new_data['Answer'].iloc[similarity_scores.argmax()]

' Absolutely. The iPhone 13 Pro Max, and the iPhone 13 Pro both have a trio of 12MP cameras on the back. Those cover the roles of main, ultrawide and 3x telephoto duties. They both have the following features-'

In [27]:
def collector():
    user = input('Pls ask your question: ')
    pre_user = preprocess_text(user)
    vect_user = tfidf_vectorizer.transform([pre_user])
    similarity_scores = cosine_similarity(vect_user, corpus)
    most_similar_index = similarity_scores.argmax()
    
    return new_data['Answer'].iloc[most_similar_index]

In [28]:
collector()

' Great, we have a wide range of Apple products to choose from. What particular Apple product are you inerested in?'

In [29]:
def responder(text):
    user_input_processed = preprocess_text(text)
    vectorized_user_input = tfidf_vectorizer.transform([user_input_processed])
    similarity_score = cosine_similarity(vectorized_user_input, corpus)
    argument_maximum = similarity_score.argmax()

    print(new_data['Answer'].iloc[argument_maximum])

bot_greetings = ['Hello user, I am Ayo..... Pls ask your question',
             'Howdy, what can I do for you?',
             'Whazzap, what you need?',
             'Oremi, ki lo need?',
             'Welcome user, what can I do you for?'
             ]

farewell = ['Thanks for your usage..... Bye.',
            'Tainku, come again soon.',
            'Gracias, hope to see you again.',
            'Oshey oremi...... Odabo.',
            'Bye user, thanks for your patronage.']

human_greetings = ['hi', 'hello', 'hey', 'whatsup', 'good day', 'hello there', 'howdy', 'waddup']

human_exists = ['bye', 'good bye', 'ciao', 'close', 'exit', 'thanks', 'thank you','close']

import random
random_greeting = random.choice(bot_greetings)
random_farewell = random.choice(farewell)

while True:
    user_input = input('You: ')

    if user_input.lower() in human_greetings:
        print(random_greeting)
    elif user_input.lower() in human_exists:
        print(random_farewell)
        break
    else:
        responder(user_input)

 Great, we have a wide range of Apple products to choose from. What particular Apple product are you inerested in?
 Great, we have a wide range of Apple products to choose from. What particular Apple product are you inerested in?
 Great, we have a wide range of Apple products to choose from. What particular Apple product are you inerested in?
 Great, we have a wide range of Apple products to choose from. What particular Apple product are you inerested in?
 Great, we have a wide range of Apple products to choose from. What particular Apple product are you inerested in?
 Great, we have a wide range of Apple products to choose from. What particular Apple product are you inerested in?
 Great, we have a wide range of Apple products to choose from. What particular Apple product are you inerested in?
 Great, we have a wide range of Apple products to choose from. What particular Apple product are you inerested in?
 Absolutely. The iPhone 13 Pro Max, and the iPhone 13 Pro both have a trio of 12

In [30]:
# def get_response(user_input):
#     user_input_processed = preprocess_text(user_input) # ....................... Preprocess the user's input using the preprocess_text function

#     user_input_vector = tfidf_vectorizer.transform([user_input_processed])# .... Vectorize the preprocessed user input using the TF-IDF vectorizer

#     similarity_scores = cosine_similarity(user_input_vector, corpus) # .. Calculate the score of similarity between the user input vector and the corpus (df) vector

#     most_similar_index = similarity_scores.argmax() # ..... Find the index of the most similar question in the corpus (df) based on cosine similarity

#     return new_data['Answer'].iloc[most_similar_index] # ... Retrieve the corresponding answer from the df DataFrame and return it as the chatbot's response

# # create greeting list 
# greetings = ["Hey There.... I am a creation of Ehiz Danny Agba Coder.... How can I help",
#             "Hi Human.... How can I help",
#             'Twale baba nla, wetin dey happen nah',
#             'How far Alaye, wetin happen'
#             "Good Day .... How can I help", 
#             "Hello There... How can I be useful to you today",
#             "Hi GomyCode Student.... How can I be of use"]

# exits = ['thanks bye', 'bye', 'quit', 'exit', 'bye bye', 'close']
# farewell = ['Thanks....see you soon', 'Babye, See you soon', 'Bye... See you later', 'Bye... come back soon']

# random_farewell = random.choice(farewell) # ---------------- Randomly select a farewell message from the list
# random_greetings = random.choice(greetings) # -------- Randomly select greeting message from the list

# Test your chatbot
# while True:
#     user_input = input("You: ")
#     if user_input.lower() in exits:
#         print(f"\nChatbot: {random_farewell}!")
#         break
#     if user_input.lower() in ['hi', 'hello', 'hey', 'hi there']:
#         print(f"\nChatbot: {random_greetings}!")
#     else:   
#         response = get_response(user_input)
#         print(f"\nChatbot: {response}")