<a href="https://colab.research.google.com/github/Sankytanky100/NLP-Projects/blob/main/customer_service_chatbot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Project Overview
Objective: Build a customer service chatbot that can handle user inquiries about products, shipping, and weather by:

Processing user messages.
Finding the most appropriate response using TF-IDF and cosine similarity.
Extracting entities from the user's message.
Personalizing responses by inserting the extracted entities.

```
# This is formatted as code
```



1. Setting Up the Environment

In [6]:
# Install NLTK dependencies
import nltk
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')

# Install spaCy and download the English model
!pip install -U spacy
!python -m spacy download en_core_web_md

# Import necessary libraries
import re
from collections import Counter
import numpy as np
import spacy
from nltk import pos_tag, word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


Collecting en-core-web-md==3.8.0
  Using cached https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.8.0/en_core_web_md-3.8.0-py3-none-any.whl (33.5 MB)
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_md')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


2. Defining Helper Functions

In [1]:
def preprocess_text(text):
    # Lowercase the text
    text = text.lower()
    # Remove punctuation
    text = re.sub(r'[^\w\s]', '', text)
    # Tokenize the text
    tokens = word_tokenize(text)
    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    tokens = [word for word in tokens if word not in stop_words]
    # Lemmatize the tokens
    lemmatizer = WordNetLemmatizer()
    tokens = [lemmatizer.lemmatize(word) for word in tokens]
    return tokens


2.2 Compare Overlap Function

In [2]:
def compare_overlap(user_message, response):
    overlap = 0
    for token in user_message:
        if token in response:
            overlap += 1
    return overlap


2.3 Extract Nouns Function

In [3]:
def extract_nouns(tagged_message):
    message_nouns = []
    for token in tagged_message:
        if token[1].startswith("NN"):
            message_nouns.append(token[0])
    return message_nouns


2.4 Compute Similarity Function

In [4]:
def compute_similarity(tokens, category, nlp):
    output_list = []
    category_token = nlp(category)
    for token in tokens:
        token_nlp = nlp(token)
        similarity = token_nlp.similarity(category_token)
        output_list.append((token, category, similarity))
    return output_list


3. Loading spaCy Model

In [7]:
# Load spaCy's medium English model
nlp = spacy.load('en_core_web_md')


4. Defining the Chatbot Class

In [8]:
class ChatBot:
    def __init__(self):
        # Placeholder for unknown entities
        self.blank_spot = "_____"

        # Possible responses with placeholders
        self.responses = [
            "Hey! I just checked my records, your shipment containing {} is en route. Expect it within the next two days!",
            "Good news! The {} you ordered has been shipped and will arrive soon.",
            "Our records show that the {} is currently out of stock. We'll notify you when it's available.",
            "The {} comes in various sizes and colors. Which one are you interested in?",
            "Currently, the weather in {} is sunny with a slight chance of rain.",
            "I'm sorry, but I don't have information about {} at the moment.",
            "Could you please provide more details about {}?"
        ]

        # Categories for entity extraction
        self.product_categories = ['dress', 'shirt', 't-shirt', 'jeans', 'shoes', 'bag']
        self.location_categories = ['city', 'state', 'country', 'town', 'village']

        # Initialize spaCy model
        self.nlp = nlp

    def preprocess(self, text):
        return preprocess_text(text)

    def find_intent_match(self, user_message):
        bow_user_message = Counter(self.preprocess(user_message))
        processed_responses = [Counter(self.preprocess(response)) for response in self.responses]

        # Compute similarity scores
        similarity_list = [compare_overlap(bow_user_message, response) for response in processed_responses]

        # Find the index of the best response
        response_index = similarity_list.index(max(similarity_list))
        return self.responses[response_index]

    def find_entities(self, user_message):
        tagged_message = pos_tag(self.preprocess(user_message))
        message_nouns = extract_nouns(tagged_message)

        # If no nouns found, return None
        if not message_nouns:
            return None

        # Compute similarity with product categories
        product_similarities = compute_similarity(message_nouns, 'product', self.nlp)
        location_similarities = compute_similarity(message_nouns, 'location', self.nlp)

        # Combine and sort similarities
        combined_similarities = product_similarities + location_similarities
        combined_similarities.sort(key=lambda x: x[2], reverse=True)

        # Return the most similar noun
        if combined_similarities and combined_similarities[0][2] > 0.5:
            return combined_similarities[0][0]
        else:
            return None

    def respond(self, user_message):
        # Find the best intent match
        best_response = self.find_intent_match(user_message)
        entity = self.find_entities(user_message)

        if entity:
            # Replace blank spot with the entity
            best_response = best_response.format(entity)
        else:
            best_response = best_response.format(self.blank_spot)

        print(best_response)
        # Continue the chat
        self.chat()

    def chat(self):
        user_message = input("Hello! How can I assist you today?\n")
        if user_message.lower() in ['quit', 'exit', 'bye']:
            print("Thank you for contacting us. Have a great day!")
        else:
            self.respond(user_message)


5. Running the Chatbot

In [10]:
# Create ChatBot instance
customer_service_bot = ChatBot()

# Start the chat
customer_service_bot.chat()


Hello! How can I assist you today?
I ordered two t-shirts this past weekend. When will my package be shipped?


LookupError: 
**********************************************************************
  Resource [93mpunkt_tab[0m not found.
  Please use the NLTK Downloader to obtain the resource:

  [31m>>> import nltk
  >>> nltk.download('punkt_tab')
  [0m
  For more information see: https://www.nltk.org/data.html

  Attempted to load [93mtokenizers/punkt_tab/english/[0m

  Searched in:
    - '/root/nltk_data'
    - '/usr/nltk_data'
    - '/usr/share/nltk_data'
    - '/usr/lib/nltk_data'
    - '/usr/share/nltk_data'
    - '/usr/local/share/nltk_data'
    - '/usr/lib/nltk_data'
    - '/usr/local/lib/nltk_data'
**********************************************************************


In [None]:
# Install spaCy and download the English model
!pip install spacy
!python -m spacy download en_core_web_md


In [None]:
# Import necessary libraries
import spacy
from collections import Counter
import numpy as np


In [None]:
def preprocess(text):
    import string
    from nltk.corpus import stopwords
    import nltk
    nltk.download('stopwords')
    nltk.download('punkt')

    stop_words = set(stopwords.words('english'))
    # Lowercase and remove punctuation
    text = text.lower()
    text = text.translate(str.maketrans('', '', string.punctuation))
    # Tokenize
    tokens = nltk.word_tokenize(text)
    # Remove stopwords
    tokens = [word for word in tokens if word not in stop_words]
    return tokens


In [None]:
def compare_overlap(user_message, response):
    overlap = 0
    for word in user_message:
        if word in response:
            overlap += 1
    return overlap


In [None]:
def pos_tag(tokens):
    import nltk
    nltk.download('averaged_perceptron_tagger')
    return nltk.pos_tag(tokens)


In [None]:
def compute_similarity(tokens1, tokens2, nlp):
    vector1 = np.mean([nlp(word).vector for word in tokens1 if word in nlp.vocab], axis=0)
    vector2 = np.mean([nlp(word).vector for word in tokens2 if word in nlp.vocab], axis=0)
    cosine_similarity = np.dot(vector1, vector2) / (np.linalg.norm(vector1) * np.linalg.norm(vector2))
    return cosine_similarity


In [None]:
# Possible responses
responses = [
    "It looks like rain today. Carry an umbrella!",
    "It's sunny and warm outside.",
    "Expect cloudy skies with a chance of showers.",
    "Snow is expected later this evening.",
    "It's quite windy today. Hold onto your hat!",
    "I don't have data for that location.",
    "I'm not sure about the weather, but I hope it's nice!"
]

# Placeholder for unknown entities
blank_spot = "____"


In [None]:
# Load spaCy's medium English model
nlp = spacy.load('en_core_web_md')


In [None]:
class ChatBot:
    def find_intent_match(self, responses, user_message):
        bow_user_message = Counter(preprocess(user_message))
        processed_responses = [Counter(preprocess(response)) for response in responses]

        # Compute similarity scores
        similarity_list = []
        for response in processed_responses:
            similarity = 0
            for word in bow_user_message:
                if word in response:
                    similarity += 1
            similarity_list.append(similarity)

        # Find the index of the best response
        response_index = similarity_list.index(max(similarity_list))
        return responses[response_index]

    def find_entities(self, user_message):
        tagged_user_message = pos_tag(preprocess(user_message))
        message_nouns = extract_nouns(tagged_user_message)

        # If no nouns found, return empty list
        if not message_nouns:
            return []

        # Compute similarity with known entities (e.g., weather terms)
        tokens = [word.lower() for word in message_nouns]
        weather_terms = ['rain', 'sunny', 'cloudy', 'snow', 'windy', 'weather', 'forecast']
        similarities = []
        for term in weather_terms:
            for token in tokens:
                similarity = nlp(term).similarity(nlp(token))
                similarities.append((term, token, similarity))

        # Sort results by similarity
        similarities.sort(key=lambda x: x[2], reverse=True)
        return similarities[0][0] if similarities else None

    def respond(self, user_message):
        # Find the best intent match
        best_response = self.find_intent_match(responses, user_message)
        entity = self.find_entities(user_message)

        if entity:
            # Replace blank spot with the entity
            best_response = best_response.replace(blank_spot, entity)

        print(best_response)
        # Continue the chat
        self.chat()

    def chat(self):
        user_message = input("Hi, I'm Stratus. Ask me about your local weather!\n")
        if user_message.lower() in ['quit', 'exit', 'bye']:
            print("Goodbye! Stay safe and take care!")
        else:
            self.respond(user_message)


In [None]:
# Create ChatBot instance
stratus = ChatBot()

# Start the chat
stratus.chat()
