In [1]:
pip install azure-ai-textanalytics nltk scikit-learn

Collecting azure-ai-textanalytics
  Downloading azure_ai_textanalytics-5.3.0-py3-none-any.whl.metadata (82 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m82.8/82.8 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
Collecting azure-core<2.0.0,>=1.24.0 (from azure-ai-textanalytics)
  Downloading azure_core-1.32.0-py3-none-any.whl.metadata (39 kB)
Collecting azure-common~=1.1 (from azure-ai-textanalytics)
  Downloading azure_common-1.1.28-py2.py3-none-any.whl.metadata (5.0 kB)
Collecting isodate<1.0.0,>=0.6.1 (from azure-ai-textanalytics)
  Downloading isodate-0.7.2-py3-none-any.whl.metadata (11 kB)
Downloading azure_ai_textanalytics-5.3.0-py3-none-any.whl (298 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m298.6/298.6 kB[0m [31m9.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading azure_common-1.1.28-py2.py3-none-any.whl (14 kB)
Downloading azure_core-1.32.0-py3-none-any.whl (198 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m198.9/1

In [8]:
from azure.ai.textanalytics import TextAnalyticsClient
from azure.core.credentials import AzureKeyCredential
import nltk
import random
import string
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Download necessary NLTK resources
nltk.download("punkt")
nltk.download("stopwords")
nltk.download("wordnet")

from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words("english"))

# Azure Language Service Credentials
AZURE_LANGUAGE_KEY = "6ySxQFHQPqOqCnr7ZEUnGxrYGgIf7srHYGYXJ2XBesaaqAgVsGVlJQQJ99BBACYeBjFXJ3w3AAAaACOGBhsu"
AZURE_LANGUAGE_ENDPOINT = "https://week6languageservice.cognitiveservices.azure.com/"

def authenticate_client():
    return TextAnalyticsClient(endpoint=AZURE_LANGUAGE_ENDPOINT, credential=AzureKeyCredential(AZURE_LANGUAGE_KEY))

text_analytics_client = authenticate_client()


[nltk_data] Downloading package punkt to
[nltk_data]     /Users/siddbohidar/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/siddbohidar/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/siddbohidar/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [10]:
# Analyze text using Azure's Language Service
def analyze_text_with_azure(text):
    documents = [text]
    response = text_analytics_client.analyze_sentiment(documents=documents)[0]

    sentiment = response.sentiment  # Positive, Neutral, Negative
    key_phrases = text_analytics_client.extract_key_phrases(documents=documents)[0].key_phrases

    return sentiment, key_phrases


In [14]:
# Predefined corpus (Questions and Answers)
corpus = {
    "hello": "Hello! How can I assist you?",
    "hi": "Hello! How can I assist you?",
    "how are you": "I'm fine, thank you! How about you?",
    "what is your name": "I am a chatbot created to help you.",
    "can you explain machine learning": "Sure! Machine learning is a field of AI that allows computers to learn from data.",
    "tell me a joke": "Why did the scarecrow win an award? Because he was outstanding in his field!",
    "goodbye": "Goodbye! Have a great day!",
    "what can you do?": "I can answer general questions, help with AI concepts, and provide information."
}

# Preprocess text
def preprocess_input(text):
    tokens = nltk.word_tokenize(text.lower())
    tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words and word not in string.punctuation]
    return " ".join(tokens)

# Prepare corpus keys and responses
questions = list(corpus.keys())
responses = list(corpus.values())

# Vectorize corpus
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(questions)

# Chatbot response function with Azure NLP
def chatbot_response(user_input):
    sentiment, key_phrases = analyze_text_with_azure(user_input)

    print(f"\n*User Sentiment: {sentiment}*") 
    print(f"*Key Phrases: {', '.join(key_phrases)}*\n")  


    # If user sentiment is negative, offer help
    if sentiment == "negative":
        return "I'm sorry if you're having trouble. Can I help with something specific?"

    processed_input = preprocess_input(user_input)
    input_vector = vectorizer.transform([processed_input])

    # Compute similarity with predefined questions
    similarity_scores = cosine_similarity(input_vector, X)
    best_match_index = np.argmax(similarity_scores)

    # If similarity score is low, return a generic response
    if similarity_scores[0, best_match_index] < 0.3:
        return "I'm not sure how to respond to that. Can you rephrase?"

    return responses[best_match_index]


In [26]:
# Running the chatbot
print("Chatbot is ready! Type 'exit' to end.<b>")
while True:
    user_input = input("You: ")
    if user_input.lower() == "exit":
        print("Chatbot: Goodbye! Have a great day!")
        break
    print("Chatbot:", chatbot_response(user_input))

Chatbot is ready! Type 'exit' to end.<b>


You:  hi



*User Sentiment: neutral*
*Key Phrases: *

Chatbot: Hello! How can I assist you?


You:  what is your name



*User Sentiment: neutral*
*Key Phrases: name*

Chatbot: I am a chatbot created to help you.


You:  tell me a joke



*User Sentiment: positive*
*Key Phrases: joke*

Chatbot: Why did the scarecrow win an award? Because he was outstanding in his field!


You:  not funny



*User Sentiment: negative*
*Key Phrases: *

Chatbot: I'm sorry if you're having trouble. Can I help with something specific?


You:  what can you do



*User Sentiment: neutral*
*Key Phrases: *

Chatbot: I'm not sure how to respond to that. Can you rephrase?


You:  goodbye



*User Sentiment: neutral*
*Key Phrases: goodbye*

Chatbot: Goodbye! Have a great day!


You:  exit


Chatbot: Goodbye! Have a great day!
