In [None]:
from newspaper import Article
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import random

# List of URLs to be processed
urls = [
    'https://en.wikipedia.org/wiki/Sri_Lanka',
    'https://en.wikipedia.org/wiki/Car',
    'https://en.wikipedia.org/wiki/Artificial_intelligence',
    'https://en.wikipedia.org/wiki/Sri_Lankan_elephant',
    'https://kandyesalaperahera.com/',
    'https://en.wikipedia.org/wiki/Kandy',
]

# List to hold the text content of the articles
corpus_list = []

for url in urls:
    article = Article(url)
    article.download()
    article.parse()
    article.nlp()
    corpus_list.append(article.text)

# Combine texts into a single corpus
sentence_list = []
for corpus in corpus_list:
    sentence_list.extend(corpus.split('. '))  # Split text into sentences


# Function to return a random greeting response to a user's greeting
def greeting_response(text):
    text = text.lower()

    # bot's greeting responses
    bot_greetings = ['howdy', 'hi', 'hey', 'hello', 'hola']
    # user greetings
    user_greetings = ['hi', 'howdy', 'hello', 'greeting', 'watsup']

    for word in text.split():
        if word in user_greetings:
            return random.choice(bot_greetings)

# Function to sort indices based on the values in list_var
def index_sort(list_var):
    length = len(list_var)
    list_index = list(range(length))

    for i in range(length):
        for j in range(length):
            if list_var[list_index[i]] > list_var[list_index[j]]:
                # swap
                temp = list_index[i]
                list_index[i] = list_index[j]
                list_index[j] = temp

    return list_index

# Function to create the bot's response
def bot_response(user_input):
    user_input = user_input.lower()
    sentence_list.append(user_input)
    bot_response = ''
    cm = CountVectorizer().fit_transform(sentence_list)
    similarity_scores = cosine_similarity(cm[-1], cm)
    similarity_scores_list = similarity_scores.flatten()
    sorted_indices = index_sort(similarity_scores_list)
    sorted_indices = sorted_indices[1:]  # Exclude the user's input itself
    response_flag = 0

    j = 0
    for i in range(len(sorted_indices)):
        if similarity_scores_list[sorted_indices[i]] > 0.0:
            bot_response = bot_response + ' ' + sentence_list[sorted_indices[i]]
            response_flag = 1
            j = j + 1

        if j > 2:  # Limit the number of responses
            break

    if response_flag == 0:
        bot_response = bot_response + " I apologize, I don't understand."

    sentence_list.remove(user_input)

    return bot_response

# New function to handle introduction
def introduction_response(text):
    if "what is your name" in text.lower():
        return "My name is Shewin. How can I help you? Ask me any question and I will give you the best answer I know."
    return None

# Start the chat
print('Doc Bot: I am Doc bot. I will answer your questions.')

exit_list = ['exit', 'see you later', 'bye', 'quit', 'break']

while True:
    user_input = input()
    if user_input.lower() in exit_list:
        print('Doc Bot: Okay, Bye... Chat with you later!')
        break
    else:
        if greeting_response(user_input):
            print('Doc Bot: ' + greeting_response(user_input))
        elif introduction_response(user_input):
            print('Doc Bot: ' + introduction_response(user_input))
        else:
            print('Doc Bot: ' + bot_response(user_input))
