import random
import spacy
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

class Chatbot:
    def __init__(self):
        self.nlp = spacy.load('en_core_web_sm')
        self.tf_idf_vectorizer = TfidfVectorizer(stop_words='english')
        self.sent_tokens = []
        self.tfidf_matrix = None
        
    def read_corpus(self, corpus_file):
        with open(corpus_file, 'r', encoding='utf8', errors='ignore') as file:
            return file.read()
        
    def lem_normalize(self, text):
        return [token.lemma_ for token in self.nlp(text) if not token.is_punct and not token.is_stop]
    
    def greeting(self, sentence):
        GREETING_INPUTS = ("hello", "hi", "greetings", "sup", "what's up", "hey")
        GREETING_RESPONSES = ["Hi there!", "Hello!", "Hey!", "Hi!", "Nice to meet you!", "Hey, how are you?"]
        for word in sentence.split():
            if word.lower() in GREETING_INPUTS:
                return random.choice(GREETING_RESPONSES)
    
    def generate_response(self, user_response, user_name="User"):
        robo_response = ''
        user_doc = self.nlp(user_response)
        user_tokens = [token.text.lower() for token in user_doc if not token.is_punct and not token.is_stop]
        self.sent_tokens.append(user_response.lower())
        tfidf = self.tf_idf_vectorizer.transform(self.sent_tokens)
        vals = cosine_similarity(tfidf[-1], tfidf)
        idx = vals.argsort()[0][-2]
        flat = vals.flatten()
        flat.sort()
        req_tfidf = flat[-2]
        if req_tfidf == 0:
            robo_response = f"I am sorry, {user_name}. I don't understand you."
        else:
            robo_response = self.sent_tokens[idx]
        self.sent_tokens.remove(user_response.lower())
        return robo_response

    def chat(self, corpus_file, user_name="User"):
        self.corpus = self.read_corpus(corpus_file)
        self.doc = self.nlp(self.corpus)
        self.sent_tokens = [sent.text.lower() for sent in self.doc.sents]
        self.tfidf_matrix = self.tf_idf_vectorizer.fit_transform(self.sent_tokens)
        
        print(f"ROBO: My name is ROBO. I will answer your queries about Chatbots, {user_name}. If you want to exit, type Bye!")
        while True:
            user_response = input(f"{user_name.upper()}: ")
            user_response = user_response.lower()
            if user_response != 'bye':
                if user_response == 'thanks' or user_response == 'thank you':
                    print("ROBO: You are welcome..")
                    break
                else:
                    if self.greeting(user_response) is not None:
                        print(f"ROBO: {self.greeting(user_response)}")
                    else:
                        print("ROBO: " + self.generate_response(user_response, user_name))
            else:
                print("ROBO: Bye! Take care..")
                break

if __name__ == "__main__":
    chatbot = Chatbot()
    user_name = input("Enter your name: ")
    chatbot.chat('chatbot.txt', user_name)

