In [15]:
import random
import string
import nltk
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Télécharger les ressources NLTK
nltk.download('punkt', quiet=True)
nltk.download('wordnet', quiet=True)

# Lemmatiseur pour normaliser les mots
lemmer = nltk.stem.WordNetLemmatizer()

def LemTokens(tokens):
    return [lemmer.lemmatize(token) for token in tokens]

def LemNormalize(text):
    remove_punct_dict = dict((ord(punct), None) for punct in string.punctuation)
    return LemTokens(nltk.word_tokenize(text.lower().translate(remove_punct_dict)))

# Charger le fichier CSV avec pandas
df = pd.read_csv('/content/DataScience QA.csv', encoding='utf-8', on_bad_lines='skip')

# Assumant que le fichier CSV a deux colonnes : 'question' et 'answer'
questions = df['Question'].tolist()
answers = df['Answer'].tolist()

# Fonction pour les salutations
GREETING_INPUTS = ["hello", "hi", "greetings", "sup", "what's up", "hey"]
GREETING_RESPONSES = [
    "Hi! How can I help you with AI or Data Science?",
    "Hello! Do you have any questions about AI or Data Science?",
    "Hey! Feel free to ask me anything related to AI or Data Science."
]

def greeting(sentence):
    for word in sentence.split():
        if word.lower() in GREETING_INPUTS:
            return random.choice(GREETING_RESPONSES)

# Initialisation du TF-IDF Vectorizer
TfidfVec = TfidfVectorizer(tokenizer=LemNormalize, stop_words='english')
tfidf = TfidfVec.fit_transform(questions)

# Fonction pour la réponse
def response(user_response):
    robo_response = ''
    tfidf_user_response = TfidfVec.transform([user_response])
    vals = cosine_similarity(tfidf_user_response, tfidf)
    idx = vals.argsort()[0][-2]
    flat = vals.flatten()
    flat.sort()

    # Si la similarité est trop faible, on demande une clarification
    if flat[-2] == 0:
        robo_response = "I'm sorry, I don't understand. Could you please rephrase?"
    else:
        # Retourner seulement la réponse correspondant à la question
        robo_response = answers[idx]

    return robo_response

# Fonction principale du chatbot
def chatbot():
    print("AI Assistant: My name is AI Assistant. I will answer your queries about AI and Data Science. If you want to exit, type 'bye'.")
    flag = True
    while flag:
        user_response = input().lower()
        if user_response != 'bye':
            if greeting(user_response) is not None:
                print("AI Assistant: " + greeting(user_response))
            else:
                print("AI Assistant: " + response(user_response))
        else:
            flag = False
            print("AI Assistant: Bye! Take care.")

# Lancer le chatbot
chatbot()


AI Assistant: My name is AI Assistant. I will answer your queries about AI and Data Science. If you want to exit, type 'bye'.
NLP?
AI Assistant: Natural language processing is a field of artificial intelligence that focuses on enabling computers to understand, interpret, and generate human language, including tasks such as text classification, sentiment analysis, machine translation, and question answering.
hey
AI Assistant: Hi! How can I help you with AI or Data Science?
bye
AI Assistant: Bye! Take care.


In [18]:
# Save answers after the chat
with open('answers.pkl', 'wb') as f:
    pickle.dump(answers, f)

print("Model components saved successfully!")

Model components saved successfully!
