In [1]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import random
import json
import re
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import json

# Charger les données depuis le fichier intents.json
with open('intents.json', 'r', encoding='utf-8') as file:
    data = json.load(file)

# Afficher les données pour vérification
print(json.dumps(data, indent=2))


{
  "intents": [
    {
      "tag": "greeting",
      "patterns": [
        "Hi",
        "Hey",
        "Is anyone there?",
        "Hi there",
        "Hello",
        "Hey there",
        "Howdy",
        "Hola",
        "Bonjour",
        "Konnichiwa",
        "Guten tag",
        "Ola"
      ],
      "responses": [
        "Hello there. Tell me how are you feeling today?",
        "Hi there. What brings you here today?",
        "Hi there. How are you feeling today?",
        "Great to see you. How do you feel currently?",
        "Hello there. Glad to see you're back. What's going on in your world right now?"
      ]
    },
    {
      "tag": "morning",
      "patterns": [
        "Good morning"
      ],
      "responses": [
        "Good morning. I hope you had a good night's sleep. How are you feeling today? "
      ]
    },
    {
      "tag": "afternoon",
      "patterns": [
        "Good afternoon"
      ],
      "responses": [
        "Good afternoon. How is your day going?"

In [3]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
import numpy as np

# Extraire les patterns et les tags
patterns = []
tags = []
for intent in data['intents']:
    for pattern in intent['patterns']:
        patterns.append(pattern)
        tags.append(intent['tag'])

# Effectuer une tokenisation sur les phrases
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(patterns)

# Encoder les tags
encoder = LabelEncoder()
y = encoder.fit_transform(tags)

print("Patterns:", patterns)
print("Tags:", tags)
print("Tokenized patterns:", X.toarray())
print("Encoded tags:", y)


Tags: ['greeting', 'greeting', 'greeting', 'greeting', 'greeting', 'greeting', 'greeting', 'greeting', 'greeting', 'greeting', 'greeting', 'greeting', 'morning', 'afternoon', 'evening', 'night', 'goodbye', 'goodbye', 'goodbye', 'goodbye', 'goodbye', 'goodbye', 'goodbye', 'goodbye', 'thanks', 'thanks', 'thanks', 'thanks', 'thanks', 'no-response', 'neutral-response', 'about', 'about', 'about', 'about', 'about', 'about', 'about', 'about', 'skill', 'creation', 'creation', 'creation', 'name', 'name', 'name', 'help', 'help', 'help', 'help', 'help', 'help', 'help', 'sad', 'sad', 'sad', 'sad', 'sad', 'sad', 'sad', 'sad', 'stressed', 'stressed', 'stressed', 'stressed', 'stressed', 'worthless', 'worthless', 'worthless', 'worthless', 'worthless', 'depressed', 'depressed', 'depressed', 'depressed', 'happy', 'happy', 'happy', 'happy', 'happy', 'happy', 'happy', 'casual', 'casual', 'casual', 'casual', 'casual', 'casual', 'casual', 'casual', 'casual', 'casual', 'casual', 'anxious', 'anxious', 'not-ta

In [4]:
from sklearn.metrics.pairwise import cosine_similarity

def respond(query):
    # Transformer la requête en un vecteur de caractéristiques
    query_vector = vectorizer.transform([query])

    # Calculer la similarité entre le vecteur de requête et tous les vecteurs de phrases d'entraînement
    similarities = cosine_similarity(query_vector, X)

    # Trouver l'index de la phrase d'entraînement avec la plus grande similarité
    best_match_index = np.argmax(similarities)

    # Trouver le tag correspondant à cette phrase
    best_match_tag = tags[best_match_index]

    # Trouver une réponse correspondant à ce tag
    for intent in data['intents']:
        if intent['tag'] == best_match_tag:
            response = np.random.choice(intent['responses'])
            break

    return response

# Test de la fonction de réponse
query = "Good morning"
print("Query:", query)
print("Response:", respond(query))


Query: Good morning
Response: Good morning. I hope you had a good night's sleep. How are you feeling today? 


In [5]:
print("Query: Bonjour")
print("Response:", respond("Bonjour"))
print("\n")

print("Query: Merci")
print("Response:", respond("Merci"))
print("\n")

print("Query: Au revoir")
print("Response:", respond("Au revoir"))
print("\n")


Query: Bonjour
Response: Great to see you. How do you feel currently?


Query: Merci
Response: Hello there. Glad to see you're back. What's going on in your world right now?


Query: Au revoir
Response: Bye! Come back again.




In [6]:
while True:
    query = input("Vous: ")
    print("Vous: ", query)
    if query.lower() == "Bye":
        break
    response = respond(query)
    print("Chatbot: ", response)


Vous:  hello
Chatbot:  Hello there. Glad to see you're back. What's going on in your world right now?
Vous:  tell me joke
Chatbot:  mental health is not a joke.
Vous:  bye
Chatbot:  See you later.
Vous:  Bye
Chatbot:  Bye! Come back again.
Vous:  Bye
Chatbot:  I'll see you soon.
Vous:  
Chatbot:  Hello there. Glad to see you're back. What's going on in your world right now?


In [6]:
# Charger le modèle GPT-2 pré-entraîné
gpt2_model = GPT2LMHeadModel.from_pretrained("gpt2")
gpt2_tokenizer = GPT2Tokenizer.from_pretrained("gpt2")

# Définir explicitement pad_token_id sur eos_token_id
gpt2_model.config.pad_token_id = gpt2_tokenizer.eos_token_id

# Fonction pour générer une question avec GPT-2
def generate_gpt2_question():
    random_pattern = random.choice(patterns)
    input_text = f"{random_pattern}"  # Ajout de "Generate a question:" avant le pattern

    # Encoder le texte
    input_ids = gpt2_tokenizer.encode(input_text, return_tensors="pt", max_length=512)

    # Définir le masque d'attention
    attention_mask = torch.ones(input_ids.shape, dtype=torch.long)

    # Générer la séquence
    output = gpt2_model.generate(input_ids, attention_mask=attention_mask, max_length=150, num_beams=5,
                                 no_repeat_ngram_size=2, top_k=50, top_p=0.95, temperature=0.7)

    # Décoder la séquence
    generated_sequence = gpt2_tokenizer.decode(output[0], skip_special_tokens=True)

    # Extraire la question complétée après un point d'interrogation, un point ou entre guillemets
    match = re.search(r"(.*[.!?])|(\".*\")", generated_sequence)
    if match:
        generated_question = match.group()
    else:
        generated_question = generated_sequence

    return generated_question


# Fonction pour simuler une conversation entre les deux chatbots
def chat_between_bots():
    print("MAHMOUD: Hi! ")
    print("BMZ: Hello, I am BMZ, a chatbot model.")
    print("Let's start our conversation.")

    for _ in range(3):  # Nombre de questions dans la discussion (vous pouvez ajuster cela à des fins de démonstration)
        # Générer une question avec GPT-2
        gpt2_question = generate_gpt2_question()
        print(f"MAHMOUD: {gpt2_question}\n")

        # Obtenir la réponse de TF-IDF
        tfidf_response_text = respond(gpt2_question)
        print(f"BMZ: {tfidf_response_text}\n")

# Lancer la conversation entre les deux chatbots
chat_between_bots()


Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


MAHMOUD: Hi! 
BMZ: Hello, I am BMZ, a chatbot model.
Let's start our conversation.


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


MAHMOUD: Gene editing technologies have been used in the past to improve the quality of the images, but they have not been able to do so in a way that is consistent with the human eye.

BMZ: History is filled with mysteries. Discuss historical enigmas, explore unsolved puzzles from the past, or share your thoughts on historical conundrums.



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


MAHMOUD: 1. Don't be afraid to ask for help. If you don't know what you're talking about, it's probably not going to work out for you. You might be able to get help from a friend or family member, but you might not have the time or resources to do it yourself. It's best to talk to someone who knows what they're doing, and ask them if they can help you out. Ask them about your diet, exercise habits, or any other health issues that might affect your health. They might also want to know if there's anything you can do to reduce your risk of heart disease, stroke, cancer, diabetes, etc.

BMZ: I want to help you. I really do. But in order for me to help you, you're gonna have to talk to me.

MAHMOUD: I am so useless that I can't do anything about it.

BMZ: It's only natural to feel this way. Tell me more. What else is on your mind?

