In [60]:
import xml.etree.ElementTree as ET
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import requests

In [61]:
# Initialisation de NLTK (à faire une fois)
nltk.download('punkt')
nltk.download('stopwords')


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [62]:
# Fonction pour charger et analyser le fichier XML des questions
def charger_et_analyser_xml(chemin_fichier):
    arbre = ET.parse(chemin_fichier)
    return arbre.getroot()

In [63]:
# Fonction pour prétraiter le texte des questions
def pretraiter_question(texte_question):
    tokens = word_tokenize(texte_question)
    mots_arret = set(stopwords.words('english'))
    tokens_filtres = [token for token in tokens if token.lower() not in mots_arret]
    return tokens_filtres

In [64]:
# Fonction pour exécuter une requête SPARQL
def executer_requete_sparql(requete_sparql):
    url_endpoint = "https://dbpedia.org/sparql"
    try:
        reponse = requests.get(url_endpoint, params={'query': requete_sparql, 'format': 'json'})
        reponse.raise_for_status()  # Lève une exception pour les codes d'état HTTP 4XX/5XX
        resultats = reponse.json().get('results', {}).get('bindings', [])
        return resultats
    except requests.exceptions.RequestException as e:
        print(f"Erreur de requête SPARQL: {e}")
        return []  # Retourne une liste vide en cas d'erreur

In [65]:
# Fonction pour extraire les questions et les réponses du fichier XML
def extraire_questions(racine):
    liste_questions = []
    for question in racine.findall('.//question'):
        texte_element = question.find('.//string[@lang="en"]')
        texte_question = texte_element.text if texte_element is not None else "Question manquante"
        requete_element = question.find('.//query')
        requete = requete_element.text.strip() if requete_element is not None else ""
        reponses = [rep.text for rep in question.findall('.//answers/answer/uri') if rep.text is not None]
        liste_questions.append({'texte_question': texte_question, 'requete': requete, 'reponses': reponses})
    return liste_questions

In [67]:
# Fonction pour évaluer le modèle
def evaluer_modele(questions, executer_requete_sparql):
    nombre_total_questions = len(questions)
    reponses_correctes = 0
    total_reponses_predites = 0
    total_reponses_correctes = sum(len(q['reponses']) for q in questions)

    for question in questions:
        resultats_predits = executer_requete_sparql(question['requete'])
        uris_predits = [resultat['uri']['value'] for resultat in resultats_predits if 'uri' in resultat]
        total_reponses_predites += len(uris_predits)

        uris_corrects = question['reponses']
        reponses_correctes += sum(1 for uri in uris_predits if uri in uris_corrects)

    precision = reponses_correctes / total_reponses_predites if total_reponses_predites else 0
    rappel = reponses_correctes / total_reponses_correctes if total_reponses_correctes else 0
    mesure_f = 2 * (precision * rappel) / (precision + rappel) if (precision + rappel) else 0

    return precision, rappel, mesure_f

In [68]:
# Chemin vers votre fichier questions.xml (ajustez selon votre environnement)
chemin_fichier = 'questions.xml'

In [69]:
# Charger et analyser le fichier XML
racine = charger_et_analyser_xml('./questions.xml')

In [70]:
# Extraire les questions du fichier XML
questions = extraire_questions(racine)

In [71]:
# Afficher les questions extraites
for i, question in enumerate(questions, start=1):
    print(f"Question {i}: {question['texte_question']}")
    print(f"Requête: {question['requete']}")
    print(f"Réponses: {question['reponses']}")
    print()

Question 1: Which river does the Brooklyn Bridge cross?
Requête: PREFIX dbo: <http://dbpedia.org/ontology/> PREFIX res: <http://dbpedia.org/resource/> SELECT DISTINCT ?uri WHERE { res:Brooklyn_Bridge dbo:crosses ?uri . }
Réponses: ['http://dbpedia.org/resource/East_River']

Question 2: Who is the author of Wikipedia?
Requête: PREFIX dbo: <http://dbpedia.org/ontology/>
PREFIX res: <http://dbpedia.org/resource/>
SELECT DISTINCT ?uri
WHERE {
	res:Wikipedia dbp:authors ?uri .
}
Réponses: ['http://dbpedia.org/resource/Jimmy_Wales', 'http://dbpedia.org/resource/Larry_Sanger']

Question 3: In which country does the Nile start?
Requête: PREFIX dbo: <http://dbpedia.org/ontology/>
PREFIX res: <http://dbpedia.org/resource/>
SELECT DISTINCT ?uri
WHERE {       
        res:Nile dbp:source2Location ?uri .
}
Réponses: ['http://dbpedia.org/resource/Ethiopia', 'http://dbpedia.org/resource/Lake_Tana']

Question 4: What is the highest place of Karakoram?
Requête: PREFIX dbo: <http://dbpedia.org/ontology/

In [72]:
# Évaluer le modèle
precision, rappel, mesure_f = evaluer_modele(questions, executer_requete_sparql)

In [73]:
# Afficher les résultats de l'évaluation
print(f"Précision: {precision:.4f}")
print(f"Rappel: {rappel:.4f}")
print(f"Mesure F: {mesure_f:.4f}")

Précision: 0.8837
Rappel: 0.9268
Mesure F: 0.9048
