In [None]:

# linterface de juypter est complete dans ce fichier 
import pandas as pd
import ipywidgets as widgets
from IPython.display import display, HTML
from collections import Counter
from src.Corpus import Corpus
from src.Document import Document
from src.SearchEngine import SearchEngine
import src.analytics as analytics

# la chargement de corpus 

df = pd.read_csv(r"C:\Users\user\OneDrive\Desktop\projet python\data\discours_US.csv", sep="\t")
corpus = Corpus("Discours US")
corpus.load_from_dataframe(df)
engine = SearchEngine(corpus)

print("le  Corpus chargé avec", corpus.taille(), "documents")

# widgets de recherche 


title = widgets.HTML("<h2> Mini Moteur de Recherche & Analyse (Discours US)</h2>")

query = widgets.Text(value='', placeholder='ex: freedom', description='Mot clé :')
limit = widgets.IntSlider(value=10, min=1, max=50, step=1, description='Résultats :')
search_button = widgets.Button(description='Rechercher', button_style='primary')
output_search = widgets.Output()

# widgets analytics 

topk_slider = widgets.IntSlider(value=10, min=1, max=50, step=1, description='Top K :')
tfidf_button = widgets.Button(description='Top TF-IDF', button_style='info')
bm25_button = widgets.Button(description='BM25', button_style='warning')
output_analytics = widgets.Output()

# on compare la sous-corpus


filterA = widgets.Text(value='', placeholder='Auteur ou type A', description='Filtre A :')
filterB = widgets.Text(value='', placeholder='Auteur ou type B', description='Filtre B :')
compare_button = widgets.Button(description='Comparer', button_style='success')
output_compare = widgets.Output()
# levolution temporelle de widgets 

term_widget = widgets.Text(value='', placeholder='Mot clé', description='Terme :')
period_widget = widgets.Dropdown(options=['year','month','full'], value='year', description='Période :')
evol_button = widgets.Button(description='Évolution', button_style='primary')
output_evolution = widgets.Output()

# declaration des fonctions des widgets 
#dans ces fonctions on a utilise un outils de AI pour corriger les erreurs 
def on_search_clicked(b):
    output_search.clear_output()
    q = query.value.strip()
    n = limit.value
    if not q:
        with output_search: display(HTML("<b style='color:red'>⚠ Veuillez entrer un mot-clé.</b>"))
        return
    results = engine.search(q, n)
    with output_search:
        if not results:
            display(HTML("<b style='color:red'>Aucun résultat trouvé.</b>"))
        else:
            for doc_id, doc in results:
                display(HTML(f"""
                    <div style='background:#f7f7f7;padding:10px;margin:6px;border-radius:6px'>
                        <b>{doc.auteur}</b> — <i>{doc.date}</i><br>
                        <p>{doc.texte[:300]}...</p>
                        <details><summary>Voir texte complet</summary><p>{doc.texte}</p></details>
                    </div>
                """))

def on_tfidf_clicked(b):
    output_analytics.clear_output()
    topk = topk_slider.value
    top_terms = analytics.top_terms_tfidf_for_corpus(corpus, topk=topk)
    with output_analytics:
        display(HTML(f"<h4>Top {topk} termes TF-IDF du corpus :</h4>"))
        display(HTML("<br>".join([f"{t} : {score:.4f}" for t, score in top_terms])))

def on_bm25_clicked(b):
    output_analytics.clear_output()
    topk = topk_slider.value
    q = query.value.strip()
    if not q:
        with output_analytics: display(HTML("<b style='color:red'>⚠ Entrez un mot-clé pour BM25.</b>"))
        return
    bm25_index = analytics.compute_bm25(corpus)
    results = analytics.bm25_score_for_query(corpus, q, bm25_index, topk=topk)
    with output_analytics:
        display(HTML(f"<h4>BM25 – Top {topk} documents pour '{q}' :</h4>"))
        for doc_id, score in results:
            doc = corpus.id2doc[doc_id]
            display(HTML(f"<b>{doc.auteur}</b> — <i>{doc.date}</i> : score={score:.4f}<br>{doc.texte[:200]}...<hr>"))

def on_compare_clicked(b):
    output_compare.clear_output()
    fA = filterA.value.strip()
    fB = filterB.value.strip()
    def build_subcorpus(filt):
        sub = Corpus("sub")
        for doc in corpus.id2doc.values():
            if filt.lower() in doc.auteur.lower() or filt.lower() in doc.type.lower():
                sub.add_doc(doc)
        return sub
    subA = build_subcorpus(fA)
    subB = build_subcorpus(fB)
    common, specificA, specificB = analytics.common_and_specific_terms(subA, subB, topk=20)
    with output_compare:
        display(HTML(f"<h4>Termes communs :</h4>{', '.join(common)}"))
        display(HTML(f"<h4>Termes spécifiques A :</h4>{', '.join(specificA)}"))
        display(HTML(f"<h4>Termes spécifiques B :</h4>{', '.join(specificB)}"))

def on_evolution_clicked(b):
    output_evolution.clear_output()
    term = term_widget.value.strip()
    period = period_widget.value
    if not term:
        with output_evolution: display(HTML("<b style='color:red'>Entrez un terme.</b>"))
        return
    def period_func(date_str):
        if period=="year": return date_str.split("-")[0] if date_str else "unknown"
        elif period=="month":
            parts = date_str.split("-")
            return f"{parts[0]}-{parts[1]}" if len(parts)>=2 else date_str
        return date_str
    series = analytics.term_time_series(corpus, term, period_func)
    with output_evolution:
        display(HTML(f"<h4>Évolution du terme '{term}' :</h4>"))
        for per, count in series.items():
            display(HTML(f"{per} : {count}"))

# liaison des boutons 

search_button.on_click(on_search_clicked)
tfidf_button.on_click(on_tfidf_clicked)
bm25_button.on_click(on_bm25_clicked)
compare_button.on_click(on_compare_clicked)
evol_button.on_click(on_evolution_clicked)

#affichage des interface 

display(title)
display(widgets.HTML("<h3>Recherche</h3>"), query, limit, search_button, output_search)
display(widgets.HTML("<h3>Analytics</h3>"), topk_slider, tfidf_button, bm25_button, output_analytics)
display(widgets.HTML("<h3>Comparaison de sous-corpus</h3>"), filterA, filterB, compare_button, output_compare)
display(widgets.HTML("<h3>Évolution temporelle</h3>"), term_widget, period_widget, evol_button, output_evolution)


le  Corpus chargé avec 30832 documents


HTML(value='<h2> Mini Moteur de Recherche & Analyse (Discours US)</h2>')

HTML(value='<h3>Recherche</h3>')

Text(value='', description='Mot clé :', placeholder='ex: freedom')

IntSlider(value=10, description='Résultats :', max=50, min=1)

Button(button_style='primary', description='Rechercher', style=ButtonStyle())

Output()

HTML(value='<h3>Analytics</h3>')

IntSlider(value=10, description='Top K :', max=50, min=1)

Button(button_style='info', description='Top TF-IDF', style=ButtonStyle())



Output()

HTML(value='<h3>Comparaison de sous-corpus</h3>')

Text(value='', description='Filtre A :', placeholder='Auteur ou type A')

Text(value='', description='Filtre B :', placeholder='Auteur ou type B')

Button(button_style='success', description='Comparer', style=ButtonStyle())

Output()

HTML(value='<h3>Évolution temporelle</h3>')

Text(value='', description='Terme :', placeholder='Mot clé')

Dropdown(description='Période :', options=('year', 'month', 'full'), value='year')

Button(button_style='primary', description='Évolution', style=ButtonStyle())

Output()