In [1]:
import pandas as pd

In [2]:
from model.Document import Document
from model.Corpus import Corpus

In [3]:

import ipywidgets as widgets
from IPython.display import clear_output
from model.SearchEngine import SearchEngine


In [5]:
df = pd.read_csv("../corpus/discours_US.csv",sep="\t")
df 

Unnamed: 0,speaker,text,date,descr,link
0,CLINTON,": I'm getting ready for a lot of things, a lot...","April 12, 2015",Video Remarks Announcing Candidacy for President,http://www.presidency.ucsb.edu/ws/index.php?pi...
1,CLINTON,"[ ] : I'll be graduating in May, and on gradua...","April 14, 2015",Remarks in a Question and Answer Session at Ki...,http://www.presidency.ucsb.edu/ws/index.php?pi...
2,CLINTON,": Well, thank you all so much for inviting me ...","April 20, 2015","Remarks in Keene, New Hampshire",http://www.presidency.ucsb.edu/ws/index.php?pi...
3,CLINTON,Thank you so much. I am absolutely delighted t...,"April 29, 2015",Address to the David N. Dinkins Leadership & P...,http://www.presidency.ucsb.edu/ws/index.php?pi...
4,CLINTON,"Oh, hello. Hi, how are you? Well, it's wonderf...","May 5, 2015",Remarks at a Roundtable with Young Nevada Resi...,http://www.presidency.ucsb.edu/ws/index.php?pi...
...,...,...,...,...,...
159,CLINTON,"Hello, Pittsburgh! Woah! Hello back there! Tha...","November 7, 2016",Remarks at the University of Pittsburgh,http://www.presidency.ucsb.edu/ws/index.php?pi...
160,CLINTON,Thank you all! Whoa! Thank you! Thank you. Wel...,"November 7, 2016",Remarks at North Carolina State University in ...,http://www.presidency.ucsb.edu/ws/index.php?pi...
161,CLINTON,Hello Grand Valley! Thank you! Thank you. It i...,"November 7, 2016",Remarks at Grand Valley State University in Gr...,http://www.presidency.ucsb.edu/ws/index.php?pi...
162,TRUMP,"Thank you. Thank you very much, everyone. [ ] ...","November 9, 2016",Remarks in New York City Accepting Election as...,http://www.presidency.ucsb.edu/ws/index.php?pi...


In [6]:
# La distribution des auteurs des discours
df["speaker"].value_counts()

speaker
CLINTON    93
TRUMP      71
Name: count, dtype: int64

In [7]:
df.shape

(164, 5)

In [8]:
corpus = Corpus("US_discours")
for i in range(df.shape[0]):
    row = df.loc[i,["speaker","text","date","link"]]
    text = row["text"]
    date = row["date"]
    auteur = row["speaker"]
    url = row["link"]
    phrases = text.split(".")
    for phrase in phrases:
        doc = Document("discours",auteur=auteur,date=date,url=url,texte=phrase)
        corpus.add(doc)


In [9]:
corpus.search("thank",10)

'rst I want thank you, Nic'

In [10]:
corpus.concorde("W",10)

Unnamed: 0,contexte gauche,motif trouve,contexte droit
0,nd a half,W,ow That's
1,nks Great,W,"ell, first"
2,of money,W,e're got t
3,do this?,W,hat motiva
4,o forward,W,hen I got
...,...,...,...
5435,g-hearted,W,e have see
5436,president,W,e owe him
5437,xpression,W,e respect
5438,ir dreams,W,e've spent


In [13]:
search_engine = SearchEngine(corpus)
result_df = search_engine.search("president", top_k=5)
result_df

In [16]:
import ipywidgets as widgets
from IPython.display import display


title = widgets.HTML(
    value="<h1 style='text-align: center;'>Moteur de recherche</h1>",
    placeholder='',
    description='',
)


texte = widgets.Text(
    value='',
    placeholder='Type something',
    description='Mots clés:',
    disabled=False,
)


slider = widgets.IntSlider(
    value=7,
    min=0,
    max=10,
    step=1,
    description='Test:',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d',
    
)


button = widgets.Button(
    description='Rechercher',
    disabled=False,
    button_style='', 
    tooltip='Click me',
    icon='check',
)
output = widgets.Output(
    layout=widgets.Layout(
        width='80%',
        border='1px solid black',
        padding='10px'
    )
)

container = widgets.VBox(
    [title, texte, slider, button,output],
    layout=widgets.Layout(
        align_items='center', 
        justify_content='center', 
        width='100%' 
    )
)



display(container)


VBox(children=(HTML(value="<h1 style='text-align: center;'>Moteur de recherche</h1>", placeholder=''), Text(va…

In [15]:
def clique_bouton(b):
 
    mots_cles = texte.value.strip()  
    nb_documents = slider.value      
    
    with output:
        output.clear_output()
        
        
        if not mots_cles:
            print("Veuillez entrer des mots-clés pour effectuer une recherche.")
            return
        
        
        try:
            
            results = SearchEngine(corpus).search(mots_cles,nb_documents)
            
            if results:
                for idx, result in enumerate(results, start=1):
                    print(f"{idx}. {result}")
            else:
                print("Aucun résultat trouvé.")
        
        except Exception as e:
            print(f"Erreur lors de la recherche : {e}")


In [13]:
button.on_click(clique_bouton)