In [2]:
import pandas as pd
from model.Document import Document
from model.Corpus import Corpus
import ipywidgets as widgets
from IPython.display import clear_output
from model.SearchEngine import SearchEngine


In [3]:
df = pd.read_csv("../corpus/discours_US.csv",sep="\t")[:10]
df 

Unnamed: 0,speaker,text,date,descr,link
0,CLINTON,": I'm getting ready for a lot of things, a lot...","April 12, 2015",Video Remarks Announcing Candidacy for President,http://www.presidency.ucsb.edu/ws/index.php?pi...
1,CLINTON,"[ ] : I'll be graduating in May, and on gradua...","April 14, 2015",Remarks in a Question and Answer Session at Ki...,http://www.presidency.ucsb.edu/ws/index.php?pi...
2,CLINTON,": Well, thank you all so much for inviting me ...","April 20, 2015","Remarks in Keene, New Hampshire",http://www.presidency.ucsb.edu/ws/index.php?pi...
3,CLINTON,Thank you so much. I am absolutely delighted t...,"April 29, 2015",Address to the David N. Dinkins Leadership & P...,http://www.presidency.ucsb.edu/ws/index.php?pi...
4,CLINTON,"Oh, hello. Hi, how are you? Well, it's wonderf...","May 5, 2015",Remarks at a Roundtable with Young Nevada Resi...,http://www.presidency.ucsb.edu/ws/index.php?pi...
5,CLINTON,Thank you. Wow. Thank you. I am thrilled to be...,"May 18, 2015","Remarks at a Campaign Event in Mason City, Iowa",http://www.presidency.ucsb.edu/ws/index.php?pi...
6,CLINTON,Well first let me thank all of you in particul...,"May 19, 2015","Remarks in Cedar Falls, Iowa",http://www.presidency.ucsb.edu/ws/index.php?pi...
7,CLINTON,"Wow, what a great way to begin and to all of y...","May 20, 2015","Remarks in Chicago, Illinois",http://www.presidency.ucsb.edu/ws/index.php?pi...
8,CLINTON,"Well, this is so exciting, to be here with all...","May 27, 2015",Remarks to the Democratic Women's Council in C...,http://www.presidency.ucsb.edu/ws/index.php?pi...
9,CLINTON,Wow! Thank you so very much. I cannot tell you...,"June 4, 2015",Remarks at Texas Southern University in Houston,http://www.presidency.ucsb.edu/ws/index.php?pi...


In [9]:
# La distribution des auteurs des discours
df["speaker"].value_counts()

speaker
CLINTON    93
TRUMP      71
Name: count, dtype: int64

In [7]:
df.shape

(164, 5)

In [4]:
corpus = Corpus("US_discours")
for i in range(df.shape[0]):
    row = df.loc[i,["speaker","text","date","link"]]
    text = row["text"]
    date = row["date"]
    auteur = row["speaker"]
    url = row["link"]
    phrases = text.split(".")
    for phrase in phrases:
        doc = Document("discours",auteur=auteur,date=date,url=url,texte=phrase)
        corpus.add(doc)


In [5]:
corpus.search("thank",10)

'rst I want thank you, Nic'

In [6]:
corpus.concorde("thanks",10)

Unnamed: 0,contexte gauche,motif trouve,contexte droit
0,ools And,thanks,to your g
1,"develop,",thanks,to all th
2,today and,thanks,to Bike T


In [8]:
search_engine = SearchEngine(corpus)
result_df = search_engine.search("students educators", top_k=2)
result_df

QUERY [[0. 0. 0. ... 0. 0. 0.]]
[  31   27  795 ...   18   17 1600]


[Titre : discours	Auteur : CLINTON	Date : April 14, 2015	URL : http://www.presidency.ucsb.edu/ws/index.php?pid=110044	Texte :  And before we get started — because I want to hear from each of you, I want more information about what you see as challenges as students and educators and the opportunities you hope to take advantage of — I just want to tell you a little bit about why I'm here today	,
 Titre : discours	Auteur : CLINTON	Date : April 14, 2015	URL : http://www.presidency.ucsb.edu/ws/index.php?pid=110044	Texte :  I got to talk with the instructor and four of the students, all of whom are high school students who are doing what you have described as the great opportunity to mix your high school years with college learning and college credits as you move forward	]

In [9]:
from TD3_6 import load_corpus
corpus_2 = load_corpus(filename="../corpus/corpus.pkl")
search_engine = SearchEngine(corpus_2)
result_df = search_engine.search("china cases", top_k=5)
result_df

AttributeError: 'Corpus' object has no attribute '_searchString'

In [21]:
import ipywidgets as widgets
from IPython.display import display


title = widgets.HTML(
    value="<h1 style='text-align: center;'>Moteur de recherche</h1>",
    placeholder='',
    description='',
)


texte = widgets.Text(
    value='',
    placeholder='Type something',
    description='Mots clés:',
    disabled=False,
)


slider = widgets.IntSlider(
    value=7,
    min=0,
    max=10,
    step=1,
    description='Test:',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d',
    
)


button = widgets.Button(
    description='Rechercher',
    disabled=False,
    button_style='', 
    tooltip='Click me',
    icon='check',
)
output = widgets.Output(
    layout=widgets.Layout(
        width='80%',
        border='1px solid black',
        padding='10px'
    )
)

container = widgets.VBox(
    [title, texte, slider, button,output],
    layout=widgets.Layout(
        align_items='center', 
        justify_content='center', 
        width='100%' 
    )
)



display(container)


VBox(children=(HTML(value="<h1 style='text-align: center;'>Moteur de recherche</h1>", placeholder=''), Text(va…

In [19]:
def clique_bouton(b):
 
    mots_cles = texte.value.strip()  
    nb_documents = slider.value      
    
    with output:
        output.clear_output()
        
        
        if not mots_cles:
            print("Veuillez entrer des mots-clés pour effectuer une recherche.")
            return
        
        
        try:
            
            results = search_engine.search(mots_cles,nb_documents)
            
            if results:
                for idx, result in enumerate(results, start=1):
                    print(f"{idx}. {result}")
            else:
                print("Aucun résultat trouvé.")
        
        except Exception as e:
            print(f"Erreur lors de la recherche : {e}")


In [20]:
button.on_click(clique_bouton)