In [1]:
# Importation des modules
import pandas as pd
import ipywidgets as widgets
from IPython.display import display, clear_output
import re
import csv
from tqdm import tqdm

from models.Corpus import Corpus
from models.Document import Document
from models.SearchEngine import SearchEngine

In [2]:
# Chargement du fichier CSV
df = pd.read_csv('./data/discours_US.csv', sep='\t', quoting=csv.QUOTE_NONE, engine='python', escapechar='\\')

print(f"Fichier chargé : {len(df)} lignes.")
print("Colonnes :", df.columns.tolist())
display(df.head(3))

# Distribution des auteurs
print("\nDistribution des auteurs :")
print(df[df.columns[0]].value_counts())

Fichier chargé : 164 lignes.
Colonnes : ['"speaker"', '"text"', '"date"', '"descr"', '"link"']


Unnamed: 0,"""speaker""","""text""","""date""","""descr""","""link"""
0,"""CLINTON""",""": I'm getting ready for a lot of things, a lo...","""April 12, 2015""","""Video Remarks Announcing Candidacy for Presid...","""http://www.presidency.ucsb.edu/ws/index.php?p..."
1,"""CLINTON""","""[ ] : I'll be graduating in May, and on gradu...","""April 14, 2015""","""Remarks in a Question and Answer Session at K...","""http://www.presidency.ucsb.edu/ws/index.php?p..."
2,"""CLINTON""",""": Well, thank you all so much for inviting me...","""April 20, 2015""","""Remarks in Keene, New Hampshire""","""http://www.presidency.ucsb.edu/ws/index.php?p..."



Distribution des auteurs :
"speaker"
"CLINTON"    93
"TRUMP"      71
Name: count, dtype: int64


In [3]:
# Création du Corpus
mon_corpus = Corpus(nom="Discours US")
count_phrases = 0

for index, row in df.iterrows():
    auteur = row[df.columns[0]]
    texte = row[df.columns[1]]
    date = row[df.columns[2]]
    titre = row[df.columns[3]]
    url = row[df.columns[4]]
    
    # Découpage en phrases
    phrases = re.split(r'[.!?]\s+', texte)
    
    for i, phrase in enumerate(phrases):
        if len(phrase.strip()) > 20: 
            doc = Document(
                titre=f"{titre} (phrase {i+1})",
                auteur=auteur,
                date=date,
                url=url,
                texte=phrase.strip()
            )
            mon_corpus.add_document(doc)
            count_phrases += 1

print(f"Corpus créé avec {count_phrases} phrases")

# Test avec search
print("\n--- Test search('freedom') ---")
resultats = mon_corpus.search("freedom")
if resultats:
    print(f"Trouvé {len(resultats)} résultats")
    print(f"Premier résultat : {resultats[0][:100]}...")
else:
    print("Aucun résultat")

# Test avec concorde
print("\n--- Test concorde('peace', 50) ---")
mon_corpus.concorde("peace", 50)

Corpus créé avec 29351 phrases

--- Test search('freedom') ---
Trouvé 49 résultats
Premier résultat : ized and prayed to expand the circle of freedom and opportunity They never gave up and ...

--- Test concorde('peace', 50) ---


Unnamed: 0,contexte gauche,motif trouvé,contexte droit
0,by heeding the pleas of Freddie Gray's family for,peace,"and unity, echoing the families of Michael Brown,"
1,"t Clinton honored the bargain, we had the longest",peace,"time expansion in history, a balanced budget, and"
2,"confidence, not anxiety That you should have the",peace,of mind that your health care will be there when
3,"smarts, and values to maintain our leadership for",peace,", security, and prosperity No other country on..."
4,—I like the sound of that—America saw the longest,peace,time expansion in our history Nearly 23 millio...
...,...,...,...
104,rican child to be able to walk down the street in,peace,Safety is a civil right The problem is not the pr
105,this nation has a right to grow up in safety and,peace,And my plan includes a pledge to restore manufact
106,literally life and death decisions about war and,peace,How do you handle a crisis And do you know the di
107,d the world with strength and intelligence toward,peace,"and prosperity Number three, we've got to bring o"


In [4]:
# Initialisation du moteur de recherche
engine = SearchEngine(mon_corpus)

# Tests avec plusieurs requêtes
print("\n=== Test 1 : 'war peace' ===")
res1 = engine.search("war peace", n_results=3)
display(res1)

print("\n=== Test 2 : 'democracy' ===")
res2 = engine.search("democracy", n_results=5)
display(res2)

print("\n=== Test 3 : 'economy' ===")
res3 = engine.search("economy", n_results=3)
display(res3)

-> Vocabulaire créé : 12145 mots.

=== Test 1 : 'war peace' ===


                                                                  

Unnamed: 0,Document,Score,Auteur,Date,URL,Type
0,"""Debate between Trump and Clinton"" (phrase 579)",0.4883,"""TRUMP""","""September 26, 2016""","""http://www.presidency.ucsb.edu/ws/index.php?p...",Inconnu
1,"""Debate between Trump and Clinton"" (phrase 605)",0.4229,"""TRUMP""","""September 26, 2016""","""http://www.presidency.ucsb.edu/ws/index.php?p...",Inconnu
2,"""Remarks at a Rally at the Pensacola Bay Cente...",0.4205,"""TRUMP""","""September 9, 2016""","""http://www.presidency.ucsb.edu/ws/index.php?p...",Inconnu



=== Test 2 : 'democracy' ===


                                                                  

Unnamed: 0,Document,Score,Auteur,Date,URL,Type
0,"""Debate between Trump and Clinton"" (phrase 442)",0.6954,"""CLINTON""","""September 26, 2016""","""http://www.presidency.ucsb.edu/ws/index.php?p...",Inconnu
1,"""Remarks at Texas Southern University in Houst...",0.6656,"""CLINTON""","""June 4, 2015""","""http://www.presidency.ucsb.edu/ws/index.php?p...",Inconnu
2,"""Remarks at Macomb Community College South Cam...",0.5876,"""TRUMP""","""October 31, 2016""","""http://www.presidency.ucsb.edu/ws/index.php?p...",Inconnu
3,"""Remarks at Grand Valley State University in G...",0.5721,"""CLINTON""","""November 7, 2016""","""http://www.presidency.ucsb.edu/ws/index.php?p...",Inconnu
4,"""Debate between Trump and Clinton"" (phrase 326)",0.5641,"""CLINTON""","""October 19, 2016""","""http://www.presidency.ucsb.edu/ws/index.php?p...",Inconnu



=== Test 3 : 'economy' ===


                                                                  

Unnamed: 0,Document,Score,Auteur,Date,URL,Type
0,"""Address Accepting the Presidential Nomination...",0.7206,"""TRUMP""","""July 21, 2016""","""http://www.presidency.ucsb.edu/ws/index.php?p...",Inconnu
1,"""Interview with Chuck Todd of NBC News ""Meet t...",0.6558,"""CLINTON""","""February 7, 2016""","""http://www.presidency.ucsb.edu/ws/index.php?p...",Inconnu
2,"""Remarks at a Campaign Rally in Marshalltown, ...",0.606,"""CLINTON""","""January 26, 2016""","""http://www.presidency.ucsb.edu/ws/index.php?p...",Inconnu


In [5]:
# Interface Graphique

style = {'description_width': 'initial'}

lbl_titre = widgets.HTML("<h2>Moteur de Recherche US Speeches</h2>")

# Champ texte pour les mots clés
txt_requete = widgets.Text(
    description='Mots clés :',
    placeholder='ex: war freedom',
    style=style
)

# Slider pour le nombre de résultats
slider_nb = widgets.IntSlider(
    value=10,
    min=1,
    max=50,
    step=1,
    description="Nombre d'articles :",
    style=style
)

# Bouton de recherche
btn_search = widgets.Button(
    description='Rechercher',
    button_style='info',
    icon='search'
)

# Zone d'affichage des résultats
out_resultat = widgets.Output()

# Fonction clique_bouton
def clique_bouton(b):
    mots = txt_requete.value
    n = slider_nb.value
    
    with out_resultat:
        clear_output() # On efface le résultat précédent
        
        if not mots.strip():
            print("Veuillez entrer des mots-clés.")
            return
            
        print(f"Recherche en cours pour : '{mots}' ({n} résultats)...")
        
        df_res = engine.search(mots, n_results=n)
        
        if df_res.empty:
            print("Aucun résultat trouvé.")
        else:
            display(df_res)

btn_search.on_click(clique_bouton)

# Mise en page
ui = widgets.VBox([
    lbl_titre,
    txt_requete,
    widgets.HBox([slider_nb, btn_search]),
    out_resultat
])

# Affichage final
display(ui)

VBox(children=(HTML(value='<h2>Moteur de Recherche US Speeches</h2>'), Text(value='', description='Mots clés :…