# Projet

### Membres de groupe:

 Mehmet YALCIN ;
 Lyheng LAO ;
 Guang YANG

In [1]:
from lxml import etree
import os
import xml.etree.ElementTree as ET
import time

# Nettoyage de corpus

In [2]:
import re
from lxml import etree
import os

class XMLCleaner:
    def __init__(self, input_file, output_dir, page_split,mot_clé):
        self.input_file = input_file
        self.output_dir = output_dir
        self.page_split = page_split
        self.mot_clé = mot_clé.lower()
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

    def escape_xml_chars(self, text):
        return text.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;').replace('"', '&quot;').replace("'", '&apos;')

    def clean_text(self, text):
        text = text.lower()
        text = re.sub(r'\{\{.*?\}\}', '', text, flags=re.DOTALL)  # remove double braces content
        text = re.sub(r'<ref.*?>.*?</ref>', '', text, flags=re.DOTALL)  # remove references
        text = re.sub(r'\[\[File:.*?\]\]', '', text, flags=re.DOTALL)  # remove file links
        text = re.sub(r"== voir aussi ==.*?(?=(\n==|$))", '', text, flags=re.DOTALL)
        text = re.sub(r"== notes et références ==.*?(?=(\n==|$))", '', text, flags=re.DOTALL)
        text = re.sub(r"== bibliographie ==.*?(?=(\n==|$))", '', text, flags=re.DOTALL)
        text = re.sub(r"== articles connexes ==.*?(?=(\n==|$))", '', text, flags=re.DOTALL)
        text = re.sub(r"== liens externes ==.*?(?=(\n==|$))", '', text, flags=re.DOTALL)
        text = re.sub(r'[^\[\]\w\s]+(?![^\[]*\]\])', '', text)
        text = re.sub(r'(?<!\[)\[([^[\]]*?)\](?!\])', '', text)
        return self.escape_xml_chars(text)

    def process_all_splits(self):
        if not os.path.exists(self.output_dir):
            os.makedirs(self.output_dir)

        context = etree.iterparse(self.input_file, events=('end',), tag='{http://www.mediawiki.org/xml/export-0.10/}page')  # 'page' à ajuster selon la structure XML
        files_count = 0
        elements_count = 0

        # Créer un fichier de sortie initial
        output_file = os.path.join(self.output_dir, f'split_{files_count}.xml')
        output_handle = open(output_file, 'wb')
        output_handle.write(b'<?xml version="1.0" encoding="UTF-8"?>\n<root>\n')
        ns = {'mw': 'http://www.mediawiki.org/xml/export-0.10/'}
        print(f'création de {output_file}')
        id = 0
        cleaned_text = "" 

        for _, elem in context:
            if elements_count == self.page_split:
                # Fermer le fichier actuel et en ouvrir un nouveau
                output_handle.write(b'</root>\n')
                output_handle.close()
                files_count += 1
                elements_count = 0

                output_file = os.path.join(self.output_dir, f'split_{files_count}.xml')
                output_handle = open(output_file, 'wb')
                print(f'création de {output_file}')
                output_handle.write(b'<?xml version="1.0" encoding="UTF-8"?>\n<root>\n')

            # Écrire l'élément dans le fichier de sortie
            ti = elem.find('.//mw:title', namespaces=ns)
            title = self.escape_xml_chars(ti.text)
            revision = elem.find('.//mw:revision', namespaces=ns)
            if revision is not None:
                text = revision.find('.//mw:text', namespaces=ns)
                if text is not None and text.text and self.mot_clé in text.text.lower():
                    words = re.findall(r'\b\w+\b', text.text.lower())  # Utilisation de lower() pour la correspondance
                    if len(words) > 600:
                        cleaned_text = self.clean_text(text.text)
            
            if cleaned_text:
                output_handle.write(f'<page>\n<title>{title}</title>\n<id>{str(id)}</id>\n<text>{cleaned_text}</text>\n</page>\n'.encode('utf-8'))
                id += 1
                output_handle.write(b'\n')
                elements_count += 1

            # Il est important de libérer la mémoire des éléments traités
            elem.clear()
            while elem.getprevious() is not None:
                del elem.getparent()[0]
            if files_count * self.page_split == 200000:
                break

        # Fermer le dernier fichier
        output_handle.write(b'</root>\n')
        output_handle.close() 

    def run(self):
        self.process_all_splits()

In [3]:
# %%time
# cleaner = XMLCleaner('frwiki.xml', 'nettoyage', 100,'science')
# cleaner.run()

# PageWork(traitement des pages)

In [4]:
from collections import Counter,defaultdict
import spacy
import math

In [5]:
class PageWork:
    def __init__(self, file):
        self.file = file
        self.nlp = spacy.load('fr_core_news_sm', disable=['parser', 'ner'])  # Optimisation de SpaCy
        self.words_per_page = defaultdict(list)
        self.page_id = defaultdict(list)
        self.mot_vec = None
        self.list_TF = None
        self.list_N = None
        self.words_dict = Counter()
        self.page_page = defaultdict(list)
        self.title_to_id = {}
        self.idf_score = {}

    def page_process(self):
        for filename in os.listdir(self.file):
            if filename.endswith('.xml'):
                input_path = os.path.join(self.file, filename)
                tree = etree.parse(input_path)
                root = tree.getroot()
                start = time.time()
                for page in root.findall('.//page'):
                    title = page.find('title').text
                    page_id = page.find('id').text
                    texts = page.find('text').text if page.find('text') is not None else ""
                    links = re.findall(r'\[\[([^\d\]|]+)(?:\|[^\]]+)?\]\]', texts)
                    texts = re.sub(r'\[\[.*?\]\]', '',texts)
                    self.title_to_id[title.lower()] = page_id
                    #self.title_to_id[title] = page_id
                    doc = self.nlp(texts.lower())
                    counter = Counter(token.lemma_ for token in doc if not token.is_stop and token.is_alpha)
                    for word,nb in counter.items():
                        self.words_per_page[word].append([nb,page_id])
                    for link in links:
                        if link not in self.page_page[page_id]:
                            self.page_page[page_id].append(link.lower())
                end = time.time()
                print(f"done in {round(end - start, 2)}s for {filename}")

    def page_to_id(self):
        for key, value in self.page_page.items():
            self.page_id[key] = []
            for val in value:
                if val in self.title_to_id:
                    self.page_id[key].append(self.title_to_id[val])
            
    def IDF(self):
        nb_doc = len(self.page_id)
        self.idf_score = {mot: math.log((nb_doc / len(self.words_per_page[mot])), 10) for mot in self.words_per_page}

    def TF_list(self):
        if self.list_TF is None:
            self.list_TF = {mot: [(1 + math.log(count), page_id) for count, page_id in pages] for mot, pages in self.words_per_page.items()}

    def TF(self, mots, page):
        doc = self.nlp(mots)
        lemma = [token.lemma_ for token in doc if token.is_alpha and not token.is_stop]
        L = []
        for tok in lemma:
            if tok in self.list_TF:
                L.extend([tf for tf, pg in self.list_TF[tok] if pg == str(page)])
            else:
                L.append(0)
        return L

    def N_d(self):
        if self.list_N is None:
            self.TF_list()
            self.list_N = {page_id: math.sqrt(sum(L[0]**2 for terms in self.list_TF.values() for L in terms if L[1] == page_id)) 
                           for page_id in self.page_id}

    def page_calcul(self):
        if self.mot_vec is None:
            self.N_d()
            self.mot_vec = {}
            for mot, list_tf in self.list_TF.items():
                for tf_value, page_id in list_tf:
                    N_value = self.list_N.get(page_id, 0)
                    if N_value > 0:
                        self.mot_vec.setdefault(mot, []).append((tf_value / N_value, page_id))

## NLTK

In [6]:
import nltk
nltk.download('punkt')
nltk.download('stopwords')
from nltk.corpus import stopwords
from nltk.stem.snowball import SnowballStemmer
from nltk.tokenize import word_tokenize

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\G.1997.Yang\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\G.1997.Yang\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [7]:
class PageWork:
    def __init__(self, file):
        self.file = file
        self.stemmer = SnowballStemmer("french")
        self.french_stopwords = set(stopwords.words('french'))
        self.words_per_page = defaultdict(list)
        self.page_id = defaultdict(list)
        self.mot_vec = None
        self.list_TF = None
        self.list_N = None
        self.words_dict = Counter()
        self.page_page = defaultdict(list)
        self.title_to_id = {}
        self.idf_score = {}
        self.id_to_title = {}  # 新增

    def page_process(self):
        for filename in os.listdir(self.file):
            if filename.endswith('.xml'):
                input_path = os.path.join(self.file, filename)
                tree = etree.parse(input_path)
                root = tree.getroot()
                start = time.time()
                for page in root.findall('.//page'):
                    title = page.find('title').text
                    page_id = page.find('id').text
                    texts = page.find('text').text if page.find('text') is not None else ""
                    links = re.findall(r'\[\[([^\d\]|]+)(?:\|[^\]]+)?\]\]', texts)
                    texts = re.sub(r'\[\[.*?\]\]', '',texts)
                    self.title_to_id[title.lower()] = page_id
                    self.id_to_title[page_id] = title  # 新增
                    doc = word_tokenize(texts.lower(), language='french')
                    stems = [self.stemmer.stem(token) for token in doc if token.lower() not in self.french_stopwords and token.isalpha()]
                    counter = Counter(stems)
                    for word,nb in counter.items():
                        self.words_per_page[word].append([nb,page_id])
                    for link in links:
                        if link not in self.page_page[page_id]:
                            self.page_page[page_id].append(link.lower())
                end = time.time()
                print(f"done in {round(end - start, 2)}s for {filename}")
                
    def page_to_id(self):
        for key, value in self.page_page.items():
            self.page_id[key] = []
            for val in value:
                if val in self.title_to_id:
                    self.page_id[key].append(self.title_to_id[val])
            
    def IDF(self):
        nb_doc = len(self.page_id)
        self.idf_score = {mot: math.log((nb_doc / len(self.words_per_page[mot])), 10) if len(self.words_per_page[mot]) > 0 else 0 for mot in self.words_per_page}

    def TF_list(self):
        if self.list_TF is None:
            self.list_TF = {mot: [(1 + math.log(count), page_id) for count, page_id in pages] for mot, pages in self.words_per_page.items()}

    def TF(self, mots, page):
        doc = word_tokenize(mots.lower(), language='french')
        lemma = [self.stemmer.stem(token) for token in doc if token not in self.french_stopwords and token.isalpha()]
        L = []
        for tok in lemma:
            if tok in self.list_TF:
                L.extend([tf for tf, pg in self.list_TF[tok] if pg == str(page)])
            else:
                L.append(0)
        return L

    def N_d(self):
        if self.list_N is None:
            self.TF_list()
            self.list_N = {page_id: math.sqrt(sum(L[0]**2 for terms in self.list_TF.values() for L in terms if L[1] == page_id)) 
                           for page_id in self.page_id}

    def page_calcul(self):
        if self.mot_vec is None:
            self.N_d()
            self.mot_vec = {}
            for mot, list_tf in self.list_TF.items():
                for tf_value, page_id in list_tf:
                    N_value = self.list_N.get(page_id, 0)
                    if N_value > 0:
                        self.mot_vec.setdefault(mot, []).append((tf_value / N_value, page_id))

# PageRank

In [8]:
import math

In [9]:
import numpy as np

class PageRank(PageWork):
    def __init__(self,file):
        super().__init__(file)
        self.C = None
        self.L = np.array([0])
        self.I = np.array([])
        
    def LI(self):
        if self.L.size == 1 and self.I.size == 0:
            for _, p in self.page_id.items():
                taille = len(p)
                self.L = np.append(self.L, self.L[-1] + taille)
                self.I = np.append(self.I, sorted([int(i) for i in p])).astype(int)
            
    def produit(self,u):
        n = len(u)
        v = np.zeros((n))
        for i in range(0,n):
            for j in range(self.L[i],self.L[i+1]):
                v[i] += u[self.I[j]] / (self.L[i+1]-self.L[i])
        return v
                
    def produit_T(self,u):
        n = len(u)
        v = np.zeros((n))
        s = 0
        for i in range(n):
            for j in range(self.L[i],self.L[i+1]):
                v[self.I[j]] += u[i] / (self.L[i+1] - self.L[i])
            if self.L[i] == self.L[i+1]:
                s += u[i]/n
        for i in range(n):
            v[i] += s
        return v
            
    def pagerank(self,ep,k):
        n = len(self.L)-1
        self.phi = np.array([])
        self.phi = np.append(self.phi,[1/n] * n)
        J = np.ones(n) 
        for i in range(1,k):
            self.phi = (1 - ep) * self.produit_T(self.phi) + (ep / n) * J 

# Requête

In [10]:
import heapq

In [11]:
class recherche(PageRank):
    def __init__(self,file):
        super().__init__(file)
        self.score = {}
        
    def set_param(self,alpha,beta,gamma):
        self.alpha = alpha
        self.beta = beta
        self.gamma = gamma
        
    def traitement(self,requests):
        doc = word_tokenize(requests.lower(), language='french')
        self.req = [self.stemmer.stem(token) for token in doc if token not in self.french_stopwords and token.isalpha()]

    def commun(self):
        # Initialize an empty Counter to aggregate ID frequencies across all tokens
        aggregated_id_freq = Counter()

        # Aggregate ID frequencies across all tokens present in self.req and in self.mot_vec
        for tok in self.req:
            if tok in self.mot_vec:
                aggregated_id_freq.update(id for _, id in self.mot_vec[tok])

        # Determine common IDs based on a threshold value related to the length of self.req
        val = int(len(self.req) * 2 / 3)
        ids_communs = {id for id, freq in aggregated_id_freq.items() if freq > val}

        # Filter self.mot_vec for each token to retain only the items with IDs in ids_communs
        self.commun_dict = {}
        for tok in self.req:
            if tok in self.mot_vec:
                self.commun_dict[tok] = [item for item in self.mot_vec[tok] if item[1] in ids_communs]

    def frequence(self,page_id):
        frq = 0
        for tok in self.req:
            if tok in self.commun_dict:
                for L in self.commun_dict[tok]:
                    if L[1] == page_id:
                        frq += L[0] * self.idf_score[tok]
        return frq
    
    def S(self):
        long = max(self.commun_dict, key=lambda k: len(self.commun_dict[k]))
        L = self.commun_dict[long]
        self.score = {}
        for _ ,id in L:
                self.score[id] = self.alpha * self.frequence(id) + self.beta * self.phi[int(id)]
        for title,index in self.title_to_id.items(): # la mise en évidence pour les requêtes qui sont concernés dans le title
            seuil = sum(1 for mot in self.req if mot in title)
            if seuil == len(self.req):
                if index in self.score:
                    self.score[index] += 2
                else:
                # Si index n'est pas présent, initialiser avec la valeur de la première boucle (les page donc pas des mots)
                    self.score[index] = 2
        self.score = {k: v for k, v in sorted(self.score.items(), key=lambda item: item[1], reverse=True)}
        
    def new_mot_page(self):
        for mot, pages in self.commun_dict.items():
            self.mots_pages[mot] = sorted(pages, key=lambda x: self.phi[int(x[1])], reverse=True)
            
    def v_max(self):
        self.v_md = {}
        for mot,vec in self.mots_pages.items():
            vmax = 0
            self.v_md[mot] = []
            for _, page in reversed(vec):
                vmax = max(vmax,self.score(page))
                self.v_md[mot].append(vmax,page)
                
            self.v_md[mot] = list(reversed(self.v_md[mot]))
            
    def WAND(self,k):
        meilleur_pages = {}
        pointeur = {mot:0 for mot in self.mots_pages}
        
        for id,score in self.score.items():
            for i in range(k):
                meilleur_pages[id] = score
                
        

# exécution

In [12]:
%%time
cleaner = XMLCleaner('frwiki.xml', 'nettoyage', 10,'science')
cleaner.run()

création de nettoyage\split_0.xml
création de nettoyage\split_1.xml
création de nettoyage\split_2.xml
CPU times: total: 250 ms
Wall time: 232 ms


In [13]:
k= recherche('nettoyage')

In [14]:
k.page_process()

done in 0.64s for split_0.xml
done in 1.11s for split_1.xml
done in 0.07s for split_2.xml


In [15]:
k.words_per_page

defaultdict(list,
            {'antoin': [[4, '0'], [1, '14']],
             'meillet': [[10, '0']],
             'né': [[1, '0'], [1, '8'], [8, '14'], [21, '15'], [61, '20']],
             'mort': [[1, '0'],
              [4, '8'],
              [8, '11'],
              [8, '12'],
              [1, '13'],
              [3, '14'],
              [4, '15'],
              [8, '20']],
             'franc': [[5, '0'],
              [3, '3'],
              [3, '4'],
              [3, '5'],
              [3, '6'],
              [3, '7'],
              [33, '8'],
              [1, '9'],
              [1, '10'],
              [2, '11'],
              [2, '12'],
              [2, '13'],
              [35, '14'],
              [8, '15'],
              [9, '16'],
              [9, '17'],
              [9, '18'],
              [9, '19'],
              [19, '20']],
             'principal': [[2, '0'],
              [1, '1'],
              [1, '2'],
              [3, '3'],
              [3, '4'],
   

json save

In [None]:
import json

# sauvegarde le dict pour l'utilisation ultérieur (et aussi qui permet de faire la mise à jour si on veut ajouter nouvelle page)

with open('words_per_page.json','w') as f:
    json.dump(k.words_per_page,f)

In [None]:
# récupération de fichier

with open('words_per_page.json','r') as f:
    wordes_per_page = json.load(f)

#ainsi de suit on saugevard les données qui nous seront util pour les partie après

---------------------

In [25]:
k.mot_vec

{'antoin': [(0.11908882479800581, '0'), (0.010417961065175188, '14')],
 'meillet': [(0.16481662276379727, '0')],
 'né': [(0.04990533722005582, '0'),
  (0.013858874715405667, '8'),
  (0.032081502083703584, '14'),
  (0.10197593799350523, '15'),
  (0.1163219585390159, '20')],
 'mort': [(0.04990533722005582, '0'),
  (0.03307135458483958, '8'),
  (0.04494008713067608, '11'),
  (0.04494008713067608, '12'),
  (0.040361589412576684, '13'),
  (0.021863261114242562, '14'),
  (0.06016646206091683, '15'),
  (0.07008716725442907, '20')],
 'franc': [(0.13022487897482227, '0'),
  (0.05028947142793286, '3'),
  (0.05028947142793286, '4'),
  (0.05028947142793286, '5'),
  (0.05028947142793286, '6'),
  (0.05028947142793286, '7'),
  (0.06231653495123819, '8'),
  (0.03683531709884924, '9'),
  (0.03683531709884924, '10'),
  (0.024709084679657625, '11'),
  (0.024709084679657625, '12'),
  (0.06833811131682235, '13'),
  (0.047457438742917975, '14'),
  (0.07764302078782073, '15'),
  (0.06678590689638889, '16'),


In [29]:
k.pagerank(1 / 10 , 1000)

np save

In [None]:
np.save('pagerank.npy',k.phi) # sauvegarde les formats numpy

In [None]:
phi = np.load('pagerank.npy') # charge en mémoire

-----------------

# Pre-site

In [39]:
pip install ipywidgets




In [40]:
import ipywidgets as widgets
from IPython.display import display, clear_output

def search(query):
    k.traitement(query)
    k.commun()
    k.S()
    return k.score  

def get_title_by_id(self, page_id):
    # 假设有一个从ID映射到标题的字典，名为id_to_title
    return self.id_to_title.get(page_id, "Unknown Title")

# def on_search_clicked(b):
#     with output:
#         clear_output()
#         query = search_input.value
#         results = search(query)  
#         if results:
#             for id, score in results.items():
#                 print(f"ID: {id}, Score: {score}")
#         else:
#             print("No matching results were found.")

# def on_search_clicked(b):
#     with output:
#         clear_output()
#         query = search_input.value
#         results = search(query)  
#         if results:
#             for id, score in results.items():
#                 title = k.id_to_title[id]  # 假设k是PageWork或其子类的实例
#                 print(f"ID: {id}, https://fr.wikipedia.org/{title}, Score: {score}")
#                 print(f"ID: {id}, Title: {title}, Score: {score}")
#         else:
#             print("No matching results were found.")

# def on_search_clicked(b):
#     with output:
#         clear_output()
#         query = search_input.value
#         results = search(query)  
#         if results:
#             for id, score in results.items():
#                 title = k.id_to_title[id]  # 假设k是PageWork或其子类的实例
#                 title_for_url = title.replace(" ", "_")  # 将标题中的空格替换为下划线
#                 print(f"ID: {id}, https://fr.wikipedia.org/wiki/{title_for_url}, Score: {score}")
#         else:
#             print("No matching results were found.")

def on_search_clicked(b):
    with output:
        clear_output()
        query = search_input.value
        results = search(query)  
        if results:
            for id, score in results.items():
                title = k.id_to_title[id]  # 假设k是PageWork或其子类的实例
                title_for_url = title.replace(" ", "_")  # 将标题中的空格替换为下划线
                print(f"ID: {id}, <https://fr.wikipedia.org/wiki/{title_for_url}>, Score: {score}")
        else:
            print("No matching results were found.")

search_input = widgets.Text(placeholder="Enter your search term...")
search_button = widgets.Button(description="search")
output = widgets.Output()

search_button.on_click(on_search_clicked)

display(search_input, search_button, output)


Text(value='', placeholder='Enter your search term...')

Button(description='search', style=ButtonStyle())

Output()

# site

In [41]:
pip install flask

Note: you may need to restart the kernel to use updated packages.


In [42]:
from flask import Flask, request, render_template
import numpy as np
from urllib.parse import quote

In [43]:
app = Flask(__name__)


In [44]:
from flask import Flask, request, render_template_string
import threading

app = Flask(__name__)


@app.route('/')
def index():
    return render_template_string('''
        <!DOCTYPE html>
        <html>
        <body>
            <h1>search page</h1>
            <form action="/search" method="post">
                <input type="text" name="query" placeholder="Enter keywords">
                <input type="submit" value="search">
            </form>
        </body>
        </html>
    ''')

# @app.route('/search', methods=['POST'])
# def search():
#     query = request.form['query']
#     k.traitement(query)
#     k.commun()
#     k.S()
#     results = k.score  
    
#     results_html = '<br>'.join([f'ID: {id}, Score: {score}' for id, score in results.items()])
#     return render_template_string('''
#         <!DOCTYPE html>
#         <html>
#         <body>
#             <h1>result</h1>
#             <p>{{ results_html|safe }}</p>
#             <a href="/">return</a>
#         </body>
#         </html>
#     ''', results_html=results_html)

# @app.route('/search', methods=['POST'])
# def search():
#     query = request.form['query']
#     k.traitement(query)
#     k.commun()
#     k.S()
#     results = k.score  
    
#     # 生成结果HTML，其中标题的空格被替换为下划线，并创建Wikipedia链接
#     results_html = '<br>'.join([
#         f'ID: {id}, <a href="https://fr.wikipedia.org/wiki/{k.id_to_title[id].replace(" ", "_")}">{k.id_to_title[id]}</a>, Score: {score}'
#         for id, score in results.items()
#     ])
    
#     return render_template_string('''
#         <!DOCTYPE html>
#         <html>
#         <body>
#             <h1>result</h1>
#             <p>{{ results_html|safe }}</p>
#             <a href="/">return</a>
#         </body>
#         </html>
#     ''', results_html=results_html)

@app.route('/search', methods=['POST'])
def search():
    query = request.form['query']
    k.traitement(query)
    k.commun()
    k.S()
    results = k.score  
    
    # 生成结果HTML，其中标题的空格被替换为下划线，并创建Wikipedia链接
    results_html = '<br>'.join([
        f'ID: {id}, <a href="https://fr.wikipedia.org/wiki/{quote(k.id_to_title[id].replace(" ", "_"))}">{k.id_to_title[id]}</a>, Score: {score}'
        for id, score in results.items()
    ])
    
    return render_template_string('''
        <!DOCTYPE html>
        <html>
        <body>
            <h1>result</h1>
            <p>{{ results_html|safe }}</p>
            <a href="/">return</a>
        </body>
        </html>
    ''', results_html=results_html)

In [45]:
# Flask 
def run_app():
    app.run(use_reloader=False, debug=True, port=5000)

In [46]:
threading.Thread(target=run_app).start()

 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
127.0.0.1 - - [15/Mar/2024 10:44:29] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [15/Mar/2024 10:44:34] "POST /search HTTP/1.1" 200 -
