In [18]:
import pandas as pd
import json
from pprint import pprint
from datetime import datetime
from elasticsearch import Elasticsearch

In [19]:
es = Elasticsearch()

def getPQReport(query):
    body = {
      "query": {"match": {
        "queries.query.keyword": query
      }}}
    res = es.search(index="log_reports_queries", body=body)
    n = res['hits']['total']['value']
    if n != 1 :
        raise Exception("Error, wrong hit numbers : " + str(n))
    return res['hits']['hits'][0]

def findPQReport(title, variantes):
    print('Search report for query ', title)
    for v in variantes :
        try:
            report = getPQReport(v)
            print("- Report found\n")
            return report
        except:
            continue
            #print("- No results for variante ", v)
    print("- \u26A0 Error : cannot find report for query ", title, "\n")
    raise 


In [20]:
def toRow (query, report):
    source = report["_source"]
    data = {
        'title' : query['title'] if query else source['queryName'],
        'nombre de requêtes': source['queriesCount'],
        
        'score - selections': '{:.3f}'.format(source['selectionsRatio']),
        'sélections': source['selectionsCount'],
        
        'score - ordre': '{:.3f}'.format(source['ndcg']),
        
        'nombre de résultats': len(source['results']),
        'résultats': source['results'],
        'variantes': query['document']['variants'] if query else source["queryVariants"],                
        
        'cdtn id': query['cdtnId']if query else "n/a",        
        'lien admin preprod': 'https://preprod-cdtn-admin.dev2.fabrique.social.gouv.fr/contenus/edit/' + query['cdtnId'] if query else None,
    }
    return data;
    


In [21]:
with open("./prequalified.json") as json_file:
    prequalified = json.load(json_file)

In [22]:
reports = []

for p in prequalified['data']['documents'] :    
    try :
        report = findPQReport(p['title'], p['document']['variants'])
    except:
        report = None
        continue
    reports.append([p, report])

Search report for query  13eme-mois
- Report found

Search report for query  A quelle date l’employeur doit-il me remettre mes documents de fin de contrat ?
- Report found

Search report for query  A quelle date l’employeur doit-il régler mon salaire ?
- Report found

Search report for query  A qui s’adresse le congé pour création d’entreprise ? 
- Report found

Search report for query  abandon-de-poste
- Report found

Search report for query  absence-contrat-travail
- Report found

Search report for query  absence pendant préavis
- Report found

Search report for query  accident-travail
- Report found

Search report for query  affichage obligatoire
- Report found

Search report for query  ai-je le droit à l’allocation chômage en cas de rupture conventionnelle ?
- Report found

Search report for query  alcools autorisés en entreprise
- Report found

Search report for query  allocation-chomage
- Report found

Search report for query  aménagement du temps de travail
- Report found

Searc

- Report found

Search report for query  fin-cdd
- Report found

Search report for query  formulaire accident du travail
- Report found

Search report for query  fractionnement
- Report found

Search report for query  frais-deplacement domicile travail
- Report found

Search report for query  garantie financière - caution mutuelle portage salarial
- ⚠ Error : cannot find report for query  garantie financière - caution mutuelle portage salarial 

Search report for query  gratification stagiaire
- Report found

Search report for query  grève
- Report found

Search report for query  grille de salaire
- Report found

Search report for query  Grossesse et travail
- Report found

Search report for query  harcelement
- Report found

Search report for query  harcèlement moral
- Report found

Search report for query  harcèlement sexuel
- Report found

Search report for query  heures complémentaires
- Report found

Search report for query  heures formation
- Report found

Search report for query

- Report found

Search report for query  prime-precarite
- Report found

Search report for query  procédure licenciement pour inaptitude
- Report found

Search report for query  promesse-embauche
- Report found

Search report for query  protocole d’accord pré-électoral
- Report found

Search report for query  prudhommes
- Report found

Search report for query  puis-je demander une rupture conventionnelle pendant un arrêt maladie
- Report found

Search report for query  Quand annoncer sa grossesse à son employeur ?
- ⚠ Error : cannot find report for query  Quand annoncer sa grossesse à son employeur ? 

Search report for query  Quel est le montant de l’indemnité de fin de mission ?
- ⚠ Error : cannot find report for query  Quel est le montant de l’indemnité de fin de mission ? 

Search report for query  Quel est le rôle de la médecine du travail ? 
- Report found

Search report for query  Quelle est la différence entre invalidité et inaptitude ?
- Report found

Search report for query  

In [23]:
pq_rows = [toRow(query, report) for [query, report] in reports]
pd.DataFrame(pq_rows).to_excel("./query-reports-pq2.xlsx")

In [24]:
def getAllSearchReports (minQueries= 10, size= 1000) :
    body = {
        "size": size,
        "query": 
              {"bool": {
                "must":[
              {"match": {"type" : "search"
              }},
              {"range": {"queriesCount": {"gt": minQueries}
              }}
              ]
          }}}
    res = es.search(index="log_reports_queries", body=body)
    n = res['hits']['total']['value']
    return res['hits']['hits']

In [25]:
searches = [toRow(None, report) for report in getAllSearch()]
pd.DataFrame(searches).to_excel("./query-reports-search2.xlsx")