# Taux de complétion des outils

## A documenter

In [None]:
import pandas as pd
import math
from tqdm.notebook import tqdm
from typing import List, Dict


pd.set_option('display.max_columns', 100)
pd.set_option('display.max_colwidth', 80)
pd.set_option('display.max_rows', 1000)

In [None]:
from query_elastic_search import load_file_if_exists_or_execute_query

In [None]:
SEARCH_QUERY = {
    "query": {
        "bool": { 
          "must": [
            {
              "prefix": {
                "url" : "https://code.travail.gouv.fr/outils" 
              }
            },
            {
              "range": {
               "logfile": {
                  #"gte": "2022-03-01",
                  #"lt": "2022-06-01"
                  "gte": "2022-12-01",
                  "lt": "2022-12-20"
               }
              }
            }
          ]
        }
    }
}

In [None]:
logs_new = load_file_if_exists_or_execute_query("", SEARCH_QUERY)

In [None]:
logs_outils = logs_new[
    ['url', 'idVisit', 'type', 'outil', 'outilAction', 'outilEvent', 'lastActionDateTime', 'logfile']].copy()
logs_outils

In [None]:
logs_outils["url_cleaned"] = logs_outils['url'].apply(lambda x: x.split('?')[0].split('#')[0])

In [None]:
logs_outils_view_step = logs_outils[logs_outils['outilAction']=='view_step']
#logs_outils_view_step = logs_outils[logs_outils['outilAction'].notna()]

In [None]:
logs_outils_view_step.url_cleaned.value_counts()

In [None]:
logs_outils_view_step.url_cleaned.unique()

In [None]:
logs_outils_view_step.outil.unique()

In [None]:
indemnite = logs_outils_view_step[
    logs_outils_view_step['url_cleaned']=='https://code.travail.gouv.fr/outils/procedure-licenciement']
indemnite


In [None]:
logs_outils[logs_outils['url_cleaned']=='https://code.travail.gouv.fr/outils/procedure-licenciement']

In [None]:
indemnite.groupby(by=['outilEvent']).agg({
    'idVisit': 'nunique',
})

In [None]:
logs_outils_view_step.groupby(by=['url_cleaned', 'outilEvent']).agg({
    'idVisit': 'nunique',
})

In [None]:
# Same for june
logs_june = pd_read_pattern(PATH_CDTN_MONOLOG + "/data-all-logs-june/2022-06-*")
logs_june_tools = logs_june[logs_june['url'].str.startswith('https://code.travail.gouv.fr/outils', na=False)]
logs_june_tools['url'] = logs_june_tools['url'].str.split('#').str[0].str.split('?').str[0]

### Comparaison du nombre de recherche de convention collective entre tous les logs et les logs sur les urls https://code.travail.gouv.fr/outils

In [None]:
from query_elastic_search import execute_query

In [None]:
logs_new_january = execute_query()

Nombre de recherche dans tous les logs

In [None]:
logs_new_january.type.value_counts().filter(regex = 'cc')

In [None]:
logs_new_january[
    logs_new_january['url'].str.startswith('https://code.travail.gouv.fr/outils', na=False)
].type.value_counts().filter(regex = 'cc')

# Calcule du taux de complétion des outils en janvier 2022

In [None]:
from join_multiple_file import pd_read_pattern, PATH_CDTN_MONOLOG

In [None]:
DICT_OF_OUTILS_WITH_STARTING_AND_ENDING_STEP_EVENT_NAME = {
  #"Trouver sa convention collective": {
  #  'firstStep': "start",
  #  'lastStep': None,
  #},
  "Heures pour recherche d’emploi": {
    'firstStep': "start",
    'lastStep': "results",
  },
  "Indemnité de licenciement": {
    'firstStep': "start",
    'lastStep': "indemnite_legale",
  },
  "Indemnité de précarité": {
    'firstStep': "start",
    'lastStep': "indemnite",
  },
}

In [None]:
df = pd_read_pattern(PATH_CDTN_MONOLOG + "/data-all-logs-january/2022-01-*")
df = df[df['url'].str.startswith('https://code.travail.gouv.fr/outils', na=False)]

In [None]:
df.shape

In [None]:
df.head(5)

In [None]:
df_view_step = df[df['outilAction']=='view_step']
df_view_step.shape

In [None]:
visits_by_step_of_tools = df_view_step.groupby(['outil', 'outilEvent']).agg({
    'idVisit': 'nunique',
})
visits_by_step_of_tools

In [None]:
visits_by_step_of_tools.loc['Heures pour recherche d’emploi'].loc['start', 'idVisit']

In [None]:
for outil in DICT_OF_OUTILS_WITH_STARTING_AND_ENDING_STEP_EVENT_NAME.keys():
    first_step = DICT_OF_OUTILS_WITH_STARTING_AND_ENDING_STEP_EVENT_NAME[outil]['firstStep']
    last_step = DICT_OF_OUTILS_WITH_STARTING_AND_ENDING_STEP_EVENT_NAME[outil]['lastStep']
    visits_first_step = visits_by_step_of_tools.loc[outil].loc[first_step, 'idVisit']
    visits_last_step = visits_by_step_of_tools.loc[outil].loc[last_step, 'idVisit']
    print(f'{outil} a un taux de {visits_last_step / visits_first_step}')

### Taux de complétion outil convention collective

In [None]:
df.type.value_counts()

In [None]:
df_cc_url = df[
    df['url'].str.startswith('https://code.travail.gouv.fr/outils/convention-collective', na=False)
].copy()
df_cc_url

In [None]:
nb_visits_by_cc_type = df_cc_url.groupby(by=['type']).agg({'idVisit': 'nunique'})
nb_visits_by_cc_type

## Analyse "pb kpi taux de complétion heures pour recherche d'emploi"

Constat : le taux de complétion de l'outil heure pour recherches emploi diminue entre mars et mai 2022

On va afficher son évolution au cours du mois d'avril 

In [None]:
# extraction du jour et du mois de la colonne date
def get_day_and_month(df, column):
    df_copy = df.copy()
    #df_copy[column]= pd.to_datetime(df_copy[column])
    df_copy['month'] = pd.DatetimeIndex(df_copy[column]).month
    df_copy['day'] = pd.DatetimeIndex(df_copy[column]).day
    return df_copy

In [None]:
logs_outils_view_step_with_date = get_day_and_month(logs_outils_view_step, 'lastActionDateTime')

In [None]:
logs_heures_pour_recherche_emploi = logs_outils_view_step_with_date[
    (logs_outils_view_step['url_cleaned']=='https://code.travail.gouv.fr/outils/heures-recherche-emploi') &
    #(logs_outils_view_step['outil']=='https://code.travail.gouv.fr/outils/heures-recherche-emploi') &
    (logs_outils_view_step['outilEvent'].isin(['results', 'start']))
]
logs_heures_pour_recherche_emploi


In [None]:
logs_heures_pour_recherche_emploi.outil.value_counts()

### évolution du nombre de visits sur les pages 'start' et 'résultat' de l'outil Heures pour recherches emploi

#### Par mois

In [None]:
logs_heures_pour_recherche_emploi.groupby(by=['month', 'outilEvent']).agg({
    'idVisit': 'nunique',
})

#### Par jour

In [None]:
logs_heures_pour_recherche_emploi.groupby(by=['logfile', 'outilEvent']).agg({
    'idVisit': 'nunique',
})

## Analyse bug outil indemnité de licenciement en juillet qui atteint à 0,09%

In [None]:
logs_july = pd_read_pattern(PATH_CDTN_MONOLOG + "/data-all-logs-july/2022-07-*")

In [None]:
logs_outils_july = logs_july[logs_july['url'].str.startswith('https://code.travail.gouv.fr/outils', na=False)].copy()

In [None]:
logs_outils_july['url'] = logs_outils_july['url'].str.split('#').str[0].str.split('?').str[0]

In [None]:
logs_outils_july[logs_outils_july['outilAction']=='view_step'].groupby(['outil', 'outilEvent']).agg({
    'idVisit': 'nunique',
}).sort_values(by=['outil','idVisit'],ascending=False)