# Import des bibliothèques

In [5]:
import os 
import json
import requests
import boto3 # module pour se connecter au serveur S3
from botocore.client import Config # module pour se connecter au serveur S3
import logging # module pour se connecter au serveur S3
from botocore.exceptions import ClientError # module pour se connecter au serveur S3
import pandas as pd # module pour créer des dataframes
import numpy as np # module pour gérer les arrays
from jsonpath_ng import jsonpath, parse # module python pour faire des requête dans un JSON
from openpyxl import workbook #module pour travailler avec excels et des dataframes
from openpyxl import load_workbook  #module pour travailler avec excels et des dataframes
from openpyxl.utils.dataframe import dataframe_to_rows #module pour travailler avec excels et des dataframes
from openpyxl import Workbook #module pour travailler avec excels et des dataframes
import openpyxl as xl # module pour gérer les classeurs excels
import io  # module pour gérer flux de données (écrire un JSON)
import sys
import img2pdf, time # module pour transformer tiff en pdf
from PIL import Image
import traceback
from nltk import edit_distance
from tenacity import *
import ast

# Création des classes et fonctions

In [6]:
from typing import Optional, Any, List, Union, TypeVar, Callable, Type, cast
from datetime import datetime
import dateutil.parser
import json

T = TypeVar("T")


def from_str(x: Any) -> str:
    assert isinstance(x, str)
    return x


def from_int(x: Any) -> int:
    assert isinstance(x, int) and not isinstance(x, bool)
    return x


def from_none(x: Any) -> Any:
    assert x is None
    return x


def from_union(fs, x):
    for f in fs:
        try:
            return f(x)
        except:
            pass
    assert False


def from_float(x: Any) -> float:
    assert isinstance(x, (float, int)) and not isinstance(x, bool)
    return float(x)


def from_list(f: Callable[[Any], T], x: Any) -> List[T]:
    assert isinstance(x, list)
    return [f(y) for y in x]


def to_float(x: Any) -> float:
    assert isinstance(x, float)
    return x


def to_class(c: Type[T], x: Any) -> dict:
    assert isinstance(x, c)
    return cast(Any, x).to_dict()


def from_bool(x: Any) -> bool:
    assert isinstance(x, bool)
    return x


def from_datetime(x: Any) -> datetime:
    return dateutil.parser.parse(x)


class AggCat:
    cat: str
    reference_price: Optional[int]
    score: str

    def __init__(self, cat: str, reference_price: Optional[int], score: str) -> None:
        self.cat = cat
        self.reference_price = reference_price
        self.score = score

    @staticmethod
    def from_dict(obj: Any) -> 'AggCat':
        assert isinstance(obj, dict)
        cat = from_str(obj.get("cat"))
        reference_price = from_union([from_int, from_none], obj.get("reference_price"))
        score = from_str(obj.get("score"))
        return AggCat(cat, reference_price, score)

    def to_dict(self) -> dict:
        result: dict = {}
        result["cat"] = from_str(self.cat)
        result["reference_price"] = from_union([from_int, from_none], self.reference_price)
        result["score"] = from_str(self.score)
        return result


class Category:
    cat: str
    score: str

    def __init__(self, cat: str, score: str) -> None:
        self.cat = cat
        self.score = score

    @staticmethod
    def from_dict(obj: Any) -> 'Category':
        assert isinstance(obj, dict)
        cat = from_str(obj.get("cat"))
        score = from_str(obj.get("score"))
        return Category(cat, score)

    def to_dict(self) -> dict:
        result: dict = {}
        result["cat"] = from_str(self.cat)
        result["score"] = from_str(self.score)
        return result


class Quantite:
    content: str
    coordinates: str
    score: str

    def __init__(self, content: str, coordinates: str, score: str) -> None:
        self.content = content
        self.coordinates = coordinates
        self.score = score

    @staticmethod
    def from_dict(obj: Any) -> 'Quantite':
        assert isinstance(obj, dict)
        content = from_str(obj.get("content"))
        coordinates = from_str(obj.get("coordinates"))
        score = from_str(obj.get("score"))
        return Quantite(content, coordinates, score)

    def to_dict(self) -> dict:
        result: dict = {}
        result["content"] = from_str(self.content)
        result["coordinates"] = from_str(self.coordinates)
        result["score"] = from_str(self.score)
        return result


class Record:
    h_x0: float
    h_x1: float
    h_y0: float
    h_y1: float
    agg_cat: List[AggCat]
    categories: List[Category]
    n_agg_cat: int
    n_categories_baremo: int
    quantite: Quantite
    text: Quantite
    text_quantite: Quantite
    text_units: Quantite
    total_prix: Quantite
    unit_prix: Quantite
    unit_prix_text: Quantite
    units: Quantite

    def __init__(self, h_x0: float, h_x1: float, h_y0: float, h_y1: float, agg_cat: List[AggCat], categories: List[Category], n_agg_cat: int, n_categories_baremo: int, quantite: Quantite, text: Quantite, text_quantite: Quantite, text_units: Quantite, total_prix: Quantite, unit_prix: Quantite, unit_prix_text: Quantite, units: Quantite) -> None:
        self.h_x0 = h_x0
        self.h_x1 = h_x1
        self.h_y0 = h_y0
        self.h_y1 = h_y1
        self.agg_cat = agg_cat
        self.categories = categories
        self.n_agg_cat = n_agg_cat
        self.n_categories_baremo = n_categories_baremo
        self.quantite = quantite
        self.text = text
        self.text_quantite = text_quantite
        self.text_units = text_units
        self.total_prix = total_prix
        self.unit_prix = unit_prix
        self.unit_prix_text = unit_prix_text
        self.units = units

    @staticmethod
    def from_dict(obj: Any) -> 'Record':
        assert isinstance(obj, dict)
        h_x0 = from_float(obj.get("H_X0"))
        h_x1 = from_float(obj.get("H_X1"))
        h_y0 = from_float(obj.get("H_Y0"))
        h_y1 = from_float(obj.get("H_Y1"))
        agg_cat = from_list(AggCat.from_dict, obj.get("agg_cat"))
        categories = from_list(Category.from_dict, obj.get("categories"))
        n_agg_cat = from_int(obj.get("n_agg_cat"))
        n_categories_baremo = from_int(obj.get("n_categories_baremo"))
        quantite = Quantite.from_dict(obj.get("quantite"))
        text = Quantite.from_dict(obj.get("text"))
        text_quantite = Quantite.from_dict(obj.get("text_quantite"))
        text_units = Quantite.from_dict(obj.get("text_units"))
        total_prix = Quantite.from_dict(obj.get("total_prix"))
        unit_prix = Quantite.from_dict(obj.get("unit_prix"))
        unit_prix_text = Quantite.from_dict(obj.get("unit_prix_text"))
        units = Quantite.from_dict(obj.get("units"))
        return Record(h_x0, h_x1, h_y0, h_y1, agg_cat, categories, n_agg_cat, n_categories_baremo, quantite, text, text_quantite, text_units, total_prix, unit_prix, unit_prix_text, units)

    def to_dict(self) -> dict:
        result: dict = {}
        result["H_X0"] = to_float(self.h_x0)
        result["H_X1"] = to_float(self.h_x1)
        result["H_Y0"] = to_float(self.h_y0)
        result["H_Y1"] = to_float(self.h_y1)
        result["agg_cat"] = from_list(lambda x: to_class(AggCat, x), self.agg_cat)
        result["categories"] = from_list(lambda x: to_class(Category, x), self.categories)
        result["n_agg_cat"] = from_int(self.n_agg_cat)
        result["n_categories_baremo"] = from_int(self.n_categories_baremo)
        result["quantite"] = to_class(Quantite, self.quantite)
        result["text"] = to_class(Quantite, self.text)
        result["text_quantite"] = to_class(Quantite, self.text_quantite)
        result["text_units"] = to_class(Quantite, self.text_units)
        result["total_prix"] = to_class(Quantite, self.total_prix)
        result["unit_prix"] = to_class(Quantite, self.unit_prix)
        result["unit_prix_text"] = to_class(Quantite, self.unit_prix_text)
        result["units"] = to_class(Quantite, self.units)
        return result


class ContentClass:
    n_records: int
    records: List[Record]

    def __init__(self, n_records: int, records: List[Record]) -> None:
        self.n_records = n_records
        self.records = records

    @staticmethod
    def from_dict(obj: Any) -> 'ContentClass':
        assert isinstance(obj, dict)
        n_records = from_int(obj.get("n_records"))
        records = from_list(Record.from_dict, obj.get("records"))
        return ContentClass(n_records, records)

    def to_dict(self) -> dict:
        result: dict = {}
        result["n_records"] = from_int(self.n_records)
        result["records"] = from_list(lambda x: to_class(Record, x), self.records)
        return result


class Position:
    xmax: str
    xmin: str
    ymax: str
    ymin: str

    def __init__(self, xmax: str, xmin: str, ymax: str, ymin: str) -> None:
        self.xmax = xmax
        self.xmin = xmin
        self.ymax = ymax
        self.ymin = ymin

    @staticmethod
    def from_dict(obj: Any) -> 'Position':
        assert isinstance(obj, dict)
        xmax = from_str(obj.get("xmax"))
        xmin = from_str(obj.get("xmin"))
        ymax = from_str(obj.get("ymax"))
        ymin = from_str(obj.get("ymin"))
        return Position(xmax, xmin, ymax, ymin)

    def to_dict(self) -> dict:
        result: dict = {}
        result["xmax"] = from_str(self.xmax)
        result["xmin"] = from_str(self.xmin)
        result["ymax"] = from_str(self.ymax)
        result["ymin"] = from_str(self.ymin)
        return result


class Bbox:
    bbox_class: str
    content: Union[ContentClass, str]
    position: Position
    score: str
    scoreocr: str

    def __init__(self, bbox_class: str, content: Union[ContentClass, str], position: Position, score: str, scoreocr: str) -> None:
        self.bbox_class = bbox_class
        self.content = content
        self.position = position
        self.score = score
        self.scoreocr = scoreocr

    @staticmethod
    def from_dict(obj: Any) -> 'Bbox':
        assert isinstance(obj, dict)
        bbox_class = from_str(obj.get("class"))
        content = from_union([ContentClass.from_dict, from_str], obj.get("content"))
        position = Position.from_dict(obj.get("position"))
        score = from_str(obj.get("score"))
        scoreocr = from_str(obj.get("scoreocr"))
        return Bbox(bbox_class, content, position, score, scoreocr)

    def to_dict(self) -> dict:
        result: dict = {}
        result["class"] = from_str(self.bbox_class)
        result["content"] = from_union([lambda x: to_class(ContentClass, x), from_str], self.content)
        result["position"] = to_class(Position, self.position)
        result["score"] = from_str(self.score)
        result["scoreocr"] = from_str(self.scoreocr)
        return result


class Page:
    bbox: List[Bbox]
    page: int
    probability: float
    type: str

    def __init__(self, bbox: List[Bbox], page: int, probability: float, type: str) -> None:
        self.bbox = bbox
        self.page = page
        self.probability = probability
        self.type = type

    @staticmethod
    def from_dict(obj: Any) -> 'Page':
        assert isinstance(obj, dict)
        bbox = from_list(Bbox.from_dict, obj.get("bbox"))
        page = int(from_str(obj.get("page")))
        probability = from_float(obj.get("probability"))
        type = from_str(obj.get("type"))
        return Page(bbox, page, probability, type)

    def to_dict(self) -> dict:
        result: dict = {}
        result["bbox"] = from_list(lambda x: to_class(Bbox, x), self.bbox)
        result["page"] = from_str(str(self.page))
        result["probability"] = to_float(self.probability)
        result["type"] = from_str(self.type)
        return result


class PuneHedgehog:
    value: str

    def __init__(self, value: str) -> None:
        self.value = value

    @staticmethod
    def from_dict(obj: Any) -> 'PuneHedgehog':
        assert isinstance(obj, dict)
        value = from_str(obj.get("VALUE"))
        return PuneHedgehog(value)

    def to_dict(self) -> dict:
        result: dict = {}
        result["VALUE"] = from_str(self.value)
        return result


class Rf1:
    rf1_doc: PuneHedgehog
    rf1_ht: PuneHedgehog
    value: bool

    def __init__(self, rf1_doc: PuneHedgehog, rf1_ht: PuneHedgehog, value: bool) -> None:
        self.rf1_doc = rf1_doc
        self.rf1_ht = rf1_ht
        self.value = value

    @staticmethod
    def from_dict(obj: Any) -> 'Rf1':
        assert isinstance(obj, dict)
        rf1_doc = PuneHedgehog.from_dict(obj.get("RF1_DOC"))
        rf1_ht = PuneHedgehog.from_dict(obj.get("RF1_HT"))
        value = from_bool(obj.get("VALUE"))
        return Rf1(rf1_doc, rf1_ht, value)

    def to_dict(self) -> dict:
        result: dict = {}
        result["RF1_DOC"] = to_class(PuneHedgehog, self.rf1_doc)
        result["RF1_HT"] = to_class(PuneHedgehog, self.rf1_ht)
        result["VALUE"] = from_bool(self.value)
        return result


class Rf2:
    rf2_f: PuneHedgehog
    rf2_numreg: PuneHedgehog
    rf2_qt1: PuneHedgehog
    rf2_qte: PuneHedgehog
    rf2_upe: PuneHedgehog
    value: bool

    def __init__(self, rf2_f: PuneHedgehog, rf2_numreg: PuneHedgehog, rf2_qt1: PuneHedgehog, rf2_qte: PuneHedgehog, rf2_upe: PuneHedgehog, value: bool) -> None:
        self.rf2_f = rf2_f
        self.rf2_numreg = rf2_numreg
        self.rf2_qt1 = rf2_qt1
        self.rf2_qte = rf2_qte
        self.rf2_upe = rf2_upe
        self.value = value

    @staticmethod
    def from_dict(obj: Any) -> 'Rf2':
        assert isinstance(obj, dict)
        rf2_f = PuneHedgehog.from_dict(obj.get("RF2_F"))
        rf2_numreg = PuneHedgehog.from_dict(obj.get("RF2_NUMREG"))
        rf2_qt1 = PuneHedgehog.from_dict(obj.get("RF2_QT1"))
        rf2_qte = PuneHedgehog.from_dict(obj.get("RF2_QTE"))
        rf2_upe = PuneHedgehog.from_dict(obj.get("RF2_UPE"))
        value = from_bool(obj.get("VALUE"))
        return Rf2(rf2_f, rf2_numreg, rf2_qt1, rf2_qte, rf2_upe, value)

    def to_dict(self) -> dict:
        result: dict = {}
        result["RF2_F"] = to_class(PuneHedgehog, self.rf2_f)
        result["RF2_NUMREG"] = to_class(PuneHedgehog, self.rf2_numreg)
        result["RF2_QT1"] = to_class(PuneHedgehog, self.rf2_qt1)
        result["RF2_QTE"] = to_class(PuneHedgehog, self.rf2_qte)
        result["RF2_UPE"] = to_class(PuneHedgehog, self.rf2_upe)
        result["VALUE"] = from_bool(self.value)
        return result


class Rf3:
    rf3_aut1: PuneHedgehog
    rf3_aut1_np: PuneHedgehog
    rf3_aut1_total: PuneHedgehog
    rf3_aut2: PuneHedgehog
    rf3_aut2_np: PuneHedgehog
    rf3_aut2_total: PuneHedgehog
    rf3_aut3: PuneHedgehog
    rf3_aut3_np: PuneHedgehog
    rf3_aut3_total: PuneHedgehog
    value: bool

    def __init__(self, rf3_aut1: PuneHedgehog, rf3_aut1_np: PuneHedgehog, rf3_aut1_total: PuneHedgehog, rf3_aut2: PuneHedgehog, rf3_aut2_np: PuneHedgehog, rf3_aut2_total: PuneHedgehog, rf3_aut3: PuneHedgehog, rf3_aut3_np: PuneHedgehog, rf3_aut3_total: PuneHedgehog, value: bool) -> None:
        self.rf3_aut1 = rf3_aut1
        self.rf3_aut1_np = rf3_aut1_np
        self.rf3_aut1_total = rf3_aut1_total
        self.rf3_aut2 = rf3_aut2
        self.rf3_aut2_np = rf3_aut2_np
        self.rf3_aut2_total = rf3_aut2_total
        self.rf3_aut3 = rf3_aut3
        self.rf3_aut3_np = rf3_aut3_np
        self.rf3_aut3_total = rf3_aut3_total
        self.value = value

    @staticmethod
    def from_dict(obj: Any) -> 'Rf3':
        assert isinstance(obj, dict)
        rf3_aut1 = PuneHedgehog.from_dict(obj.get("RF3_AUT1"))
        rf3_aut1_np = PuneHedgehog.from_dict(obj.get("RF3_AUT1_NP"))
        rf3_aut1_total = PuneHedgehog.from_dict(obj.get("RF3_AUT1_TOTAL"))
        rf3_aut2 = PuneHedgehog.from_dict(obj.get("RF3_AUT2"))
        rf3_aut2_np = PuneHedgehog.from_dict(obj.get("RF3_AUT2_NP"))
        rf3_aut2_total = PuneHedgehog.from_dict(obj.get("RF3_AUT2_TOTAL"))
        rf3_aut3 = PuneHedgehog.from_dict(obj.get("RF3_AUT3"))
        rf3_aut3_np = PuneHedgehog.from_dict(obj.get("RF3_AUT3_NP"))
        rf3_aut3_total = PuneHedgehog.from_dict(obj.get("RF3_AUT3_TOTAL"))
        value = from_bool(obj.get("VALUE"))
        return Rf3(rf3_aut1, rf3_aut1_np, rf3_aut1_total, rf3_aut2, rf3_aut2_np, rf3_aut2_total, rf3_aut3, rf3_aut3_np, rf3_aut3_total, value)

    def to_dict(self) -> dict:
        result: dict = {}
        result["RF3_AUT1"] = to_class(PuneHedgehog, self.rf3_aut1)
        result["RF3_AUT1_NP"] = to_class(PuneHedgehog, self.rf3_aut1_np)
        result["RF3_AUT1_TOTAL"] = to_class(PuneHedgehog, self.rf3_aut1_total)
        result["RF3_AUT2"] = to_class(PuneHedgehog, self.rf3_aut2)
        result["RF3_AUT2_NP"] = to_class(PuneHedgehog, self.rf3_aut2_np)
        result["RF3_AUT2_TOTAL"] = to_class(PuneHedgehog, self.rf3_aut2_total)
        result["RF3_AUT3"] = to_class(PuneHedgehog, self.rf3_aut3)
        result["RF3_AUT3_NP"] = to_class(PuneHedgehog, self.rf3_aut3_np)
        result["RF3_AUT3_TOTAL"] = to_class(PuneHedgehog, self.rf3_aut3_total)
        result["VALUE"] = from_bool(self.value)
        return result


class TartuGecko:
    active: bool
    value: str

    def __init__(self, active: bool, value: str) -> None:
        self.active = active
        self.value = value

    @staticmethod
    def from_dict(obj: Any) -> 'TartuGecko':
        assert isinstance(obj, dict)
        active = from_bool(obj.get("ACTIVE"))
        value = from_str(obj.get("VALUE"))
        return TartuGecko(active, value)

    def to_dict(self) -> dict:
        result: dict = {}
        result["ACTIVE"] = from_bool(self.active)
        result["VALUE"] = from_str(self.value)
        return result


class Rf4:
    rf4_no_principales: TartuGecko
    rf4_no_principales_est: TartuGecko
    rf4_peint001: TartuGecko
    rf4_peint001_est: TartuGecko
    rf4_peint003: TartuGecko
    rf4_peint003_est: TartuGecko
    rf4_peint004: TartuGecko
    rf4_peint004_est: TartuGecko
    rf4_plat001: TartuGecko
    rf4_plat001_est: TartuGecko
    rf4_plomb001: TartuGecko
    rf4_plomb001_est: TartuGecko
    value: bool

    def __init__(self, rf4_no_principales: TartuGecko, rf4_no_principales_est: TartuGecko, rf4_peint001: TartuGecko, rf4_peint001_est: TartuGecko, rf4_peint003: TartuGecko, rf4_peint003_est: TartuGecko, rf4_peint004: TartuGecko, rf4_peint004_est: TartuGecko, rf4_plat001: TartuGecko, rf4_plat001_est: TartuGecko, rf4_plomb001: TartuGecko, rf4_plomb001_est: TartuGecko, value: bool) -> None:
        self.rf4_no_principales = rf4_no_principales
        self.rf4_no_principales_est = rf4_no_principales_est
        self.rf4_peint001 = rf4_peint001
        self.rf4_peint001_est = rf4_peint001_est
        self.rf4_peint003 = rf4_peint003
        self.rf4_peint003_est = rf4_peint003_est
        self.rf4_peint004 = rf4_peint004
        self.rf4_peint004_est = rf4_peint004_est
        self.rf4_plat001 = rf4_plat001
        self.rf4_plat001_est = rf4_plat001_est
        self.rf4_plomb001 = rf4_plomb001
        self.rf4_plomb001_est = rf4_plomb001_est
        self.value = value

    @staticmethod
    def from_dict(obj: Any) -> 'Rf4':
        assert isinstance(obj, dict)
        rf4_no_principales = TartuGecko.from_dict(obj.get("RF4_NO_PRINCIPALES"))
        rf4_no_principales_est = TartuGecko.from_dict(obj.get("RF4_NO_PRINCIPALES_EST"))
        rf4_peint001 = TartuGecko.from_dict(obj.get("RF4_PEINT001"))
        rf4_peint001_est = TartuGecko.from_dict(obj.get("RF4_PEINT001_EST"))
        rf4_peint003 = TartuGecko.from_dict(obj.get("RF4_PEINT003"))
        rf4_peint003_est = TartuGecko.from_dict(obj.get("RF4_PEINT003_EST"))
        rf4_peint004 = TartuGecko.from_dict(obj.get("RF4_PEINT004"))
        rf4_peint004_est = TartuGecko.from_dict(obj.get("RF4_PEINT004_EST"))
        rf4_plat001 = TartuGecko.from_dict(obj.get("RF4_PLAT001"))
        rf4_plat001_est = TartuGecko.from_dict(obj.get("RF4_PLAT001_EST"))
        rf4_plomb001 = TartuGecko.from_dict(obj.get("RF4_PLOMB001"))
        rf4_plomb001_est = TartuGecko.from_dict(obj.get("RF4_PLOMB001_EST"))
        value = from_bool(obj.get("VALUE"))
        return Rf4(rf4_no_principales, rf4_no_principales_est, rf4_peint001, rf4_peint001_est, rf4_peint003, rf4_peint003_est, rf4_peint004, rf4_peint004_est, rf4_plat001, rf4_plat001_est, rf4_plomb001, rf4_plomb001_est, value)

    def to_dict(self) -> dict:
        result: dict = {}
        result["RF4_NO_PRINCIPALES"] = to_class(TartuGecko, self.rf4_no_principales)
        result["RF4_NO_PRINCIPALES_EST"] = to_class(TartuGecko, self.rf4_no_principales_est)
        result["RF4_PEINT001"] = to_class(TartuGecko, self.rf4_peint001)
        result["RF4_PEINT001_EST"] = to_class(TartuGecko, self.rf4_peint001_est)
        result["RF4_PEINT003"] = to_class(TartuGecko, self.rf4_peint003)
        result["RF4_PEINT003_EST"] = to_class(TartuGecko, self.rf4_peint003_est)
        result["RF4_PEINT004"] = to_class(TartuGecko, self.rf4_peint004)
        result["RF4_PEINT004_EST"] = to_class(TartuGecko, self.rf4_peint004_est)
        result["RF4_PLAT001"] = to_class(TartuGecko, self.rf4_plat001)
        result["RF4_PLAT001_EST"] = to_class(TartuGecko, self.rf4_plat001_est)
        result["RF4_PLOMB001"] = to_class(TartuGecko, self.rf4_plomb001)
        result["RF4_PLOMB001_EST"] = to_class(TartuGecko, self.rf4_plomb001_est)
        result["VALUE"] = from_bool(self.value)
        return result


class Rf5:
    rf5_fourniture: PuneHedgehog
    rf5_fourniture_est: PuneHedgehog
    rf5_mo000: TartuGecko
    rf5_mo000_est: TartuGecko
    rf5_no_principales: TartuGecko
    rf5_no_principales_est: TartuGecko
    rf5_peint001: TartuGecko
    rf5_peint001_est: TartuGecko
    rf5_peint003: TartuGecko
    rf5_peint003_est: TartuGecko
    rf5_peint004: TartuGecko
    rf5_peint004_est: TartuGecko
    rf5_peint_autre: PuneHedgehog
    rf5_peint_autre_est: PuneHedgehog
    rf5_plat001: TartuGecko
    rf5_plat001_est: TartuGecko
    rf5_plat002: PuneHedgehog
    rf5_plat002_est: PuneHedgehog
    rf5_plomb001: TartuGecko
    rf5_plomb001_est: TartuGecko
    value: bool

    def __init__(self, rf5_fourniture: PuneHedgehog, rf5_fourniture_est: PuneHedgehog, rf5_mo000: TartuGecko, rf5_mo000_est: TartuGecko, rf5_no_principales: TartuGecko, rf5_no_principales_est: TartuGecko, rf5_peint001: TartuGecko, rf5_peint001_est: TartuGecko, rf5_peint003: TartuGecko, rf5_peint003_est: TartuGecko, rf5_peint004: TartuGecko, rf5_peint004_est: TartuGecko, rf5_peint_autre: PuneHedgehog, rf5_peint_autre_est: PuneHedgehog, rf5_plat001: TartuGecko, rf5_plat001_est: TartuGecko, rf5_plat002: PuneHedgehog, rf5_plat002_est: PuneHedgehog, rf5_plomb001: TartuGecko, rf5_plomb001_est: TartuGecko, value: bool) -> None:
        self.rf5_fourniture = rf5_fourniture
        self.rf5_fourniture_est = rf5_fourniture_est
        self.rf5_mo000 = rf5_mo000
        self.rf5_mo000_est = rf5_mo000_est
        self.rf5_no_principales = rf5_no_principales
        self.rf5_no_principales_est = rf5_no_principales_est
        self.rf5_peint001 = rf5_peint001
        self.rf5_peint001_est = rf5_peint001_est
        self.rf5_peint003 = rf5_peint003
        self.rf5_peint003_est = rf5_peint003_est
        self.rf5_peint004 = rf5_peint004
        self.rf5_peint004_est = rf5_peint004_est
        self.rf5_peint_autre = rf5_peint_autre
        self.rf5_peint_autre_est = rf5_peint_autre_est
        self.rf5_plat001 = rf5_plat001
        self.rf5_plat001_est = rf5_plat001_est
        self.rf5_plat002 = rf5_plat002
        self.rf5_plat002_est = rf5_plat002_est
        self.rf5_plomb001 = rf5_plomb001
        self.rf5_plomb001_est = rf5_plomb001_est
        self.value = value

    @staticmethod
    def from_dict(obj: Any) -> 'Rf5':
        assert isinstance(obj, dict)
        rf5_fourniture = PuneHedgehog.from_dict(obj.get("RF5_FOURNITURE"))
        rf5_fourniture_est = PuneHedgehog.from_dict(obj.get("RF5_FOURNITURE_EST"))
        rf5_mo000 = TartuGecko.from_dict(obj.get("RF5_MO000"))
        rf5_mo000_est = TartuGecko.from_dict(obj.get("RF5_MO000_EST"))
        rf5_no_principales = TartuGecko.from_dict(obj.get("RF5_NO_PRINCIPALES"))
        rf5_no_principales_est = TartuGecko.from_dict(obj.get("RF5_NO_PRINCIPALES_EST"))
        rf5_peint001 = TartuGecko.from_dict(obj.get("RF5_PEINT001"))
        rf5_peint001_est = TartuGecko.from_dict(obj.get("RF5_PEINT001_EST"))
        rf5_peint003 = TartuGecko.from_dict(obj.get("RF5_PEINT003"))
        rf5_peint003_est = TartuGecko.from_dict(obj.get("RF5_PEINT003_EST"))
        rf5_peint004 = TartuGecko.from_dict(obj.get("RF5_PEINT004"))
        rf5_peint004_est = TartuGecko.from_dict(obj.get("RF5_PEINT004_EST"))
        rf5_peint_autre = PuneHedgehog.from_dict(obj.get("RF5_PEINT_AUTRE"))
        rf5_peint_autre_est = PuneHedgehog.from_dict(obj.get("RF5_PEINT_AUTRE_EST"))
        rf5_plat001 = TartuGecko.from_dict(obj.get("RF5_PLAT001"))
        rf5_plat001_est = TartuGecko.from_dict(obj.get("RF5_PLAT001_EST"))
        rf5_plat002 = PuneHedgehog.from_dict(obj.get("RF5_PLAT002"))
        rf5_plat002_est = PuneHedgehog.from_dict(obj.get("RF5_PLAT002_EST"))
        rf5_plomb001 = TartuGecko.from_dict(obj.get("RF5_PLOMB001"))
        rf5_plomb001_est = TartuGecko.from_dict(obj.get("RF5_PLOMB001_EST"))
        value = from_bool(obj.get("VALUE"))
        return Rf5(rf5_fourniture, rf5_fourniture_est, rf5_mo000, rf5_mo000_est, rf5_no_principales, rf5_no_principales_est, rf5_peint001, rf5_peint001_est, rf5_peint003, rf5_peint003_est, rf5_peint004, rf5_peint004_est, rf5_peint_autre, rf5_peint_autre_est, rf5_plat001, rf5_plat001_est, rf5_plat002, rf5_plat002_est, rf5_plomb001, rf5_plomb001_est, value)

    def to_dict(self) -> dict:
        result: dict = {}
        result["RF5_FOURNITURE"] = to_class(PuneHedgehog, self.rf5_fourniture)
        result["RF5_FOURNITURE_EST"] = to_class(PuneHedgehog, self.rf5_fourniture_est)
        result["RF5_MO000"] = to_class(TartuGecko, self.rf5_mo000)
        result["RF5_MO000_EST"] = to_class(TartuGecko, self.rf5_mo000_est)
        result["RF5_NO_PRINCIPALES"] = to_class(TartuGecko, self.rf5_no_principales)
        result["RF5_NO_PRINCIPALES_EST"] = to_class(TartuGecko, self.rf5_no_principales_est)
        result["RF5_PEINT001"] = to_class(TartuGecko, self.rf5_peint001)
        result["RF5_PEINT001_EST"] = to_class(TartuGecko, self.rf5_peint001_est)
        result["RF5_PEINT003"] = to_class(TartuGecko, self.rf5_peint003)
        result["RF5_PEINT003_EST"] = to_class(TartuGecko, self.rf5_peint003_est)
        result["RF5_PEINT004"] = to_class(TartuGecko, self.rf5_peint004)
        result["RF5_PEINT004_EST"] = to_class(TartuGecko, self.rf5_peint004_est)
        result["RF5_PEINT_AUTRE"] = to_class(PuneHedgehog, self.rf5_peint_autre)
        result["RF5_PEINT_AUTRE_EST"] = to_class(PuneHedgehog, self.rf5_peint_autre_est)
        result["RF5_PLAT001"] = to_class(TartuGecko, self.rf5_plat001)
        result["RF5_PLAT001_EST"] = to_class(TartuGecko, self.rf5_plat001_est)
        result["RF5_PLAT002"] = to_class(PuneHedgehog, self.rf5_plat002)
        result["RF5_PLAT002_EST"] = to_class(PuneHedgehog, self.rf5_plat002_est)
        result["RF5_PLOMB001"] = to_class(TartuGecko, self.rf5_plomb001)
        result["RF5_PLOMB001_EST"] = to_class(TartuGecko, self.rf5_plomb001_est)
        result["VALUE"] = from_bool(self.value)
        return result


class RedFlags:
    rf1: Rf1
    rf2: Rf2
    rf3: Rf3
    rf4: Rf4
    rf5: Rf5

    def __init__(self, rf1: Rf1, rf2: Rf2, rf3: Rf3, rf4: Rf4, rf5: Rf5) -> None:
        self.rf1 = rf1
        self.rf2 = rf2
        self.rf3 = rf3
        self.rf4 = rf4
        self.rf5 = rf5

    @staticmethod
    def from_dict(obj: Any) -> 'RedFlags':
        assert isinstance(obj, dict)
        rf1 = Rf1.from_dict(obj.get("RF1"))
        rf2 = Rf2.from_dict(obj.get("RF2"))
        rf3 = Rf3.from_dict(obj.get("RF3"))
        rf4 = Rf4.from_dict(obj.get("RF4"))
        rf5 = Rf5.from_dict(obj.get("RF5"))
        return RedFlags(rf1, rf2, rf3, rf4, rf5)

    def to_dict(self) -> dict:
        result: dict = {}
        result["RF1"] = to_class(Rf1, self.rf1)
        result["RF2"] = to_class(Rf2, self.rf2)
        result["RF3"] = to_class(Rf3, self.rf3)
        result["RF4"] = to_class(Rf4, self.rf4)
        result["RF5"] = to_class(Rf5, self.rf5)
        return result


class Output:
    n_pages: int
    pages: List[Page]
    red_flags: RedFlags

    def __init__(self, n_pages: int, pages: List[Page], red_flags: RedFlags) -> None:
        self.n_pages = n_pages
        self.pages = pages
        self.red_flags = red_flags

    @staticmethod
    def from_dict(obj: Any) -> 'Output':
        assert isinstance(obj, dict)
        n_pages = from_int(obj.get("n_pages"))
        pages = from_list(Page.from_dict, obj.get("pages"))
        red_flags = RedFlags.from_dict(obj.get("red_flags"))
        return Output(n_pages, pages, red_flags)

    def to_dict(self) -> dict:
        result: dict = {}
        result["n_pages"] = from_int(self.n_pages)
        result["pages"] = from_list(lambda x: to_class(Page, x), self.pages)
        result["red_flags"] = to_class(RedFlags, self.red_flags)
        return result


class Welcome:
    action: str
    welcome_datetime: datetime
    elapsed: int
    error: int
    fileid: str
    output: Output

    def __init__(self, action: str, welcome_datetime: datetime, elapsed: int, error: int, fileid: str, output: Output) -> None:
        self.action = action
        self.welcome_datetime = welcome_datetime
        self.elapsed = elapsed
        self.error = error
        self.fileid = fileid
        self.output = output

    @staticmethod
    def from_dict(obj: Any) -> 'Welcome':
        assert isinstance(obj, dict)
        action = from_str(obj.get("action"))
        welcome_datetime = from_datetime(obj.get("datetime"))
        elapsed = from_int(obj.get("elapsed"))
        error = from_int(obj.get("error"))
        fileid = from_str(obj.get("fileid"))
        output = Output.from_dict(obj.get("output"))
        return Welcome(action, welcome_datetime, elapsed, error, fileid, output)

    def to_dict(self) -> dict:
        result: dict = {}
        result["action"] = from_str(self.action)
        result["datetime"] = self.welcome_datetime.isoformat()
        result["elapsed"] = from_int(self.elapsed)
        result["error"] = from_int(self.error)
        result["fileid"] = from_str(self.fileid)
        result["output"] = to_class(Output, self.output)
        return result


def welcome_from_dict(s: Any) -> Welcome:
    return Welcome.from_dict(s)


def welcome_to_dict(x: Welcome) -> Any:
    return to_class(Welcome, x)

In [7]:
UMBRAL_1 = 500

MINIMUM_DISTANCE = 3
#Se establecen unos precios mínimo y máximo para los totales de las facturas
MINIMUM_TOTAL = 99.99
MAXIMUM_TOTAL = 100000.01
#Umbrales fijados para el cálculo de los RFs
RF1_HT = 1200
RF3_AUT = 200
cols_ref_prix = ['PEINT001', 'PEINT003', 'PEINT004', 'PLAT001', 'PLOMB001', 'MO000', 'NO_PRINCIPALES']
RF4_UNITPRICE_VALUES = [21, 28, 21, 36, 400, 50, 100]
RF5_UNITPRICE_VALUES  = [21,28,21,36,400,50,100]

In [8]:
def total_ok(total):
    total = total.replace(',','.')
    num_point = total.count('.')
    # SI ES DEL TIPO 1.000.00
    if(num_point > 1):
        total = float(int(total.replace('.', ''))/100)
        
    # SI ES DEL TIPO 1.000 o 100.00
    elif(num_point == 1):
        # SI ES DEL TIPO 1.000
        if(len(total.split('.')[-1])>2):
            total = float(total.replace('.', ''))
        # SI ES DEL TIPO 100.00
        else:
            total = float(int(total.replace('.', ''))/100)
    # SI ES DEL TIPO 100        
    else:
        total = float(total.replace('.', ''))
        
    if((total > MINIMUM_TOTAL) and (total < MAXIMUM_TOTAL)):
        return total
    
    return np.nan
        
def distance_to_word(x, word):
    y = [y for y in x.lower().split(' ')]
    r = [edit_distance(z, word, transpositions=True) for z in y]
    
    if(len(r) != 0):        
        return (min(r))
    return 99

def get_pandas_out_of_json(data):
    array = []
    
    #print('data: ', data)
    for page in data['output']['pages']:
        df_page = pd.DataFrame.from_dict(page)
        
        total_full_price = 0
        total_full_score = 0
        total_full_ocr_score = 0
        total_ttc_full_price = 0
        total_ttc_full_score = 0
        total_ttc_full_ocr_score = 0
        x_min_total= 0
        x_max_total= 0
        y_min_total= 0
        y_max_total= 0
        emiter = 0
        score_emitter = 0
        score_ocr_emitter = 0
        x_min_emiter= 0
        x_max_emiter= 0
        y_min_emiter= 0
        y_max_emiter= 0
        siret =  0
        score_siret = 0
        score_ocr_siret = 0
        x_min_siret= 0
        x_max_siret= 0
        y_min_siret= 0
        y_max_siret= 0
        recipient = 0
        score_recipient = 0
        score_ocr_recipient = 0
        x_min_recipient= 0
        x_max_recipient= 0
        y_min_recipient= 0
        y_max_recipient= 0

        for bbox in df_page['bbox']:
            
            if(bbox['class'] == 'total'):
                total_full_price = bbox['content']
                total_full_score = bbox['score']
                total_full_ocr_score = bbox['scoreocr']
                x_min_total=bbox['position']['xmin']
                x_max_total=bbox['position']['xmax']
                y_min_total=bbox['position']['ymin']
                y_max_total=bbox['position']['ymax']
                
            if('total_aux' in bbox['class']):
                total_ttc_full_price = bbox['content']
                total_ttc_full_score = bbox['score']
                total_ttc_full_ocr_score = bbox['scoreocr']
                
            if('emitter' in bbox['class']):
                emiter = bbox['content']
                score_emitter = bbox['score']
                score_ocr_emitter = bbox['scoreocr']
                x_min_emiter=bbox['position']['xmin']
                x_max_emiter=bbox['position']['xmax']
                y_min_emiter=bbox['position']['ymin']
                y_max_emiter=bbox['position']['ymax']
                
            if('siret' in bbox['class']):
                siret = bbox['content']
                score_siret = bbox['score']
                score_ocr_siret = bbox['scoreocr']
                x_min_siret=bbox['position']['xmin']
                x_max_siret=bbox['position']['xmax']
                y_min_siret=bbox['position']['ymin']
                y_max_siret=bbox['position']['ymax']
            
            if('recipient' in bbox['class']):
                recipient = bbox['content']
                score_recipient = bbox['score']
                score_ocr_recipient = bbox['scoreocr']
                x_min_recipient=bbox['position']['xmin']
                x_max_recipient=bbox['position']['xmax']
                y_min_recipient=bbox['position']['ymin']
                y_max_recipient=bbox['position']['ymax']
                
        for bbox in df_page['bbox']:
            if('body' in bbox['class']):
                df_record = pd.DataFrame.from_dict(bbox['content'])

                body_found = 1 if (bbox['score'] == bbox['score']) else 0
                body_score = bbox['score']
                body_score_ocr = bbox['scoreocr']
                x_min_body=bbox['position']['xmin']
                x_max_body=bbox['position']['xmax']
                y_min_body=bbox['position']['ymin']
                y_max_body=bbox['position']['ymax']

                if(df_record.shape[0] > 0):
                    n_records = df_record['n_records'][0]                
                    for i, record in enumerate(df_record['records']):                    

                        text_value = dict(record['text'])['content']
                        text_score = dict(record['text'])['score']
                        total_value = dict(record['total_prix'])['content']
                        total_score = dict(record['total_prix'])['score']
                        unitprix_value = dict(record['unit_prix'])['content']
                        unitprix_score = dict(record['unit_prix'])['score']
                        quantite_value = dict(record['quantite'])['content']
                        quantite_score = dict(record['quantite'])['score']
                        units_value = dict(record['units'])['content']
                        units_score = dict(record['units'])['score']
                        
                        text_quantite_value = dict(record['text_quantite'])['content']
                        text_quantite_score = dict(record['text_quantite'])['score']
                        text_units_value = dict(record['text_units'])['content']
                        text_units_score = dict(record['text_units'])['score']
                        unit_prix_text_value = dict(record['unit_prix_text'])['content']
                        unit_prix_text_score = dict(record['unit_prix_text'])['score']

                        h_x0 = record['H_X0']
                        h_y0 = record['H_Y0']
                        h_x1 = record['H_X1']
                        h_y1 = record['H_Y1']
                        
                        n_agg_cat = record['n_agg_cat']
                        agg_cat = []
                        ref_price = []
                        for i_agg in range(int(n_agg_cat)):
                            agg_cat.append(dict(record['agg_cat'][i_agg])['cat'])
                            ref_price.append(dict(record['agg_cat'][i_agg])['reference_price'])
                                        
                        n_categories_baremo = record['n_categories_baremo']
                        
                        category_value_0 = dict(record['categories'][0])['cat']
                        category_score_0 = dict(record['categories'][0])['score']
                        category_value_1 = dict(record['categories'][1])['cat']
                        category_score_1 = dict(record['categories'][1])['score']
                        category_value_2 = dict(record['categories'][2])['cat']
                        category_score_2 = dict(record['categories'][2])['score']
                        category_value_3 = dict(record['categories'][3])['cat']
                        category_score_3 = dict(record['categories'][3])['score']
                        category_value_4 = dict(record['categories'][4])['cat']
                        category_score_4 = dict(record['categories'][4])['score']
                        category_value_5 = dict(record['categories'][5])['cat']
                        category_score_5 = dict(record['categories'][5])['score']
                        category_value_6 = dict(record['categories'][6])['cat']
                        category_score_6 = dict(record['categories'][6])['score']
                        category_value_7 = dict(record['categories'][7])['cat']
                        category_score_7 = dict(record['categories'][7])['score']
                        category_value_8 = dict(record['categories'][8])['cat']
                        category_score_8 = dict(record['categories'][8])['score']

                        array.append([data['fileid'], 
                                      df_page.page[0], 
                                      df_page.type[0], 
                                      df_page.probability[0], 
                                      total_full_price,
                                      total_full_score,
                                      total_full_ocr_score,
                                      total_ttc_full_price,
                                      total_ttc_full_score,
                                      total_ttc_full_ocr_score,
                                      x_min_total,
                                      x_max_total,
                                      y_min_total,
                                      y_max_total,
                                      body_found, 
                                      body_score,
                                      body_score_ocr,
                                      x_min_body,
                                      x_max_body,
                                      y_min_body,
                                      y_max_body,
                                      siret,
                                      score_siret,
                                      score_ocr_siret,
                                      x_min_siret,
                                      x_max_siret,
                                      y_min_siret,
                                      y_max_siret,
                                      recipient,
                                      score_recipient,
                                      score_ocr_recipient,
                                      x_min_recipient,
                                      x_max_recipient,
                                      y_min_recipient,
                                      y_max_recipient,
                                      emiter,
                                      score_emitter,
                                      score_ocr_emitter,
                                      x_min_emiter,
                                      x_max_emiter,
                                      y_min_emiter,
                                      y_max_emiter,
                                      n_records,
                                      i,
                                      text_value, 
                                      text_score,
                                      units_value, 
                                      units_score,
                                      quantite_value, 
                                      quantite_score,
                                      unitprix_value, 
                                      unitprix_score,
                                      total_value, 
                                      total_score,
                                      text_quantite_value,
                                      text_quantite_score,
                                      text_units_value,
                                      text_units_score,
                                      unit_prix_text_value,
                                      unit_prix_text_score,
                                      h_x0,
                                      h_y0,
                                      h_x1,
                                      h_y1,
                                      agg_cat,
                                      ref_price,
                                      n_agg_cat,
                                      n_categories_baremo, 
                                      category_value_0,
                                      category_score_0, 
                                      category_value_1,
                                      category_score_1, 
                                      category_value_2,
                                      category_score_2, 
                                      category_value_3,
                                      category_score_3, 
                                      category_value_4,
                                      category_score_4,
                                      category_value_5,
                                      category_score_5,
                                      category_value_6,
                                      category_score_6,
                                      category_value_7,
                                      category_score_7,
                                      category_value_8,
                                      category_score_8
                                     ])
                else:                
                    array.append([data['fileid'], 
                                  df_page.page[0], 
                                  df_page.type[0], 
                                  df_page.probability[0], 
                                  total_full_price,
                                  total_full_score,
                                  total_full_ocr_score,
                                  total_ttc_full_price,
                                  total_ttc_full_score,
                                  total_ttc_full_ocr_score,
                                  x_min_total,
                                  x_max_total,
                                  y_min_total,
                                  y_max_total,
                                  body_found, 
                                  body_score,
                                  body_score_ocr,
                                  x_min_body,
                                  x_max_body,
                                  y_min_body,
                                  y_max_body,
                                  siret,
                                  score_siret,
                                  score_ocr_siret,
                                  x_min_siret,
                                  x_max_siret,
                                  y_min_siret,
                                  y_max_siret,
                                  recipient,
                                  score_recipient,
                                  score_ocr_recipient,
                                  x_min_recipient,
                                  x_max_recipient,
                                  y_min_recipient,
                                  y_max_recipient,
                                  emiter,
                                  score_emitter,
                                  score_ocr_emitter,
                                  x_min_emiter,
                                  x_max_emiter,
                                  y_min_emiter,
                                  y_max_emiter,
                                  0,
                                  0,
                                  None, 
                                  None,
                                  None, 
                                  None,
                                  None, 
                                  None,
                                  None, 
                                  None,
                                  None, 
                                  None,
                                  None, 
                                  None,
                                  None, 
                                  None,
                                  None, 
                                  None,
                                  None, 
                                  None,
                                  None, 
                                  None,
                                  None, 
                                  None,
                                  None,
                                  None,
                                  None,
                                  None,
                                  None, 
                                  None,
                                  None, 
                                  None,
                                  None,
                                  None,
                                  None,
                                  None,
                                  None, 
                                  None,
                                  None, 
                                  None,
                                  None, 
                                  None,
                                  None,
                                  None,
                                  ])

    df = pd.DataFrame(array, columns=['ID DOC', 'PAGE', 'TYPE', 'SCORE TYPE', 'TOTAL PRICE', 'SCORE TOTAL PRICE', 'OCR SCORE TOTAL PRICE', 
                                      'TOTAL PRICE DOC', 'SCORE TOTAL PRICE DOC', 'OCR SCORE TOTAL PRICE DOC', 'X MIN TOTAL PRICE', 
                                      'X MAX TOTAL PRICE', 'Y MIN TOTAL PRICE', 'Y MAX TOTAL PRICE', 'BODY', 'SCORE BODY', 'OCR SCORE BODY', 
                                      'X MIN BODY', 'X MAX BODY', 'Y MIN BODY', 'Y MAX BODY', 'SIRET', 'SCORE SIRET', 'SCORE OCR SIRET', 
                                      'X MIN SIRET', 'X MAX SIRET', 'Y MIN SIRET', 'Y MAX SIRET', 'RECIPIENT', 'RECIPIENT SCORE', 
                                      'RECIPIENT OCR SCORE', 'X MIN RECIPIENT', 'X MAX RECIPIENT', 'Y MIN RECIPIENT', 'Y MAX RECIPIENT', 
                                      'EMITER', 'EMITER SCORE', 'EMITER OCR SCORE', 'X MIN EMITER', 'X MAX EMITER', 'Y MIN EMITER', 
                                      'Y MAX EMITER', 'NUM RECORDS', 'RECORD', 'TEXT RECORD', 'SCORE TEXT RECORD','UNITS', 'SCORE UNITS', 
                                      'QUANTITY', 'SCORE QUANTITY', 'UNIT PRICE','SCORE UNIT PRICE', 'TOTAL VALUE', 'SCORE TOTAL VALUE',
                                      'TEXT QUANTITY VALUE', 'SCORE TEXT QUANTITY VALUE', 'TEXT UNITS VALUE', 'SCORE TEXT UNITS VALUE', 
                                      'TEXT UNIT PRIX VALUE', 'SCORE TEXT UNIT PRIX VALUE', 'H_X0','H_Y0','H_X1','H_Y1','AGGREGATED CATEGORY','REFERENCE PRIX',
                                      'NUMERO BAREMOS AGREGADOS','NUMERO BAREMOS','CATEGORY MA 1',
                                      'SCORE CATEGORY MA 1', 'CATEGORY MA 2', 'SCORE CATEGORY MA 2', 'CATEGORY MA 3', 'SCORE CATEGORY MA 3', 'CATEGORY MA 4', 
                                      'SCORE CATEGORY MA 4', 'CATEGORY MA 5', 'SCORE CATEGORY MA 5', 'CATEGORY MA 6', 'SCORE CATEGORY MA 6',
                                      'CATEGORY MA 7', 'SCORE CATEGORY MA 7', 'CATEGORY MA 8', 'SCORE CATEGORY MA 8', 'CATEGORY MA 9',
                                      'SCORE CATEGORY MA 9'])
    return df

def red_flags(df,a):   
    df_total = df.copy()
    df = df[df['NUM RECORDS']!=0]
    dic4 = {}
    dic5 = {}
    for i in range(len(cols_ref_prix)):
        dic4[cols_ref_prix[i]] = RF4_UNITPRICE_VALUES[i]
        dic5[cols_ref_prix[i]] = RF5_UNITPRICE_VALUES[i]
    redflag4 = []
    redflag5 = []        
    #Calcular las distancias a las palabras frofait, ens, for, f, ft en busca de forfaits.
    df['UNITS'] = df['UNITS'].astype(str)
    df['UNIT PRICE'] = df['UNIT PRICE'].astype(str)
    df['QUANTITY'] = df['QUANTITY'].astype(str)
    df['TEXT RECORD'] = df['TEXT RECORD'].astype(str)
    df['TOTAL PRICE'] = df['TOTAL PRICE'].astype(str)
    df['TOTAL PRICE DOC'] = df['TOTAL PRICE DOC'].astype(str)
    
    df['DISTANCE UNITS'] = df['UNITS'].apply(distance_to_word, args=['forfait'])
    df['DISTANCE TEXT'] = df['TEXT RECORD'].apply(distance_to_word, args=['forfait'])
    
    df['DISTANCE UNITS ENS'] = df['UNITS'].apply(distance_to_word, args=['ens'])
    df['DISTANCE TEXT ENS'] = df['TEXT RECORD'].apply(distance_to_word, args=['ens'])
    
    df['DISTANCE UNITS FOR'] = df['UNITS'].apply(distance_to_word, args=['for'])
    
    df['DISTANCE UNITS F'] = df['UNITS'].apply(distance_to_word, args=['f']) 
    df['DISTANCE UNITS FT'] = df['UNITS'].apply(distance_to_word, args=['ft'])
    
    df['DISTANCE UNITS F'] = df[['DISTANCE UNITS F','DISTANCE UNITS FT']].min(axis=1)
    
    df_2 = pd.DataFrame(data=[[False,0.0,0.0,False,0.0,0.0,0.0,0.0,0.0,0.0,False,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,False,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,False,False,False,False,False,False,False,False,False,False,False,False, False,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False]]\
                        , columns=['RF1','RF1_DOC','RF1_HT', 'RF2', 'RF2_SCORE', 'RF2_QT1', 'RF2_QTE', 'RF2_UPE', 'RF2_F', 'RF2_NUMREG','RF3','RF3_AUT1','RF3_AUT2','RF3_AUT3','RF3_AUT1_TOTAL','RF3_AUT2_TOTAL','RF3_AUT3_TOTAL','RF3_AUT1_NP','RF3_AUT2_NP','RF3_AUT3_NP',\
                                   'RF4','RF4_NO_PRINCIPALES','RF4_PEINT001','RF4_PEINT003','RF4_PEINT004','RF4_PLOMB001','RF4_PLAT001',\
                                   'RF4_NO_PRINCIPALES_EST','RF4_PEINT001_EST','RF4_PEINT003_EST','RF4_PEINT004_EST','RF4_PLOMB001_EST','RF4_PLAT001_EST',\
                                   'RF4_NO_PRINCIPALES_ACTIVATE','RF4_PEINT001_ACTIVATE','RF4_PEINT003_ACTIVATE','RF4_PEINT004_ACTIVATE','RF4_PLOMB001_ACTIVATE','RF4_PLAT001_ACTIVATE',\
                                   'RF4_NO_PRINCIPALES_EST_ACTIVATE','RF4_PEINT001_EST_ACTIVATE','RF4_PEINT003_EST_ACTIVATE','RF4_PEINT004_EST_ACTIVATE','RF4_PLOMB001_EST_ACTIVATE','RF4_PLAT001_EST_ACTIVATE',\
                                   'RF5','RF5_NO_PRINCIPALES','RF5_FOURNITURE','RF5_MO000','RF5_PEINT001','RF5_PEINT003','RF5_PEINT004','RF5_PEINT_AUTRE','RF5_PLOMB001','RF5_PLAT001','RF5_PLAT002',\
                                   'RF5_NO_PRINCIPALES_EST','RF5_FOURNITURE_EST','RF5_MO000_EST','RF5_PEINT001_EST','RF5_PEINT003_EST','RF5_PEINT004_EST','RF5_PEINT_AUTRE_EST','RF5_PLOMB001_EST','RF5_PLAT001_EST','RF5_PLAT002_EST',\
                                  'RF5_NO_PRINCIPALES_ACTIVATE','RF5_FOURNITURE_ACTIVATE','RF5_MO000_ACTIVATE','RF5_PEINT001_ACTIVATE','RF5_PEINT003_ACTIVATE','RF5_PEINT004_ACTIVATE','RF5_PEINT_AUTRE_ACTIVATE','RF5_PLOMB001_ACTIVATE','RF5_PLAT001_ACTIVATE','RF5_PLAT002_ACTIVATE',\
                                   'RF5_NO_PRINCIPALES_EST_ACTIVATE','RF5_FOURNITURE_EST_ACTIVATE','RF5_MO000_EST_ACTIVATE','RF5_PEINT001_EST_ACTIVATE','RF5_PEINT003_EST_ACTIVATE','RF5_PEINT004_EST_ACTIVATE','RF5_PEINT_AUTRE_EST_ACTIVATE','RF5_PLOMB001_EST_ACTIVATE','RF5_PLAT001_EST_ACTIVATE','RF5_PLAT002_EST_ACTIVATE'])
  
    #RED FLAG 1 (Se busca total y total HT en las columnas de totales)
    df_total = df_total[['PAGE', 'TOTAL PRICE', 'TOTAL PRICE DOC', 'TYPE']].drop_duplicates().sort_values(by='PAGE', ascending=False)
    total = np.nan
    total_ht = np.nan
    for idx, row in df_total.iterrows():
        if((row['TOTAL PRICE'].lower() != 'nan')&(row['TYPE']!='none')):
            t = total_ok(row['TOTAL PRICE'])
            if(t == t):
                total = t
                total_ht = t
                break
                
    if(total != total):            
        for idx, row in df_total.iterrows():
            if((row['TOTAL PRICE DOC'].lower() != 'nan')&(row['TYPE']!='none')):
                t = total_ok(row['TOTAL PRICE DOC'])
                if(t == t):
                    total = t
                    break
    
    if(total != total):
        for idx, row in df_total.iterrows():
            if((row['TOTAL PRICE'].lower() != 'nan')):
                total_ht = total_ok(row['TOTAL PRICE'])
                total = total_ok(row['TOTAL PRICE'])
                break
            if((row['TOTAL PRICE DOC'].lower() != 'nan')&(total != total)):
                total = total_ok(row['TOTAL PRICE DOC'])
                break
    
    df_2.at[0,'RF1_HT'] = total_ht
    df_2.at[0,'RF1_DOC'] = total
    
    #RED FLAG 2
    df_2.at[0,'RF2_NUMREG'] = df[df['NUM RECORDS']!=0].shape[0]
    #Diferentes formas en las que se han encontrado el valor 1.
    values = ['1', '1.', '1.0', '1,', '1,0','1.00','1,00']
    df['QUANTITY'] = df['QUANTITY'].astype(str)
    df['TEXT QUANTITY VALUE'] = df['TEXT QUANTITY VALUE'].astype(str)
    df['UNIT PRICE'] = df['UNIT PRICE'].astype(str)
    df['TEXT UNIT PRIX VALUE'] = df['TEXT UNIT PRIX VALUE'].astype(str)
    #Eliminamos los registros que no tienen valor
    df = df[(~(df['TOTAL VALUE'].isnull()))&(df['TOTAL VALUE'] != '')]
    total_registres = df['TOTAL VALUE'].astype(float).sum() #Suma de los totales de los registros
    if (total_registres>0):
        df['QUANTITY'] = df['QUANTITY'].replace('','nan')
        df['UNIT PRICE'] = df['UNIT PRICE'].replace('','nan')
        df['TEXT QUANTITY VALUE'] = df['TEXT QUANTITY VALUE'].replace('','nan')
        df['TEXT UNIT PRIX VALUE'] = df['TEXT UNIT PRIX VALUE'].replace('','nan')
        df_2.at[0,'RF2_QT1'] = sum(df[(df['QUANTITY'].isin(values))&(df['TEXT QUANTITY VALUE'].isin(values+['nan'])) | (df['QUANTITY']=='nan')&((df['TEXT QUANTITY VALUE'].isin(values)))]['TOTAL VALUE'].astype(float))/total_registres #Peso de los registros con cantidad 1 respecto el total
        df_2.at[0,'RF2_QTE'] = sum(df[(df['QUANTITY']=='nan')&(df['TEXT QUANTITY VALUE']=='nan')]['TOTAL VALUE'].astype(float))/total_registres #Peso de los registros con cantidad vacía respecto el total
        df_2.at[0,'RF2_UPE'] = sum(df[(df['UNIT PRICE']=='nan')&(df['TEXT UNIT PRIX VALUE']=='nan')]['TOTAL VALUE'].astype(float))/total_registres #Peso de los registros con precio unitario vacío respecto el total
        #Para cada registro se calcula si es forfait o no
        df['F'] = (df['DISTANCE UNITS'] < MINIMUM_DISTANCE )\
               | (df['DISTANCE TEXT'] < MINIMUM_DISTANCE )\
               | (df['DISTANCE UNITS FOR'] < 2 ) \
               | (df['DISTANCE UNITS F'] < 1 )\
               | (df['DISTANCE TEXT ENS'] < 1 )\
               | (df['DISTANCE UNITS ENS'] < 1)
        #Peso de los registros del tipo forfait respecto el total
        df_2.at[0,'RF2_F'] = ((df['TOTAL VALUE'].astype(float)*df['F']).sum())/total_registres
        
        #RED FLAG 3
        df_2.at[0,'RF3'] = 0
        df = df.fillna('nan')
               
        if (df['TOTAL VALUE'].astype(float).sum()) > 0:
            df['TOTAL VALUE'] = df['TOTAL VALUE'].astype(float)
            #Suma de los totales de los registros cuyo baremo es AUTRE
            df_2.at[0,'RF3_AUT1'] = df[df['AGGREGATED CATEGORY'].apply(lambda x: 'AUTRES001' in x)]['TOTAL VALUE'].sum()
            df_2.at[0,'RF3_AUT2'] = df[df['AGGREGATED CATEGORY'].apply(lambda x: 'AUTRES002' in x)]['TOTAL VALUE'].sum()
            df_2.at[0,'RF3_AUT3'] = df[df['AGGREGATED CATEGORY'].apply(lambda x: 'AUTRES003' in x)]['TOTAL VALUE'].sum()
            
            #Peso del precio de los registros cuyo baremo es AUTRE
            df_2.at[0,'RF3_AUT1_TOTAL'] = (df_2['RF3_AUT1'].astype(float).values[0])/total_registres
            df_2.at[0,'RF3_AUT2_TOTAL'] = (df_2['RF3_AUT2'].astype(float).values[0])/total_registres
            df_2.at[0,'RF3_AUT3_TOTAL'] = (df_2['RF3_AUT3'].astype(float).values[0])/total_registres
        
            #Media de los registros cuyo baremo es AUTRE del total de registros
            df_2.at[0,'RF3_AUT1_NP'] = (df_2['RF3_AUT1'].astype(float).values[0])/(df_2['RF2_NUMREG'].values[0])
            df_2.at[0,'RF3_AUT2_NP'] = (df_2['RF3_AUT2'].astype(float).values[0])/(df_2['RF2_NUMREG'].values[0])
            df_2.at[0,'RF3_AUT3_NP'] = (df_2['RF3_AUT3'].astype(float).values[0])/(df_2['RF2_NUMREG'].values[0])
        #El Label 2 es el string concatenado de todos los labels
        df['LABEL2'] = df['AGGREGATED CATEGORY'].apply(' '.join)
        #Quantite conjunta de quant i text_qte
        df['QUANTITY2'] = df.apply(returnQte,axis=1)
        df['TOTAL_RF4'] = df['TOTAL VALUE'].copy().astype(float)
        total_peintA = df[(df['LABEL2'].str.contains('PEINT_AUTRE'))]['TOTAL VALUE'].astype(float).sum()
        df['PROP'] = 0.0
        dfpeint = df[(df['LABEL2'].str.contains('PEINT00'))]
        if (len(dfpeint)>0)&(total_peintA > 0):
            dfpeint['PROP'] = dfpeint['TOTAL_RF4']/(dfpeint['TOTAL_RF4'].sum())
            dfpeint['TOTAL_RF4'] =  dfpeint['TOTAL_RF4'] + total_peintA*dfpeint['PROP']
            df = df[~(df['LABEL2'].str.contains('PEINT_AUTRE'))]      
        #RED FLAGs 4 y 5
        catsRF4 = ['NO_PRINCIPALES','PEINT001','PEINT003','PEINT004','PLOMB001','PLAT001']
        catsRF5 = ['NO_PRINCIPALES','FOURNITURE','MO000','PEINT001','PEINT003','PEINT004','PEINT_AUTRE','PLOMB001','PLAT001','PLAT002']                             
        rf4 = df[(~df['LABEL2'].str.contains('AUTRE'))&(~df['LABEL2'].str.contains('FOURNITURE'))&(~df['LABEL2'].str.contains('MO000'))].copy()
        totalrf4 = rf4['TOTAL_RF4'].astype(float).sum()
        if (len(rf4)>0)&(totalrf4 > 0):
            #Se reparte el precio de Autre
            rf4['PROP'] = rf4['TOTAL_RF4']/totalrf4
            preu_a_repartir = float(df[(df['LABEL2'].str.contains('AUTRES'))|(df['LABEL2'].str.contains('PEINT_AUTRE'))]['TOTAL VALUE'].sum())
            rf4['TOTAL_RF4'] = rf4['TOTAL_RF4'].astype(float) + preu_a_repartir*rf4['PROP'].astype(float)
            #Para los que tienen unit price estimamos la cantidad
            unit_price = rf4[(rf4['UNIT PRICE']!='nan')&(rf4['UNIT PRICE']!='')].copy()
            unit_price['QTE EST'] = unit_price['TOTAL VALUE']/unit_price['UNIT PRICE'].astype(float)
            unit_price['UNIT_PRIX_EST_RF4'] = unit_price['TOTAL_RF4']/unit_price['QTE EST'].astype(float)
            #Para los que tienen cantidad estimamos unit price
            qte = rf4[(rf4['QUANTITY2']!='nan')&(rf4['QUANTITY2']!='')].copy()
            qte['UNIT_PRIX_QTE_RF4'] = qte['TOTAL_RF4'].astype(float)/qte['QUANTITY2'].astype(float)
            qte['UNIT_PRIX_EST_RF5'] = qte['TOTAL VALUE'].astype(float)/qte['QUANTITY2'].astype(float)
            df = pd.merge(df, unit_price[['ID DOC','PAGE','RECORD','UNIT_PRIX_EST_RF4']],on=['ID DOC','PAGE','RECORD'],how='left')
            df['UNIT_PRIX_EST_RF4'] = df['UNIT_PRIX_EST_RF4'].fillna(0.0)
            df = pd.merge(df, qte[['ID DOC','PAGE','RECORD','UNIT_PRIX_QTE_RF4','UNIT_PRIX_EST_RF5']],on=['ID DOC','PAGE','RECORD'],how='left')
            df['UNIT_PRIX_QTE_RF4'] = df['UNIT_PRIX_QTE_RF4'].fillna(0.0)
            for cat in catsRF5:
                #Para cada categoria comprobamos los RF 4 y 5
                df_aux = df[(df['LABEL2'].str.contains(cat))&(~df['QUANTITY2'].isin(values))&(df['F']==False)].copy()
                if len(df_aux)>0:
                    df_aux['UNIT PRICE'] = df_aux['UNIT PRICE'].astype(str)
                    if cat in catsRF4:                                                                                                                                                                                                                                             
                        df_2.at[0,'RF4_'+cat] = df_aux[(df_aux['UNIT_PRIX_QTE_RF4']>0)]['UNIT_PRIX_QTE_RF4'].mean()
                        df_2.at[0,'RF4_'+cat+'_EST'] = df_aux[(df_aux['UNIT_PRIX_EST_RF4']>0)]['UNIT_PRIX_EST_RF4'].mean()
                        df_2.at[0,'RF4_'+cat + '_ACTIVATE'] = df_aux[(df_aux['UNIT_PRIX_QTE_RF4']>0)]['UNIT_PRIX_QTE_RF4'].mean() < dic4[cat]
                        df_2.at[0,'RF4_'+cat+'_EST_ACTIVATE'] = df_aux[(df_aux['UNIT_PRIX_EST_RF4']>0)]['UNIT_PRIX_EST_RF4'].mean() < dic4[cat]
                        redflag4.append((df_2.at[0,'RF4_'+cat]>dic4[cat])+(df_2.at[0,'RF4_'+cat+'_EST']>dic4[cat]))
                    df_2.at[0,'RF5_'+cat] = df_aux[(df_aux['UNIT PRICE']!='nan')&(df_aux['UNIT PRICE']!='')]['UNIT PRICE'].astype(float).mean()
                    df_2.at[0,'RF5_'+cat+'_EST'] = df_aux[(df_aux['UNIT_PRIX_EST_RF5']>0)]['UNIT_PRIX_EST_RF5'].mean()
                    df_2.at[0,'RF5_'+cat+ '_ACTIVATE'] = df_aux[(df_aux['UNIT PRICE']!='nan')&(df_aux['UNIT PRICE']!='')]['UNIT PRICE'].astype(float).mean() < dic5[cat]
                    df_2.at[0,'RF5_'+cat+'_EST_ACTIVATE'] = df_aux[(df_aux['UNIT_PRIX_EST_RF5']>0)]['UNIT_PRIX_EST_RF5'].mean() < dic5[cat]
                    if cat in cols_ref_prix:
                        redflag5.append((df_2.at[0,'RF5_'+cat]>dic5[cat])+(df_2.at[0,'RF5_'+cat+'_EST']>dic5[cat]))
                        
    df_2 = df_2.fillna(0.0)

    df_2.loc[df_2['RF2_NUMREG']==0,'RF2_QTE'] = 1
    #Calculamos el booleano de cada RF
    df_2.at[0,'RF1'] = (df_2.at[0,'RF1_DOC']>RF1_HT)
    df_2.at[0,'RF2'] = ((3*df_2.at[0,'RF2_QT1'] + 2*df_2.at[0,'RF2_QTE'] + df_2.at[0,'RF2_UPE'] + df_2.at[0,'RF2_F']) > 2)
    df_2.at[0,'RF3'] = (df_2.at[0,'RF3_AUT2']>RF3_AUT)
    df_2.at[0,'RF4'] = any(redflag4)
    df_2.at[0,'RF5'] = any(redflag5)

    return df_2
                                                                                                                                                 

def returnQte(df):
    #La cantidad del registro se busca en el campo Cantidad y si este es nan o 1 se busca en el texto si lo hay
    values = ['1', '1.', '1.0', '1,', '1,0','1.00','1,00']
    if (df['QUANTITY'] not in (values+['nan'])):
        return df['QUANTITY']
    if (df['TEXT QUANTITY VALUE'] != 'nan'):
        return df['TEXT QUANTITY VALUE'] 
    return df['QUANTITY']

In [9]:
from IPython.display import Javascript, display
from ipywidgets import widgets

def run_all(ev):
    display(Javascript('IPython.notebook.execute_cells_above()'))

button = widgets.Button(description="Générer les classes")
button.on_click(run_all)
display(button)

Button(description='Générer les classes', style=ButtonStyle())

# Connection à l'ICR

## Générer un token

In [21]:
from IPython.display import display
button = widgets.Button(description="Générer un token de connection à l'ICR")
output = widgets.Output()

display(button, output)

def on_button_clicked(b):
    with output:
        #generation d'un token pour pouvoir accéder à l'API
        param_token={'user':'billma_cofare','password':'cW0cXD_fJMATJWpq-hGWHfnsakySpQ_L','client':'multiasistencia'} # parametre
        reponses_token=requests.post('https://multiasistencia.pervasive-tech.com/token/cofare/2.1',json=(param_token)) # requete
        tok = reponses_token.json()['token'] # token utilisé pour accéder à l'API
        print("token :" + tok)

button.on_click(on_button_clicked)

Button(description="Générer un token de connection à l'ICR", style=ButtonStyle())

Output()

In [3]:
# identifiants pour l'accès au S3
ACCESS_KEY_ID = 'AKIAJUWSKSHW6B4ZSK7Q'
ACCESS_SECRET_KEY = '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'
BUCKET_NAME = 'billma-s3'

#generation d'un token pour pouvoir accéder à l'API (ne doit s'éxecuter que si beoin de changement du token et pas a chaque demande)
param_token={'user':'billma_cofare','password':'cW0cXD_fJMATJWpq-hGWHfnsakySpQ_L','client':'multiasistencia'} # parametre
#reponses_token=requests.post('https://multiasistencia.pervasive-tech.com/token/cofare/2.1',json=(param_token)) # requete
#token=reponses_token.json()['token'] # token utilisé pour accéder à l'API
token = tok


# créer workbook & worksheet pour partidas
wb_prediction_partidas = Workbook()
ws_prediction_partidas = wb_prediction_partidas.active

# créer workbook & worksheet pour red flags
wb = Workbook()
ws_red_flags = wb.active

ligne_data=0
z=0   #compteur pour compter l'id du fichier en cours de traitement
pathname=os.path.abspath(r"C:\Users\gsegonds\Desktop\topdf")  # chemin d'accès de mon dossier avec dedans mes sous dossiers

for path, dirs, files in os.walk(pathname):
    path=path
    for filename in files: # parcours les sous
        if (filename.endswith(".pdf") or filename.endswith(".PDF") or filename.endswith(".jpg") or filename.endswith(".jpeg") or filename.endswith(".JPG") or filename.endswith(".JPEG") or filename.endswith(".png") or filename.endswith(".PNG") or filename.endswith(".tif") or filename.endswith(".TIF")): # si mon fichier est un pdf
            if filename.endswith(".tif") or filename.endswith(".TIF"):
                base_filename = os.path.basename(filename)
                output_file = path + '\\' + filename + "_tmp_.pdf"
                try:
                    with open(output_file,"wb") as f:
                        f.write(img2pdf.convert( path + '/' + filename))
                except:
                    1
            if filename.endswith(".png") or filename.endswith(".jpeg") or filename.endswith(".jpg") or filename.endswith(".JPG"):
                PNG_FILE = pathname + "\\" + filename
                output_file = pathname +  "\\" + filename.replace(' ','') + '.pdf'
                rgba = Image.open(PNG_FILE)
                rgb = Image.new('RGB', rgba.size, (255, 255, 255))  # white background
                rgb.paste(rgba, mask=rgba.split()[2])               # paste using alpha channel as mask
                rgb.save(output_file, 'PDF', resoultion=100.0)
            if filename.endswith(".pdf") or filename.endswith(".PDF"):
                output_file = filename
            
            print("Inicio tratamiento: " + filename)
            path_upload_amazon=os.path.join(path, output_file) # chemin d'accès local de mon fichier
            
            #établir connexion avec le S3
            s3= boto3.resource('s3',aws_access_key_id=ACCESS_KEY_ID,aws_secret_access_key=ACCESS_SECRET_KEY,) 
            
            #upload fichiers sur le S3
            data = open(path_upload_amazon, 'rb')
            s3.Bucket(BUCKET_NAME).put_object(Key=os.path.basename(path_upload_amazon), Body=data) # key est le nom visible sur le S3                                  
            
            # génération d'une URL signée (temporaire) pour pouvoir accéder à l'API
            s3_url = boto3.client('s3',aws_access_key_id=ACCESS_KEY_ID,aws_secret_access_key=ACCESS_SECRET_KEY)
            url_signed=s3_url.generate_presigned_url('get_object',Params={'Bucket':BUCKET_NAME,'Key':os.path.basename(path_upload_amazon)},ExpiresIn=3600)
            
            
            # arguments API
            meta={'tipo_bucket':'URL','user':'billma_cofare','password':'cW0cXD_fJMATJWpq-hGWHfnsakySpQ_L','object_storage':'Bucket','carpeta':''}
            json_params={'url':url_signed,'meta':meta,'model': 'cofare','action':'predict','user':'billma_cofare','token':token}
            # requete API
            reponses=requests.post('https://multiasistencia.pervasive-tech.com/models/cofare/2.1/online/',json=json_params)            
            
            print(reponses.status_code)
            
            if(reponses.status_code == 200):
                                
                try:
                    json_string=json.dumps(reponses.json())
                    result = welcome_from_dict(json.loads(json_string)) # creation d'un objet7

                    liste_data=[]
                    ligne_data=0 # ligne de tout mon pdf 


                    # creation de 2 dataframes qui ont des valeurs fixe par pdf (ID, elapsed, nbr de page) préremplis avec des valeur par défauts
                    df_first_layer_fixe= pd.DataFrame(data={"FileID":2,"Elapsed":1},index=[0])
                    df_second_layer_fixe=pd.DataFrame(data={"Nombre_Pages":6},index=[0])


                    frames=[] # liste contenant mes dataframes qui ont des valeurs fixe par page (SIRET, EMITER, BODY, RECIPIENT, TOTAL, TOTAL AUX, PAGES, TYPE, )
                    frame_interne=[] # liste contenant les dataframe de ma page en cour



                    #on boucle sur les pages du pdf:
                    for indice_page_document,page in enumerate(result.output.pages):

                        # creation de 8 dataframes qui ont des valeurs fixe par page (EMITER, RECIPIENT, SIRET, TOTAL, TOTAL AUX, PAGES(en cours), Type, BODY(contient nbr de lignes)) préremplis avec des valeur par défauts
                        df_layer_emiter=pd.DataFrame(data={"emiter": 1,"SCORE EMITER":2,"SCORE EMITER OCR":3,"xmaxEmiter":4,"xminEmiter":5,"ymaxEmiter":6,"yminEmiter":7},index=[0]) # on fait varier la page et la bloc reconocido
                        df_layer_recipient=pd.DataFrame(data={"recipient": 1,"SCORE RECIPIENT":2,"SCORE RECIPIENT OCR":3,"xmaxRecipient":4,"xminRecipient":5,"ymaxRecipient":6,"yminRecipient":7},index=[0]) # on fait varier la page et la bloc reconocido
                        df_layer_siret=pd.DataFrame(data={"SIRET": 1,"SCORE SIRET":2,"SCORE SIRET OCR":3,"xmaxSiret":4,"xminSiret":5,"ymaxSiret":6,"yminSiret":7},index=[0]) # on fait varier la page et la bloc reconocido
                        df_layer_total=pd.DataFrame(data={"TOTAL": 1,"SCORE TOTAL":2,"SCORE TOTAL OCR":3,"xmaxTotal":4,"xminTotal":5,"ymaxTotal":6,"yminTotal":7},index=[0]) # on fait varier la page et la bloc reconocido
                        df_layer_total_aux=pd.DataFrame(data={"TOTAL con impuestos incluidos o no especificado": 1,"SCORE TOTAL AUX":2,"SCORE TOTAL AUX OCR":3,"xmaxTotalAUX":4,"xminTotalAUX":5,"ymaxTotalAUX":6,"yminTotalAUX":7},index=[0]) # on fait varier la page et la bloc reconocido
                        df_layer_3=pd.DataFrame(data={"PAGES":7},index=[0]) # fixe
                        df_layer_4=pd.DataFrame(data={"Type":78,"SCORE Type":5},index=[0]) # fixe
                        df_layer_8_variable_body=pd.DataFrame(data={"RECORD":1,"SCORE BODY":2,"SCORE TOTAL BODY":3,"XMAXBODY":4,"XMINBODY":5,"YMAXBODY":6,"YMINBODY":7},index=[0]) # on fait varier la page et la bloc reconocido


                        # boucle sur bloques reconocidos 
                        for indice_Bloques_reconocidos,page in enumerate(result.output.pages[0].bbox):
                            if(indice_Bloques_reconocidos==0): # acces au body qui contient les lignes de ma page

                                # creation de 5 dataframes qui ont des valeurs fixe par ligne de body (quantite etc. , coordonnees lignes, nom et score des categories MA, aggregated category, indice ligne en cours) préremplis avec des valeur par défauts
                                df_layer_12_1 = pd.DataFrame(data= {'QUANTITE':1,'QUANTITE SCORE':2,'TEXT RECORD':3,'TEXT RECORD SCORE':4,'TEXT QUANTITY':5,'TEXT QUANTITY SCORE':6,'TEXT UNITS':7,'TEXT UNITS SCORE':8,'TOTAL VALUE':9,'TOTAL VALUE SCORE':10,'UNITS PRICE':11,'UNITS PRICE SCORE':12,'UNITS PRICE TEXT':13,'UNITS PRICE TEXT SCORE':14,'UNITS':15,'UNITS SCORE':16},index=[0])
                                df_layer_12_2=pd.DataFrame(data={"H_X0":1,"H_X1":2,"H_Y0":3,"H_Y1":4,"N_CATEGORIES_BAREMO":5},index=[0])
                                df_layer_12_3=pd.DataFrame(data={"CATEGORIE MA 1":1,"SCORE CATEGORIE MA 1":2,"CATEGORIE MA 2":3,"SCORE CATEGORIE MA 2":4,"CATEGORIE MA 3":5,"SCORE CATEGORIE MA 3":6,"CATEGORIE MA 4":7,"SCORE CATEGORIE MA 4":8,"CATEGORIE MA 5":9,"SCORE CATEGORIE MA 5":10,"CATEGORIE MA 6":11,"SCORE CATEGORIE MA 6":12,"CATEGORIE MA 7":13,"SCORE CATEGORIE MA 7":14,"CATEGORIE MA 8":15,"SCORE CATEGORIE MA 8":1,"CATEGORIE MA 9":2,"SCORE CATEGORIE MA 9":3,"CATEGORIE MA 10":4,"SCORE CATEGORIE MA 10":5,"CATEGORIE MA 11":6,"SCORE CATEGORIE MA 11":7,"CATEGORIE MA 12":8,"SCORE CATEGORIE MA 12":9,"CATEGORIE MA 13":10,"SCORE CATEGORIE MA 13":11,"CATEGORIE MA 14":12,"SCORE CATEGORIE MA 14":13,"CATEGORIE MA 15":14,"SCORE CATEGORIE MA 15":15},index=[0])
                                df_layer_12_4=pd.DataFrame(data={"AGGREGATED CATEGORY":1,"REFERENCE PRICE":2},index=[0])
                                df_layer_11_variable_record=pd.DataFrame(data={"Record":1},index=[0])

                                # si la liste contenant les lignes est vide alors toutes les valeurs sont "nan"
                                if not result.output.pages[indice_page_document].bbox[0].content.records:
                                    ligne_body=1

                                    serie_layer_12_1=pd.Series(data=['nan']*len(df_layer_12_1.columns),index=df_layer_12_1.columns)# creation de la ligne avec nan
                                    df_layer_12_1=df_layer_12_1.append(serie_layer_12_1, ignore_index=True) # ajout ligne au dataframe

                                    serie_layer_12_2=pd.Series(data=['nan']*len(df_layer_12_2.columns),index=df_layer_12_2.columns)
                                    df_layer_12_2=df_layer_12_2.append(serie_layer_12_2,ignore_index=True)  

                                    serie_layer_12_3=pd.Series(data=['nan']*len(df_layer_12_3.columns),index=df_layer_12_3.columns)
                                    df_layer_12_3=df_layer_12_3.append(serie_layer_12_3,ignore_index=True)

                                    serie_layer_12_4=pd.Series(data=['nan']*len(df_layer_12_4.columns),index=df_layer_12_4.columns)
                                    df_layer_12_4=df_layer_12_4.append(serie_layer_12_4,ignore_index=True)

                                    df_layer_11_variable_record=df_layer_11_variable_record.append(pd.Series(data=["nan"],index=df_layer_11_variable_record.columns),ignore_index=True)


                                # si cette liste est non vide
                                else:
                                    ligne_body=result.output.pages[indice_page_document].bbox[0].content.n_records #  nombre de ligne total de mon document


                                    # parcourt cette liste (donc parcout des lignes du doc en cours et de la page en cours)
                                    for indice_ligne_body,content in enumerate(result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].content.records): 

                                        liste_data.append(indice_ligne_body)

                                        #creation ligne contenant  les informations de la ligne en cours (quantite etc. , nom et score des categories MA, aggregated category,)
                                        serie_layer_12_1=pd.Series(data=[result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].content.records[indice_ligne_body].quantite.content,result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].content.records[indice_ligne_body].quantite.score,
                                                                        result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].content.records[indice_ligne_body].text.content,result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].content.records[indice_ligne_body].text.score,
                                                                        result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].content.records[indice_ligne_body].text_quantite.content,result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].content.records[indice_ligne_body].text_quantite.score,
                                                                    result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].content.records[indice_ligne_body].text_units.content,result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].content.records[indice_ligne_body].text_units.score,
                                                                      result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].content.records[indice_ligne_body].total_prix.content,result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].content.records[indice_ligne_body].total_prix.score,
                                                                   result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].content.records[indice_ligne_body].unit_prix.content,result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].content.records[indice_ligne_body].unit_prix.score,
                                                                        result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].content.records[indice_ligne_body].unit_prix_text.content,result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].content.records[indice_ligne_body].unit_prix_text.score,
                                                                    result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].content.records[indice_ligne_body].units.content,result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].content.records[indice_ligne_body].units.score,
                                                                        ],index=df_layer_12_1.columns)

                                        #ajout au dataframe
                                        df_layer_12_1=df_layer_12_1.append(serie_layer_12_1,ignore_index=True)

                                        # meme chose avec  coordonnees ligne en cours + n categories bareme
                                        serie_layer_12_2=pd.Series(data=[result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].content.records[indice_ligne_body].h_x0,
                                                                        result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].content.records[indice_ligne_body].h_x1,
                                                                        result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].content.records[indice_ligne_body].h_y0,
                                                                        result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].content.records[indice_ligne_body].h_y1,
                                                                       result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].content.records[indice_ligne_body].n_categories_baremo],
                                                                   index=df_layer_12_2.columns)
                                        df_layer_12_2=df_layer_12_2.append(serie_layer_12_2,ignore_index=True)



                                        #creation ligne contenant  indice de la ligne en cours
                                        serie_layer_11_variable=pd.Series(data=indice_ligne_body,index=df_layer_11_variable_record.columns)
                                        #ajout dataframe
                                        df_layer_11_variable_record=df_layer_11_variable_record.append(serie_layer_11_variable,ignore_index=True)


                                        # parcours la liste "agg_cat" qui contient le nom et le score de chaque catégories aggrégé 
                                        for indice_ligne_agg_cat,content in enumerate(result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].content.records[indice_ligne_body].agg_cat):
                                            serie_layer_12_4=pd.Series(data=[result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].content.records[indice_ligne_body].agg_cat[indice_ligne_agg_cat].cat,
                                                                             result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].content.records[indice_ligne_body].agg_cat[indice_ligne_agg_cat].reference_price],index=df_layer_12_4.columns)
                                            df_layer_12_4=df_layer_12_4.append(serie_layer_12_4,ignore_index=True) 


                                        liste_categories=[] # liste qui va contenir nom et score de chaque catégorie MA
                                        #boucle sur la liste "categories" qui contient le nom et le score de chaque catégorie MA (15) -> 
                                        for indice_ligne_categories_ma,content in enumerate(result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].content.records[indice_ligne_body].categories):
                                            liste_categories.append(result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].content.records[indice_ligne_body].categories[indice_ligne_categories_ma].cat)
                                            liste_categories.append(result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].content.records[indice_ligne_body].categories[indice_ligne_categories_ma].score)

                                        # création série qui contient nom et score de chaque catégorie MA pour la ligne en cours
                                        serie_layer_12_3=pd.Series(data=liste_categories,index=df_layer_12_3.columns)
                                        #ajout dataframe
                                        df_layer_12_3=df_layer_12_3.append(serie_layer_12_3,ignore_index=True)            

                            # création des lignes avec des valeurs fixe par pages (EMITTER, RECIPIENT, SIRET, TOTAL, TOTAL AUX )
                            if(indice_Bloques_reconocidos==1):
                                #creation ligne contenant EMITER de la page en cours
                                serie_layer_emiter=pd.Series(data=[result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].content,result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].score,result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].scoreocr,result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].position.xmax,result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].position.xmin,result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].position.ymax,result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].position.ymin],index=df_layer_emiter.columns) # on fait varier la page et la bloc reconocido)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           
                            if(indice_Bloques_reconocidos==2):
                                #creation ligne contenant RECIPIENT de la page en cours
                                serie_layer_recipient=pd.Series(data=[result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].content,result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].score,result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].scoreocr,result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].position.xmax,result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].position.xmin,result.output.pages[0].bbox[indice_Bloques_reconocidos].position.ymax,result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].position.ymin],index=df_layer_recipient.columns)
                            if(indice_Bloques_reconocidos==3):
                                #creation ligne contenant le SIRET de la page en cours
                                serie_layer_siret=pd.Series(data=[result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].content,result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].score,result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].scoreocr,result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].position.xmax,result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].position.xmin,result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].position.ymax,result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].position.ymin],index=df_layer_siret.columns) # on fait varier la page et la bloc reconocido                
                            if(indice_Bloques_reconocidos==4):
                                #creation ligne contenant le total de la page en cours
                                serie_layer_total=pd.Series(data=[result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].content,result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].score,result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].scoreocr,result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].position.xmax,result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].position.xmin,result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].position.ymax,result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].position.ymin],index=df_layer_total.columns) # on fait varier la page et la bloc reconocido)
                            if(indice_Bloques_reconocidos==5):
                                #creation ligne contenant le total aux de la page en cours
                                serie_layer_total_aux=pd.Series(data=[result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].content,result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].score,result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].scoreocr,result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].position.xmax,result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].position.xmin,result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].position.ymax,result.output.pages[indice_page_document].bbox[indice_Bloques_reconocidos].position.ymin],index=df_layer_total_aux.columns)
                            #else:
                                #print("ok")

                        # AJOUT DES VALEURS FIXES PAR PAGE A LEUR DATAFRAME RESPECTIVE

                        # creation et ajout dataframe de la page en cours
                        serie_layer_3=pd.Series(data=[result.output.pages[indice_page_document].page],index=df_layer_3.columns)
                        df_layer_3=df_layer_3.append(serie_layer_3,ignore_index=True)

                        #ajout dataframe (EMITTER, RECIPIENT, SIRET, TOTAL, TOTAL AUX ) + BODY
                        df_layer_emiter=df_layer_emiter.append(serie_layer_emiter,ignore_index=True)
                        df_layer_recipient=df_layer_recipient.append(serie_layer_recipient,ignore_index=True)
                        df_layer_siret=df_layer_siret.append(serie_layer_siret,ignore_index=True)
                        df_layer_total=df_layer_total.append(serie_layer_total,ignore_index=True)
                        df_layer_total_aux=df_layer_total_aux.append(serie_layer_total_aux,ignore_index=True)

                        #creation et ajout dataframe de BODY
                        serie_layer_8_variable = pd.Series(data=[result.output.pages[indice_page_document].bbox[0].content.n_records, result.output.pages[indice_page_document].bbox[0].score,result.output.pages[indice_page_document].bbox[0].scoreocr,result.output.pages[indice_page_document].bbox[0].position.xmax,result.output.pages[indice_page_document].bbox[0].position.xmin,result.output.pages[indice_page_document].bbox[0].position.ymax,result.output.pages[indice_page_document].bbox[0].position.ymin],index=df_layer_8_variable_body.columns) # on fait varier la page et la bloc reconocidoand
                        df_layer_8_variable_body= df_layer_8_variable_body.append(serie_layer_8_variable,ignore_index=True)

                        # creation et ajout dataframe  du  type et de la probability de la page en cours
                        serie_layer_4_fixe=pd.Series(data=[result.output.pages[indice_page_document].type,result.output.pages[indice_page_document].probability], index=df_layer_4.columns)
                        df_layer_4=df_layer_4.append(serie_layer_4_fixe,ignore_index=True)                   

                        # fusion des différents informations concernant la ligne en cours (quantite etc. , coordonnees lignes, nom et score des categories MA, aggregated category, indice ligne en cours) préremplis avec des valeur par défaut)
                        df_page_variable = pd.concat([df_layer_11_variable_record,df_layer_12_1,df_layer_12_2,df_layer_12_3,df_layer_12_4], axis=1)
                        df_page_variable =df_page_variable.drop([0],axis=0) # supprime valeurs par défauts
                        df_page_variable =df_page_variable.reset_index() # reset index des lignes

                        #ajout du dataframe contenant les informations de la liste en cours dans la liste frame_interne ( qui contient les dataframes de chaque ligne )
                        frame_interne.append(df_page_variable)   

                        ligne_data=ligne_data+ligne_body

                        #concatenation horizontale des dataframes contenant des valeurs fixe par page (EMITER, RECIPIENT, SIRET, TOTAL, TOTAL AUX, PAGES(en cours), Type, BODY(contient nbr de lignes))
                        df_layer_fixe_page=pd.concat([df_layer_3,df_layer_4,df_layer_8_variable_body,df_layer_total,df_layer_total_aux,df_layer_siret,df_layer_recipient,df_layer_emiter],axis=1, join='inner') 
                        df_layer_fixe_page=df_layer_fixe_page.drop([0],axis=0) # suppresion valeurs par défauts
                        df_layer_fixe_page=df_layer_fixe_page.reset_index() # réindexation

                        # création du même dataframe avec un nombre de ligne dupliqués (ligne_body qui est égale un nombre de ligne de ma page en cours)
                        df_layer_fixe_page_duplicate=df_layer_fixe_page.loc[np.repeat(df_layer_fixe_page.index.values, ligne_body)] # dupliquer ligne d'un dataframe de la page en question
                        df_layer_total_duplicate = df_layer_fixe_page.loc[np.repeat(df_layer_fixe_page.index.values, ligne_body)] 
                        df_layer_total_duplicate =df_layer_total_duplicate.reset_index()
                        #ajout dans la liste frames, le data frame avec le bon nombre de ligne du dataframe avec les informations fixe de la page en cours
                        frames.append(df_layer_total_duplicate) 

                    # concatenation verticale des dataframes de chaque page (avec les informations fixe par ligne du body)
                    df_page_variable_concat_duplicate=pd.concat(frame_interne)
                    df_page_variable_concat_duplicate= df_page_variable_concat_duplicate.reset_index()


                    #concatenation verticale des dataframes de chaque page (contenant les informations fixe de la page (EMITER, RECIPIENT, SIRET, TOTAL, TOTAL AUX, PAGES(en cours), Type, BODY(contient nbr de lignes))
                    df_layer_total_duplicate_concat=pd.concat(frames)
                    del df_layer_total_duplicate_concat['level_0']
                    del df_layer_total_duplicate_concat['index']
                    df_layer_total_duplicate_concat=df_layer_total_duplicate_concat.reset_index() # réindexation

                    # creation dataframe avec les valeurs fixe par pdf ( ID, nombre de page et elapsed )

                    #filed id + elapsed
                    serie_first_layer_fixe=pd.Series(data=[result.fileid,result.elapsed],index=df_first_layer_fixe.columns)
                    df_first_layer_fixe=df_first_layer_fixe.append(serie_first_layer_fixe,ignore_index=True)
                    df_first_layer_fixe=df_first_layer_fixe.drop([0],axis=0)
                    df_first_layer_fixe=df_first_layer_fixe.reset_index()

                    #nombre de page
                    df_second_layer_fixe=df_second_layer_fixe.append(pd.Series(data=[result.output.n_pages],index=df_second_layer_fixe.columns),ignore_index=True)
                    df_second_layer_fixe=df_second_layer_fixe.drop([0],axis=0)
                    df_second_layer_fixe=df_second_layer_fixe.reset_index()

                    # concatenation des 2 dataframes
                    df_first_second_layer_fixe=pd.concat([df_first_layer_fixe,df_second_layer_fixe],axis=1)

                    #repete le dataframe, avec le nombre de ligne du pdf
                    df_first_second_layer_fixe = df_first_second_layer_fixe.loc[np.repeat(df_first_second_layer_fixe.index.values, ligne_data)] # dupliquer ligne d'un dataframe
                    df_first_second_layer_fixe=df_first_second_layer_fixe.reset_index()



                    # DATAFRAME FINAL CONTIENT 
                    # dataframe avec valeur fixe par pdf : df_first_second_layer_fixe
                    # dataframe avec valeurs fixes par page :
                    # dataframe avec valeur fixes par lignes :
                    fusion = pd.concat([df_first_second_layer_fixe,df_layer_total_duplicate_concat,df_page_variable_concat_duplicate],axis=1)


                    # suppression de 2 colonnes créer suite à la réindexation
                    del fusion['index'] 
                    del fusion['level_0']
                    
                    fusion1 = fusion[['FileID', 'PAGES', 'Type', 'SCORE Type', 'TOTAL', 'SCORE TOTAL', 'SCORE TOTAL OCR','xminTotal','xmaxTotal','yminTotal','ymaxTotal','FileID','SCORE BODY', 'SCORE TOTAL BODY', 'XMINBODY', 'XMAXBODY', 'YMINBODY', 'YMAXBODY','SIRET', 'SCORE SIRET', 'SCORE SIRET OCR', 'xminSiret', 'xmaxSiret', 'yminSiret', 'ymaxSiret', 'recipient', 'SCORE RECIPIENT', 'SCORE RECIPIENT OCR', 'xminRecipient', 'xmaxRecipient', 'yminRecipient', 'ymaxRecipient', 'emiter', 'SCORE EMITER', 'SCORE EMITER OCR', 'xminEmiter', 'xmaxEmiter', 'yminEmiter', 'ymaxEmiter', 'RECORD', 'Record', 'TEXT RECORD', 'TEXT RECORD SCORE', 'UNITS', 'UNITS SCORE', 'QUANTITE', 'QUANTITE SCORE', 'UNITS PRICE', 'UNITS PRICE SCORE', 'TOTAL VALUE', 'TOTAL VALUE SCORE', 'QUANTITE', 'QUANTITE SCORE', 'TEXT UNITS', 'TEXT UNITS SCORE', 'UNITS PRICE', 'UNITS PRICE SCORE', "H_X0","H_Y0","H_X1","H_Y1", 'N_CATEGORIES_BAREMO', "CATEGORIE MA 1","SCORE CATEGORIE MA 1","CATEGORIE MA 2","SCORE CATEGORIE MA 2","CATEGORIE MA 3","SCORE CATEGORIE MA 3","CATEGORIE MA 4","SCORE CATEGORIE MA 4","CATEGORIE MA 5","SCORE CATEGORIE MA 5","CATEGORIE MA 6","SCORE CATEGORIE MA 6","CATEGORIE MA 7","SCORE CATEGORIE MA 7","CATEGORIE MA 8","SCORE CATEGORIE MA 8","CATEGORIE MA 9","SCORE CATEGORIE MA 9", 'AGGREGATED CATEGORY', 'REFERENCE PRICE']].copy()
                    

                    # TRAITEMENT RED FLAGS
                    
                    d = json.loads(json_string)
                    partidas = get_pandas_out_of_json(d)
                    rf = red_flags(partidas,a)
                    del rf['RF2_SCORE']
    
                    # ecriture dans un excel
                    # s'il s'agit du premier pdf 
                    if(z==0): 

                        #partidas 
                        # on écrit le nom des colonnes du dataframe sans l'index
                        for r in dataframe_to_rows(fusion1, index=False, header=True): 
                            ws_prediction_partidas.append(r)


                        #red flags
                        # on écrit le nom des colonnes du dataframe sans l'index
                        for p in dataframe_to_rows(rf, index=False, header=True): 
                            ws_red_flags.append(p)

                    # sinon      
                    else:

                        #partidas
                        # on itere sur les lignes sans réecrire le nom des colonnes
                        for index, row in fusion1.iterrows(): 
                            ws_prediction_partidas.append(row.tolist())

                        #redflags
                        # on itere sur les lignes sans réecrire le nom des colonnes
                        for index, row in rf.iterrows():  
                            ws_red_flags.append(row.tolist())

                    z=z+1 # incrémente l'index du pdf en cours


                    #on supprime le pdf du S3
                    obj_delete = s3.Object(BUCKET_NAME,os.path.basename(path_upload_amazon))
                    obj_delete.delete()
                    print("Fichero tratado y borrado: "+ filename)
                    # enregiste en local les .xlsx           
                    wb_prediction_partidas.save("Partidas_parcial.xlsx")
                    wb.save("Red_flags_parcial.xlsx")
                except AssertionError:
                    print("Error de formato de JSON")
                    file = open('unitaire','a') 
                    file.write(os.path.basename(path_upload_amazon)+'\n')
                    file.close()
                    #on supprime le pdf du S3
                    obj_delete = s3.Object(BUCKET_NAME,os.path.basename(path_upload_amazon))
                    obj_delete.delete()
                    print("Fichero NO tratado y borrado: "+ filename)
                except:
                    print("Error de acceso al servicio")
                    file = open('unitaire','a') 
                    file.write(os.path.basename(path_upload_amazon)+'\n')
                    file.close()
                    #on supprime le pdf du S3
                    obj_delete = s3.Object(BUCKET_NAME,os.path.basename(path_upload_amazon))
                    obj_delete.delete()
                    print("Fichero NO tratado y borrado: "+ filename)

                
            else: # on ecrit dans un .txt les fichiers qui n'ont pas été traités
                file = open('unitaire','a') 
                file.write(os.path.basename(path_upload_amazon)+'\n')
                file.close()
                #on supprime le pdf du S3
                obj_delete = s3.Object(BUCKET_NAME,os.path.basename(path_upload_amazon))
                obj_delete.delete()
                print("Fichero NO tratado y borrado: "+ filename)
                
for path, dirs, files in os.walk(pathname):
    path=path
    for filename in files: 
        if "_tmp_" in str(filename):
            os.remove(path +'\\' + filename)


       
# enregiste en local les .xlsx 
wb_prediction_partidas.save("Partidas.xlsx")
wb.save("Red_flags.xlsx")
print("Final del tratamiento")

NameError: name 'requests' is not defined