In [None]:
!pip install openai
!pip install colorama
!pip install tqdm

In [45]:
# Import Necessary SDKs
import os
from openai import OpenAI
import json
import pandas as pd
from colorama import Fore, Style
import time
from tqdm import tqdm

In [46]:
token = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx_xxxxxxxxxxxxxxxxxxxxxx"
endpoint = "https://models.inference.ai.azure.com"
model_name = "gpt-4o-mini"
csv_path = "grounding_truth.csv"

In [47]:
client = OpenAI(
    base_url=endpoint,
    api_key=token,
)

System_Prompt = """
Vous êtes un assistant puissant dans l'extraction d'informations a partir des factures francaises. \n
Observe attentivement la facture et extrait les informations suivantes:  \n

- Fournisseur : C'est le nom du fournisseur qui a vendu des produits ou des services à un client. Généralement, C'est dans le Logo du Fournisseur ou a côté. \n
- Date_Facture : C'est la date de la facture. \n
- Num_Facture : C'est le numéro de la facture se forme d'une série de numéros ou numéros avec lettres. \n
- TTC : C'est le montant total que le client va payer, peut etre referer avec : Total TTC, Net à Payer ou quelque chose similaire. \n
- TT  : C'est le montant total de la facture hors taxes, peut etre referer avec : Total HT ou quelque chose similaire. \n
- TVA : C'est le montant de TVA appliqué au Total hors taxes, peut etre referer avec : TVA, Total TVA, Taxe ou quelque chose similaire. \n

- Votre reponse est un String se la forme d'un dictionaire python avec les clés-valeurs suivantes : \n

     "Invoice_Name": le nom et format du l'image du la facture,
     "Fournisseur": "Fournisseur",
     "Date_Facture": "Date_Facture",
     "Num_Facture": "Num_Facture",
     "TTC": "TTC",
     "TT": "TT",
     "TVA": "TVA"

- Ne pas formater votre reponse comme un code python , repondre juste en texte. \n
- Les 3 prix TT,TTC,TVA doivent être des chiffres sans indication de devise. \n
- le nom de Fournisseur doit être en majuscule. \n
"""

[Dataset](https://huggingface.co/datasets/Noureddinesa/Images_Test)


In [48]:
def Save_to_csv(json_data):
    data = json.loads(json_data)
    df = pd.DataFrame([data])

    if os.path.exists(csv_path):
        df.to_csv(csv_path, mode='a', header=False, index=False)
    else:
        df.to_csv(csv_path, index=False)

    print(Fore.GREEN + Style.BRIGHT + "CSV Updated Successfully" + Style.RESET_ALL)


def Run_ALL(Names):
  counter = 0
  for Name in tqdm(Names,desc="Processing Invoices", unit="number"):
        Hugg_path = "https://huggingface.co/datasets/Noureddinesa/Images_Test/resolve/main/"+Name
        response = client.chat.completions.create(
                      model=model_name,
                      messages=[
                          {"role": "system", "content": System_Prompt},
                          {
                          "role": "user",
                          "content": [
                            {"type": "text", "text": f"Extraire les Informations demandées a partir de la facture {Name} suivante : "},
                            {
                              "type": "image_url",
                              "image_url": {"url":Hugg_path},
                            },
                          ],
                          }
                            ],
                      #max_tokens=500,
                    )

        data = response.choices[0].message.content
        print(data)
        Save_to_csv(data)
        # Sleep time between requests of 2 seconds
        time.sleep(2)
        counter += 1
        # We set a Sleep of 1 min so we don't exceed the rate limit (10 requests/min)
        if counter == 7:
          time.sleep(60)
          counter = 0

In [49]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os

project_path = "/content/drive/MyDrive/Textra_Insights/"
Folder_path = project_path+"Test_Invoices"
Files = os.listdir(Folder_path)
sorted_files = sorted(Files, key=lambda x: int(x.split('.')[0]))
sorted_files

In [51]:
Run_ALL(sorted_files)

Processing Invoices:   0%|          | 0/30 [00:00<?, ?number/s]

{
    "Invoice_Name": "2.jpg",
    "Fournisseur": "OUTIDIS SARL",
    "Date_Facture": "13/01/2023",
    "Num_Facture": "FV230007",
    "TTC": 586.80,
    "TT": 489.00,
    "TVA": 97.80
}
[32m[1mCSV Updated Successfully[0m


Processing Invoices:   3%|▎         | 1/30 [00:05<02:31,  5.24s/number]

{
    "Invoice_Name": "3.jpg",
    "Fournisseur": "FUN PLACE",
    "Date_Facture": "14/04/2023",
    "Num_Facture": "FUNPLACE - 2023/000403",
    "TTC": 3864,
    "TT": 3220,
    "TVA": 644
}
[32m[1mCSV Updated Successfully[0m


Processing Invoices:   7%|▋         | 2/30 [00:10<02:23,  5.14s/number]

{
    "Invoice_Name": "49.jpg",
    "Fournisseur": "LYDEC",
    "Date_Facture": "30/03/2023",
    "Num_Facture": "1507042573 AM",
    "TTC": 224.28,
    "TT": 209.59,
    "TVA": 14.69
}
[32m[1mCSV Updated Successfully[0m


Processing Invoices:  10%|█         | 3/30 [00:17<02:43,  6.04s/number]

{
    "Invoice_Name": "57.jpg",
    "Fournisseur": "ARCANES TECHNOLOGIES",
    "Date_Facture": "04-01-2023",
    "Num_Facture": "223285-23-DJ",
    "TTC": 226.80,
    "TT": 189.00,
    "TVA": 37.80
}
[32m[1mCSV Updated Successfully[0m


Processing Invoices:  13%|█▎        | 4/30 [00:22<02:23,  5.50s/number]

{
    "Invoice_Name": "58.jpg",
    "Fournisseur": "NETTOPAP",
    "Date_Facture": "10/03/2023",
    "Num_Facture": "230058",
    "TTC": 3026.45,
    "TT": 2522.04,
    "TVA": 504.41
}
[32m[1mCSV Updated Successfully[0m


Processing Invoices:  17%|█▋        | 5/30 [00:27<02:12,  5.30s/number]

{
    "Invoice_Name": "61.jpg",
    "Fournisseur": "OUTIDIS SARL",
    "Date_Facture": "22/07/2022",
    "Num_Facture": "202212194",
    "TTC": 3588,
    "TT": 2990,
    "TVA": 598
}
[32m[1mCSV Updated Successfully[0m


Processing Invoices:  20%|██        | 6/30 [00:31<01:59,  4.97s/number]

{
    "Invoice_Name": "66.jpg",
    "Fournisseur": "PAPETERIE IMIZEGH",
    "Date_Facture": "03/01/2023",
    "Num_Facture": "00000002",
    "TTC": 1250,
    "TT": 1041.68,
    "TVA": 208.32
}
[32m[1mCSV Updated Successfully[0m


Processing Invoices:  23%|██▎       | 7/30 [01:38<09:40, 25.23s/number]

{
    "Invoice_Name": "67.jpg",
    "Fournisseur": "CAPMAN",
    "Date_Facture": "10/05/23",
    "Num_Facture": "FA2304843",
    "TTC": 134.10,
    "TT": 111.75,
    "TVA": 22.35
}
[32m[1mCSV Updated Successfully[0m


Processing Invoices:  27%|██▋       | 8/30 [01:43<06:55, 18.87s/number]

{
    "Invoice_Name": "72.jpg",
    "Fournisseur": "OUTIDIS",
    "Date_Facture": "01/10/2022",
    "Num_Facture": "0001141024102022",
    "TTC": 500,
    "TT": 416.67,
    "TVA": 83.33
}
[32m[1mCSV Updated Successfully[0m


Processing Invoices:  30%|███       | 9/30 [01:48<05:04, 14.48s/number]

{
    "Invoice_Name": "76.jpg",
    "Fournisseur": "ONETECHR",
    "Date_Facture": "09/03/23",
    "Num_Facture": "FA2303-0161",
    "TTC": 420,
    "TT": 350,
    "TVA": 70
}
[32m[1mCSV Updated Successfully[0m


Processing Invoices:  33%|███▎      | 10/30 [01:53<03:50, 11.53s/number]

{
    "Invoice_Name": "109.jpg",
    "Fournisseur": "DUOWEB",
    "Date_Facture": "17/01/2023",
    "Num_Facture": "FP|202301|007",
    "TTC": 2100,
    "TT": 1750,
    "TVA": 350
}
[32m[1mCSV Updated Successfully[0m


Processing Invoices:  37%|███▋      | 11/30 [01:57<02:57,  9.33s/number]

{
    "Invoice_Name": "114.jpg",
    "Fournisseur": "LATELIER DU VOYAGE",
    "Date_Facture": "31/12/2022",
    "Num_Facture": "CMN/22/INV/01/050592",
    "TTC": 4461,
    "TT": 4431,
    "TVA": 5
}
[32m[1mCSV Updated Successfully[0m


Processing Invoices:  40%|████      | 12/30 [02:02<02:25,  8.09s/number]

{
    "Invoice_Name": "121.jpg",
    "Fournisseur": "OUTIDIS",
    "Date_Facture": "18/08/2022",
    "Num_Facture": "001022",
    "TTC": 5000,
    "TT": 4166.67,
    "TVA": 833.33
}
[32m[1mCSV Updated Successfully[0m


Processing Invoices:  43%|████▎     | 13/30 [02:07<02:00,  7.09s/number]

{
    "Invoice_Name": "129.jpg",
    "Fournisseur": "OUTIDIS SARL",
    "Date_Facture": "08. Jul 2022",
    "Num_Facture": "300309732",
    "TTC": 8695.60,
    "TT": 7246.37,
    "TVA": 1449.23
}
[32m[1mCSV Updated Successfully[0m


Processing Invoices:  47%|████▋     | 14/30 [03:12<06:34, 24.64s/number]

{
    "Invoice_Name": "131.jpg",
    "Fournisseur": "PRINT CENTER",
    "Date_Facture": "13/03/2023",
    "Num_Facture": "002/2023",
    "TTC": 4080,
    "TT": 3400,
    "TVA": 680
}
[32m[1mCSV Updated Successfully[0m


Processing Invoices:  50%|█████     | 15/30 [03:18<04:43, 18.88s/number]

{
    "Invoice_Name": "151.jpg",
    "Fournisseur": "CMI",
    "Date_Facture": "26/07/22",
    "Num_Facture": "003052404000075",
    "TTC": 120,
    "TT": 100,
    "TVA": 20
}
[32m[1mCSV Updated Successfully[0m


Processing Invoices:  53%|█████▎    | 16/30 [03:23<03:26, 14.75s/number]

{
    "Invoice_Name": "164.jpg",
    "Fournisseur": "OUTIDIS",
    "Date_Facture": "01/08/2022",
    "Num_Facture": "0000868515082022",
    "TTC": 500,
    "TT": 416.67,
    "TVA": 83.33
}
[32m[1mCSV Updated Successfully[0m


Processing Invoices:  57%|█████▋    | 17/30 [03:28<02:33, 11.80s/number]

{
    "Invoice_Name": "307.jpg",
    "Fournisseur": "PAPETERIE IMIZEGH",
    "Date_Facture": "11/05/2023",
    "Num_Facture": "000000002007",
    "TTC": 1186.40,
    "TT": 988.66,
    "TVA": 197.74
}
[32m[1mCSV Updated Successfully[0m


Processing Invoices:  60%|██████    | 18/30 [03:33<01:56,  9.73s/number]

{
    "Invoice_Name": "311.jpg",
    "Fournisseur": "ELECTRO AINANE",
    "Date_Facture": "03/04/2023",
    "Num_Facture": "N12.04.2023",
    "TTC": 1650,
    "TT": 1375,
    "TVA": 275
}
[32m[1mCSV Updated Successfully[0m


Processing Invoices:  63%|██████▎   | 19/30 [03:38<01:32,  8.41s/number]

{
    "Invoice_Name": "314.jpg",
    "Fournisseur": "ASSURLAND",
    "Date_Facture": "5/1/2023",
    "Num_Facture": "22/0004104",
    "TTC": 2586,
    "TT": 2586,
    "TVA": 0
}
[32m[1mCSV Updated Successfully[0m


Processing Invoices:  67%|██████▋   | 20/30 [03:43<01:12,  7.22s/number]

{
    "Invoice_Name": "315.jpg",
    "Fournisseur": "RENAULT",
    "Date_Facture": "21/06/22",
    "Num_Facture": "044797",
    "TTC": 152360,
    "TT": 142684,
    "TVA": 9675
}
[32m[1mCSV Updated Successfully[0m


Processing Invoices:  70%|███████   | 21/30 [04:49<03:43, 24.82s/number]

{
    "Invoice_Name": "316.jpg",
    "Fournisseur": "ELECTROPLANET",
    "Date_Facture": "30-01-2023",
    "Num_Facture": "508/30-012023/00242069",
    "TTC": 12909,
    "TT": 10757,
    "TVA": 2151
}
[32m[1mCSV Updated Successfully[0m


Processing Invoices:  73%|███████▎  | 22/30 [04:53<02:29, 18.75s/number]

{
    "Invoice_Name": "317.jpg",
    "Fournisseur": "LYDEC",
    "Date_Facture": "22/03/2023",
    "Num_Facture": "C060200093",
    "TTC": 1637,
    "TT": 1350,
    "TVA": 286
}
[32m[1mCSV Updated Successfully[0m


Processing Invoices:  77%|███████▋  | 23/30 [04:58<01:42, 14.68s/number]

{
    "Invoice_Name": "319.jpg",
    "Fournisseur": "SOCIETE OUTIDIS OUTIDIS",
    "Date_Facture": "07/04/2023",
    "Num_Facture": "0000044161042023",
    "TTC": 157.5,
    "TT": 157.5,
    "TVA": 0
}
[32m[1mCSV Updated Successfully[0m


Processing Invoices:  80%|████████  | 24/30 [05:04<01:11, 11.90s/number]

{
    "Invoice_Name": "327.jpg",
    "Fournisseur": "BRICOMAR",
    "Date_Facture": "15/01/2022",
    "Num_Facture": "0122003246",
    "TTC": 58.80,
    "TT": 49.00,
    "TVA": 9.80
}
[32m[1mCSV Updated Successfully[0m


Processing Invoices:  83%|████████▎ | 25/30 [05:09<00:50, 10.04s/number]

{
    "Invoice_Name": "334.jpg",
    "Fournisseur": "IMPRIMOS",
    "Date_Facture": "24-10-2022",
    "Num_Facture": "18020",
    "TTC": 6240,
    "TT": 5200,
    "TVA": 1040
}
[32m[1mCSV Updated Successfully[0m


Processing Invoices:  87%|████████▋ | 26/30 [05:15<00:34,  8.69s/number]

{
    "Invoice_Name": "340.jpg",
    "Fournisseur": "BIG OFFICE",
    "Date_Facture": "16/07/2022",
    "Num_Facture": "20220268",
    "TTC": 21240,
    "TT": 17700,
    "TVA": 3540
}
[32m[1mCSV Updated Successfully[0m


Processing Invoices:  90%|█████████ | 27/30 [05:21<00:23,  7.75s/number]

{
    "Invoice_Name": "383.jpg",
    "Fournisseur": "MOUCHINE FTOUHI",
    "Date_Facture": "28/03/2023",
    "Num_Facture": "000 001",
    "TTC": "3100",
    "TT": "3100",
    "TVA": "0"
}
[32m[1mCSV Updated Successfully[0m


Processing Invoices:  93%|█████████▎| 28/30 [06:25<00:49, 24.78s/number]

{
    "Invoice_Name": "417.jpg",
    "Fournisseur": "KAYAR",
    "Date_Facture": "11/05/2023",
    "Num_Facture": "F2023/00297",
    "TTC": 13777.31,
    "TT": 11481.09,
    "TVA": 2296.22
}
[32m[1mCSV Updated Successfully[0m


Processing Invoices:  97%|█████████▋| 29/30 [06:30<00:18, 18.87s/number]

{
    "Invoice_Name": "419.jpg",
    "Fournisseur": "PRO NUMERIQUE",
    "Date_Facture": "01/02/2023",
    "Num_Facture": "FA2023043",
    "TTC": 1300,
    "TT": 1083.33,
    "TVA": 216.67
}
[32m[1mCSV Updated Successfully[0m


Processing Invoices: 100%|██████████| 30/30 [06:35<00:00, 13.19s/number]
