In [1]:
import pandas as pd
import requests
import json

In [54]:
from utils.embedding import get_embedding

* 'allow_population_by_field_name' has been renamed to 'populate_by_name'
* 'smart_union' has been removed
2025-01-28 16:12:34,264 - config.clients_config - INFO - HTTP_PROXY: None, REQUESTS_CA_BUNDLE: None


# Prestations FR

In [2]:
res = requests.get("https://www.fr.ch/sites/default/files/json/FR_Services_FULL.json")

In [3]:
data = res.json()

In [4]:
data.keys()

dict_keys(['source', 'services'])

In [5]:
data["source"]

{'canton_code': 'FR',
 'languages': ['fr', 'de'],
 'fr': {'name': 'Etat de Fribourg'},
 'de': {'name': 'Staat Freiburg'},
 'created': '2024-09-26T09:10:00Z',
 'updated': '2024-09-28T11:48:32Z'}

In [6]:
len(data["services"])

2

In [22]:
{lang:[] for lang in languages}

{'fr': [], 'de': []}

In [57]:
languages = ["fr", "de"]

docs = []

for service in data["services"]:
    for lang, content in service.items():

        text_embedding = await get_embedding(str(content))
        
        doc = {
            "text": content,
            "url": content["link"],
            "language": lang,
            "tags": "prestations_fr",
            "subtopics": None,
            "doctype": "context_doc",
            "organizations": None,
            "summary": None,
            "hyq": None,
            "hyq_declarative": None,
            "text_embedding": text_embedding,
        }
        docs.append(doc)


2025-01-28 16:13:15,140 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2025-01-28 16:13:15,157 - utils.embedding - INFO - Embedding successfull with model: text-embedding-3-small
2025-01-28 16:13:15,855 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2025-01-28 16:13:15,862 - utils.embedding - INFO - Embedding successfull with model: text-embedding-3-small
2025-01-28 16:13:16,163 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2025-01-28 16:13:16,171 - utils.embedding - INFO - Embedding successfull with model: text-embedding-3-small
2025-01-28 16:13:16,777 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2025-01-28 16:13:16,781 - utils.embedding - INFO - Embedding successfull with model: text-embedding-3-small


In [58]:
pd.DataFrame(docs).to_csv("indexing/data/ch_ch_copilot/fribourg/prestations_fr.csv", index=None)

# Prestations VD

In [62]:
res = requests.get("https://prestations.vd.ch/pub/101554/api/prestation-data")
data = res.json()

In [63]:
data.keys()

dict_keys(['catalog', 'promotedPrestations'])

In [65]:
data["catalog"].keys()

dict_keys(['prestations', 'situations', 'thematiques'])

In [66]:
data["promotedPrestations"].keys()

dict_keys(['entreprises', 'communes', 'particuliers'])

In [76]:
thematiques_mapping = {}
[thematiques_mapping.update({x["id"]: x["label"]}) for x in data["catalog"]["thematiques"]]
thematiques_mapping

{1: 'Culture',
 2: 'Population',
 3: 'Santé, soins et handicap',
 4: 'Territoire et construction',
 5: 'Justice',
 6: 'Formation',
 7: 'Mobilité',
 8: 'Environnement',
 9: 'Soutien social et aides financières',
 10: 'Economie',
 11: 'Etat, Droit, Finances',
 13: 'Sécurité',
 16: 'Population',
 17: 'Santé, soins et handicap',
 18: 'Sécurité',
 20: 'Economie',
 21: 'Formation',
 22: 'Environnement',
 23: 'Territoire et construction',
 24: 'Mobilité',
 25: 'Etat, Droit, Finances',
 26: 'Soutien social et aides financières',
 27: 'Culture',
 28: 'Justice',
 29: 'Etat, Droit, Finances',
 31: 'Environnement',
 32: 'Sécurité',
 33: 'Santé, soins et handicap',
 34: 'Economie',
 35: 'Justice',
 36: 'Culture',
 37: 'Population',
 39: 'Territoire et construction',
 40: 'Formation',
 41: 'Mobilité',
 42: 'Soutien social et aides financières',
 43: 'Service ACV',
 44: 'Mobilité',
 78: 'Enfance, jeunesse et famille',
 80: 'Aménagement',
 81: 'Automobile',
 48: 'Cyberadministration',
 49: 'Quelles so

In [70]:
len(data["catalog"]["prestations"])

588

In [90]:
keys = ["titre", "description", "motsCles", "motsClesAlternatifs", "servicePrestataire", "servicePrestataireAbrege",
       "conditionsPrealables", "modaliteAccesTypes", "etapes", "publicCible", "documents", "cout", "delai", "info", "lienAcces"]

docs = []
for service in data["catalog"]["prestations"]:

    formatted_text = {k:v for k,v in service.items() if k in keys}

    text_embedding = await get_embedding(str(formatted_text))
    
    doc = {
        "text": formatted_text,
        "url": f"https://www.vd.ch/go.to?prestation={service['idMetierPrestation']}",
        "language": "fr",
        "tags": ",".join([thematiques_mapping[_id].lower().strip() for _id in service["thematiqueIds"]]),
        "subtopics": None,
        "summary": None,
        "hyq": None,
        "hyq_declarative": None,
        "doctype": "context_doc",
        "organizations": None,
        "text_embedding": text_embedding,
    }
    docs.append(doc)


2025-01-28 16:44:50,891 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2025-01-28 16:44:50,900 - utils.embedding - INFO - Embedding successfull with model: text-embedding-3-small
2025-01-28 16:44:51,300 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2025-01-28 16:44:51,306 - utils.embedding - INFO - Embedding successfull with model: text-embedding-3-small
2025-01-28 16:44:51,912 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2025-01-28 16:44:51,921 - utils.embedding - INFO - Embedding successfull with model: text-embedding-3-small
2025-01-28 16:44:52,536 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2025-01-28 16:44:52,541 - utils.embedding - INFO - Embedding successfull with model: text-embedding-3-small
2025-01-28 16:44:53,654 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 2

In [91]:
pd.DataFrame(docs).to_csv("indexing/data/ch_ch_copilot/vaud/prestations_vd.csv", index=None)

# ch.ch