In [1]:
# This notebook is used to create the CARE_SITE table
# Based on the following documentations: 
#   https://ohdsi.github.io/CommonDataModel/cdm53.html#care_site
#   https://documentation-snds.health-data-hub.fr/omop/documentation_etl/care_site.html

import pandas as pd
import sqlite3
import hashlib

# Loading data
df_care_site = pd.read_csv("../data/raw/t_mcoaae.csv", dtype=str)

df_care_site

Unnamed: 0,eta_num,soc_rai
0,750300360,l'Hôpital Privé des Peupliers
1,750023772,Pharmacie Plaisance


In [None]:
# Rename columns
df_care_site = df_care_site.rename(columns={
    "eta_num": "care_site_source_value",
    "soc_rai": "care_site_name"
})

df_care_site["location_id"] = None

# Hash function to generate a pseudonymized, deterministic integer ID
def hash_to_int(value):
    # Use SHA-256 and fit into 32-bit signed integer range
    hash_obj = hashlib.sha256(str(value).encode())
    return int(hash_obj.hexdigest(), 16) % (2**31 - 1)

# Apply hash_to_int function
df_care_site["care_site_id"] = df_care_site["care_site_source_value"].apply(hash_to_int).astype("int32")

df_care_site


Unnamed: 0,care_site_source_value,care_site_name,location_id,care_site_id
0,750300360,l'Hôpital Privé des Peupliers,,859137566
1,750023772,Pharmacie Plaisance,,796756685


In [3]:
# Connect to SQLite
conn = sqlite3.connect("../data/processed/omop_database.sqlite")

# Replace table if it exists
df_care_site.to_sql("CARE_SITE", conn, if_exists="replace", index=False)

conn.commit()
conn.close()