In [18]:
import pandas as pd
from pathlib import Path
from loguru import logger
import uuid

In [19]:
# silver_path = base_path / 'DATA' / 'SILVER'
# silver_path = base_path / 'DATA' / 'SILVER'
# gold_path = base_path / 'DATA' / 'GOLD'
# gold_path.mkdir(parents=True, exist_ok=True)
gold_path = Path('DATABASES/france_172074/DATA') / 'GOLD'
gold_path.mkdir(exist_ok=True)

In [20]:
df_farm_types = pd.DataFrame({
    'type_id': [1, 2, 3],
    'name': ['Wind', 'Solar', 'Hybrid']
})
df_farm_types.to_csv(gold_path / 'farm_types.csv', index=False)
logger.info(f"farm_types: {len(df_farm_types)} rows")

[32m2025-10-16 16:22:11.056[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m6[0m - [1mfarm_types: 3 rows[0m


In [21]:
COMPANY_ROLES = sorted([
    'Customer',
    'Portfolio',
    'Asset Manager'
    'Legal Representative',
    'Bank Domiciliation',
    'Project Developer',
    'Co-developer',
    'WTG Service Provider',# = WEC Service Company
    'Substation Service Provider', # = Transfer station / power station service company
    'Grid Operator',
    'OM Main Service Company', # = Main Service Company
    'OM Service Provider'
])

df_company_roles = pd.DataFrame({'role_name': COMPANY_ROLES})
df_company_roles.insert(0, 'id', df_company_roles.index + 1)
df_company_roles.to_csv(gold_path / 'company_roles.csv', index=False)
logger.info(f"company_roles: {len(df_company_roles)} rows")

PERSON_ROLES = sorted([
    'Head of Technical Management',
    'Technical Manager',
    'Substitute Technical Manager',
    'HSE Coordination',
    'Electrical Manager',
    'Controller Responsible',
    'Controller Deputy',
    'Administrative responsible',
    'Administrative Deputy',
    'Control Room Operator',
    'Field Crew Manager',
    'Environmental Department Manager',
    'Key Account Manager',
    'Substitute Key Account Manager',
    'Chartered Accountant', # = Expert Comptabe Chartered Accountant
    'Legal Auditor', # = Commissaire aux Comptes Legal Auditor
    'Asset Manager',
    'Legal Representative'
])

df_person_roles = pd.DataFrame({'role_name': PERSON_ROLES})
df_person_roles.insert(0, 'id', df_person_roles.index + 1)
df_person_roles.to_csv(gold_path / 'person_roles.csv', index=False)

[32m2025-10-16 16:22:11.112[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m19[0m - [1mcompany_roles: 11 rows[0m


In [22]:
silver_path = Path('DATABASES/france_172074/DATA/SILVER')
df_repartition = pd.read_csv(silver_path / 'repartition_sheet.csv')  # type: ignore
df_repartition.head()

Unnamed: 0,owner,spv,project,code,farm_type,technical_manager,substitute_technical_manager,key_account_manager,substitute_key_account_manager,electrical_manager,controller_responsible,controller_deputy,administrative_responsible,administrative_deputy
0,STATKRAFT,EOLIENNES SUROIT SNC,Lanrivoaré,LAN,Wind,Fahim RAHMANI,Hamed OMID,Azenor VIDAMENT,Arthur LAGACHE,Stephane BERTHE,,,,
1,STATKRAFT,EOLIENNES SUROIT SNC,Méligny-le-Grand,MLG,Wind,Fahim RAHMANI,Hamed OMID,Azenor VIDAMENT,Arthur LAGACHE,Stephane BERTHE,,,,
2,STATKRAFT,EOLIENNES SUROIT SNC,Ménil-la-Horgne,MLH,Wind,Fahim RAHMANI,Hamed OMID,Azenor VIDAMENT,Arthur LAGACHE,Stephane BERTHE,,,,
3,STATKRAFT,EOLIENNES SUROIT SNC,Roudouallec,ROU,Wind,Fahim RAHMANI,Hamed OMID,Azenor VIDAMENT,Arthur LAGACHE,Stephane BERTHE,,,,
4,LANÉA,ENERGIE 02,Dizy/Obi,E02,Wind,Mohammed-Amine MAACHOU,Gwénaël CARRET,Azenor VIDAMENT,Arthur LAGACHE,Stephane BERTHE,Florian BONNET,Ronald SENAEME,Brandon MOLIN,Ronald SENAEME


In [None]:

person_columns = [
    'technical_manager',
    'substitute_technical_manager', 
    'key_account_manager',
    'substitute_key_account_manager',
    'electrical_manager',
    'controller_responsible',
    'controller_deputy',
    'administrative_controller',
    'administrative_deputy'
]

all_persons = []

for col in person_columns:
    if col in df_repartition.columns:
        persons_in_column = df_repartition[col].dropna().unique()
        all_persons.extend(persons_in_column)

all_persons_series = pd.Series(all_persons).str.strip().replace('', pd.NA).dropna()

persons_exploded = all_persons_series.str.split(r' \+ ', regex=True).explode().unique()

df_persons = pd.DataFrame({'full_name': persons_exploded})

df_persons = (
    df_persons[df_persons['full_name'] != '']
    .drop_duplicates()
    .reset_index(drop=True)
    .assign(
        first_name=lambda df: df['full_name'].str.split().str[0],
        last_name=lambda df: df['full_name'].str.split().str[1:].str.join(' ')
    )
    .drop('full_name', axis=1)
)


df_persons.insert(0, 'uuid', [str(uuid.uuid4()) for _ in range(len(df_persons))])

logger.info(f"persons: {len(df_persons)} rows")
df_persons


In [24]:
df_farms = (
    df_repartition[['spv', 'project', 'code', 'farm_type']]
    .drop_duplicates()
    .reset_index(drop=True)
    .merge(df_farm_types, left_on='farm_type', right_on='name', how='left')
    .drop(['name', 'farm_type'], axis=1)
    .rename(columns={'type_id': 'farm_type_id'})
)

df_farms.insert(0, 'uuid', [str(uuid.uuid4()) for _ in range(len(df_farms))])

logger.info(f"farms: {len(df_farms)} rows")
df_farms

[32m2025-10-16 16:22:11.331[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m12[0m - [1mfarms: 47 rows[0m


Unnamed: 0,uuid,spv,project,code,farm_type_id
0,147c3eb8-7002-405b-ab76-b3f5c9de704e,EOLIENNES SUROIT SNC,Lanrivoaré,LAN,1
1,86afc6d1-f790-43a7-88aa-b061080b09fa,EOLIENNES SUROIT SNC,Méligny-le-Grand,MLG,1
2,b417a944-3cfe-465c-a946-15da4d88c34f,EOLIENNES SUROIT SNC,Ménil-la-Horgne,MLH,1
3,7212e363-baff-4e5c-a479-f99f1bbe9947,EOLIENNES SUROIT SNC,Roudouallec,ROU,1
4,5a35c9fe-01ee-4fc2-a122-97a8a77705a1,ENERGIE 02,Dizy/Obi,E02,1
5,04ec153a-1736-4122-8c34-6de6fc0a9631,ENERGIE 06,Saisseval,E06,1
6,7ebba9c4-49e1-40f3-b8a8-0a2822564509,ENERGIE 21 POITOU-CHARENTES,MLHCP,EPC,1
7,d32474f1-9e11-46b5-aeb3-fb8079894d0d,ENERGIE DE L'OBI,Parc M (Obi),EOB,1
8,67404ac0-d1fb-4c2c-815f-f8f298fe3990,ENERGIE DIZY,Dizy/Obi,EDI,1
9,1155f41d-ff26-4b2d-a4e8-02777b4f7bd5,ENERGIE DU BLANC MONT,Blanc-Mont,EBM,1


In [25]:
# Create a lookup for person names to UUIDs
person_lookup = df_persons.set_index(['first_name', 'last_name'])['uuid'].to_dict()

# Function to get person UUID from full name
def get_person_uuid(full_name):
    if pd.isna(full_name) or full_name == '':
        return None
    parts = full_name.split()
    if len(parts) < 2:
        return None
    first_name = parts[0]
    last_name = ' '.join(parts[1:])
    return person_lookup.get((first_name, last_name))

# Map column names to role names
column_to_role = {
    'technical_manager': 'Technical Manager',
    'substitute_technical_manager': 'Substitute Technical Manager',
    'key_account_manager': 'Key Account Manager',
    'substitute_key_account_manager': 'Substitute Key Account Manager',
    'electrical_manager': 'Electrical Manager',
    'controller_responsible': 'Controller Responsible',
    'controller_deputy': 'Controller Deputy',
    'administrative_responsible': 'Administrative responsible',
    'administrative_deputy': 'Administrative Deputy'
}

# Create role lookup
role_lookup = df_person_roles.set_index('role_name')['id'].to_dict()

# Create farm code to UUID lookup
farm_lookup = df_farms.set_index('code')['uuid'].to_dict()

# Build farm_referents table
referents_list = []

for col_name, role_name in column_to_role.items():
    if col_name in df_repartition.columns:
        role_id = role_lookup.get(role_name)
        
        for _, row in df_repartition.iterrows():
            farm_uuid = farm_lookup.get(row['code'])
            person_name = row[col_name]
            
            if pd.notna(person_name) and person_name != '':
                person_uuid = get_person_uuid(person_name)
                
                if farm_uuid and person_uuid:
                    referents_list.append({
                        'farm_uuid': farm_uuid,
                        'farm_code': row['code'],
                        'role_id': role_id,
                        'person_uuid': person_uuid,
                        'company_uuid': None
                    })

df_farm_referents = pd.DataFrame(referents_list).drop_duplicates()

logger.info(f"farm_referents: {len(df_farm_referents)} rows")
df_farm_referents.head(20)

[32m2025-10-16 16:22:11.438[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m59[0m - [1mfarm_referents: 360 rows[0m


Unnamed: 0,farm_uuid,farm_code,role_id,person_uuid,company_uuid
0,147c3eb8-7002-405b-ab76-b3f5c9de704e,LAN,18,0d63d75d-873f-4313-b2e7-cd0408481307,
1,86afc6d1-f790-43a7-88aa-b061080b09fa,MLG,18,0d63d75d-873f-4313-b2e7-cd0408481307,
2,b417a944-3cfe-465c-a946-15da4d88c34f,MLH,18,0d63d75d-873f-4313-b2e7-cd0408481307,
3,7212e363-baff-4e5c-a479-f99f1bbe9947,ROU,18,0d63d75d-873f-4313-b2e7-cd0408481307,
4,5a35c9fe-01ee-4fc2-a122-97a8a77705a1,E02,18,5d9f3f29-ab15-4ff0-bdbe-495b14a34798,
5,04ec153a-1736-4122-8c34-6de6fc0a9631,E06,18,5d9f3f29-ab15-4ff0-bdbe-495b14a34798,
6,7ebba9c4-49e1-40f3-b8a8-0a2822564509,EPC,18,ae51683b-94c7-46c6-ba39-d32d26cfd809,
7,d32474f1-9e11-46b5-aeb3-fb8079894d0d,EOB,18,5d9f3f29-ab15-4ff0-bdbe-495b14a34798,
8,67404ac0-d1fb-4c2c-815f-f8f298fe3990,EDI,18,5d9f3f29-ab15-4ff0-bdbe-495b14a34798,
9,1155f41d-ff26-4b2d-a4e8-02777b4f7bd5,EBM,18,ae51683b-94c7-46c6-ba39-d32d26cfd809,
