## Test API et affichage des données

In [1]:
import pandas as pd
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter

In [3]:
from geopy.geocoders import Nominatim

geolocator = Nominatim(user_agent="test_ci")

for v in ['divo', 'bingerville', 'bouaké', 'anyama', 'san-pédro', 'daloa', 'gagnoa', 'tiassalé',
       'abidjan', 'man', 'grand-bassam', 'yamoussoukro', 'korhogo']:
    loc = geolocator.geocode(v, country_codes="ci")
    print(v, "->", loc)
    if loc:
        # affiche tout le raw pour voir la structure réelle
        print("RAW:", loc.raw)            
        addr = loc.raw.get("address", {}) 
        print("ADDRESS:", addr)
        print("\n")

divo -> Divo, Lôh-Djiboua, Gôh-Djiboua, Côte d’Ivoire
RAW: {'place_id': 277785976, 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. http://osm.org/copyright', 'osm_type': 'node', 'osm_id': 443389991, 'lat': '5.8405332', 'lon': '-5.3549049', 'class': 'place', 'type': 'city', 'place_rank': 16, 'importance': 0.37320859947062124, 'addresstype': 'city', 'name': 'Divo', 'display_name': 'Divo, Lôh-Djiboua, Gôh-Djiboua, Côte d’Ivoire', 'boundingbox': ['5.6805332', '6.0005332', '-5.5149049', '-5.1949049']}
ADDRESS: {}


bingerville -> Bingerville, Abidjan, Côte d’Ivoire
RAW: {'place_id': 277664988, 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. http://osm.org/copyright', 'osm_type': 'relation', 'osm_id': 3377922, 'lat': '5.3577727', 'lon': '-3.8885591', 'class': 'boundary', 'type': 'administrative', 'place_rank': 14, 'importance': 0.41372244811350606, 'addresstype': 'city', 'name': 'Bingerville', 'display_name': 'Bingerville, Abidjan, Côte d’Ivoire', 'boundingbox': ['5.2728319

In [None]:
df = pd.DataFrame({"ville": ['divo', 'bouaké', 'anyama', 'san-pédro', 'daloa', 'gagnoa',
       'abidjan', 'man', 'grand-bassam', 'yamoussoukro', 'korhogo']})

# Initialiser le géocodeur Nominatim (OpenStreetMap)
geolocator = Nominatim(user_agent="mon_app_geocodage_ci")
geocode = RateLimiter(geolocator.geocode, min_delay_seconds=1)

def get_region_from_city_ci(city_name):
    if pd.isna(city_name):
        return None

    loc = geocode(city_name, country_codes="ci")
    if not loc:
        return None

    # Récupérer le display_name depuis le dict brut
    disp = loc.raw.get("display_name")
    if not disp:
        return None

    # Exemple: "Daloa, Haut-Sassandra, Sassandra-Marahoué, Côte d’Ivoire"
    parts = [p.strip() for p in disp.split(",")]

    # Enlever le pays (dernier élément)
    if parts and parts[-1].lower().startswith(("côte d’ivoire", "cote d ivoire")):
        parts = parts[:-1]

    # Heuristique simple : on prend l'avant-dernier élément comme région
    # Daloa -> [..., 'Haut-Sassandra', 'Sassandra-Marahoué'] -> 'Sassandra-Marahoué'
    # Grand-Bassam -> [..., 'Sud-Comoé', 'Comoé'] -> 'Comoé'
    if len(parts) >= 3:
        region = parts[-2]
        return region

    return None
# Appliquer sur la colonne ville
df["region"] = df["ville"].apply(get_region_from_city_ci)

print(df)

In [4]:
df_sp = pd.read_csv("./Paquets/Sous_Prefecture.csv")
df_sp

Unnamed: 0,CodDistric,NomDistric,ChefLieu_Distr,CodReg,NomReg,ChefLieu_Reg,CodDep,NomDep,CodSp,NomSp,Superfkm_,Type_1,Type_2,Unnamed: 13
0,4,COMOE,ABENGOUROU,5,INDENIE-DJUABLIN,ABENGOUROU,1,ABENGOUROU,1,ABENGOUROU,785.0,0,Commune,SousPrefecture
1,8,LACS,DIMBOKRO,11,N'ZI,DIMBOKRO,20,DIMBOKRO,1,ABIGUI,523.0,80,,SousPrefecture
2,1,AUTONOME D'ABIDJAN,ABIDJAN,1,ABIDJAN,,2,ABIDJAN,1,ABOBO,97.0,30,Commune,
3,4,COMOE,ABENGOUROU,13,SUD-COMOE,ABOISSO,3,ABOISSO,1,ABOISSO,373.0,20,Commune,SousPrefecture
4,9,LAGUNES,DABOU,30,LA ME,ADZOPE,52,ALEPE,1,ABOISSO-COMOE,658.0,60,,SousPrefecture
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
515,10,MONTAGNES,MAN,6,TONKPI,MAN,18,DANANE,7,ZONNEU,301.0,60,,SousPrefecture
516,10,MONTAGNES,MAN,27,GUEMON,DUEKOUE,7,BANGOLO,9,ZOU,533.0,40,,SousPrefecture
517,10,MONTAGNES,MAN,6,TONKPI,MAN,66,ZOUAN-HOUNIEN,6,ZOUAN-HOUNIEN,463.0,30,Commune,SousPrefecture
518,7,SASSANDRA-MARAHOUE,DALOA,2,HAUT-SASSANDRA,DALOA,95,ZOUKOUGBEU,4,ZOUKOUGBEU,186.0,10,Commune,SousPrefecture


In [None]:
import requests

def get_region_osm(region_name):
    url = "https://nominatim.openstreetmap.org/search"
    params = {
        "q": f"{region_name}, Côte d'Ivoire",
        "format": "jsonv2",
        "addressdetails": 1,
        "limit": 1,
        "polygon_geojson": 0,  # pas obligé d'avoir le polygone ici
    }
    r = requests.get(url, params=params, headers={"User-Agent": "ci-region-script"})
    r.raise_for_status()
    data = r.json()
    if not data:
        return None
    return data[0]   # contient osm_id, osm_type, boundingbox, etc.

region = get_region_osm("Gbêkê")
print(region["osm_type"], region["osm_id"], region.get("boundingbox"))


In [None]:
import requests

osm_id = 3603161          # ce que tu as affiché
area_id = 3600000000 + osm_id  # => 3603601234

overpass_url = "https://overpass-api.de/api/interpreter"
query = f"""
[out:json][timeout:60];
area({area_id})->.reg;
(
  node["place"~"city|town|village"](area.reg);
);
out body;
"""

r = requests.post(overpass_url, data={"data": query})
r.raise_for_status()
data = r.json()
print(data)
names = []
#for el in data.get("elements", []):
    #print("ID:", el.get("id"), "type:", el.get("type"))
    #print(el)


In [5]:
df_Anst = pd.read_csv("../DATAS/ANSTAT2021_dataset_Clean.csv")
df_Anst

Unnamed: 0,agro_zone,region_name,department,city,milieu_resid,hhweight,resid,sex,age_num,lien,...,frigo,cuisin,ordin,decod,car,superf,bancarise,volhor_manquant,salaire_mois_manquant,Stg_formel
0,ABIDJAN,AUTONOME D'ABIDJAN,ABIDJAN,ABIDJAN,Urbain,1098.1172,Oui,Féminin,29,"Fils, Fille",...,Oui,Non,Non,Oui,Non,0.0,1,0,0,1
1,ABIDJAN,AUTONOME D'ABIDJAN,ABIDJAN,ABIDJAN,Urbain,1098.1172,Oui,Féminin,17,"Fils, Fille",...,Oui,Non,Non,Oui,Non,0.0,0,1,1,0
2,ABIDJAN,AUTONOME D'ABIDJAN,ABIDJAN,ABIDJAN,Urbain,1098.1172,Oui,Masculin,15,"Fils, Fille",...,Oui,Non,Non,Oui,Non,0.0,0,1,1,0
3,ABIDJAN,AUTONOME D'ABIDJAN,ABIDJAN,ABIDJAN,Urbain,1098.1172,Oui,Féminin,12,"Fils, Fille",...,Oui,Non,Non,Oui,Non,0.0,0,1,1,0
4,ABIDJAN,AUTONOME D'ABIDJAN,ABIDJAN,ABIDJAN,Urbain,1098.1172,Oui,Féminin,34,Conjoint ( e ),...,Oui,Non,Non,Oui,Non,0.0,0,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64469,CENTRE,MORONOU,M'BATTO,TIEMELEKRO,Rural,242.7796,Oui,Masculin,11,Neveu/Nièce,...,Non,Non,Non,Oui,Non,8566398.0,0,1,1,0
64470,CENTRE,MORONOU,M'BATTO,TIEMELEKRO,Rural,242.7796,Oui,Masculin,7,"Fils, Fille",...,Non,Non,Non,Oui,Non,8566398.0,0,1,1,0
64471,CENTRE,MORONOU,M'BATTO,TIEMELEKRO,Rural,242.7796,Oui,Masculin,10,Neveu/Nièce,...,Non,Non,Non,Oui,Non,8566398.0,0,1,1,0
64472,CENTRE,MORONOU,M'BATTO,TIEMELEKRO,Rural,242.7796,Oui,Masculin,4,"Fils, Fille",...,Non,Non,Non,Oui,Non,8566398.0,0,1,1,0


In [6]:
print(df_Anst['region_name'].nunique())
df_Anst['region_name'].unique()

33


array(["AUTONOME D'ABIDJAN", 'HAUT-SASSANDRA', 'PORO', 'GBEKE',
       'INDENIE-DJUABLIN', 'TONKPI', 'YAMOUSSOUKRO', 'GONTOUGO',
       'SAN-PEDRO', 'KABADOUGOU', "N'ZI", 'MARAHOUE', 'SUD-COMOE',
       'WORODOUGOU', 'LÔH-DJIBOUA', 'AGNEBY-TIASSA', 'GÔH', 'CAVALLY',
       'BAFING', 'BAGOUE', 'BELIER', 'BERE', 'BOUNKANI', 'FOLON',
       'GBÔKLE', 'GRANDS-PONTS', 'GUEMON', 'HAMBOL', 'IFFOU', 'LA ME',
       'NAWA', 'TCHOLOGO', 'MORONOU'], dtype=object)

In [7]:
df2 = pd.read_csv("./Paquets/LISTE DES CIRCONSCRIPTIONS ADMINISTRATIVES ET DES COMMUNES.csv")
df2

Unnamed: 0,REGION,CHEF-LIEU,DEPARTEMENT,SOUS-PREFECTURE,COMMUNE
0,LAGUNES,ABIDJAN,ABIDJAN,Abidjan,Abobo
1,LAGUNES,ABIDJAN,ABIDJAN,Abidjan,Adjamé
2,LAGUNES,ABIDJAN,ABIDJAN,Abidjan,Attecoubé
3,LAGUNES,ABIDJAN,ABIDJAN,Abidjan,Cocody
4,LAGUNES,ABIDJAN,ABIDJAN,Abidjan,Koumassi
...,...,...,...,...,...
247,DENGUELE,ODIENNE,ODIENNE,Samatiguila,Samatiguila
248,DENGUELE,ODIENNE,ODIENNE,Seguelon,Séguélon
249,DENGUELE,ODIENNE,ODIENNE,Seydougou,Seydougou
250,DENGUELE,ODIENNE,ODIENNE,Tieme,Tiémé


In [None]:
print(df2['REGION'].nunique())
df2['REGION'].unique()

In [8]:
print(df_sp['NomReg'].nunique())
df_sp['NomReg'].unique() 

33


array(['INDENIE-DJUABLIN', "N'ZI", 'ABIDJAN', 'SUD-COMOE', 'LA ME',
       'AGNEBY-TIASSA', 'GRAND-PONTS', 'IFFOU', 'GONTOUGO', 'MORONOU',
       'GBEKE', 'BELIER', 'HAMBOL', 'YAMOUSSOUKRO', 'GUEMON', 'PORO',
       'KABADOUGOU', 'CAVALLY', 'TONKPI', 'BAGOUE', 'GÔH',
       'HAUT-SASSANDRA', 'MARAHOUE', 'TCHOLOGO', 'WORODOUGOU', 'BOUNKANI',
       'BAFING', 'BERE', 'NAWA', 'LÔH-DJIBOUA', 'GBOKLE', 'SAN-PEDRO',
       'FOLON'], dtype=object)

In [9]:
#df_sp["NomReg"] = df_sp["NomReg"].replace({
 #   "G�H": "GÔH",
  #  "L�H-DJIBOUA": "LÔH-DJIBOUA"
#})

In [10]:
#print(df_sp['NomReg'].nunique())
#df_sp['NomReg'].unique() 

In [11]:
df_sp['NomSp'].unique() 

array(['ABENGOUROU', 'ABIGUI', 'ABOBO', 'ABOISSO', 'ABOISSO-COMOE',
       'ABONGOUA', 'ABOUDE', 'ADAOU', 'ADIAKE', 'ADJAME', 'ADJOUAN',
       'ADZOPE', 'AFFERY', 'AGBOVILLE', 'AGNIBILEKROU', 'AGOU',
       'AHOUANOU', 'AKOBOISSUE', 'AKOUPE', 'AKPASSANOU', 'ALEPE',
       'ALLOSSO', 'AMANVI', 'AMELEKIA', 'ANANDA', 'ANANGUIE', 'ANDE',
       'ANDO-KEKRENOU', 'ANGODA', 'ANIANOU', 'ANIASSUE', 'ANNEPE',
       'ANOUMABA', 'ANYAMA', 'APPIMANDOUM', 'ARIKOKAHA', 'ARRAH',
       'ASSAHARA', 'ASSIE-KOUMASSI', 'ASSIKOI', 'ASSINIE', 'ASSUEFRY',
       'ATTECOUBE', 'ATTIEGOUAKRO', 'ATTOBROU', 'ATTOUTOU A', 'AYAME',
       'AYAOU-SOKPA', 'AZAGUIE', 'BACANDA', 'BADIKAHA', 'BAGOHOUO',
       'BAHOUAKAHA', 'BAKO', 'BAKOUBLY', 'BANDAKAGNI-TOMORA', 'BANGOLO',
       'BANNEU', 'BASSAWA', 'BAYA', 'BAYOTA', 'BAZRA-NATTIS', 'BAZRE',
       'BECEDI-BRIGNAN', 'BECOUEFIN', 'BEDIALA', 'BEDY-GOAZON',
       'BEGBESSOU', 'BENGASSOU', 'BEOUE-ZIBIAO', 'BEOUMI', 'BETTIE',
       'BIANKOUMA', 'BIANOUAN', 'BIEBY', 'B

In [13]:
missing_counts = df_sp.isnull().sum()
missing_counts = missing_counts[missing_counts > 0]
print("Variables avec valeurs manquantes :")
print(missing_counts)

Variables avec valeurs manquantes :
ChefLieu_Reg     18
Superfkm_         1
Type_2          319
Unnamed: 13      11
dtype: int64


In [14]:
df_sp.to_csv("./Paquets/Sous_Prefecture.csv", index=False, encoding="utf-8")

In [15]:
print(df_Anst['department'].nunique())
sorted_departments = sorted(df_Anst["department"].dropna().unique())
print(sorted_departments)

108
['ABENGOUROU', 'ABIDJAN', 'ABOISSO', 'ADIAKE', 'ADZOPE', 'AGBOVILLE', 'AGNIBILEKROU', 'AKOUPE', 'ALEPE', 'ARRHA', 'ATTIEGOUAKRO', 'BANGOLO', 'BEOUMI', 'BETTIE', 'BIANKOUMA', 'BLOLEQUIN', 'BOCANDA', 'BONDOUKOU', 'BONGOUANOU', 'BOTRO', 'BOUAFLE', 'BOUAKE', 'BOUNA', 'BOUNDIALI', 'BUYO', 'DABAKALA', 'DABOU', 'DALOA', 'DANANE', 'DAOUKRO', 'DIANRA', 'DIDIEVI', 'DIKODOUGOU', 'DIMBOKRO', 'DIVO', 'DJEKANOU', 'DOROPO', 'DUEKOUE', 'FACOBLY', 'FERKESSEDOUGOU', 'FRESCO', 'GAGNOA', 'GBELEGBAN', 'GRAND-BASSAM', 'GRAND-LAHOU', 'GUEYO', 'GUIGLO', 'GUITRY', 'ISSIA', 'JACQUEVILLE', 'KANI', 'KANIASSO', 'KATIOLA', 'KONG', 'KORHOGO', 'KORO', 'KOUASSI-KOUASSIKRO', 'KOUIBLY', 'KOUN-FAO', 'KOUNAHIRI', 'KOUTO', 'LAKOTA', "M'BAHIAKRO", "M'BATTO", "M'BENGUE", 'MADINANI', 'MAN', 'MANKONO', 'MEAGUI', 'MINIGNAN', 'NASSIAN', 'NIAKARAMADOUGOU', 'ODIENNE', 'OUANGOLODOUGOU', 'OUANINOU', 'OUME', 'PRIKRO', 'SAKASSOU', 'SAMATIGUILA', 'SAN-PEDRO', 'SANDEGUE', 'SASSANDRA', 'SEGUELA', 'SEGUELON', 'SIKENSI', 'SINEMATIALI',

In [16]:
print(df_sp['NomDep'].nunique())
sorted_department = sorted(df_sp['NomDep'].dropna().unique())
print(sorted_department)

111
['ABENGOUROU', 'ABIDJAN', 'ABOISSO', 'ADIAKE', 'ADZOPE', 'AGBOVILLE', 'AGNIBILEKROU', 'AKOUPE', 'ALEPE', 'ARRAH', 'ATTIEGOUAKRO', 'BANGOLO', 'BEOUMI', 'BETTIE', 'BIANKOUMA', 'BLOLEQUIN', 'BOCANDA', 'BONDOUKOU', 'BONGOUANOU', 'BONON', 'BOTRO', 'BOUAFLE', 'BOUAKE', 'BOUNA', 'BOUNDIALI', 'BUYO', 'DABAKALA', 'DABOU', 'DALOA', 'DANANE', 'DAOUKRO', 'DIANRA', 'DIDIEVI', 'DIKODOUGOU', 'DIMBOKRO', 'DIVO', 'DJEKANOU', 'DOROPO', 'DUEKOUE', 'FACOBLY', 'FERKESSEDOUGOU', 'FRESCO', 'GAGNOA', 'GBELEBAN', 'GOHITAFLA', 'GRAND-BASSAM', 'GRAND-LAHOU', 'GUEYO', 'GUIGLO', 'GUITRY', 'ISSIA', 'JACQUEVILLE', 'KANI', 'KANIASSO', 'KATIOLA', 'KONG', 'KORHOGO', 'KORO', 'KOUASSI-KOUASSIKRO', 'KOUIBLY', 'KOUN-FAO', 'KOUNAHIRI', 'KOUTO', 'LAKOTA', "M'BAHIAKRO", "M'BATTO", "M'BENGUE", 'MADINANI', 'MAN', 'MANKONO', 'MEAGUI', 'MINIGNAN', 'NASSIAN', 'NIAKARAMADOUGOU', 'ODIENNE', 'OUANGOLODOUGOU', 'OUANINOU', 'OUELLE', 'OUME', 'PRIKRO', 'SAKASSOU', 'SAMATIGUILA', 'SAN-PEDRO', 'SANDEGUE', 'SASSANDRA', 'SEGUELA', 'SEGUE

## Obtenir les regions/ departements/ sous-préfectures 

In [108]:
# Dataset de l'anstat 
df_Anst

Unnamed: 0,agro_zone,region_name,department,city,milieu_resid,hhweight,resid,sex,age_num,lien,...,frigo,cuisin,ordin,decod,car,superf,bancarise,volhor_manquant,salaire_mois_manquant,Stg_formel
0,ABIDJAN,AUTONOME D'ABIDJAN,ABIDJAN,ABIDJAN,Urbain,1098.1172,Oui,Féminin,29,"Fils, Fille",...,Oui,Non,Non,Oui,Non,0.0,1,0,0,1
1,ABIDJAN,AUTONOME D'ABIDJAN,ABIDJAN,ABIDJAN,Urbain,1098.1172,Oui,Féminin,17,"Fils, Fille",...,Oui,Non,Non,Oui,Non,0.0,0,1,1,0
2,ABIDJAN,AUTONOME D'ABIDJAN,ABIDJAN,ABIDJAN,Urbain,1098.1172,Oui,Masculin,15,"Fils, Fille",...,Oui,Non,Non,Oui,Non,0.0,0,1,1,0
3,ABIDJAN,AUTONOME D'ABIDJAN,ABIDJAN,ABIDJAN,Urbain,1098.1172,Oui,Féminin,12,"Fils, Fille",...,Oui,Non,Non,Oui,Non,0.0,0,1,1,0
4,ABIDJAN,AUTONOME D'ABIDJAN,ABIDJAN,ABIDJAN,Urbain,1098.1172,Oui,Féminin,34,Conjoint ( e ),...,Oui,Non,Non,Oui,Non,0.0,0,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64469,CENTRE,MORONOU,M'BATTO,TIEMELEKRO,Rural,242.7796,Oui,Masculin,11,Neveu/Nièce,...,Non,Non,Non,Oui,Non,8566398.0,0,1,1,0
64470,CENTRE,MORONOU,M'BATTO,TIEMELEKRO,Rural,242.7796,Oui,Masculin,7,"Fils, Fille",...,Non,Non,Non,Oui,Non,8566398.0,0,1,1,0
64471,CENTRE,MORONOU,M'BATTO,TIEMELEKRO,Rural,242.7796,Oui,Masculin,10,Neveu/Nièce,...,Non,Non,Non,Oui,Non,8566398.0,0,1,1,0
64472,CENTRE,MORONOU,M'BATTO,TIEMELEKRO,Rural,242.7796,Oui,Masculin,4,"Fils, Fille",...,Non,Non,Non,Oui,Non,8566398.0,0,1,1,0


In [109]:
df_Anst.columns

Index(['agro_zone', 'region_name', 'department', 'city', 'milieu_resid',
       'hhweight', 'resid', 'sex', 'age_num', 'lien', 'marital_status',
       'religion', 'ethnie', 'nation', 'agemar', 'mal30j', 'aff30j', 'arrmal',
       'durarr', 'con30j', 'hos12m', 'couvmal', 'handit', 'handig', 'alfa',
       'alfa2', 'scol', 'educ_scol', 'educ_hi', 'diplome', 'telpor',
       'internet', 'activ7j', 'activ12m', 'branch', 'sectins', 'csp', 'volhor',
       'salaire', 'emploi_sec', 'sectins_sec', 'csp_sec', 'volhor_sec',
       'salaire_sec', 'serviceconsult', 'persconsult', 'salaire_mois',
       'salaire_sec_mois', 'rev_total_mois', 'log_revenu', 'sans_revenu',
       'age_grp', 'a_assurance', 'alphabete', 'logem', 'elec_ac', 'elec_ur',
       'elec_ua', 'tv', 'fer', 'frigo', 'cuisin', 'ordin', 'decod', 'car',
       'superf', 'bancarise', 'volhor_manquant', 'salaire_mois_manquant',
       'Stg_formel'],
      dtype='object')

In [110]:
# Dataset des sous-préfecture
df_sp

Unnamed: 0,CodDistric,NomDistric,ChefLieu_Distr,CodReg,NomReg,ChefLieu_Reg,CodDep,NomDep,CodSp,NomSp,Superfkm_,Type_1,Type_2,Unnamed: 13
0,4,COMOE,ABENGOUROU,5,INDENIE-DJUABLIN,ABENGOUROU,1,ABENGOUROU,1,ABENGOUROU,785.0,0,Commune,SousPrefecture
1,8,LACS,DIMBOKRO,11,N'ZI,DIMBOKRO,20,DIMBOKRO,1,ABIGUI,523.0,80,,SousPrefecture
2,1,AUTONOME D'ABIDJAN,ABIDJAN,1,ABIDJAN,,2,ABIDJAN,1,ABOBO,97.0,30,Commune,
3,4,COMOE,ABENGOUROU,13,SUD-COMOE,ABOISSO,3,ABOISSO,1,ABOISSO,373.0,20,Commune,SousPrefecture
4,9,LAGUNES,DABOU,30,LA ME,ADZOPE,52,ALEPE,1,ABOISSO-COMOE,658.0,60,,SousPrefecture
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
515,10,MONTAGNES,MAN,6,TONKPI,MAN,18,DANANE,7,ZONNEU,301.0,60,,SousPrefecture
516,10,MONTAGNES,MAN,27,GUEMON,DUEKOUE,7,BANGOLO,9,ZOU,533.0,40,,SousPrefecture
517,10,MONTAGNES,MAN,6,TONKPI,MAN,66,ZOUAN-HOUNIEN,6,ZOUAN-HOUNIEN,463.0,30,Commune,SousPrefecture
518,7,SASSANDRA-MARAHOUE,DALOA,2,HAUT-SASSANDRA,DALOA,95,ZOUKOUGBEU,4,ZOUKOUGBEU,186.0,10,Commune,SousPrefecture


In [111]:
# Dataset BD proxy 
df_user = pd.read_csv("./Paquets/Clean_All_Users.csv")
df_user

Unnamed: 0,id,birthday,civility,marital_status,bancarise,children_number,address,country,region_name,city,...,previous_dwellings,persons_dependents,created_at,updated_at,cleaned_at,source_table,cleaning_version,milieu_resid,age_num,sex
0,5a09bfe7-d2a5-48ed-b4a1-cfe218cb8ebc,1988-02-10 08:42:26.130,mrs.,Célibataire,1,9,112 boulevard de marseille,côte d'ivoire,divo,centre divo,...,1,0,2024-08-01,2025-10-17 15:42:02.714,2025-11-14T00:05:35.864066+00:00,users,v1.0,Rural,37,F
1,32fc5161-c9a5-4465-9659-e8a0244dfde4,1978-02-21 13:47:53.144,mrs.,Veuf(ve),1,5,4 avenue jean mermoz,côte d'ivoire,bouaké,centre bouaké,...,4,1,2024-08-01,2025-10-17 15:42:01.796,2025-11-14T00:05:35.864066+00:00,users,v1.0,Rural,47,F
2,d00dc51a-cfcf-4d33-844f-3735d5f48c55,1996-06-20 14:22:43.356,ms.,Célibataire,1,6,406 boulevard françois mitterrand,côte d'ivoire,anyama,centre anyama,...,2,6,2024-08-01,2025-10-17 15:42:02.762,2025-11-14T00:05:35.864066+00:00,users,v1.0,Rural,29,F
3,abd739ba-c384-46bc-ba90-2111421883bf,2001-07-16 18:06:10.901,miss,Célibataire,0,5,571 boulevard lagunaire,côte d'ivoire,san-pédro,centre san-pédro,...,5,6,2024-08-01,2025-10-17 15:42:02.397,2025-11-14T00:05:35.864066+00:00,users,v1.0,Rural,24,F
4,901d435b-afae-4ef1-b6e1-290a5ff95ac3,1966-03-06 05:08:37.875,mrs.,Veuf(ve),0,2,320 boulevard hassan ii,côte d'ivoire,daloa,centre daloa,...,5,7,2024-08-01,2025-10-17 15:42:01.940,2025-11-14T00:05:35.864066+00:00,users,v1.0,Rural,59,F
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,ee2ba334-d324-452e-9685-e8814f0ce5c8,1963-10-31 14:50:53.916,ms.,Célibataire,1,7,381 avenue chardy,côte d'ivoire,man,centre man,...,3,2,2024-08-01,2025-10-17 15:42:02.015,2025-11-14T00:05:35.864066+00:00,users,v1.0,Rural,62,F
96,d1d23f6c-e83c-4531-89a5-a2bd355581a3,1992-03-13 21:44:34.287,mr.,Divorcé(e),0,9,79 boulevard de la république,côte d'ivoire,korhogo,centre korhogo,...,2,4,2024-08-01,2025-10-17 15:42:02.084,2025-11-14T00:05:35.864066+00:00,users,v1.0,Rural,33,M
97,471a66d4-0b42-4d7f-9010-b7732d1c236b,1982-06-24 03:18:22.342,dr.,Veuf(ve),0,8,921 boulevard de marseille,côte d'ivoire,daloa,centre daloa,...,1,6,2024-08-01,2025-10-17 15:42:02.788,2025-11-14T00:05:35.864066+00:00,users,v1.0,Rural,43,M
98,9acd5f20-7975-4430-8986-69cdd594133e,1985-07-09 16:47:13.581,dr.,Veuf(ve),1,2,664 avenue nogués,côte d'ivoire,gagnoa,centre gagnoa,...,4,4,2024-08-01,2025-10-17 15:42:02.171,2025-11-14T00:05:35.864066+00:00,users,v1.0,Rural,40,M


In [112]:
df_user.columns

Index(['id', 'birthday', 'civility', 'marital_status', 'bancarise',
       'children_number', 'address', 'country', 'region_name', 'city',
       'neighborhood', 'monthly_income', 'occupational_function',
       'activity_sector_id', 'contract_type', 'monthly_charge', 'debt',
       'professional_experience', 'previous_dwellings', 'persons_dependents',
       'created_at', 'updated_at', 'cleaned_at', 'source_table',
       'cleaning_version', 'milieu_resid', 'age_num', 'sex'],
      dtype='object')

In [113]:
print(df_user['region_name'].nunique())
df_user['region_name'].unique()

11


array(['divo', 'bouaké', 'anyama', 'san-pédro', 'daloa', 'gagnoa',
       'abidjan', 'man', 'grand-bassam', 'yamoussoukro', 'korhogo'],
      dtype=object)

In [114]:
print(df_user['city'].nunique())
df_user['city'].unique()

17


array(['centre divo', 'centre bouaké', 'centre anyama',
       'centre san-pédro', 'centre daloa', 'centre gagnoa', 'abobo',
       'centre man', 'centre grand-bassam', 'treichville',
       'centre yamoussoukro', 'centre korhogo', 'cocody', 'adjamé',
       'port-bouët', 'attécoubé', 'marcory'], dtype=object)

In [115]:
#df_Anst    region_name       department    city
#df_sp      NomReg            NomDep        NomSp
#df_user    region_name                     city

In [116]:
# Au niveau de df_user : 
# "city" correspond à "sous-prefecture"          OKI
# "region_name" correspond aux villes            OKI
# il faut retirer "centre" devant les valeurs du champ "sous-prefecture"        OKI
# mettre en majuscule toutes les valeurs du champ "sous-prefecture" et "city"   OKI
# les champs du cluster disponible : "age_num", "sex", "marital_status", "city", "mileu_resid", "bancarise"   OKI
# le champ manquant : "region_name               OKI
# en se basant sur la sous-prefecture (city) obtenir la region avec le dataset df_sp   OKI

In [117]:
df_user = df_user.rename(columns={
    "region_name": "cty_name"
})
df_user

Unnamed: 0,id,birthday,civility,marital_status,bancarise,children_number,address,country,cty_name,city,...,previous_dwellings,persons_dependents,created_at,updated_at,cleaned_at,source_table,cleaning_version,milieu_resid,age_num,sex
0,5a09bfe7-d2a5-48ed-b4a1-cfe218cb8ebc,1988-02-10 08:42:26.130,mrs.,Célibataire,1,9,112 boulevard de marseille,côte d'ivoire,divo,centre divo,...,1,0,2024-08-01,2025-10-17 15:42:02.714,2025-11-14T00:05:35.864066+00:00,users,v1.0,Rural,37,F
1,32fc5161-c9a5-4465-9659-e8a0244dfde4,1978-02-21 13:47:53.144,mrs.,Veuf(ve),1,5,4 avenue jean mermoz,côte d'ivoire,bouaké,centre bouaké,...,4,1,2024-08-01,2025-10-17 15:42:01.796,2025-11-14T00:05:35.864066+00:00,users,v1.0,Rural,47,F
2,d00dc51a-cfcf-4d33-844f-3735d5f48c55,1996-06-20 14:22:43.356,ms.,Célibataire,1,6,406 boulevard françois mitterrand,côte d'ivoire,anyama,centre anyama,...,2,6,2024-08-01,2025-10-17 15:42:02.762,2025-11-14T00:05:35.864066+00:00,users,v1.0,Rural,29,F
3,abd739ba-c384-46bc-ba90-2111421883bf,2001-07-16 18:06:10.901,miss,Célibataire,0,5,571 boulevard lagunaire,côte d'ivoire,san-pédro,centre san-pédro,...,5,6,2024-08-01,2025-10-17 15:42:02.397,2025-11-14T00:05:35.864066+00:00,users,v1.0,Rural,24,F
4,901d435b-afae-4ef1-b6e1-290a5ff95ac3,1966-03-06 05:08:37.875,mrs.,Veuf(ve),0,2,320 boulevard hassan ii,côte d'ivoire,daloa,centre daloa,...,5,7,2024-08-01,2025-10-17 15:42:01.940,2025-11-14T00:05:35.864066+00:00,users,v1.0,Rural,59,F
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,ee2ba334-d324-452e-9685-e8814f0ce5c8,1963-10-31 14:50:53.916,ms.,Célibataire,1,7,381 avenue chardy,côte d'ivoire,man,centre man,...,3,2,2024-08-01,2025-10-17 15:42:02.015,2025-11-14T00:05:35.864066+00:00,users,v1.0,Rural,62,F
96,d1d23f6c-e83c-4531-89a5-a2bd355581a3,1992-03-13 21:44:34.287,mr.,Divorcé(e),0,9,79 boulevard de la république,côte d'ivoire,korhogo,centre korhogo,...,2,4,2024-08-01,2025-10-17 15:42:02.084,2025-11-14T00:05:35.864066+00:00,users,v1.0,Rural,33,M
97,471a66d4-0b42-4d7f-9010-b7732d1c236b,1982-06-24 03:18:22.342,dr.,Veuf(ve),0,8,921 boulevard de marseille,côte d'ivoire,daloa,centre daloa,...,1,6,2024-08-01,2025-10-17 15:42:02.788,2025-11-14T00:05:35.864066+00:00,users,v1.0,Rural,43,M
98,9acd5f20-7975-4430-8986-69cdd594133e,1985-07-09 16:47:13.581,dr.,Veuf(ve),1,2,664 avenue nogués,côte d'ivoire,gagnoa,centre gagnoa,...,4,4,2024-08-01,2025-10-17 15:42:02.171,2025-11-14T00:05:35.864066+00:00,users,v1.0,Rural,40,M


In [118]:
# 1. Retirer "centre " au début de sous-prefecture
df_user["city"] = (
    df_user["city"]
      .astype(str)
      .str.replace(r"^centre\s+", "", regex=True)  # supprime "centre " au début
)

# 2. Mettre en majuscules sous-prefecture et city
cols = ["city", "cty_name"]
df_user[cols] = df_user[cols].apply(lambda s: s.astype(str).str.upper())

print(df_user[["cty_name", "city"]].head())
df_user

    cty_name       city
0       DIVO       DIVO
1     BOUAKÉ     BOUAKÉ
2     ANYAMA     ANYAMA
3  SAN-PÉDRO  SAN-PÉDRO
4      DALOA      DALOA


Unnamed: 0,id,birthday,civility,marital_status,bancarise,children_number,address,country,cty_name,city,...,previous_dwellings,persons_dependents,created_at,updated_at,cleaned_at,source_table,cleaning_version,milieu_resid,age_num,sex
0,5a09bfe7-d2a5-48ed-b4a1-cfe218cb8ebc,1988-02-10 08:42:26.130,mrs.,Célibataire,1,9,112 boulevard de marseille,côte d'ivoire,DIVO,DIVO,...,1,0,2024-08-01,2025-10-17 15:42:02.714,2025-11-14T00:05:35.864066+00:00,users,v1.0,Rural,37,F
1,32fc5161-c9a5-4465-9659-e8a0244dfde4,1978-02-21 13:47:53.144,mrs.,Veuf(ve),1,5,4 avenue jean mermoz,côte d'ivoire,BOUAKÉ,BOUAKÉ,...,4,1,2024-08-01,2025-10-17 15:42:01.796,2025-11-14T00:05:35.864066+00:00,users,v1.0,Rural,47,F
2,d00dc51a-cfcf-4d33-844f-3735d5f48c55,1996-06-20 14:22:43.356,ms.,Célibataire,1,6,406 boulevard françois mitterrand,côte d'ivoire,ANYAMA,ANYAMA,...,2,6,2024-08-01,2025-10-17 15:42:02.762,2025-11-14T00:05:35.864066+00:00,users,v1.0,Rural,29,F
3,abd739ba-c384-46bc-ba90-2111421883bf,2001-07-16 18:06:10.901,miss,Célibataire,0,5,571 boulevard lagunaire,côte d'ivoire,SAN-PÉDRO,SAN-PÉDRO,...,5,6,2024-08-01,2025-10-17 15:42:02.397,2025-11-14T00:05:35.864066+00:00,users,v1.0,Rural,24,F
4,901d435b-afae-4ef1-b6e1-290a5ff95ac3,1966-03-06 05:08:37.875,mrs.,Veuf(ve),0,2,320 boulevard hassan ii,côte d'ivoire,DALOA,DALOA,...,5,7,2024-08-01,2025-10-17 15:42:01.940,2025-11-14T00:05:35.864066+00:00,users,v1.0,Rural,59,F
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,ee2ba334-d324-452e-9685-e8814f0ce5c8,1963-10-31 14:50:53.916,ms.,Célibataire,1,7,381 avenue chardy,côte d'ivoire,MAN,MAN,...,3,2,2024-08-01,2025-10-17 15:42:02.015,2025-11-14T00:05:35.864066+00:00,users,v1.0,Rural,62,F
96,d1d23f6c-e83c-4531-89a5-a2bd355581a3,1992-03-13 21:44:34.287,mr.,Divorcé(e),0,9,79 boulevard de la république,côte d'ivoire,KORHOGO,KORHOGO,...,2,4,2024-08-01,2025-10-17 15:42:02.084,2025-11-14T00:05:35.864066+00:00,users,v1.0,Rural,33,M
97,471a66d4-0b42-4d7f-9010-b7732d1c236b,1982-06-24 03:18:22.342,dr.,Veuf(ve),0,8,921 boulevard de marseille,côte d'ivoire,DALOA,DALOA,...,1,6,2024-08-01,2025-10-17 15:42:02.788,2025-11-14T00:05:35.864066+00:00,users,v1.0,Rural,43,M
98,9acd5f20-7975-4430-8986-69cdd594133e,1985-07-09 16:47:13.581,dr.,Veuf(ve),1,2,664 avenue nogués,côte d'ivoire,GAGNOA,GAGNOA,...,4,4,2024-08-01,2025-10-17 15:42:02.171,2025-11-14T00:05:35.864066+00:00,users,v1.0,Rural,40,M


#### Créer region_name

##### 1) Construire le mapping sous‑préfecture → région

In [119]:
import unidecode

def norm(s):
    return unidecode.unidecode(str(s).strip().lower())

# copie pour ne pas modifier les originaux
sp = df_sp.copy()

sp["NomSp_norm"] = sp["NomSp"].apply(norm)

# si une sous-pref a plusieurs lignes, on garde la première région
mapping_sp_to_reg = (
    sp.dropna(subset=["NomSp_norm", "NomReg"])
      .drop_duplicates("NomSp_norm")
      .set_index("NomSp_norm")["NomReg"]
      .to_dict()
)


##### 2) Créer region_name dans df_user avec .map

In [120]:
user = df_user.copy()
user["city_norm"] = user["city"].apply(norm)

user["region_name"] = user["city_norm"].map(mapping_sp_to_reg)

# on peut supprimer la colonne technique si on veut
user = user.drop(columns=["city_norm"])

print(user[["city", "region_name"]].head())


        city     region_name
0       DIVO     LÔH-DJIBOUA
1     BOUAKÉ           GBEKE
2     ANYAMA         ABIDJAN
3  SAN-PÉDRO       SAN-PEDRO
4      DALOA  HAUT-SASSANDRA


In [121]:
user.isna().sum()

id                         0
birthday                   0
civility                   0
marital_status             0
bancarise                  0
children_number            0
address                    0
country                    0
cty_name                   0
city                       0
neighborhood               0
monthly_income             0
occupational_function      0
activity_sector_id         0
contract_type              0
monthly_charge             0
debt                       0
professional_experience    0
previous_dwellings         0
persons_dependents         0
created_at                 0
updated_at                 0
cleaned_at                 0
source_table               0
cleaning_version           0
milieu_resid               0
age_num                    0
sex                        0
region_name                0
dtype: int64

In [122]:
user

Unnamed: 0,id,birthday,civility,marital_status,bancarise,children_number,address,country,cty_name,city,...,persons_dependents,created_at,updated_at,cleaned_at,source_table,cleaning_version,milieu_resid,age_num,sex,region_name
0,5a09bfe7-d2a5-48ed-b4a1-cfe218cb8ebc,1988-02-10 08:42:26.130,mrs.,Célibataire,1,9,112 boulevard de marseille,côte d'ivoire,DIVO,DIVO,...,0,2024-08-01,2025-10-17 15:42:02.714,2025-11-14T00:05:35.864066+00:00,users,v1.0,Rural,37,F,LÔH-DJIBOUA
1,32fc5161-c9a5-4465-9659-e8a0244dfde4,1978-02-21 13:47:53.144,mrs.,Veuf(ve),1,5,4 avenue jean mermoz,côte d'ivoire,BOUAKÉ,BOUAKÉ,...,1,2024-08-01,2025-10-17 15:42:01.796,2025-11-14T00:05:35.864066+00:00,users,v1.0,Rural,47,F,GBEKE
2,d00dc51a-cfcf-4d33-844f-3735d5f48c55,1996-06-20 14:22:43.356,ms.,Célibataire,1,6,406 boulevard françois mitterrand,côte d'ivoire,ANYAMA,ANYAMA,...,6,2024-08-01,2025-10-17 15:42:02.762,2025-11-14T00:05:35.864066+00:00,users,v1.0,Rural,29,F,ABIDJAN
3,abd739ba-c384-46bc-ba90-2111421883bf,2001-07-16 18:06:10.901,miss,Célibataire,0,5,571 boulevard lagunaire,côte d'ivoire,SAN-PÉDRO,SAN-PÉDRO,...,6,2024-08-01,2025-10-17 15:42:02.397,2025-11-14T00:05:35.864066+00:00,users,v1.0,Rural,24,F,SAN-PEDRO
4,901d435b-afae-4ef1-b6e1-290a5ff95ac3,1966-03-06 05:08:37.875,mrs.,Veuf(ve),0,2,320 boulevard hassan ii,côte d'ivoire,DALOA,DALOA,...,7,2024-08-01,2025-10-17 15:42:01.940,2025-11-14T00:05:35.864066+00:00,users,v1.0,Rural,59,F,HAUT-SASSANDRA
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,ee2ba334-d324-452e-9685-e8814f0ce5c8,1963-10-31 14:50:53.916,ms.,Célibataire,1,7,381 avenue chardy,côte d'ivoire,MAN,MAN,...,2,2024-08-01,2025-10-17 15:42:02.015,2025-11-14T00:05:35.864066+00:00,users,v1.0,Rural,62,F,TONKPI
96,d1d23f6c-e83c-4531-89a5-a2bd355581a3,1992-03-13 21:44:34.287,mr.,Divorcé(e),0,9,79 boulevard de la république,côte d'ivoire,KORHOGO,KORHOGO,...,4,2024-08-01,2025-10-17 15:42:02.084,2025-11-14T00:05:35.864066+00:00,users,v1.0,Rural,33,M,PORO
97,471a66d4-0b42-4d7f-9010-b7732d1c236b,1982-06-24 03:18:22.342,dr.,Veuf(ve),0,8,921 boulevard de marseille,côte d'ivoire,DALOA,DALOA,...,6,2024-08-01,2025-10-17 15:42:02.788,2025-11-14T00:05:35.864066+00:00,users,v1.0,Rural,43,M,HAUT-SASSANDRA
98,9acd5f20-7975-4430-8986-69cdd594133e,1985-07-09 16:47:13.581,dr.,Veuf(ve),1,2,664 avenue nogués,côte d'ivoire,GAGNOA,GAGNOA,...,4,2024-08-01,2025-10-17 15:42:02.171,2025-11-14T00:05:35.864066+00:00,users,v1.0,Rural,40,M,GÔH


In [123]:
user["region_name"] = user["region_name"].replace(
    {"ABIDJAN": "AUTONOME D'ABIDJAN"}
)

In [124]:
user

Unnamed: 0,id,birthday,civility,marital_status,bancarise,children_number,address,country,cty_name,city,...,persons_dependents,created_at,updated_at,cleaned_at,source_table,cleaning_version,milieu_resid,age_num,sex,region_name
0,5a09bfe7-d2a5-48ed-b4a1-cfe218cb8ebc,1988-02-10 08:42:26.130,mrs.,Célibataire,1,9,112 boulevard de marseille,côte d'ivoire,DIVO,DIVO,...,0,2024-08-01,2025-10-17 15:42:02.714,2025-11-14T00:05:35.864066+00:00,users,v1.0,Rural,37,F,LÔH-DJIBOUA
1,32fc5161-c9a5-4465-9659-e8a0244dfde4,1978-02-21 13:47:53.144,mrs.,Veuf(ve),1,5,4 avenue jean mermoz,côte d'ivoire,BOUAKÉ,BOUAKÉ,...,1,2024-08-01,2025-10-17 15:42:01.796,2025-11-14T00:05:35.864066+00:00,users,v1.0,Rural,47,F,GBEKE
2,d00dc51a-cfcf-4d33-844f-3735d5f48c55,1996-06-20 14:22:43.356,ms.,Célibataire,1,6,406 boulevard françois mitterrand,côte d'ivoire,ANYAMA,ANYAMA,...,6,2024-08-01,2025-10-17 15:42:02.762,2025-11-14T00:05:35.864066+00:00,users,v1.0,Rural,29,F,AUTONOME D'ABIDJAN
3,abd739ba-c384-46bc-ba90-2111421883bf,2001-07-16 18:06:10.901,miss,Célibataire,0,5,571 boulevard lagunaire,côte d'ivoire,SAN-PÉDRO,SAN-PÉDRO,...,6,2024-08-01,2025-10-17 15:42:02.397,2025-11-14T00:05:35.864066+00:00,users,v1.0,Rural,24,F,SAN-PEDRO
4,901d435b-afae-4ef1-b6e1-290a5ff95ac3,1966-03-06 05:08:37.875,mrs.,Veuf(ve),0,2,320 boulevard hassan ii,côte d'ivoire,DALOA,DALOA,...,7,2024-08-01,2025-10-17 15:42:01.940,2025-11-14T00:05:35.864066+00:00,users,v1.0,Rural,59,F,HAUT-SASSANDRA
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,ee2ba334-d324-452e-9685-e8814f0ce5c8,1963-10-31 14:50:53.916,ms.,Célibataire,1,7,381 avenue chardy,côte d'ivoire,MAN,MAN,...,2,2024-08-01,2025-10-17 15:42:02.015,2025-11-14T00:05:35.864066+00:00,users,v1.0,Rural,62,F,TONKPI
96,d1d23f6c-e83c-4531-89a5-a2bd355581a3,1992-03-13 21:44:34.287,mr.,Divorcé(e),0,9,79 boulevard de la république,côte d'ivoire,KORHOGO,KORHOGO,...,4,2024-08-01,2025-10-17 15:42:02.084,2025-11-14T00:05:35.864066+00:00,users,v1.0,Rural,33,M,PORO
97,471a66d4-0b42-4d7f-9010-b7732d1c236b,1982-06-24 03:18:22.342,dr.,Veuf(ve),0,8,921 boulevard de marseille,côte d'ivoire,DALOA,DALOA,...,6,2024-08-01,2025-10-17 15:42:02.788,2025-11-14T00:05:35.864066+00:00,users,v1.0,Rural,43,M,HAUT-SASSANDRA
98,9acd5f20-7975-4430-8986-69cdd594133e,1985-07-09 16:47:13.581,dr.,Veuf(ve),1,2,664 avenue nogués,côte d'ivoire,GAGNOA,GAGNOA,...,4,2024-08-01,2025-10-17 15:42:02.171,2025-11-14T00:05:35.864066+00:00,users,v1.0,Rural,40,M,GÔH


In [125]:
user.columns

Index(['id', 'birthday', 'civility', 'marital_status', 'bancarise',
       'children_number', 'address', 'country', 'cty_name', 'city',
       'neighborhood', 'monthly_income', 'occupational_function',
       'activity_sector_id', 'contract_type', 'monthly_charge', 'debt',
       'professional_experience', 'previous_dwellings', 'persons_dependents',
       'created_at', 'updated_at', 'cleaned_at', 'source_table',
       'cleaning_version', 'milieu_resid', 'age_num', 'sex', 'region_name'],
      dtype='object')

#### Réadapter milieu_resid en fonction de "city" (sp)

##### 1. Construire le mapping milieu dominant par city

In [126]:
# Charger le fichier des pourcentages
df_pct = pd.read_csv("./Rules Clusters/pourcentages_sous_prefecture.csv")  # colonnes: city, Rural, Urbain

# Milieu dominant = celui qui a le pourcentage le plus élevé
df_pct["milieu_dominant"] = df_pct[["Rural", "Urbain"]].idxmax(axis=1)

# Optionnel : vérifier
print(df_pct[["city", "Rural", "Urbain", "milieu_dominant"]].head())
df_pct

            city   Rural  Urbain milieu_dominant
0     ABENGOUROU   13.75   86.25          Urbain
1        ABIDJAN    0.00  100.00          Urbain
2         ABIGUI  100.00    0.00           Rural
3        ABOISSO   47.50   52.50          Urbain
4  ABOISSO-COMOE   50.36   49.64           Rural


Unnamed: 0,city,Rural,Urbain,milieu_dominant
0,ABENGOUROU,13.75,86.25,Urbain
1,ABIDJAN,0.00,100.00,Urbain
2,ABIGUI,100.00,0.00,Rural
3,ABOISSO,47.50,52.50,Urbain
4,ABOISSO-COMOE,50.36,49.64,Rural
...,...,...,...,...
437,ZONNEU,100.00,0.00,Rural
438,ZOU,67.86,32.14,Rural
439,ZOUAN-HOUNIEN,51.64,48.36,Rural
440,ZOUKOUGBEU,100.00,0.00,Rural


In [127]:
df_pct.isna().sum()

city               0
Rural              0
Urbain             0
milieu_dominant    0
dtype: int64

##### 2. Appliquer ce résultat au dataset

In [128]:
import unidecode

def norm(s):
    return unidecode.unidecode(str(s).strip().lower())

user["city_norm"] = user["city"].apply(norm)
df_pct["city_norm"] = df_pct["city"].apply(norm)

user = user.merge(
    df_pct[["city_norm", "milieu_dominant"]],
    on="city_norm",
    how="left"
).drop(columns=["city_norm"])
user["milieu_resid"] = user["milieu_dominant"]
user = user.drop(columns=["milieu_dominant"])
user

Unnamed: 0,id,birthday,civility,marital_status,bancarise,children_number,address,country,cty_name,city,...,persons_dependents,created_at,updated_at,cleaned_at,source_table,cleaning_version,milieu_resid,age_num,sex,region_name
0,5a09bfe7-d2a5-48ed-b4a1-cfe218cb8ebc,1988-02-10 08:42:26.130,mrs.,Célibataire,1,9,112 boulevard de marseille,côte d'ivoire,DIVO,DIVO,...,0,2024-08-01,2025-10-17 15:42:02.714,2025-11-14T00:05:35.864066+00:00,users,v1.0,Urbain,37,F,LÔH-DJIBOUA
1,32fc5161-c9a5-4465-9659-e8a0244dfde4,1978-02-21 13:47:53.144,mrs.,Veuf(ve),1,5,4 avenue jean mermoz,côte d'ivoire,BOUAKÉ,BOUAKÉ,...,1,2024-08-01,2025-10-17 15:42:01.796,2025-11-14T00:05:35.864066+00:00,users,v1.0,Urbain,47,F,GBEKE
2,d00dc51a-cfcf-4d33-844f-3735d5f48c55,1996-06-20 14:22:43.356,ms.,Célibataire,1,6,406 boulevard françois mitterrand,côte d'ivoire,ANYAMA,ANYAMA,...,6,2024-08-01,2025-10-17 15:42:02.762,2025-11-14T00:05:35.864066+00:00,users,v1.0,Urbain,29,F,AUTONOME D'ABIDJAN
3,abd739ba-c384-46bc-ba90-2111421883bf,2001-07-16 18:06:10.901,miss,Célibataire,0,5,571 boulevard lagunaire,côte d'ivoire,SAN-PÉDRO,SAN-PÉDRO,...,6,2024-08-01,2025-10-17 15:42:02.397,2025-11-14T00:05:35.864066+00:00,users,v1.0,Urbain,24,F,SAN-PEDRO
4,901d435b-afae-4ef1-b6e1-290a5ff95ac3,1966-03-06 05:08:37.875,mrs.,Veuf(ve),0,2,320 boulevard hassan ii,côte d'ivoire,DALOA,DALOA,...,7,2024-08-01,2025-10-17 15:42:01.940,2025-11-14T00:05:35.864066+00:00,users,v1.0,Urbain,59,F,HAUT-SASSANDRA
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,ee2ba334-d324-452e-9685-e8814f0ce5c8,1963-10-31 14:50:53.916,ms.,Célibataire,1,7,381 avenue chardy,côte d'ivoire,MAN,MAN,...,2,2024-08-01,2025-10-17 15:42:02.015,2025-11-14T00:05:35.864066+00:00,users,v1.0,Urbain,62,F,TONKPI
96,d1d23f6c-e83c-4531-89a5-a2bd355581a3,1992-03-13 21:44:34.287,mr.,Divorcé(e),0,9,79 boulevard de la république,côte d'ivoire,KORHOGO,KORHOGO,...,4,2024-08-01,2025-10-17 15:42:02.084,2025-11-14T00:05:35.864066+00:00,users,v1.0,Urbain,33,M,PORO
97,471a66d4-0b42-4d7f-9010-b7732d1c236b,1982-06-24 03:18:22.342,dr.,Veuf(ve),0,8,921 boulevard de marseille,côte d'ivoire,DALOA,DALOA,...,6,2024-08-01,2025-10-17 15:42:02.788,2025-11-14T00:05:35.864066+00:00,users,v1.0,Urbain,43,M,HAUT-SASSANDRA
98,9acd5f20-7975-4430-8986-69cdd594133e,1985-07-09 16:47:13.581,dr.,Veuf(ve),1,2,664 avenue nogués,côte d'ivoire,GAGNOA,GAGNOA,...,4,2024-08-01,2025-10-17 15:42:02.171,2025-11-14T00:05:35.864066+00:00,users,v1.0,Urbain,40,M,GÔH


In [129]:
user.isna().sum()

id                          0
birthday                    0
civility                    0
marital_status              0
bancarise                   0
children_number             0
address                     0
country                     0
cty_name                    0
city                        0
neighborhood                0
monthly_income              0
occupational_function       0
activity_sector_id          0
contract_type               0
monthly_charge              0
debt                        0
professional_experience     0
previous_dwellings          0
persons_dependents          0
created_at                  0
updated_at                  0
cleaned_at                  0
source_table                0
cleaning_version            0
milieu_resid               12
age_num                     0
sex                         0
region_name                 0
dtype: int64

In [130]:
lignes_manquantes = user[user.isna().any(axis=1)]
lignes_manquantes['city'].unique()

array(['ABOBO', 'TREICHVILLE', 'COCODY', 'ADJAMÉ', 'PORT-BOUËT',
       'ATTÉCOUBÉ', 'MARCORY'], dtype=object)

In [131]:
# liste des sous-préfectures d'Abidjan
sous_prefs_abidjan = [
    "ABOBO", "TREICHVILLE", "COCODY", "ADJAMÉ",
    "PORT-BOUËT", "ATTÉCOUBÉ", "MARCORY"
]

# Mettre en majuscules pour être sûr de matcher
user["city_upper"] = user["city"].str.upper()

# Condition: milieu_resid est NaN ET city dans la liste
mask = user["milieu_resid"].isna() & user["city_upper"].isin(sous_prefs_abidjan)

# Remplacer par "Urbain"
user.loc[mask, "milieu_resid"] = "Urbain"

# optionnel: supprimer la colonne technique
user = user.drop(columns=["city_upper"])

In [132]:
user.isna().sum()
#df_user

id                         0
birthday                   0
civility                   0
marital_status             0
bancarise                  0
children_number            0
address                    0
country                    0
cty_name                   0
city                       0
neighborhood               0
monthly_income             0
occupational_function      0
activity_sector_id         0
contract_type              0
monthly_charge             0
debt                       0
professional_experience    0
previous_dwellings         0
persons_dependents         0
created_at                 0
updated_at                 0
cleaned_at                 0
source_table               0
cleaning_version           0
milieu_resid               0
age_num                    0
sex                        0
region_name                0
dtype: int64

#### Load Data Cleaning

In [133]:
# Remplacer ces sous-préfectures par "ABIDJAN"
#user.loc[user["city"].isin(sous_prefs_abidjan), "city"] = "ABIDJAN"
#user

In [134]:
us = user.drop(['id', 'birthday', 'civility','children_number', 'address', 'country', 'cty_name',
       'neighborhood', 'monthly_income', 'occupational_function','activity_sector_id', 'contract_type', 'monthly_charge', 'debt',
       'professional_experience', 'previous_dwellings', 'persons_dependents','created_at', 'updated_at', 'cleaned_at', 'source_table',
       'cleaning_version'], axis=1)
us

Unnamed: 0,marital_status,bancarise,city,milieu_resid,age_num,sex,region_name
0,Célibataire,1,DIVO,Urbain,37,F,LÔH-DJIBOUA
1,Veuf(ve),1,BOUAKÉ,Urbain,47,F,GBEKE
2,Célibataire,1,ANYAMA,Urbain,29,F,AUTONOME D'ABIDJAN
3,Célibataire,0,SAN-PÉDRO,Urbain,24,F,SAN-PEDRO
4,Veuf(ve),0,DALOA,Urbain,59,F,HAUT-SASSANDRA
...,...,...,...,...,...,...,...
95,Célibataire,1,MAN,Urbain,62,F,TONKPI
96,Divorcé(e),0,KORHOGO,Urbain,33,M,PORO
97,Veuf(ve),0,DALOA,Urbain,43,M,HAUT-SASSANDRA
98,Veuf(ve),1,GAGNOA,Urbain,40,M,GÔH


In [135]:
user.to_csv("./Paquets/BD_Proxy_Users.csv", index=False, encoding="utf-8")
us.to_csv("./Paquets/Proxy.csv", index=False, encoding="utf-8")