In [10]:
import pandas as pd

In [6]:
import s3fs
import hvac
import os

client = hvac.Client(url='https://vault.lab.sspcloud.fr',
                     token=os.environ['VAULT_TOKEN'])

secret = os.environ['VAULT_MOUNT'] + os.environ['VAULT_TOP_DIR'] + '/s3'
mount_point, secret_path = secret.split('/', 1)
secret_dict = client.secrets.kv.read_secret_version(path=secret_path, mount_point = mount_point)

os.environ["AWS_ACCESS_KEY_ID"] = secret_dict['data']['data']['ACCESS_KEY_ID']
os.environ["AWS_SECRET_ACCESS_KEY"] = secret_dict['data']['data']['SECRET_ACCESS_KEY']
try:
    del os.environ['AWS_SESSION_TOKEN']
except KeyError:
    pass

In [8]:
fs = s3fs.S3FileSystem(
    client_kwargs={'endpoint_url': 'https://'+'minio.lab.sspcloud.fr'},
    key=os.environ["AWS_ACCESS_KEY_ID"],
    secret=os.environ["AWS_SECRET_ACCESS_KEY"]
)
fs.ls('projet-slums-detection/Donnees/RIL/')

['projet-slums-detection/Donnees/RIL/.keep',
 'projet-slums-detection/Donnees/RIL/rca_geometry_ilot.rds',
 'projet-slums-detection/Donnees/RIL/rca_geometry_ilots.rds',
 'projet-slums-detection/Donnees/RIL/ril_gc_dom.csv',
 'projet-slums-detection/Donnees/RIL/ril_pc_dom.csv']

In [11]:
with fs.open('projet-slums-detection/Donnees/RIL/ril_pc_dom.csv', 'r') as f:
    df_pc = pd.read_csv(f)

  df_pc = pd.read_csv(f)


In [12]:
df_pc.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 135915 entries, 0 to 135914
Data columns (total 51 columns):
 #   Column                  Non-Null Count   Dtype  
---  ------                  --------------   -----  
 0   id                      135915 non-null  int64  
 1   depcom                  135915 non-null  int64  
 2   id_ea                   135915 non-null  int64  
 3   principal               135915 non-null  object 
 4   numero                  135915 non-null  int64  
 5   repetition              4959 non-null    object 
 6   complement              55003 non-null   object 
 7   type_voie               135332 non-null  object 
 8   libelle_voie            135915 non-null  object 
 9   rivoli                  87208 non-null   object 
 10  ea_type                 135915 non-null  object 
 11  hab_hot_type            135800 non-null  object 
 12  nb_log                  135800 non-null  float64
 13  etat                    135800 non-null  float64
 14  pc_num              

In [13]:
pd.set_option('display.max_columns', None)
df_pc.head()

Unnamed: 0,id,depcom,id_ea,principal,numero,repetition,complement,type_voie,libelle_voie,rivoli,ea_type,hab_hot_type,nb_log,etat,pc_num,commentaire,hab_construction_an,hab_nb_imm,hab_niv,hot_sous_type,hot_enseigne,lien_cmt,x,y,code_epsg,type_localisation,numero_parcelle,code_ilot,code_iris,quartier_prioritaire,canton,liste_insee,id_rp,dernier_tirage,echantillon,grp_rotation,date_maj_ea,com_anc_idft,com_type,com_statut,com_nb_log_fonction,com_nom,com_date_creation_base,com_date_fermeture,com_capacite_theorique,millesime,date_livraison,ref_ea,id_rca_adresse,reperage,id_ea_regroup
0,32033149,97102,971020000002014,O,0,,POINTE DE LA GRANDE VIGIE,ALL,POINTE D'ANTIGUE,,HABIT,0052,0.0,1.0,,,1900.0,0.0,0.0,,,1.0,663634.543,1825946.61,4559,CIC,,PED1,102.0,,14.0,N,97102 PED1 015,,N,5.0,,,,,,,,,,2023,2022-10-05 16:41:04.000,11589082,13805445,COMMERCES,
1,32033150,97102,971020000002013,O,0,,,ALL,POINTE D'ANTIGUE,,HABIT,002D,1.0,0.0,,,1900.0,1.0,2.0,,,1.0,662540.599,1825264.35,4559,CIC,,PED1,102.0,,14.0,N,97102 PED1 014,,N,5.0,,,,,,,,,,2023,2022-10-05 16:41:04.000,11589083,13805446,PORTAIL VERT / TOIT BLC,
2,32033151,97102,971020000002008,O,0,,,ALL,POINTE D'ANTIGUE,,HABIT,002D,6.0,0.0,,,1900.0,1.0,1.0,,,1.0,662512.349,1825143.23,4559,CIC,,PED1,102.0,,14.0,N,97102 PED1 009,,N,5.0,,,,,,,,,,2023,2022-10-05 16:41:04.000,11589084,13805447,ACCES CHEM GITES,
3,32033152,97102,971020000002007,O,0,,,ALL,POINTE D'ANTIGUE,,HABIT,002D,1.0,0.0,,,1900.0,1.0,1.0,,,1.0,662573.224,1825054.98,4559,CIC,,PED1,102.0,,14.0,N,97102 PED1 008,,N,5.0,,,,,,,,,,2023,2022-10-05 16:41:04.000,11589085,13805448,MAISON JAUNE /TOIT BLEU,
4,32033153,97102,971020000002015,O,0,,,ALL,POINTE D'ANTIGUE,,HABIT,002D,1.0,0.0,,,1900.0,1.0,2.0,,,1.0,662645.662,1824859.98,4559,CIC,,PED1,102.0,,14.0,N,97102 PED1 016,,N,5.0,,,,,,,,,,2023,2022-10-05 16:41:04.000,11589086,13805449,ACCES CHEM,


### Dictionnaire des variables

In [29]:
df_pc.lien_cmt.value_counts()

1.0    135683
Name: lien_cmt, dtype: int64

- id: Identifiant du logement
- depcom: Code commune
- id_ea: Autre id
- principal: Oui ou Non, quasiment uniquement des Oui.
- numero: Numéro de la voie
- repetition: Beaucoup de NaN, signification ?
- complement: Complément d'adresse
- type_voie: Type de voie
- libelle_voie: Nom de la voie
- rivoli: Signification ?
- ea_type: HABIT, HOTEL ou COMMU
- hab_hot_type: Signification ? Quelques modalités: 002D, 002C, 002B, etc.
- nb_log: Nombre de logements
- etat: Variable entière 0 -> 6. Signification ?
- pc_num: Numéro de permis de construire
- commentaire: Commentaire sur le logement
- hab_construction_an: Année de construction
- hab_nb_imm: Variable entière 0 -> 4. Signification ?
- hab_niv: Signification ?
- hot_sous_type: Type d'hôtel
- hot_enseigne: Enseigne d'hôtel
- lien_cmt: Signification ?