In [46]:
import pandas as pd

regions = {"auvergne-rhone-alpes": [1,3,7,15,26,38,42,43,63,69,73,74],
            "bourgogne" : [21,25,39,58,70,71,89,90],
            "bretagne" : [22,29,35,56],
            "centre" : [18,28,36,37,41,45],
            "grand-est" : [8,10,51,52,54,55,57,67,68,88],
            "haut-de-france" : [2,59,60,62,80],
            "ile-de-france" : [75,77,78,91,92,93,94,95],
            "normandie" : [14,27,50,61,76],
            "nouvelle-aquitaine" : [16,17,19,23,24,33,40,47,64,79,86,87],
            "occitanie" : [9,11,12,30,31,34,46,48,65,66,81,82],
            "pays-de-la-loire" : [44,49,53,72,85],
            "provence" : [4,5,6,13,83,84]
            # "corse" : ["2A","2B"]
}

immo_vis_dir = "../../data/immo_vis/"
file_ref = "df_annonces_gps_iris_ventes_"
output_file_ref = "ventes-"
parquet_extension = ".parquet"
file_version = ".1"
metropole_file =  output_file_ref + "metropole" + file_version + parquet_extension
metropole_appartement_file = output_file_ref + "metropole-appartement" + file_version + parquet_extension
metropole_maison_file = output_file_ref + "metropole-maison" + file_version + parquet_extension


In [30]:

def read_immocv_region (regions, region,remote_url,verbose=False) :
    df = pd.DataFrame()
    for departement in regions[region] :
        # copy remote file
        dep_file = f"{file_ref}{departement:0>2}" + ".csv"
        repo_url = "https://raw.githubusercontent.com/klopstock-dviz/immo_vis/refs/heads/master/data/annonces_git/"
        if verbose :
            print (dep_file)
        if remote_url :
            file_to_read = repo_url +dep_file
        else :
            file_to_read = immo_vis_dir + region + "/" + dep_file 
        if verbose :
                print (file_to_read)
        df1 = pd.read_csv(file_to_read,delimiter=';',header=0,encoding="UTF-8",low_memory=False)
        df = pd.concat([df, df1], ignore_index=True)    
    store_file = output_file_ref + region + file_version +  parquet_extension
    output_file = immo_vis_dir + store_file
    df.to_parquet(path=output_file,index=True)
    return output_file

In [13]:
def read_immocv_all_regions (regions, remote_url,verbose=False) :
    for region in regions :
        print (f"read_immocv_all_regions process {region}" )
        read_immocv_region(regions,region,remote_url,verbose)

In [44]:
def build_immocv_metropole (regions, input_dir, verbose=False) :
    df = pd.DataFrame()
    for region in regions :
        # copy remote file
        dep_file = f"{output_file_ref}{region}" + file_version + parquet_extension
        file_to_read = input_dir + dep_file 
        if verbose :
                print (f"read region {region} {file_to_read}")
        df1 = pd.read_parquet(file_to_read)
        df = pd.concat([df, df1], ignore_index=True)    
    store_file =  metropole_file
    output_file = input_dir + store_file
    if verbose :
        print (f"write metropole file {df.shape} {output_file}")
    df.to_parquet(path=output_file,index=True)
    return output_file

In [None]:
# read one region

region = "auvergne-rhone-alpes"
remote_url = True
output_file = read_immocv_region (regions, region,remote_url)

dfr= pd.read_parquet(output_file)
print (dfr.shape)
display(dfr.head(10))

In [28]:
# read all regions
remote_url = True
read_immocv_all_regions (regions, remote_url)


read_immocv_all_regions process auvergne-rhone-alpes
read_immocv_all_regions process bourgogne
read_immocv_all_regions process bretagne
read_immocv_all_regions process centre
read_immocv_all_regions process grand-est
read_immocv_all_regions process haut-de-france
read_immocv_all_regions process ile-de-france
read_immocv_all_regions process normandie
read_immocv_all_regions process nouvelle-aquitaine
read_immocv_all_regions process occitanie
read_immocv_all_regions process pays-de-la-loire
read_immocv_all_regions process provence


In [48]:
#  build metropole file
remote_url = True
output_file = build_immocv_metropole (regions,immo_vis_dir,verbose=True)

dfr= pd.read_parquet(output_file)
print (dfr.shape)
display(dfr.head(10))

read region auvergne-rhone-alpes ../../data/immo_vis/ventes-auvergne-rhone-alpes.1.parquet
read region bourgogne ../../data/immo_vis/ventes-bourgogne.1.parquet
read region bretagne ../../data/immo_vis/ventes-bretagne.1.parquet
read region centre ../../data/immo_vis/ventes-centre.1.parquet
read region grand-est ../../data/immo_vis/ventes-grand-est.1.parquet
read region haut-de-france ../../data/immo_vis/ventes-haut-de-france.1.parquet
read region ile-de-france ../../data/immo_vis/ventes-ile-de-france.1.parquet
read region normandie ../../data/immo_vis/ventes-normandie.1.parquet
read region nouvelle-aquitaine ../../data/immo_vis/ventes-nouvelle-aquitaine.1.parquet
read region occitanie ../../data/immo_vis/ventes-occitanie.1.parquet
read region pays-de-la-loire ../../data/immo_vis/ventes-pays-de-la-loire.1.parquet
read region provence ../../data/immo_vis/ventes-provence.1.parquet
write metropole file (5770766, 59) ../../data/immo_vis/ventes-metropole.1.parquet
(5770766, 59)


Unnamed: 0,idannonce,type_annonceur,typedebien,typedetransaction,etage,surface,surface_terrain,nb_pieces,prix_bien,prix_maison,...,UU2010,REG,DEP,loyer_m2_median_n6,nb_log_n6,taux_rendement_n6,loyer_m2_median_n7,nb_log_n7,taux_rendement_n7,prix_m2_vente
0,entities-1037648-4986327,pr,m,v,0,87,680.0,4,295900,,...,451,84,1,,,,,,,3401.15
1,netty-pietrapolis-house-68858,pr,m,v,0,365,409.0,5,150000,,...,451,84,1,,,,,,,410.96
2,entities-1037648-4986094,pr,m,v,0,90,680.0,4,299900,,...,451,84,1,,,,,,,3332.22
3,entities-1037648-4986254,pr,m,v,0,95,680.0,5,300000,,...,451,84,1,,,,,,,3157.89
4,entities-1037666-4976019,pr,m,v,0,95,960.0,4,310000,,...,451,84,1,,,,,,,3263.16
5,entities-1037666-4976018,pr,m,v,0,114,682.0,4,335000,,...,451,84,1,,,,,,,2938.6
6,entities-1037648-4977862,pr,m,v,0,80,723.0,4,275000,,...,451,84,1,,,,,,,3437.5
7,immo-facile-59642564,pr,m,v,0,158,409.0,6,435000,,...,758,84,1,11.77,3.0,0.051,11.77,3.0,0.051,2753.16
8,hektor-39_idlr-7397,pr,m,v,0,290,1980.0,9,370000,,...,1000,84,1,,,,,,,1275.86
9,gedeon-32291593,pr,m,v,0,100,,5,485000,,...,758,84,1,10.67,13.0,0.026,10.67,13.0,0.026,4850.0


In [50]:
# read metropole file

from pathlib import Path


start_path = Path(immo_vis_dir)
final_path = start_path / metropole_file

print (f"read metropole maison file  {final_path.as_posix()}")
df= pd.read_parquet(final_path.as_posix())
print (df.shape)
# display(df.head(10))
print (df.typedebien.value_counts())
# dfa = df [(df.typedebien == "a") | (df.typedebien == "an")]
# appart_path = start_path / metropole_appartement_file
# print (f"write metropole appartement file {dfa.shape} {appart_path.as_posix()}")
# dfa.to_parquet(path=appart_path.as_posix(),index=True)

dfm = df [(df.typedebien == "m") | (df.typedebien == "mn")]
maison_path = start_path / metropole_maison_file
print (f"write metropole maison file {dfm.shape} {maison_path.as_posix()}")
dfm.to_parquet(path=maison_path.as_posix(),index=True)

dfa = df [(df.typedebien == "a") | (df.typedebien == "an")]
appart_path = start_path / metropole_appartement_file
print (f"write metropole appartement file {dfa.shape} {appart_path.as_posix()}")
dfa.to_parquet(path=appart_path.as_posix(),index=True)



read metropole maison file  ../../data/immo_vis/ventes-metropole.1.parquet
(5770766, 59)
typedebien
m                     3246723
a                     2348009
an                      90192
mn                      85388
l                         406
Maison/Villa neuve         24
h                          24
Name: count, dtype: int64
write metropole maison file (3332111, 59) ../../data/immo_vis/ventes-metropole-maison.1.parquet
write metropole appartement file (2438201, 59) ../../data/immo_vis/ventes-metropole-appartement.1.parquet


In [2]:
start_path = Path(immo_vis_dir)
final_path = start_path / metropole_file
final_path.as_posix
df= pd.read_parquet(final_path.as_posix())
print (df.info())

NameError: name 'Path' is not defined