In [None]:
import requests
import pandas as pd

In [None]:
# Load the dataset
file_path = "POI+sample+addresses+and+locations.csv"
data = pd.read_csv(file_path)

In [None]:
len(data["st_x"].unique().tolist())

In [None]:
invalid_geo_data = data[
    (data["st_x"].isnull())
    | (data["st_y"].isnull())
    | (data["st_x"] < -180)
    | (data["st_x"] > 180)
    | (data["st_y"] < -90)
    | (data["st_y"] > 90)
]

len(invalid_geo_data)

In [None]:
invalid_address_data = data[
    data["adresse"].isnull()
    | data["adresse"].str.strip().eq("")
    | data["code_postal"].isnull()
    | (data["code_postal"] < 1000)
    | (data["code_postal"] > 99999)
    | data["ville"].isnull()
    | data["ville"].str.strip().eq("")
]

invalid_address_data.head()

In [None]:
data

In [None]:
# Function to parse the API response and extract relevant address data
def parse_ban_response(response):
    addresses = []

    for feature in response.get("features", []):
        address_data = {
            "label": feature["properties"].get("label", ""),
            "coordinates": feature["geometry"].get("coordinates", []),
            "postcode": feature["properties"].get("postcode", ""),
            "city": feature["properties"].get("city", ""),
            "street": feature["properties"].get("street", ""),
            "score": feature["properties"].get("score", 0),
        }
        addresses.append(address_data)

    return addresses


# Function to normalize an address using the BAN API
def normalize_address(address, postcode, ville):
    response = requests.get(f"https://api-adresse.data.gouv.fr/search/?q={address} {postcode} {ville}")
    if response.status_code == 200:
        return parse_ban_response(response.json())  # Parse the normalized address
    else:
        return None  # Handle errors


# Function to correct geolocation
def correct_geolocation(latitude, longitude):
    response = requests.get(f"https://api-adresse.data.gouv.fr/reverse/?lat={latitude}&lon={longitude}")
    if response.status_code == 200:
        return parse_ban_response(response.json())  # Parse the corrected geolocation
    else:
        return None  # Handle errors

In [None]:
longitude, latitude = 4.0874698, 45.6094812
response = requests.get(f"https://api-adresse.data.gouv.fr/reverse/?lat={latitude}&lon={longitude}")
response.json()

In [None]:
l = """identifiant_unique,adresse,adresse_complement,code_postal,ville,st_x,st_y,latitude,longitude,result_label,result_score,result_score_next,result_type,result_id,result_housenumber,result_name,result_street,result_postcode,result_city,result_context,result_citycode,result_oldcitycode,result_oldcity,result_district,result_status
lescreadevicky_203741_reparation_0633175984,3 IMPASSE DES TUILIERS,,71210,MONTCHANIN,4.46914,46.758914,46.758914,4.46914,3 Impasse des Tuiliers 71210 Montchanin,0.9459581818181817,,housenumber,71310_0371_00003,3,3 Impasse des Tuiliers,Impasse des Tuiliers,71210,Montchanin,"71, Saône-et-Loire, Bourgogne-Franche-Comté",71310,,,,ok
bijouterie_obrecht_93995_reparation,24 RUE DE LA GARE,,68690,MOOSCH,7.051734,47.861574,47.861686,7.051751,24 Rue de la Gare 68690 Moosch,0.9560809090909091,0.6494824242424242,housenumber,68217_0080_00024,24,24 Rue de la Gare,Rue de la Gare,68690,Moosch,"68, Haut-Rhin, Grand Est",68217,,,,ok
amberley_design_services_172205_reparation_0546861272,4 LA TAILLEZERIE,,17150,BOISREDON,-0.5355886,45.3303143,45.330382,-0.537904,4 la Taillezerie 17150 Boisredon,0.9412099999999999,,housenumber,17052_fidjzi_00004,4,4 la Taillezerie,la Taillezerie,17150,Boisredon,"17, Charente-Maritime, Nouvelle-Aquitaine",17052,,,,ok
bijoux_enora_19597_reparation_0669640932,2 CHEMIN DES VIRGILES,LE CAPET D AZUR BAT H23,83120,SAINTE MAXIME,6.6280625,43.3130868,43.313837,6.628483,2 Chemin des Virgiles 83120 Sainte-Maxime,0.5893362695924764,,housenumber,83115_1031_00002,2,2 Chemin des Virgiles,Chemin des Virgiles,83120,Sainte-Maxime,"83, Var, Provence-Alpes-Côte d'Azur",83115,,,,ok
simonet_lionel_213050_reparation_0680960766,66 RUE DU PARC,,77360,VAIRES SUR MARNE,2.63824,48.866954,48.866954,2.63824,66 Rue du Parc 77360 Vaires-sur-Marne,0.9654054545454546,,housenumber,77479_0480_00066,66,66 Rue du Parc,Rue du Parc,77360,Vaires-sur-Marne,"77, Seine-et-Marne, Île-de-France",77479,,,,ok
sarl_bimob_60746_reparation_0232580314,30 B RUE HYPPOLITE LOZIER,,27320,NONANCOURT,1.192711,48.773491,48.773222,1.192855,30bis R Hyppolite Lozier 27320 Nonancourt,0.7317390909090908,,housenumber,27438_0045_00030_bis,30bis,30bis R Hyppolite Lozier,R Hyppolite Lozier,27320,Nonancourt,"27, Eure, Normandie",27438,,,,ok
bernard_emilie_139818_reparation_0676243532,1 DOMAINE DES CHAMPS FLEURIS,,42600,SAVIGNEUX,4.0874698,45.6094812,45.60926,4.087256,1 Domaine des Champs Fleuris 42600 Savigneux,0.9537290909090909,,housenumber,42299_a010_00001,1,1 Domaine des Champs Fleuris,Domaine des Champs Fleuris,42600,Savigneux,"42, Loire, Auvergne-Rhône-Alpes",42299,,,,ok
bernard_emilie139818rfrfrf_reparation_0676243532,,,,,4.0874688,45.6094812,,,,,,,,,,,,,,,,,,skipped
"""

In [None]:
import io
import csv

In [None]:
reader = csv.DictReader(io.StringIO(l))

In [None]:
for r in reader:
    print(r)

In [None]:
# Iterate over each row in the dataset to normalize addresses and correct geolocations
for index, row in data[:100].iterrows():
    # Normalize address
    normalized_addresses = normalize_address(row["adresse"], row["code_postal"], row["ville"])
    if normalized_addresses:
        # Example: Update address fields in the dataframe with the first result
        data.at[index, "normalized_address"] = normalized_addresses[0]["label"]
        data.at[index, "normalized_postcode"] = normalized_addresses[0]["postcode"]
        data.at[index, "normalized_city"] = normalized_addresses[0]["city"]

    # Correct geolocation
    corrected_geos = correct_geolocation(row["st_y"], row["st_x"])
    if corrected_geos:
        # Example: Update geolocation fields in the dataframe with the first result
        data.at[index, "corrected_lat"] = corrected_geos[0]["coordinates"][1]
        data.at[index, "corrected_lon"] = corrected_geos[0]["coordinates"][0]

In [None]:
data

In [None]:
# Iterate over each row in the dataset to normalize addresses and correct geolocation