In [None]:
# Create a new and clear dataset
# The dataset contains only french companies as it is specified in the test statement (Ligne 5 in bold)
# In the new dataset, country_name and city_name are converted to real names that are exploited by the API provided in the test statement (used in this cell)

from geopy.geocoders import Nominatim
from geopy.distance import geodesic

from geopy.exc import GeocoderTimedOut

import pandas as pd
import os

# Remove generated files if exists
try:
    os.remove('Dataframe_result.csv')
    os.remove('askedFile.csv')
except OSError:
    pass

# Function to extract longitude and latitude from an address
def longLatAddress(address):
    geolocator = Nominatim(user_agent="https://adresse.data.gouv.fr/api-doc/adresse")
    companyLocation = geolocator.geocode(address)

    if companyLocation is not None:
        companyLocationAddress = companyLocation.address
        companyLocationLatitude = companyLocation.latitude
        companyLocationlongitude = companyLocation.longitude
    else:
        companyLocationLatitude = None
        companyLocationlongitude = None
    return(companyLocationLatitude, companyLocationlongitude)

# Function to measure distance between a headquarter and its warehouses
def distanceLocations(headquarter, warehouse):
    distance = geodesic(headquarter, warehouse).kilometers
    if distance < 20:
        clas = "1"
    elif distance < 100 and distance > 20:
        clas = "2"
    elif distance > 100:
        clas = "3"
    return clas

# Function to extract headquarters from the new dataframe "extractedDF"
def extractHeadquarters(addressWarehouse):
    # Load dependencies as dataframes
    dfLocations = pd.read_csv('locations.csv', sep=';')
    dfCountry = pd.read_json('country.json')
    dfCity = pd.read_json('city.json')

    # Replace City and Country codes based on city.json and country.json files
    dfLocations.insert(3, "city", "_" , True)
    dfLocations['city'] = dfLocations['city_id'].map(dfCity.set_index('id')['name'])
    dfLocations.insert(2, "country", "_" , True)
    dfLocations['country'] = dfLocations['country_id'].map(dfCountry.set_index('id')['name'])

    # Filter to extract only headquarters located in France
    IdFrance = dfCountry[(dfCountry['name'] == 'France')].values[0].tolist()[0]
    extractedDF = dfLocations[(dfLocations['is_headquarter'] == 1) & (dfLocations['country_id'] == IdFrance)]

    # Filter to remove NaN addresses
    extractedDF = extractedDF.dropna()
    extractedDF.to_csv('Dataframe_result.csv', index=False, header=True)

    # Extract longitude and latitude from addressWarehouse
    warehouseLatitude, warehouselongitude = longLatAddress(addressWarehouse)
    lonLatWarehouse = (warehouseLatitude, warehouselongitude)

    # Create empty list for appending headquarter classes
    classColumn = []
    
    # Iterate over extractedDF and assign class
    for i, row in extractedDF.iterrows():
        headquarterLatitude, headquarterlongitude = longLatAddress(extractedDF['address'][i] + " " + extractedDF['city'][i] + " " + extractedDF['country'][i])
        if headquarterLatitude is None and headquarterlongitude is None:
            classColumn.append("incorrectAddress")
        else:
            lonLatHeadquarter = (headquarterLatitude, headquarterlongitude)
            clas = distanceLocations(lonLatHeadquarter, lonLatWarehouse)
            classColumn.append(clas)

    return extractedDF, classColumn

In [None]:
addressWarehouse = "16, Place de l'iris, Courbevoie, France"

# addressWarehouse = "23, Rue des Jeûneurs, Paris, France"

extractedDF, classColumn = extractHeadquarters(addressWarehouse)

extractedDF["Class"] = classColumn
extractedDF.to_csv('askedFile.csv', index=False, header=True)