In [1]:
from SPARQLWrapper import SPARQLWrapper, JSON
def get_streets(city_id):
  sparql = SPARQLWrapper("https://query.wikidata.org/sparql")
  sparql.setQuery(f"""
  PREFIX wd: <http://www.wikidata.org/entity/>
  PREFIX wdt: <http://www.wikidata.org/prop/direct/>
  PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

  SELECT ?location ?locationLabel ?wikipediaUrl ?coord WHERE {{
    ?location wdt:P131 wd:{city_id}.
    ?location rdfs:label ?locationLabel .
    FILTER (lang(?locationLabel) = "fr")
    
    OPTIONAL {{
      ?wikipediaUrl schema:about ?location .
      FILTER (STRSTARTS(STR(?wikipediaUrl), "https://fr.wikipedia.org/"))
    }}

    {{
      ?location wdt:P31/wdt:P279* wd:Q34442 .
    }} UNION {{
      ?location wdt:P31/wdt:P279* wd:Q79007 .
    }} UNION {{
      ?location wdt:P31/wdt:P279* wd:Q226649 .
    }} UNION {{
      ?location wdt:P31/wdt:P279* wd:Q41192 .
    }} UNION {{
      ?location wdt:P31/wdt:P279* wd:Q3257686 .
    }} UNION {{
      ?location wdt:P31/wdt:P279* wd:Q174782  .
    }}
    ?location wdt:P625 ?coord .
  }}
""")


  sparql.setReturnFormat(JSON)
  results = sparql.query().convert()
  return results


In [21]:
# Fetch all cities in France
from SPARQLWrapper import SPARQLWrapper, JSON

# connexion à l'API Wikidata
sparql = SPARQLWrapper("https://query.wikidata.org/sparql")

# requête SPARQL pour récupérer les noms et les identifiants des villes de France
sparql.setQuery("""
    PREFIX wd: <http://www.wikidata.org/entity/>
    PREFIX wdt: <http://www.wikidata.org/prop/direct/>
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

    SELECT ?city ?cityLabel ?wikipediaUrl ?area ?coord ?population WHERE {
    ?city wdt:P31/wdt:P279* wd:Q484170 .
    ?city wdt:P17 wd:Q142 .
    ?city rdfs:label ?cityLabel .
    ?city wdt:P2046 ?area .
    ?city wdt:P625 ?coord .
    ?city wdt:P1082 ?population .
    FILTER (lang(?cityLabel) = "fr")
    
    OPTIONAL {
        ?wikipediaUrl schema:about ?city .
        FILTER (STRSTARTS(STR(?wikipediaUrl), "https://fr.wikipedia.org/"))
    }
    }
    LIMIT 50000
""")

sparql.setReturnFormat(JSON)
results = sparql.query().convert()


HTTPError: HTTP Error 403: Forbidden

In [165]:
cities = []
# affichage des noms et des identifiants des villes
for result in results["results"]["bindings"]:
    cities.append({
        "id": result["city"]["value"].split("/")[-1],
        "name": result["cityLabel"]["value"],
        "wikipediaUrl": result["wikipediaUrl"]["value"] if "wikipediaUrl" in result else None,
        "area": result["area"]["value"],
        "coordinates" : result["coord"]["value"]
    })


In [166]:
#save citites in json file
import json
with open('cities.json', 'w') as outfile:
    json.dump(cities, outfile)
    

In [2]:
size = {
    "rue" : 0.0005,
    "avenue" : 0.001,
    "boulevard" : 0.002,
    "place" : 0.0008,
    "allée" : 0.0002,
    "impasse" : 0.0001,
    "chemin" : 0.0003,
    "cours" : 0.0004,
    "quai" : 0.0006,
    "passage" : 0.0007,
    "square" : 0.0009,
    "route" : 0.0011,
    "rond-point" : 0.001,
    "voie" : 0.0005,
    "promenade" : 0.0002,
    "parc" : 0.0001,
}

In [3]:
import json
cities = json.load(open('cities.json'))

In [4]:
data = []
for city in cities:
    #print(city["name"])
    attemps = 0
    results = None
    while attemps < 5:
        try:
            results = get_streets(city["id"])
            break
        except Exception as e:
            print(e, city["name"], end='/')
            attemps += 1
    streets = []
    seen = set()
    if results is not None:
        for result in results["results"]["bindings"]:
            if not result['locationLabel']['value'] in seen:
                streetArea = size[result['locationLabel']['value'].split(" ")[0].lower()] if result['locationLabel']['value'].split(" ")[0].lower() in size else None
                streets.append({"link" : result['location']['value'], "label" : result['locationLabel']['value'], "wikipediaUrl" : result['wikipediaUrl']['value'] if 'wikipediaUrl' in result else None, "coordinates": result['coord']['value'], "area": streetArea})
                seen.add(result['locationLabel']['value'])
    else:
        results = []
    data.append({"city": city, "streets": streets })

HTTP Error 403: Forbidden Cognac/EndPointInternalError: The endpoint returned the HTTP status code 500. 

Response:
b'SPARQL-QUERY: queryStr=\n  PREFIX wd: <http://www.wikidata.org/entity/>\n  PREFIX wdt: <http://www.wikidata.org/prop/direct/>\n  PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n\n  SELECT ?location ?locationLabel ?wikipediaUrl ?coord WHERE {\n    ?location wdt:P131 wd:Q288.\n    ?location rdfs:label ?locationLabel .\n    FILTER (lang(?locationLabel) = "fr")\n    \n    OPTIONAL {\n      ?wikipediaUrl schema:about ?location .\n      FILTER (STRSTARTS(STR(?wikipediaUrl), "https://fr.wikipedia.org/"))\n    }\n\n    {\n      ?location wdt:P31/wdt:P279* wd:Q34442 .\n    } UNION {\n      ?location wdt:P31/wdt:P279* wd:Q79007 .\n    } UNION {\n      ?location wdt:P31/wdt:P279* wd:Q226649 .\n    } UNION {\n      ?location wdt:P31/wdt:P279* wd:Q41192 .\n    } UNION {\n      ?location wdt:P31/wdt:P279* wd:Q3257686 .\n    } UNION {\n      ?location wdt:P31/wdt:P279* wd:Q17478

In [None]:
import requests

def get_city_id(city_name):
    url = f"https://www.wikidata.org/w/api.php?action=wbsearchentities&search={city_name}&language=fr&format=json&type=item"
    response = requests.get(url)
    data = response.json()

    if data["search"]:
        return data["search"][0]["id"]
    else:
        return None

city_name = "Paris"
city_id = get_city_id(city_name)
if city_id:
    print(f"City: {city_name} - ID: {city_id}")
else:
    print(f"City not found: {city_name}")


City: Paris - ID: Q90


In [None]:
from SPARQLWrapper import SPARQLWrapper, JSON

# Connect to Wikidata SPARQL endpoint
sparql = SPARQLWrapper("https://query.wikidata.org/sparql")

# Construct SPARQL query
query = """
  PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

SELECT ?city ?cityLabel ?wikipediaUrl ?area ?coord (GROUP_CONCAT(CONCAT(?street, '|', ?streetLabel, '|',  ?streetCoord); separator="||") AS ?streets)
WHERE {
  ?city wdt:P31/wdt:P279* wd:Q484170 .
  ?city wdt:P17 wd:Q142 .
  ?city rdfs:label ?cityLabel .
  ?city wdt:P2046 ?area .
  ?city wdt:P625 ?coord .
  FILTER (lang(?cityLabel) = "fr")
  
  OPTIONAL {
    ?wikipediaUrl schema:about ?city .
    FILTER (STRSTARTS(STR(?wikipediaUrl), "https://fr.wikipedia.org/"))
  }
  
  {
    SELECT DISTINCT ?city ?street ?streetLabel ?streetUrl ?streetCoord ?streetArea WHERE {
      ?city wdt:P131 ?location .
      ?location rdfs:label ?streetLabel .
      FILTER (lang(?streetLabel) = "fr")
      
      OPTIONAL {
        ?street wdt:P2046 ?streetArea .
        BIND(REPLACE(?streetLabel, " de .*$", "", "i") AS ?streetName)
        FILTER (?streetArea > 0 && ?streetName IN ("avenue", "boulevard", "chemin", "impasse", "place", "quai", "rue", "route"))
      }
      
      OPTIONAL {
        ?street wdt:P625 ?streetCoord .
      }
      
      OPTIONAL {
        ?street schema:about ?streetUrl .
        FILTER (STRSTARTS(STR(?streetUrl), "https://fr.wikipedia.org/"))
      }
    }
  }
}
GROUP BY ?city ?cityLabel ?wikipediaUrl ?area ?coord

"""

# Set query and return format
sparql.setQuery(query)
sparql.setReturnFormat(JSON)

# Execute query and retrieve results
results = sparql.query().convert()

# Parse results into JSON output format
output = {}
for result in results["results"]["bindings"]:
    city_id = result["city"]["value"].split("/")[-1]
    # Create a new city object if it doesn't exist yet
    if city_id not in output:
        output[city_id] = {
            "id": city_id,
            "name": result["cityLabel"]["value"],
            "wikipediaUrl": result.get("wikipediaUrl", {}).get("value"),
            "area": float(result["area"]["value"]),
            "coordinates": result["coord"]["value"],
            "streets": []
        }
    # Add street information to city object
    streets_str = result["streets"]["value"]
    streets = []
    for street_str in streets_str.split("||"):
        street_data = street_str.split("|")
        street = {
            "link": street_data[0],
            "label": street_data[1],
            "wikipediaUrl": street_data[2] or None,
            "coordinates": street_data[3],
            "area": float(street_data[4]) if street_data[4] else None
        }
        streets.append(street)
    output[city_id]["streets"].extend(streets)




In [1]:
import json
# load street data
with open("data.json", "r") as f:
    data = json.load(f)


In [7]:
len(data)

37730

In [5]:
# find cities with missing street
cities = []
for city in data:
    if len(city["streets"]) == 0:
        cities.append(city["city"]["name"])


In [6]:
len(cities)

37578

In [9]:
# trier les villes par surface
cities = sorted(data, key=lambda x: x["city"]["area"], reverse=True)
        