In [None]:
import requests, json
def get_streets(city_id):
  sparql_query = f"""
  PREFIX wd: <http://www.wikidata.org/entity/>
  PREFIX wdt: <http://www.wikidata.org/prop/direct/>
  PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

  SELECT ?location ?locationLabel ?wikipediaUrl ?coord WHERE {{
    ?location wdt:P131 wd:{city_id}.
    ?location rdfs:label ?locationLabel .
    FILTER (lang(?locationLabel) = "fr")
    
    OPTIONAL {{
      ?wikipediaUrl schema:about ?location .
      FILTER (STRSTARTS(STR(?wikipediaUrl), "https://fr.wikipedia.org/"))
    }}

    {{
      ?location wdt:P31/wdt:P279* wd:Q34442 .
    }} UNION {{
      ?location wdt:P31/wdt:P279* wd:Q79007 .
    }} UNION {{
      ?location wdt:P31/wdt:P279* wd:Q226649 .
    }} UNION {{
      ?location wdt:P31/wdt:P279* wd:Q41192 .
    }} UNION {{
      ?location wdt:P31/wdt:P279* wd:Q3257686 .
    }} UNION {{
      ?location wdt:P31/wdt:P279* wd:Q174782  .
    }}
    ?location wdt:P625 ?coord .
  }}
  """

  url = "https://query.wikidata.org/sparql"
  headers = {
      "User-Agent": "Mozilla/5.0",
      "Accept": "application/sparql-results+json",
  }
  response = requests.post(url, data={"query": sparql_query}, headers=headers)

  # Check if the request was successful
  if response.status_code == 200:
      json_data = json.loads(response.text)
      #print(json_data)
  else :
     json_data = None
  return json_data


In [None]:
with open('cities.json', 'r') as f:
    cities = json.load(f)

seen = set()
cities2 = []
for city in cities:
    if city['id'] not in seen:
        cities2.append(city)
        seen.add(city['id'])
    else:
        print(city['name'])

In [None]:
# fetch all cities in France
import requests
import json

sparql_query = """
    PREFIX wd: <http://www.wikidata.org/entity/>
    PREFIX wdt: <http://www.wikidata.org/prop/direct/>
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

    SELECT ?city ?cityLabel ?wikipediaUrl ?area ?coord ?population WHERE {
    ?city wdt:P31/wdt:P279* wd:Q484170 .
    ?city wdt:P17 wd:Q142 .
    ?city rdfs:label ?cityLabel .
    ?city wdt:P2046 ?area .
    ?city wdt:P625 ?coord .
    ?city wdt:P1082 ?population .
    FILTER (lang(?cityLabel) = "fr")
    
    OPTIONAL {
        ?wikipediaUrl schema:about ?city .
        FILTER (STRSTARTS(STR(?wikipediaUrl), "https://fr.wikipedia.org/"))
    }
    }
    LIMIT 50000
"""

url = "https://query.wikidata.org/sparql"
headers = {
    "User-Agent": "Mozilla/5.0",
    "Accept": "application/sparql-results+json",
}
response = requests.post(url, data={"query": sparql_query}, headers=headers)

# Check if the request was successful
if response.status_code == 200:
    json_data = json.loads(response.text)
    print(json_data)
else:
    print("Request failed with status code:", response.status_code)


In [None]:
# put cities in json
cities = []
# affichage des noms et des identifiants des villes
for result in json_data["results"]["bindings"]:
    cities.append({
        "id": result["city"]["value"].split("/")[-1],
        "name": result["cityLabel"]["value"],
        "wikipediaUrl": result["wikipediaUrl"]["value"] if "wikipediaUrl" in result else None,
        "area": result["area"]["value"],
        "coordinates" : result["coord"]["value"],
        "population": result["population"]["value"] if "population" in result else "0",
    })


In [None]:
#trier les villes par population
# add paris
cities2.append({
    "id": "Q90",
    "name": "Paris",
    "wikipediaUrl": "https://fr.wikipedia.org/wiki/Paris",
    "area": "105.4",
    "coordinates" : "Point(2.3508 48.8567)",
    "population": "2148000",
})
cities2 = sorted(cities2, key=lambda city: int(city["population"]), reverse=True)

In [None]:
size = {
    "rue" : 0.0005,
    "avenue" : 0.001,
    "boulevard" : 0.002,
    "place" : 0.0008,
    "allée" : 0.0002,
    "impasse" : 0.0001,
    "chemin" : 0.0003,
    "cours" : 0.0004,
    "quai" : 0.0006,
    "passage" : 0.0007,
    "square" : 0.0009,
    "route" : 0.0011,
    "rond-point" : 0.001,
    "voie" : 0.0005,
    "promenade" : 0.0002,
    "parc" : 0.0001,
}

In [None]:
data = []
for city in cities:
    #print(city["name"])
    attemps = 0
    results = None
    while attemps < 5:
        try:
            results = get_streets(city["id"])
            break
        except Exception as e:
            print(e, city["name"], end='/')
            attemps += 1
    streets = []
    seen = set()
    if results is not None:
        for result in results["results"]["bindings"]:
            if not result['locationLabel']['value'] in seen:
                streetArea = size[result['locationLabel']['value'].split(" ")[0].lower()] if result['locationLabel']['value'].split(" ")[0].lower() in size else None
                streets.append({"link" : result['location']['value'], "label" : result['locationLabel']['value'], "wikipediaUrl" : result['wikipediaUrl']['value'] if 'wikipediaUrl' in result else None, "coordinates": result['coord']['value'], "area": streetArea})
                seen.add(result['locationLabel']['value'])
    else:
        results = []
    data.append({"city": city, "streets": streets })

In [None]:
#get city id
import requests

def get_city_id(city_name):
    url = f"https://www.wikidata.org/w/api.php?action=wbsearchentities&search={city_name}&language=fr&format=json&type=item"
    response = requests.get(url)
    data = response.json()

    if data["search"]:
        return data["search"][0]["id"]
    else:
        return None

city_name = "Paris"
city_id = get_city_id(city_name)
if city_id:
    print(f"City: {city_name} - ID: {city_id}")
else:
    print(f"City not found: {city_name}")
