In [1]:
import json
import pandas as pd
import urllib

In [5]:
"""This function reads in the data from the InfraVelo-API. For this, it needs to iterate through the pages and bring
variables in listed dictionaries in a form that they are json-formatted. The resulting object can be transformed
into a pandas dataframe."""
def get_data_infravelo(url, flat_data = []):
    response = urllib.request.urlopen(url)
    json_data = json.loads(response.read())

    for project in json_data["results"]:
        # extract relevant information from each project dictionary
        new_row = {
          "id": project["id"],
          "title": project["title"],
          "subtitle": project.get("subtitle", ""),
          "yearOfImplementation": project.get("yearOfImplementation", ""),  # Handle missing year
          "status": project.get("status", ""),
          "holder": project.get("holder", ""),
          "costs": project.get("costs", ""),
          "link": project.get("link", ""),
          "apiLink": project.get("apiLink", ""),
          "companyConstruction": project.get("companyConstruction", ""),
          "owner": project.get("owner", ""),
          "district": project["districts"][0]["name"] if project["districts"] else "",
          "dateStart": project.get("dateStart", ""),
          "dateEnd": project.get("dateEnd", ""),
          "milestones": project.get("milestones", ""),
          "type": project["types"][0]["type"] if project["types"] else "",
          "name": project["types"][0]["metrics"][0]["name"] if project["types"] and project["types"][0]["metrics"] else "",
          "value": project["types"][0]["metrics"][0]["value"] if project["types"] and project["types"][0]["metrics"] else "",
          "category": project.get("categories", ""),
          "image": project.get("image", ""),
          "imageCurrent": project.get("imageCurrent", ""),
          "imagesBefore": project.get("imagesBefore", ""),
          "kml": project.get("kml", ""),
          "additionalInformation": project.get("additionalInformation", ""),
          "additionalHtmlContent": project.get("additionalHtmlContent", ""),
    }
        flat_data.append(new_row)
    full_proj_count = json_data["count"]
    count = len(flat_data)
    return flat_data, count, json_data["next"]

In [6]:
start_url = "https://www.infravelo.de/api/v1/projects/"

# starter object that has no data attached
info = get_data_infravelo(start_url)

# iterate over the api pages
for i in range(44):
    info = get_data_infravelo(info[2], info[0])

IndexError: list index out of range

In [181]:
proj_df = pd.json_normalize(info[0])
proj_df

Unnamed: 0,id,title,subtitle,yearOfImplementation,status,holder,costs,link,apiLink,companyConstruction,...,value,categories,image,imageCurrent,imagesBefore,kml,additionalInformation,additionalHtmlContent,image.uri,image.extension
0,9080038002,A-Schweitzer-Gymnasium Gymnasium,,2020.0,Abgeschlossen,Bezirksamt Neukölln finanziert durch Landesmit...,,https://www.infravelo.de/projekt/a-schweitzer-...,https://www.infravelo.de/api/v1/project/908003...,,...,30,[],,,[],"<?xml version=""1.0"" encoding=""utf-8""?>\n<kml x...",[],[],,
1,9100031042,Adorfer Straße 8,,2019.0,Abgeschlossen,Bezirksamt Marzahn-Hellersdorf finanziert durc...,,https://www.infravelo.de/projekt/adorfer-stras...,https://www.infravelo.de/api/v1/project/910003...,,...,6,[],,,[],"<?xml version=""1.0"" encoding=""utf-8""?>\n<kml x...",[],[],,
2,9060111001,Albrechtstraße 49,,2023.0,Abgeschlossen,Bezirksamt Steglitz-Zehlendorf finanziert durc...,,https://www.infravelo.de/projekt/albrechtstras...,https://www.infravelo.de/api/v1/project/906011...,,...,4,[],,,[],"<?xml version=""1.0"" encoding=""utf-8""?>\n<kml x...",[],[],,
3,9060092029,Albrechtstraße 81,,2020.0,Abgeschlossen,Bezirksamt Steglitz-Zehlendorf finanziert durc...,,https://www.infravelo.de/projekt/albrechtstras...,https://www.infravelo.de/api/v1/project/906009...,,...,16,[],,,[],"<?xml version=""1.0"" encoding=""utf-8""?>\n<kml x...",[],[],,
4,9100031023,Alfred-Döblin-Straße 2 A,,2019.0,Abgeschlossen,Bezirksamt Marzahn-Hellersdorf finanziert durc...,,https://www.infravelo.de/projekt/alfred-doebli...,https://www.infravelo.de/api/v1/project/910003...,,...,8,[],,,[],"<?xml version=""1.0"" encoding=""utf-8""?>\n<kml x...",[],[],,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2233,100349,Schlüter­straße,Sichtbarer mit dem Rad durch Einkaufsstraße im...,,Abgeschlossen,"Senatsverwaltung für Mobilität, Verkehr, Klima...",,https://www.infravelo.de/projekt/schlueterstra...,https://www.infravelo.de/api/v1/project/100349/,Possehl Spezialbau GmbH,...,Beidseitig,[{'name': 'Grün­be­schich­tung'}],,,[{'uri': 'https://www.infravelo.de/fileadmin/p...,"<?xml version=""1.0"" encoding=""utf-8""?>\n<kml x...",[],[],https://www.infravelo.de/fileadmin/projektbild...,jpg
2234,100087,Spandauer Damm – Freiheit,Unterwegs vom S-Bahnhof Westend bis in die Spa...,,,"Senatsverwaltung für Mobilität, Verkehr, Klima...",,https://www.infravelo.de/projekt/spandauer-dam...,https://www.infravelo.de/api/v1/project/100087/,,...,,[{'name': 'Rad­schnell­ver­bin­dung'}],,,[{'uri': 'https://www.infravelo.de/fileadmin/p...,,[],"[\n\n<section class=""accordion-section "">\n ...",https://www.infravelo.de/fileadmin/projektbild...,jpg
2235,100335,Werbellin­straße,Sichtbar und sicherer zwischen Karl-Marx-Straß...,,Abgeschlossen,"Senatsverwaltung für Mobilität, Verkehr, Klima...",,https://www.infravelo.de/projekt/werbellinstra...,https://www.infravelo.de/api/v1/project/100335/,Possehl Spezialbau GmbH,...,Beidseitig,[{'name': 'Grün­be­schich­tung'}],,,[],"<?xml version=""1.0"" encoding=""utf-8""?>\n<kml x...",[],[],https://www.infravelo.de/fileadmin/projektbild...,jpg
2236,100338,Wichert­straße,Wohn- und Schulkiez-Radfahrstreifen breiter un...,,Abgeschlossen,"Senatsverwaltung für Mobilität, Verkehr, Klima...",,https://www.infravelo.de/projekt/wichertstrasse/,https://www.infravelo.de/api/v1/project/100338/,Possehl Spezialbau GmbH,...,Beidseitig,[{'name': 'Grün­be­schich­tung'}],,,[{'uri': 'https://www.infravelo.de/fileadmin/p...,"<?xml version=""1.0"" encoding=""utf-8""?>\n<kml x...",[],[],https://www.infravelo.de/fileadmin/projektbild...,jpg


In [184]:
proj_df.to_csv("infravelo_projects_raw.csv",index = False)