In [1]:
import numpy as np
import pandas as pd
import requests
import json
import datetime




# Lim in PostUrl som spørringen skal postes mot
postUrl = "https://trafikkdata-api.atlas.vegvesen.no/"
# Definerer overskriftene for spørringen inkludert innholdstypen Det er rare kommentarer her siden jeg først tenkte å lage en liten tutorial å sende.
headers = {
    "content-type": "application/json"
}

# Spørringen og endepunktet til API-et må skrives etter hva du vil hente ut i GraphQL format.
apiQuery = """
{
trafficRegistrationPoints(searchQuery: {}) {
    id
    name
    location {
    municipality {
        name
        number
        county{
        name
        }
    }
    coordinates {
        latLon {
        lat
        lon
        }
    }
    }
}
}

"""

# Konverter spørringen til en JSON-streng
payload = json.dumps({
    "query": apiQuery
})

# Gjør en POST-forespørsel til API-et med overskriftene og nyttelasten
response = requests.post(postUrl, headers=headers, data=payload)

# Sjekk om svarstatuskoden er 200 (vellykket)
if response.status_code == 200:
    # Analyser JSON-dataene i svaret
    response_data = response.json()
    # vi må unneste dataen slik at vi får det i kolonner
    df = pd.json_normalize(response_data['data']['trafficRegistrationPoints'])
else:
    # Skriv ut en feilmelding hvis statuskoden ikke er 200
    print(f"Error: {response.status_code}")

#location.municipality.name = Tromsø
df = df[df['location.municipality.name'] == 'Tromsø']
df = df.reset_index(drop=True)
#drop location.municipality.number and name and county
df = df.drop(['location.municipality.number', 'location.municipality.name', 'location.municipality.county.name'], axis=1)
#clean column names from location.coordinates.latLon.lat to lat and location.coordinates.latLon.lon to lon
df.columns = df.columns.str.replace('location.coordinates.latLon.lat', 'lat', regex=False) #setter regex to false siden jeg vil bare skifte nøyaktig det navnet
df.columns = df.columns.str.replace('location.coordinates.latLon.lon', 'lon', regex=False)



# henter trafficRegistrationPointIds fra df
traffic_registration_point_ids = df['id'].tolist()


In [3]:

# Define the start and end datetime objects
start_datetime = datetime.datetime(2018, 1, 1, 23, 0, 0)
end_datetime = datetime.datetime(2024, 5, 27, 23, 0, 0)

# Define the time delta for each iteration
time_delta = datetime.timedelta(hours=100)

# Initialize the list to store the results
all_rows = []

# Loop through the time range and send the requests
while start_datetime < end_datetime:
    # Definerer GraphQL query med "test" for hver trafficRegistrationPointId og setter en new line for hver for å lese i request outputtet om det er rett
    query = """
    {
    """ + "\n".join(
        f"""
    test_{id.replace(':', '_')}: trafficData(trafficRegistrationPointId: "{id}") {{
        volume {{
        byHour(
            from: "{start_datetime.isoformat()}+02:00"
            to: "{(start_datetime + time_delta).isoformat()}+02:00"
        ) {{
            edges {{
            node {{
                from
                to
                byDirection {{
                heading
                total {{
                    volumeNumbers {{
                        volume
                    }}
                    coverage {{
                        percentage
                        }}
                }}
                }}
            }}
            }}
        }}
        }}
    }}
        """
        for id in traffic_registration_point_ids
    ) + "}"
    
    # setter inn link og overskrifter og definerer data for query
    url = "https://trafikkdata-api.atlas.vegvesen.no/"
    headers = {"Content-Type": "application/json"}
    data = {"query": query}

    # Sender requesten og henter resultatet
    response = requests.post(url, headers=headers, json=data)
    result = response.json()

    # henter resultatet fra data som er nestet
    data = result['data']

    # lager en liste of dictionaries, en for hver trafficregistrationpoint. jeg fjerner alias siden jeg ikke trenger det lengere. setter en "if" statement for å fjerne problem om nonetype og legger til 0 istedenfor der det er None
    rows = [    
        {        
            'id': key.replace("test_", ""),        
            'from': item['node']['from'],
            'to': item['node']['to'],
            'heading': heading['heading'],
            'volume': heading['total']['volumeNumbers']['volume'] if heading['total']['volumeNumbers'] is not None else 0,
            'coverage': heading['total']['coverage']['percentage'] if heading['total']['coverage'] is not None else 0
        }
        for key, value in data.items()
        for item in value['volume']['byHour']['edges']
        for heading in item['node']['byDirection']
    ]
    
    # Append the rows to the list of all rows
    all_rows += rows
    
    # Increment the start datetime
    start_datetime += time_delta

# lager et dataframe fra listen av dictionaries
df1 = pd.DataFrame.from_dict(all_rows)


##

#KOMBINERER


#i merge df og df1
df = df.merge(df1, on='id', how='left')


df.to_csv('ny_trafikkdata.csv', index=False)



MemoryError: Unable to allocate 8.69 GiB for an array with shape (3, 388814820) and data type object

In [10]:
#looking at name Tromsøysundtunnellen T1
df[df['name'] == 'Tromsøysundtunnelen T1']

Unnamed: 0,id,name,lat,lon,from,to,heading,volume,coverage
1,68511V2673383,Tromsøysundtunnelen T1,69.67012,19.018382,2018-01-01T22:00:00+01:00,2018-01-01T23:00:00+01:00,Tomasjord,141.0,100.00
2,68511V2673383,Tromsøysundtunnelen T1,69.67012,19.018382,2018-01-01T22:00:00+01:00,2018-01-01T23:00:00+01:00,Tromsøya,0.0,100.00
3,68511V2673383,Tromsøysundtunnelen T1,69.67012,19.018382,2018-01-01T23:00:00+01:00,2018-01-02T00:00:00+01:00,Tomasjord,68.0,100.00
4,68511V2673383,Tromsøysundtunnelen T1,69.67012,19.018382,2018-01-01T23:00:00+01:00,2018-01-02T00:00:00+01:00,Tromsøya,0.0,100.00
5,68511V2673383,Tromsøysundtunnelen T1,69.67012,19.018382,2018-01-02T00:00:00+01:00,2018-01-02T01:00:00+01:00,Tomasjord,33.0,100.00
...,...,...,...,...,...,...,...,...,...
196,68511V2673383,Tromsøysundtunnelen T1,69.67012,19.018382,2018-01-05T23:00:00+01:00,2018-01-06T00:00:00+01:00,Tromsøya,0.0,99.38
197,68511V2673383,Tromsøysundtunnelen T1,69.67012,19.018382,2018-01-06T00:00:00+01:00,2018-01-06T01:00:00+01:00,Tomasjord,45.0,100.00
198,68511V2673383,Tromsøysundtunnelen T1,69.67012,19.018382,2018-01-06T00:00:00+01:00,2018-01-06T01:00:00+01:00,Tromsøya,0.0,100.00
199,68511V2673383,Tromsøysundtunnelen T1,69.67012,19.018382,2018-01-06T01:00:00+01:00,2018-01-06T02:00:00+01:00,Tomasjord,24.0,100.00


In [None]:

# Define the start and end datetime objects
start_datetime = datetime.datetime(2018, 1, 1, 23, 0, 0)
end_datetime = datetime.datetime(2024, 6, 1, 23, 0, 0)

# Define the time delta for each iteration
time_delta = datetime.timedelta(hours=100)

# Initialize the list to store the results
all_rows = []

# Loop through the time range and send the requests
while start_datetime < end_datetime:
    # Definerer GraphQL query med "test" for hver trafficRegistrationPointId og setter en new line for hver for å lese i request outputtet om det er rett
    query = """
    {
    """ + "\n".join(
        f"""
    test_{id.replace(':', '_')}: trafficData(trafficRegistrationPointId: "{id}") {{
        volume {{
        byHour(
            from: "{start_datetime.isoformat()}+02:00"
            to: "{(start_datetime + time_delta).isoformat()}+02:00"
        ) {{
            edges {{
            node {{
                from
                to
                byDirection {{
                heading
                total {{
                    volumeNumbers {{
                        volume
                    }}
                    coverage {{
                        percentage
                        }}
                }}
                }}
            }}
            }}
        }}
        }}
    }}
        """
        for id in traffic_registration_point_ids
    ) + "}"
    
    # setter inn link og overskrifter og definerer data for query
    url = "https://trafikkdata-api.atlas.vegvesen.no/"
    headers = {"Content-Type": "application/json"}
    data = {"query": query}

    # Sender requesten og henter resultatet
    response = requests.post(url, headers=headers, json=data)
    result = response.json()

    # henter resultatet fra data som er nestet
    data = result['data']

    # lager en liste of dictionaries, en for hver trafficregistrationpoint. jeg fjerner alias siden jeg ikke trenger det lengere. setter en "if" statement for å fjerne problem om nonetype og legger til 0 istedenfor der det er None
    rows = [    
        {        
            'id': key.replace("test_", ""),        
            'from': item['node']['from'],
            'to': item['node']['to'],
            'heading': heading['heading'],
            'volume': heading['total']['volumeNumbers']['volume'] if heading['total']['volumeNumbers'] is not None else 0,
            'coverage': heading['total']['coverage']['percentage'] if heading['total']['coverage'] is not None else 0
        }
        for key, value in data.items()
        for item in value['volume']['byHour']['edges']
        for heading in item['node']['byDirection']
    ]
    
    # Append the rows to the list of all rows
    all_rows += rows
    
    # Increment the start datetime
    start_datetime += time_delta

# lager et dataframe fra listen av dictionaries
df1 = pd.DataFrame.from_dict(all_rows)


##

#KOMBINERER


#i merge df og df1
df = df.merge(df1, on='id', how='left')




