In [1]:
#Load packages

import numpy as np
import pandas as pd
import json


In [2]:
#Import volume datasets

e_roads = json.load(open(r"C:\Users\anton\Desktop\Master-Oppgave\Anton\Kode\All_Roads\JSON\volume_E.json"))
r_roads = json.load(open(r"C:\Users\anton\Desktop\Master-Oppgave\Anton\Kode\All_Roads\JSON\volume_R.json"))
f_roads = json.load(open(r"C:\Users\anton\Desktop\Master-Oppgave\Anton\Kode\All_Roads\JSON\volume_F.json"))



In [3]:
def flatten(item):
    result = []
    registration_point = item["data"]["trafficData"]["trafficRegistrationPoint"]
    lat = registration_point["location"]["coordinates"]["latLon"]["lat"]
    lon = registration_point["location"]["coordinates"]["latLon"]["lon"]
    by_month = item["data"]["trafficData"]["volume"]["average"]["daily"]["byMonth"]
    if by_month:
        for month in range(1, 13):  # loop through all months
            month_data = next((m for m in by_month if m.get("month") == month), None)
            if month_data:
                by_direction = month_data.get("byDirection", [])
                result_direction1 = {
                    "id": registration_point["id"],
                    "name": registration_point["name"],
                    "lat": lat,
                    "lon": lon,
                    "volume": month_data["total"]["volume"]["average"] if month_data and month_data["total"] and month_data["total"]["volume"] else np.nan,
                    "year": month_data["year"] if month_data else np.nan,
                    "month": month_data["month"] if month_data else np.nan,
                    "heading1": by_direction[0]["heading"] if by_direction and by_direction[0] and "heading" in by_direction[0] else np.nan,
                    "volume_heading1": by_direction[0]["total"]["volume"]["average"] if by_direction and by_direction[0] and by_direction[0]["total"] and by_direction[0]["total"]["volume"] else np.nan,
                    "heading2": by_direction[1]["heading"] if len(by_direction) > 1 and by_direction[1] and "heading" in by_direction[1] else np.nan,
                    "volume_heading2": by_direction[1]["total"]["volume"]["average"] if len(by_direction) > 1 and by_direction[1] and by_direction[1]["total"] and by_direction[1]["total"]["volume"] else np.nan
                }
                result.append(result_direction1)
    return result

def merge_lists(list1, list2):
    dict_ = {}
    for d in list1+list2:
        id_ = d["id"]
        if id_ in dict_:
            if isinstance(dict_[id_], list):
                dict_[id_].append(d)
            else:
                dict_[id_] = [dict_[id_], d]
        else:
            dict_[id_] = d
    result = []
    for key, value in dict_.items():
        if isinstance(value, dict):
            result.append(value)
        else:
            result.extend(value)
    return result

In [4]:

#Execute flatten function on data

data_e = []
for item in e_roads:
    data_e.extend(flatten(item))
data_e = merge_lists([d for d in data_e if "heading1" in d], [d for d in data_e if "heading2" in d])

data_r = []
for item in r_roads:
    data_r.extend(flatten(item))
data_r = merge_lists([d for d in data_r if "heading1" in d], [d for d in data_r if "heading2" in d])

data_f = []
for item in f_roads:
    data_f.extend(flatten(item))
data_f = merge_lists([d for d in data_f if "heading1" in d], [d for d in data_f if "heading2" in d])

In [5]:
#Convert to dataframe

df_e = pd.DataFrame(data_e)
df_f = pd.DataFrame(data_f)
df_r = pd.DataFrame(data_r)

In [6]:
#Display dataframe 

df_r.head()
df_f.head()
df_e.head()


Unnamed: 0,id,name,lat,lon,volume,year,month,heading1,volume_heading1,heading2,volume_heading2
0,65823V1668921,FJØSDALEN,67.98167,13.175464,469.0,2018,1,Leknes,235.0,Reine,234.0
1,65823V1668921,FJØSDALEN,67.98167,13.175464,706.0,2018,2,Leknes,353.0,Reine,352.0
2,65823V1668921,FJØSDALEN,67.98167,13.175464,760.0,2018,3,Leknes,381.0,Reine,379.0
3,65823V1668921,FJØSDALEN,67.98167,13.175464,659.0,2018,4,Leknes,329.0,Reine,329.0
4,65823V1668921,FJØSDALEN,67.98167,13.175464,882.0,2018,5,Leknes,443.0,Reine,439.0


In [7]:
#Check amount of registration points in the dataset

unique_id_e=df_e["id"].nunique()
print("E registration points:", unique_id_e)

unique_id_r=df_r["id"].nunique()
print("R registration points:", unique_id_r)

unique_id_f=df_f["id"].nunique()
print("F registration points:", unique_id_f)

total_unique= unique_id_f + unique_id_r + unique_id_e

print("There is a total of" , total_unique , "traffic registration points in the dataset")

E registration points: 628
R registration points: 384
F registration points: 1240
There is a total of 2252 traffic registration points in the dataset


In [8]:
# Concatenate the dataframes vertically

all_points = pd.concat([df_e, df_r, df_f])

# Reset the index of the concatenated dataframe
all_points = all_points.reset_index(drop=True)


In [9]:
# use the value_counts() method to count the occurrences of each month
month_counts = all_points['month'].value_counts()

# print the results
for month, count in month_counts.iteritems():
    print(f'{month}: {count}')

11: 4370
12: 4370
10: 4346
5: 4334
6: 4332
7: 4328
8: 4324
9: 4322
1: 4320
4: 4316
2: 4304
3: 4290


  for month, count in month_counts.iteritems():


In [10]:
all_points.head()

Unnamed: 0,id,name,lat,lon,volume,year,month,heading1,volume_heading1,heading2,volume_heading2
0,65823V1668921,FJØSDALEN,67.98167,13.175464,469.0,2018,1,Leknes,235.0,Reine,234.0
1,65823V1668921,FJØSDALEN,67.98167,13.175464,706.0,2018,2,Leknes,353.0,Reine,352.0
2,65823V1668921,FJØSDALEN,67.98167,13.175464,760.0,2018,3,Leknes,381.0,Reine,379.0
3,65823V1668921,FJØSDALEN,67.98167,13.175464,659.0,2018,4,Leknes,329.0,Reine,329.0
4,65823V1668921,FJØSDALEN,67.98167,13.175464,882.0,2018,5,Leknes,443.0,Reine,439.0


In [11]:
#Check amount of registration points in the concatenated dataframe

unique_ids=all_points["id"].nunique()
print("All registration points:", unique_ids)

All registration points: 2252


In [12]:
# Print the data types of the columns

print(all_points.dtypes)

id                  object
name                object
lat                float64
lon                float64
volume             float64
year                 int64
month                int64
heading1            object
volume_heading1    float64
heading2            object
volume_heading2    float64
dtype: object


In [13]:
# Count the occurrences where 'volume' is equal to 0 or missing
count = all_points['volume'].isnull().sum() + (all_points['volume'] == 0).sum()

# print the count
print(count)

# Replace NaN with 0 in the 'volume' column
all_points['volume'].fillna(0, inplace=True)


648


In [None]:
all_points['volume']=all_points['volume'].round().astype('int64')


In [15]:
all_points.head()

# define the file path where you want to save the file
file_path = r'C:\Users\anton\Desktop\Master-Oppgave\Anton\Kode\All_Roads\CSV\all_road_volume.csv'

# save the DataFrame as a CSV file in the specified folder
all_points.to_csv(file_path, index=False)



In [17]:
#I removed duplicates via an Excel procedure instead
removed_dupes=pd.read_excel(r"C:\Users\anton\Desktop\Master-Oppgave\Anton\Kode\All_Roads\CSV\all_roads_volume.xlsx")


In [18]:
unique_ids_no_dupes=removed_dupes["id"].nunique()
print("All registration points:", unique_ids_no_dupes)

All registration points: 2252
