In [31]:
import json as js
import pandas as pd
import os

#### General understanding about the example data
Lets first have a look at the data. From the example file, we  
can see that the data is structured like this. Chargerstations  
contains all the `chargingstations` that were found in the search.  

For each charger station there are to types of information.  
In `csmd` there is metadata on the charginstation such as the location,  
country, ID of the charger and the number of connectors it has.  

In `attr` there is some metadata that applies to all connectors in `st`  
such as if it gives realtime data, or if it is publicly available.  
There is also metadata on every connector that the charger has in `conn`.  
Such as if it is available, if it is a public connector, if it has a  
fixed cable or not.

The goal is to first define what data we want from every connector  
(conn) and every charging station (cs) and then create a common  
database with all of this information.

In [2]:
def open_json(path):
    with open(path) as json_data:
        return js.load(json_data)
    
path = 'nobil_json/json_9.json'
jsonFile = open_json(path)

### First look at the variables in CSMD

In [3]:
for key in jsonFile['chargerstations'][1].keys():
    print(key)

csmd
attr


In [4]:
jsonFile['chargerstations'][1]['csmd']['Land_code']

'SWE'

In [5]:
for key in jsonFile['chargerstations'][1]['csmd'].keys():
    print(key)

id
name
ocpidb_mapping_stasjon_id
Street
House_number
Zipcode
City
Municipality_ID
Municipality
County_ID
County
Description_of_location
Owned_by
Operator
Number_charging_points
Position
Image
Available_charging_points
User_comment
Contact_info
Created
Updated
Station_status
Land_code
International_id


In [6]:
# From looking at the keys, we can identify these as interesting for the following assignment.
importantKeysCSMD = ['id','Owned_by','Operator','Number_charging_points','Position','Contact_info','Land_code','International_id']

### First look at the variables in ATTR

In [7]:
station = jsonFile['chargerstations'][2]
attr = station['attr']
list(attr)

['st', 'conn']

#### First look at the variables in ATTR / st

In [8]:
list(attr['st'])

['2', '3', '6', '7', '21', '22', '24']

In [9]:
attr['st']['2']

{'attrtypeid': '2',
 'attrname': 'Availability',
 'attrvalid': '1',
 'trans': 'Public',
 'attrval': ''}

In [10]:
st_keys = list(attr['st'])

st_attr_names = [attr['st'][key]['attrname'] for key in st_keys]

st_attr_names

['Availability',
 'Location',
 'Time limit',
 'Parking fee',
 'Real-time information',
 'Public funding',
 'Open 24h']

#### First look at ATTR / conn

In [11]:
# Keys of the connector
list(attr['conn'])

['1', '2', '3', '4']

Each of these are a connector. Lets further deep dive into what keys they each have.

In [12]:
connector = attr['conn']['1']
list(connector)

['1', '4', '5', '17', '18', '19', '20', '25', '26']

In [13]:
connector['1']

{'attrtypeid': '1',
 'attrname': 'Accessibility',
 'attrvalid': '6',
 'trans': 'Cellular phone',
 'attrval': ''}

Like in the ATTR / st we have the name of the attribute under attrname, and the value under trans.

### Make a function to decode and flatten a connector or st attributes.

In [32]:
# Download all the stations
def get_all_stations_from_dir(path):
    allStations = []

    if not '/' in path:
        path += '/'

    jsonPaths = [path+filename for filename in os.listdir(path)]
    
    for jsonPath in jsonPaths:
        jsonFile = open_json(jsonPath)
        stations = jsonFile['chargerstations']
        allStations += stations

    return allStations

The stations all have `attr` as a branch. There is then both the `st` and the  
`conn` key. While there are differences in how the st and connectors look once   
you have put in the key i.e. `attr/conn` vs `attr/st`. They do have the same  
syntax once you step into an individual connector. They each will have a key  
which corresponds to the id of the attribute, and a value wich looks like this   

{'attrtypeid': '1',  
 'attrname': 'Accessibility',  
 'attrvalid': '6',  
 'trans': 'Cellular phone',  
 'attrval': ''}  
  
Where you have two important key value pairs, `attrname` which is the name of  
what the attribute describes and `trans` which is the value of the attribute.

We will therefore need a function to turn each of these entries from what you  
saw above into {`attrname`:`trans`}. For all of the existing attributes.

In [33]:
def st_or_connector_to_dict(st_or_connector):
    keys = list(st_or_connector)
    attr_dict = {}

    for key in keys:
        attrname = st_or_connector[key]['attrname']
        trans = st_or_connector[key]['trans']

        attr_dict[attrname] = trans
    
    return attr_dict

We will also need a function that given a station can turn it into a  
dictionary for every connector, containing both shared information which  
is true for the station and information specific to the connector.

At this point we can also chose not to keep some of the redundant categories  
from the `CSMD` categories.

In [34]:
# We chose only to keep the following categories from CSMD
csmd_attr_names = ['id','Owned_by','Operator','Number_charging_points','Position','Contact_info','Land_code','International_id']

def station_to_dictList(station,csmd_attr_names):
    station_dict_list = []

    csmd = station['csmd']
    joint_dict = {key: csmd[key] for key in csmd_attr_names}

    st = station['attr']['st']
    st_dict = st_or_connector_to_dict(st)
    joint_dict.update(st_dict)

    connectors = station['attr']['conn']

    for i, connector in enumerate(connectors.values()):
        conn_dict = st_or_connector_to_dict(connector)
        conn_dict['connectorNumber'] = i

        temp_joint_dict = joint_dict.copy()
        temp_joint_dict.update(conn_dict)

        station_dict_list.append(temp_joint_dict)

    return station_dict_list

In [42]:
# make a huge list of all of these dictionaries.
def stations_list(stations):
    list_of_dicts_all_stations = []
    all_used_ids = []

    for station in stations:
        id = station['csmd']['International_id']

        if not id in all_used_ids:

            list_of_dicts_all_stations += station_to_dictList(station,csmd_attr_names)
            all_used_ids.append(id)
            
    return list_of_dicts_all_stations

### Running all the functions and saving our dataframe to CSV

In [43]:
stations = get_all_stations_from_dir('nobil_json')
list_of_dicts_all_stations = stations_list(stations)
df = pd.DataFrame(list_of_dicts_all_stations)

In [44]:
# Add the latitudes and longitudes separately as well
def lat_lon_from_string(latLonString):
    latLonString = str(latLonString)
    latLonString = latLonString.strip('()')
    latString,lonString = latLonString.split(',')
    lat, lon = float(latString),float(lonString)
    return lat,lon

df['latlon'] = df['Position'].apply(lat_lon_from_string)
df['lat'], df['lon'] = zip(*df['latlon'])

In [52]:
df = df[df['Land_code'] == "SWE"]
df.to_csv('nobil_data_sweden.csv',index=False)