In [None]:
"""
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Script: Data conversion scripts
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Goal: Scripts that converts data formats from CSV -> GeoJSON 
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -      
Date   : 10/11/2019
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Comments: 
    - This script can be used to filter data and export it to a JSON file
    - This script is used in the cookbook recipe "ETL-Process with Datasets" 
    - The dataset we will be using is called "FILTERED_Swedish_Crane.csv"
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Version: 0.0.2
Last Changes:
1) Added Json.Dumps for correct JSON format
2)
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Todo:
Export dict to JSON
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Author: Pieter Lems
"""

In [10]:
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# 0) Import the required modules or functions from modules.

import pandas as pd
import json
# Pandas is a python library used for easy-to-use data structures
# and data analysis tools for the Python programming language.
# The JSON module is used to convert the data to correct JSON format

In [11]:
# # # # # # # # # # # # # # # # # # # # # # # # # # # # 
# #                                                 # #
# #     START CSV-> GeoJSON conversion script     # #
# #                                                 # #
# # # # # # # # # # # # # # # # # # # # # # # # # # # #

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# 1) Import the dataset && and assign it to a variable 'CSV_Swedish_Crane_Dataframe'

CSV_Swedish_Crane_Dataframe = pd.read_csv('20191011_Data_Sets/FILTERED_Swedish_Crane.csv')


In [12]:
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# 2) Show the dataframe
CSV_Swedish_Crane_Dataframe[:10]


Unnamed: 0,event-id,timestamp,location-long,location-lat,ground-speed,heading,height-above-ellipsoid,tag-local-identifier
0,1154727247,2013-07-21 03:06:32.000,13.583908,57.503796,0.0,,193.0,9381
1,1154727246,2013-07-21 03:51:34.000,13.578312,57.504063,0.5144,,194.0,9381
2,1154727245,2013-07-21 04:07:09.000,13.578205,57.50415,0.0,,199.0,9381
3,1154727244,2013-07-21 04:22:57.000,13.577142,57.504177,0.0,,194.0,9381
4,1154727243,2013-07-21 04:38:39.000,13.576754,57.504238,0.0,,192.0,9381
5,1154727242,2013-07-21 04:54:27.000,13.574988,57.505005,0.5144,,196.0,9381
6,1154727241,2013-07-21 05:10:09.000,13.573163,57.505985,0.0,,195.0,9381
7,1154727240,2013-07-21 05:25:56.000,13.573152,57.506939,0.0,,203.0,9381
8,1154727239,2013-07-21 05:41:40.000,13.573357,57.507603,0.0,,199.0,9381
9,1154727238,2013-07-21 05:57:33.000,13.5755,57.507931,0.0,,194.0,9381


In [13]:
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# 3) Set variables

#df is the dataframe to convert to geojson
df = CSV_Swedish_Crane_Dataframe[:10]

#properties is the list of columns in the dataframe to turn into geojson feature properties
properties=['event-id',
           'timestamp',
           'ground-speed',
           'tag-local-identifier'] 

#lat is the name of the column in the dataframe that contains latitude data
lat = ['location-lat']

#lng is the name of the column in the dataframe that contains longitude data
lng = ['location-long']

#z is the name of the column in the dataframe that contains height data
z = ['height-above-ellipsoid']


In [14]:
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# 4) Function to convert to GeoJSON
# create the function
def df_to_geojson(df, properties, lat, lng, z):
    
    # create a new python dict to contain our geojson data, using geojson format
    geojson = {'type':'FeatureCollection', 'features':[]}

    # loop through each row in the dataframe and convert each row to geojson format
    for x, row in df.iterrows():

        feature = {'type':'Feature',
                   'properties':{},
                   'geometry':{'type':'Point',
                               'coordinates':[]}}

        # fill in the coordinates
        feature['geometry']['coordinates'] = [float(row[lng]),float(row[lat]),float(row[z])]


        # convert the array to a pandas.serie
        geo_props = pd.Series(row)

        # for each column, get the value and add it as a new feature property
        # prop determines the list from the properties
        for prop in properties:

            #convert to string
            if type(geo_props[prop]) == float:
                geo_props[prop] = str((geo_props[prop]))

            # now create a json format, here we have to make the dict properties
            feature['properties'][prop] = geo_props[prop]

        # add this feature (aka, converted dataframe row) to the list of features inside our dict
        geojson['features'].append(feature)
    return json.dumps(geojson)


In [15]:
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# 5) Run the function with the variables we set in step 3
df_to_geojson(df,properties,lat,lng,z)
# remember to remove the first '  and the last ' for the JSON to be correct


'{"type": "FeatureCollection", "features": [{"type": "Feature", "properties": {"event-id": 1154727247, "timestamp": "2013-07-21 03:06:32.000", "ground-speed": "0.0", "tag-local-identifier": 9381}, "geometry": {"type": "Point", "coordinates": [13.583908, 57.503796, 193.0]}}, {"type": "Feature", "properties": {"event-id": 1154727246, "timestamp": "2013-07-21 03:51:34.000", "ground-speed": "0.5144", "tag-local-identifier": 9381}, "geometry": {"type": "Point", "coordinates": [13.578312, 57.504063, 194.0]}}, {"type": "Feature", "properties": {"event-id": 1154727245, "timestamp": "2013-07-21 04:07:09.000", "ground-speed": "0.0", "tag-local-identifier": 9381}, "geometry": {"type": "Point", "coordinates": [13.578205, 57.50415, 199.0]}}, {"type": "Feature", "properties": {"event-id": 1154727244, "timestamp": "2013-07-21 04:22:57.000", "ground-speed": "0.0", "tag-local-identifier": 9381}, "geometry": {"type": "Point", "coordinates": [13.577142000000002, 57.504177, 194.0]}}, {"type": "Feature", "