In [23]:
import numpy as np
import pandas as pd
import matplotlib as plt
import geopandas as geopd
import json

# The purpose of this document is to create a meta data for all the features in the data set. 
These wil contain information about their source, how the data was processed and adapted into the main data set. 

In [None]:
#Template for metadata
'''
meta_data_collection = [
    {
        id: name_of_element : ,
        data_set: name_of_the_raw_data, 
        source: name_of_souce > name_of_data_set, 
        methodology: specify the process we are using
    }
    {
        id: name_of_element : ,
        source: name_of_souce > name_of_data_set
        methodology: specify the process we are using
        data_set: name_of_the_raw_data
    }    
]
'''

In [2]:
#Import the data set with the new data you want to add
#Opent the GeoJSON file format
with open("B:\Google Drive\JUPYTERBACKUP\GeoJSON\hexagon_collection_master.geojson") as file: 
    hexagon_collection_master = json.load(file) 
hexagon_collection_master

{'type': 'FeatureCollection',
 'crs': {'type': 'name',
  'properties': {'name': 'urn:ogc:def:crs:OGC:1.3:CRS84'}},
 'features': [{'type': 'Feature',
   'properties': {'fid': 1,
    'id': 6274,
    'left': 1159000.632258716,
    'top': 1913897.9448048582,
    'right': 1162049.0416800373,
    'bottom': 1911257.9448048582,
    'adult_obesity_per_cell_weight_number_mean': 67000.0,
    'adult_obesity_per_cell_weight_number_median': 67000.0,
    'adult_obesity_pe_cell_weight_percent_mean': 21.8,
    'adult_obesity_per_cell_weight_percent_median': 21.8,
    'median_household_per_cell_income_sum': 59027.0,
    'median_household_per_cell_income_mean': 59027.0,
    'median_household_per_cell_income_median': 59027.0,
    'number_of_people_no_health_insurance_per_cell_sum': 6305.0,
    'number_of_people_no_health_insurance_per_cell_mean': 1159000.632258716,
    'parks_within_halfmile_per_cell': 1832058,
    'asthma_number_per_cell': 1162049.0416800373,
    'asthma_percent_per_cell': 1829417.944804

In [None]:
#List of forms to fill for each of the data data features
data_source = 'Chicago Data Portal > CTA-Bus Stops - Shapefile'
geographical_information_system = 'QGIS'
function_used = 'Join Attributes by Location'
method_used = 'Count

In [None]:
#Create the text to indicate the souce of the data and how it was managed
data_management = 'Data was obtained from {source}, and adapted using {program}. Using {function}, by {attribute} the data was all index using the main Hexagonal Grid'.format(source = data_source, program = geographical_information_system, function = function_used, attribute = method_used)
print(data_management)

In [13]:
#Create the meta data file in a JSON format
meta_data_collection = [
    
{"Categories": 
     
     {"Demographics": [
 
                        {
                            'data_id': 'median_household_per_cell_income',
                            'data_set': 'Chicago_MedianHouseholdIncome', 
                            'source': 'Chicago Health Atlas > Median Household Income', 
                            'methodology': 'Data was obtained from Chicago Health Atlas > Median Household Income, and adapted with QGIS. Using Join Attributes by Location, by "mean" the data was all index using the main Hexagonal Grid'
                        },
         
                        {
                            'data_id': 'population_no_health_insurance',
                            'data_set': 'Chicago_NoHealthInsurance', 
                            'source': 'Chicago Health Atlas > No Health Insurance', 
                            'methodology': 'Data was obtained from Chicago Health Atlas > No Health Insurance, and adapted with QGIS. Using Join Attributes by Location, by "sum" the data was all index using the main Hexagonal Grid'
                        }
         
         
                    ],
      
      "Healthcare":  [
                       


                     ],
      
      "Environmental": [
                          {
                            'data_id': 'park_count',
                            'data_set': 'Chicago_Parks', 
                            'source': 'Chicago Data Portal > Chicago Park District Park Boundaries', 
                            'methodology': 'Data was obtained from Chicago Data Portal > Chicago Park District Park Boundaries, and adapted with QGIS. Centroid of all the parks were created, within a half mile radus. Using Join Attributes by Location, by "count" the data was all index using the main Hexagonal Grid'
                          }
          
                        ],
      
      "Transportation/Acccess": [
          
                          {
                            'data_id': 'nearest_park_distance',
                            'data_set': 'Chicago_Parks', 
                            'source': 'Chicago Data Portal > Chicago Park District Park Boundaries', 
                            'methodology': 'Data was obtained from Chicago Data Portal > Chicago Park District Park Boundaries, and adapted with QGIS. Centroid of all the parks were created, and calculated the distance to the nearest main Hexagonal Grid Centroid'
                          }
          
                              ]   
        }
    }
]

In [18]:
meta_data_collection[0]["Categories"]["Demographics"][1]

{'data_id': 'population_no_health_insurance',
 'data_set': 'Chicago_NoHealthInsurance',
 'source': 'Chicago Health Atlas > No Health Insurance',
 'methodology': 'Data was obtained from Chicago Health Atlas > No Health Insurance, and adapted with QGIS. Using Join Attributes by Location, by "sum" the data was all index using the main Hexagonal Grid'}

In [21]:
#Save the JSON file into location, it will overwride it from the previous version. 
with open('B:\Google Drive\JUPYTERBACKUP\GeoJSON\metadata_collection_master.json', 'w') as outfile:
    json.dump(meta_data_collection, outfile)

In [26]:
print(json.dumps(meta_data_collection, sort_keys=True, indent=4))

[
    {
        "Categories": {
            "Demographics": [
                {
                    "data_id": "median_household_per_cell_income",
                    "data_set": "Chicago_MedianHouseholdIncome",
                    "methodology": "Data was obtained from Chicago Health Atlas > Median Household Income, and adapted with QGIS. Using Join Attributes by Location, by \"mean\" the data was all index using the main Hexagonal Grid",
                    "source": "Chicago Health Atlas > Median Household Income"
                },
                {
                    "data_id": "population_no_health_insurance",
                    "data_set": "Chicago_NoHealthInsurance",
                    "methodology": "Data was obtained from Chicago Health Atlas > No Health Insurance, and adapted with QGIS. Using Join Attributes by Location, by \"sum\" the data was all index using the main Hexagonal Grid",
                    "source": "Chicago Health Atlas > No Health Insurance"
            