In [None]:
import json
import geojson
import geopandas
import csv
import copy
import ast
import os
from collections import OrderedDict

from geojson import Feature, Point, FeatureCollection


In [None]:
# Input Files

def read_geo_gson_file(file_path):
    with open(file_path) as f:
        gj = geojson.load(f)
        features = gj['features']
    print(f'Loaded {len(features)} features from {file_path}')
    return  features

def read_geojson_files(file_paths, base_path = "./"):
    features = []
    for file_path in file_paths:
        features += read_geo_gson_file(os.path.join(base_path, file_path))
    return features
    

def read_market_zip_code_usage(market_file_prefix, base_path="./source_data" ):
    data_type = "zip"
    csv_file_path = mapping_data_file_path(market_file_prefix, base_path=base_path, data_type=data_type)
    data_rows = []
    with open(csv_file_path, encoding='utf-8-sig') as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            data_rows.append(row)
    zip_codes = []
    for data_row in data_rows:
        zip_codes.append(data_row['Zip'].zfill(5))
    print(f'Loaded {len(zip_codes)} zip_codes from {csv_file_path}')
    return zip_codes
        
def read_market_block_group_usage(market_file_prefix, base_path="./source_data" ):
    data_type = "bg"
    csv_file_path = mapping_data_file_path(market_file_prefix, base_path=base_path, data_type=data_type)
    data_rows = []
    with open(csv_file_path, encoding='utf-8-sig') as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            data_rows.append(row)
    block_groups = []
    for data_row in data_rows:
        block_groups.append(data_row['BG'].zfill(12))
    print(f'Loaded {len(block_groups)} block groups from {csv_file_path}')
    return zip_codes
        
# market_file_prefix = 'DallasFortWorth'
# market_prefix= 'dfw'
# DallasFortWorth__Mapping_BG_Data.csv
# ./source_data/{market_file_prefix}__Mapping_BG_Data.csv - the actual zip code data
# 
# DallasFortWorth__Mapping_Zip_Data.csv
# ./source_data/{market_file_prefix}__Mapping_Zip_Data.csv - the actual block group data
# 
# DallasFortWorth__Mapping_Settings_Transposed File_BG.csv
# ./source_data/{market_file_prefix}__Mapping_Settings_Transposed_File_BG.csv - segment definitions for Block Groups
# DallasFortWorth__Mapping_Settings_Transposed File_Zip.csv
# ./source_data/{market_file_prefix}__Mapping_Settings_Transposed_File_Zip.csv - segment definitions for Zip Codes

def file_type_for_data_type(data_type = "zip"):
    if data_type == "zip": 
        return "Zip"
    if data_type == "bg":
        return "BG"

# DallasFortWorth__Mapping_BG_Data.csv
# ./source_data/{market_file_prefix}__Mapping_BG_Data.csv - the actual zip code data
# 
# DallasFortWorth__Mapping_Zip_Data.csv
# ./source_data/{market_file_prefix}__Mapping_Zip_Data.csv - the actual block group data
# 
def mapping_data_file_path(market_file_prefix, base_path="./source_data", data_type = "zip"):
    file_type = file_type_for_data_type(data_type = data_type)
    return f'{base_path}/{market_file_prefix}__Mapping_{file_type}_Data.csv'

# DallasFortWorth__Mapping_Settings_Transposed File_BG.csv
# ./source_data/{market_file_prefix}__Mapping_Settings_Transposed_File_BG.csv - segment definitions for Block Groups
# DallasFortWorth__Mapping_Settings_Transposed File_Zip.csv
# ./source_data/{market_file_prefix}__Mapping_Settings_Transposed_File_Zip.csv - segment definitions for Zip Codes
def map_settings_data_file_path(market_file_prefix, base_path="./source_data", data_type = "zip"):
    file_type = file_type_for_data_type(data_type = data_type)
    return f'{base_path}/{market_file_prefix}__Mapping_Settings_Transposed File_{file_type}.csv'

def published_map_data_path(market_prefix,  data_type = "zip", published_data_base_path="./public"):
    return f'{published_data_base_path}/{market_prefix}_{data_type}_data.json'

def published_map_settings_path(market_prefix,  data_type = "zip", published_data_base_path="./public"):
    return f'{published_data_base_path}/{market_prefix}_{data_type}_settings_data.json'

def published_map_labels_path(market_prefix,  data_type = "zip", published_data_base_path="./public"):
    return f'{published_data_base_path}/{market_prefix}_{data_type}_labels.json'

def read_mapping_data(market_file_prefix, base_path='./source_data', data_type='zip'):
    data_rows = []
    csv_file_path = mapping_data_file_path(market_file_prefix, base_path=base_path, data_type = data_type)
    with open(csv_file_path) as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            data_rows.append(row)
    print(f'Loaded {len(data_rows)} data rows from {csv_file_path}')
    return data_rows

def create_zip_features(features, zip_data_rows):
    new_features_to_keep = []
    for data_row in zip_data_rows:
        zip_code = data_row['Zip'].zfill(5)
        # find the associated feature
        for feature in features:
            if feature['properties']["ZCTA5CE10"] == zip_code:
                new_feature = copy.deepcopy(feature)
                for key in data_row.keys():
                    if key != 'Zip':
                        if data_row[key]:
                            if data_row[key] != 'inf':
                                new_feature['properties'][key] =  ast.literal_eval(data_row[key].replace(',',''))
                new_features_to_keep.append(new_feature)
                break
    print(f'Processed {len(new_features_to_keep)} features to use')
    return new_features_to_keep

def create_bg_features(features, bg_data_rows):
    new_features_to_keep = []
    for data_row in bg_data_rows:
        block_group = str(data_row['BG']).zfill(12)
        # find the associated feature
        found = False
        for feature in features:
            if feature['properties']["GEOID10"] == block_group:
                found = True
                new_feature = copy.deepcopy(feature)
                for key in data_row.keys():
                    if key != 'BG':
                        if data_row[key] and data_row[key] != 'inf':
                            new_feature['properties'][key] =  ast.literal_eval(data_row[key].replace(',',''))
                new_features_to_keep.append(new_feature)
                break
    #         if found == False:
    #             print(f'failed: {block_group}')
    print(f'Processed {len(new_features_to_keep)} features to use')
    return new_features_to_keep

def create_geojson(features):
    print(f'Created GeoJson with  {len(features)} features')
    return FeatureCollection(features)

def write_geojson(geojson_data, market_prefix, data_type='zip',published_data_base_path = "./public"):
    geojson_string = geojson.dumps(geojson_data, sort_keys=True)
    f = open(f'{published_data_base_path}/{market_prefix}_{data_type}_data.json', "w")
    f.write(geojson_string)
    f.close()
    print(f'Wrote geojson data to  {published_data_base_path}/{market_prefix}_{data_type}_data.json')
    
def read_map_settings(market_file_prefix, base_path="./source_data", data_type = "zip"):
    data_rows = []
    csv_file_path = map_settings_data_file_path(market_file_prefix, base_path=base_path, data_type = data_type)
    print(csv_file_path)
    with open(csv_file_path, encoding='utf-8-sig') as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            print(row)
            data_rows.append(row)
    print(f'Loaded {len(data_rows)} data rows from {csv_file_path}')   
    return data_rows

def write_settings_data(settings_data, market_file_prefix, base_path="./public", data_type = "zip"):
    processed_settings_data = OrderedDict()
    for row in settings_data:
        processed_settings_data[row['Bain_Short_Name']] = row
    json_string = json.dumps(processed_settings_data, indent = 2, sort_keys=False)
    settings_data_file_path = published_map_settings_path(market_file_prefix,  data_type = data_type, published_data_base_path=base_path)
    f = open(settings_data_file_path, "w")
    f.write(json_string)
    f.close()
    print(f'Wrote settings data rows to {settings_data_file_path}') 
    
def create_zip_labels(market_file_prefix, base_path="./source_data" , data_type = "zip" , published_data_base_path="./public"):
    data_rows = []
    #     csv_file_path = './DFW_Mapping_data.csv'
    csv_file_path = mapping_data_file_path(market_file_prefix, base_path=base_path, data_type = "zip")
    with open(csv_file_path, encoding='utf-8-sig') as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            data_rows.append(row)

    features = []
    for data_row in data_rows:
        feature =  { 
            "type": "Feature", 
            "properties": {
                "id": data_row['Zip']
            },
            "geometry": { 
                "type": "Point", 
                "coordinates": [ float(data_row['intptlong']), float(data_row['intptlat']) ]
            }
        }

        features.append(feature)
    geo_json = {
        "type": "FeatureCollection",
        "features": features
    }
    json_string = json.dumps(geo_json, indent = 2, sort_keys=True)
    output_file_path = published_map_labels_path(market_prefix,  data_type , published_data_base_path="./public")
    f = open(output_file_path, "w")
    f.write(json_string)
    f.close()
    print(f'Wrote geojson labels data to  {output_file_path}')

def create_bg_labels(market_file_prefix, base_path="./source_data" , data_type = "bg" , published_data_base_path="./public"):
    data_rows = []
    #     csv_file_path = './DFW_Mapping_data.csv'
    csv_file_path = mapping_data_file_path(market_file_prefix, base_path=base_path, data_type = "bg")
    with open(csv_file_path, encoding='utf-8-sig') as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            data_rows.append(row)

    features = []
    for data_row in data_rows:
        feature =  { 
            "type": "Feature", 
            "properties": {
                "id": data_row['BG']
            },
            "geometry": { 
                "type": "Point", 
                "coordinates": [ float(data_row['LONG']), float(data_row['LAT']) ]
            }
        }

        features.append(feature)
    geo_json = {
        "type": "FeatureCollection",
        "features": features
    }
    json_string = json.dumps(geo_json, indent = 2, sort_keys=True)
    output_file_path = published_map_labels_path(market_prefix,  data_type , published_data_base_path="./public")
    f = open(output_file_path, "w")
    f.write(json_string)
    f.close()
    print(f'Wrote geojson labels data to  {output_file_path}')
    

In [None]:
geojson_base_path = "'/Users/alanmccann/Dropbox/bain/"
markets_data = [
    {
        "market_file_prefix":'CharlestonSC',
        "market_prefix": "charleston",
        "geojson_files": {
            "zip": ['sc_south_carolina_zip_codes_geo.min.json'],
            "bg": ['tl_2010_45_bg10.json']
      },
    },
    {
        "market_file_prefix":'DallasFortWorth',
        "market_prefix": "dfw",
        "geojson_files": {
            "zip": ['tx_texas_zip_codes_geo.min.json'],
            "bg": ['tl_2010_48_bg10.json']
        },
    },
    {
        "market_file_prefix":'WashingtonDC',
        "market_prefix": "dc",
        "geojson_files": {
            "zip": [
                        'dc_district_of_columbia_zip_codes_geo.min.json',
                        'va_virginia_zip_codes_geo.min.json',
                        'md_maryland_zip_codes_geo.min.json',
                  ],
            "bg": [
                'tl_2010_11_bg10.json',
                'tl_2010_24_bg10.json',
                'tl_2010_51_bg10.json',
            ]
        },
    },
    {
        "market_file_prefix":'Denver',
        "market_prefix": "denver",
        "geojson_files": {
            "zip": ['co_colorado_zip_codes_geo.min.json'],
             "bg": ['tl_2010_08_bg10.json']
        }

    },
    {
        "market_file_prefix":'DallasFortWorth',
        "market_prefix": "dfw",
        "geojson_files": {
            "zip": ['tx_texas_zip_codes_geo.min.json'],
            "bg": ['tl_2010_48_bg10.json']
        },
    },
    {
        "market_file_prefix":'KansasCity',
        "market_prefix": "kansascity",
        "geojson_files": {
            "zip": [
                'ks_kansas_zip_codes_geo.min.json',
                'mo_missouri_zip_codes_geo.min.json'
              ],
             "bg": [
               'tl_2010_20_bg10.json',
               'tl_2010_29_bg10.json',
            ]
        }

    },
    {
        "market_file_prefix":'Minnesota',
        "market_prefix": "minnesota",
        "geojson_files": {
            "zip": [
                'mn_minnesota_zip_codes_geo.min.json',
              ],
             "bg": [
               'tl_2010_27_bg10.json',
            ]
        }

    },
    {
        "market_file_prefix":'NewYorkCity',
        "market_prefix": "nyc",
        "geojson_files": {
            "zip": [
                'ny_new_york_zip_codes_geo.min.json',
                'nj_new_jersey_zip_codes_geo.min.json',
                'pa_pennsylvania_zip_codes_geo.min.json',
                'ct_connecticut_zip_codes_geo.min.json'
            ],
            "bg": [
                'tl_2010_36_bg10.json',
                'tl_2010_34_bg10.json',
                'tl_2010_42_bg10.json',
                'tl_2010_09_bg10.json',
            ]
        },
    },
    {
        "market_file_prefix":'SanDiego',
        "market_prefix": "sandiego",
        "geojson_files": {
            "zip": [
                'ca_california_zip_codes_geo.min.json',
              ],
             "bg": [
               'tl_2010_06_bg10.json',
            ]
        }

    },
    {
        "market_file_prefix":'Seattle',
        "market_prefix": "seattle",
        "geojson_files": {
            "zip": [
                'wa_washington_zip_codes_geo.min.json',
              ],
             "bg": [
               'tl_2010_53_bg10.json',
            ]
        }

    },
    {
        "market_file_prefix":'Seattle',
        "market_prefix": "seattle",
        "geojson_files": {
            "zip": [
                'wa_washington_zip_codes_geo.min.json',
              ],
             "bg": [
               'tl_2010_53_bg10.json',
            ]
        }

    },
    {
        "market_file_prefix":'Tampa',
        "market_prefix": "tampa",
        "geojson_files": {
            "zip": [
                'fl_florida_zip_codes_geo.min.json',
              ],
             "bg": [
               'tl_2010_12_bg10.json',
            ]
        }

    },
    {
        "market_file_prefix":'Texas',
        "market_prefix": "texas",
        "geojson_files": {
            "zip": [
                'tx_texas_zip_codes_geo.min.json'
            ],
            "bg": [
                'tl_2010_48_bg10.json'
            ]
        }

    },
    {
        "market_file_prefix":'Waco',
        "market_prefix": "waco",
        "geojson_files": {
            "zip": [
                'tx_texas_zip_codes_geo.min.json'
            ],
            "bg": [
                'tl_2010_48_bg10.json'
            ]
        }

    },
]


markets_data[0]

# {
#  "08": "Colorado",
#   "09": "Connecticut",
#   "11": "District of Columbia",
#   "24": "Maryland",
#   "34": "New Jersey",
#   "36": "New York",
#   "42": "Pennsylvania",
#   "48": "Texas",
#   "51": "Virginia",

# }


In [None]:

source_data_base_path = "./source_data"
source_data_base_path = "/Users/alanmccann/Dropbox/bain/16th_run"
published_data_base_path = "./public"
geojson_files_base_path = '/Users/alanmccann/Dropbox/bain/map_source_data'

for market in markets_data:
    market_file_prefix = market['market_file_prefix']
    market_prefix = market['market_prefix']
    data_type = 'zip'
    print(f'market: {market_prefix}')
    geojson_files = market['geojson_files']["zip"]
    zip_codes = read_market_zip_code_usage(market_file_prefix, base_path=source_data_base_path )
    zip_data_rows = read_mapping_data(market_file_prefix, source_data_base_path, data_type)
    features = read_geojson_files(geojson_files, base_path=geojson_files_base_path)
    features_to_use = create_zip_features(features, zip_data_rows)
    new_geojson = create_geojson(features_to_use)
    write_geojson(new_geojson, market_prefix, data_type, published_data_base_path=published_data_base_path)
    map_settings_data = read_map_settings(market_file_prefix, base_path=source_data_base_path, data_type = "zip")
    print(map_settings_data)
    write_settings_data(map_settings_data, market_prefix, base_path=published_data_base_path, data_type = "zip")


    create_zip_labels(market_file_prefix, base_path=source_data_base_path , data_type = "zip" , published_data_base_path=published_data_base_path)
    # data_type = 'bg'
    data_type = 'bg'
    geojson_files = market['geojson_files']["bg"]
    block_groups = read_market_block_group_usage(market_file_prefix, base_path=source_data_base_path )
    block_group_rows = read_mapping_data(market_file_prefix, source_data_base_path, data_type)
    features = read_geojson_files(geojson_files, base_path=geojson_files_base_path)
    features_to_use = create_bg_features(features, block_group_rows)
    new_geojson = create_geojson(features_to_use)
    write_geojson(new_geojson, market_prefix, data_type, published_data_base_path=published_data_base_path)

    #  settings
    map_settings_data = read_map_settings(market_file_prefix, base_path=source_data_base_path, data_type = "bg")
    write_settings_data(map_settings_data, market_prefix, base_path=published_data_base_path, data_type = "bg")
    create_bg_labels(market_file_prefix, base_path=source_data_base_path , data_type = "bg" , published_data_base_path=published_data_base_path)
