In [None]:
import requests, zipfile, io, os
import timeit

import pandas as pd
import geopandas as gpd
import numpy as np

from shapely import wkt
from shapely.geometry import Polygon

import matplotlib.pyplot as plt
import contextily as ctx

import folium
import branca

In [None]:
#min number of visits present for an entry to be taken into account
MIN_VISITS = 0

#use the data from the following year (2018, 2019 or 2020)
DataYear = '2020'

#municipality to be processed
municipality = 'Dresden'

#Should we reload original csv file? If false, prepared files from cache will be used if available.
Load_Original_Data = False

IsDebug = False

#coordinate systems
source_crs='EPSG:4326'
target_crs='EPSG:3857'

# Load desired shape file
This is done first in order to do the filtering for desired area as early as possible

In [None]:
#construct filename for datafile based on desired parameters
data_folder = "data\\movebis\\"
data_filename = "verkehrsmengen_%s" % DataYear
data_fileext = ".csv.tar.gz"
cache_folder = "cache\\"
cache_filename = cache_folder + data_filename + "_%d_%s.csv" % (MIN_VISITS, municipality)

In [None]:
#construct filename for shapes
shapefile_folder = "data\\shapefiles\\"
shapefile_filename = "gem" #Municipalities(='gem') or Counties(='kreis')
shapefile_fileext = ".shp"

In [None]:
#load all shapes
gem_sn = gpd.read_file(shapefile_folder + shapefile_filename + shapefile_fileext)
#gem_sn = gem_sn.to_crs(target_crs)
gem_sn = gem_sn.to_crs(source_crs)
if IsDebug:
    gem_sn.plot()
    gem_sn

In [None]:
#only use borders of our municipality
poly_municipality = gem_sn[gem_sn.ORTSNAME == municipality]
if IsDebug:
    poly_municipality.plot()

#  read original data and do some preprocessing

In [None]:
#if cache file does not exist, we have to load original data
if not os.path.isfile(cache_filename):
    Load_Original_Data = True
    
if Load_Original_Data:
    print('loading original data')    
else:
    print('using cache file ' + cache_filename)

In [None]:
#for debugging: read the first 10 lines from the file
#data_speed_test = pd.read_csv(data_folder + data_filename + data_fileext, compression='gzip', header=0, sep=',', quotechar='"', error_bad_lines=False, nrows=10)
#data_speed_test

In [None]:
#read original data in chunks and write filtered to csv file into cache directory
start = timeit.default_timer()

if (Load_Original_Data):
    first = True #write header only on first loop
    count=0
    chunksize=1000000
    if os.path.isfile(cache_filename):
        os.remove(cache_filename)
    for chunked_df in pd.read_csv(data_folder + data_filename + data_fileext, skip_blank_lines=True,
                                       header=0, sep=',', quotechar='"', error_bad_lines=False, chunksize=chunksize):
        
        start = timeit.default_timer()
        
        #rename first column to 'geometry' (per default it has the name of the csv file)
        chunked_df = chunked_df.rename(columns={chunked_df.columns[0]: "geometry"}) 
        
        #keep only links with more than MIN_VISITS occurrences. We don't want backyards :-)
        chunked_df = chunked_df[chunked_df.occurrences > MIN_VISITS].copy()
        
        #convert column with the coordinates into the right data type
        #bsp "LINESTRING (13.8774464 51.0015535, 13.8774464 51.0016493)",22.123343908730444,56
        chunked_df['geometry'] = chunked_df['geometry'].apply(wkt.loads)
       
        #convert to GeoDataFrame
        chunked_gdf = gpd.GeoDataFrame(chunked_df, geometry='geometry', crs=source_crs)
        #chunked_gdf = chunked_gdf.to_crs(target_crs)

        #Filter data to our municipality (this will take a while :-)) and save to cache file
        chunked_gdf_municipality = gpd.clip(chunked_gdf, poly_municipality)
        chunked_gdf_municipality = chunked_gdf_municipality.to_crs(target_crs)
        chunked_gdf_municipality.to_csv(cache_filename, index=False, header=first, mode='a')    
        first = False    

        #some lines don't contain correct coordinates -> remove
        chunked_df.drop(chunked_df[chunked_df.geometry == 'LINESTRING EMPTY'].index, inplace=True)

        count=count+chunksize
        end = timeit.default_timer()
        print('%d %fsec' % (count, (end - start)))
        

In [None]:
#read cached csv file
gdf_municipality = pd.read_csv(cache_filename, skip_blank_lines=True) 
gdf_municipality['geometry'] = gdf_municipality['geometry'].apply(wkt.loads)
gdf_municipality = gpd.GeoDataFrame(gdf_municipality, geometry=gdf_municipality['geometry'])
gdf_municipality.crs = target_crs    

#some lines don't contain correct coordinates -> remove
gdf_municipality.drop(gdf_municipality[gdf_municipality.geometry.is_empty].index, inplace=True)

#print(type(gdf_municipality['geometry'][1]))

# Results

In [None]:
#construct filename for datafile based on desired parameters
results_folder = "results\\"
results_filename = results_folder + data_filename + "_%d_%s" % (MIN_VISITS, municipality)

data_field = 'occurrences'
max_value = gdf_municipality[data_field].max()
print(max_value)

carto_attribution='\u0026copy; \u003ca href=\"http://www.openstreetmap.org/copyright\"\u003eOpenStreetMap\u003c/a\u003e contributors \u0026copy; \u003ca href=\"http://cartodb.com/attributions\"\u003eCartoDB\u003c/a\u003e, CartoDB \u003ca href =\"http://cartodb.com/attributions\"\u003eattributions\u003c/a\u003e' # <-- note this
custom_attribution=carto_attribution + ' | \u0026copy; \u003ca href=\"https://www.mcloud.de/web/guest/suche/-/results/suche/relevance/movebis/0/detail/33427A5A-0ADB-40B1-8A1A-390B67B0380B"\u003eMovebis\u003c/a\u003e'

poly_municipality = poly_municipality.to_crs(target_crs)

## Display the data on a map

In [None]:
df_plot = gdf_municipality.copy()
df_plot.geometry=df_plot.buffer(70)

width=20
height=width

zoom_level=14
dpi=zoom_level*20

#TODO: werte basierend auf dem Maximum berechnen

label_list=[
    'bis 6',
    '12-12',
    '14-25',
    '16-50',
    '18-100',
    '20-200',
    'über 200']

style_kwds = {'xtick.major.size': 3, 'ytick.major.size': 3,
              'font.family':u'courier prime code', 'legend.frameon': True}

print('creating plot')
ax=df_plot.plot(column=data_field, scheme='userdefined', figsize=(width,height), legend=True, alpha=0.3, 
                cmap='rainbow',# linewidth=0.100,
            classification_kwds={
             'bins':[1,2,4,8,12,16,20]},
            legend_kwds = { 
                #'numpoints':1,
                'bbox_to_anchor':(1.0, 1.0),
                'title': "Anzahl pro Link",
                'labels':label_list } #, style_kwds = style_kwds
            )

print('adding map')
#adding municipality border layer
fin_plot=poly_municipality.plot(facecolor="none", edgecolor='black', linewidths=0.6, figsize=(width,height), ax=ax)#, weight=1)

#adding basemap
ctx.add_basemap(fin_plot, source=ctx.providers.CartoDB.Positron, zoom=zoom_level)

fig = ax.get_figure()

#remove axis
ax.set_axis_off()
fin_plot.set_axis_off()

#add title
ax.set_title('Anzahl erfasster Fahrten\n Daten: Movebis - Projekt\nJahr: %s Gemeinde: %s' % (DataYear, municipality))

#Quelle
ax.text(0.995, 0.004, transform=ax.transAxes, horizontalalignment='right', size='small',\
        s="Quelle: Movebis, https://www.mcloud.de/web/guest/suche/-/results/suche/relevance/movebis/0/detail/33427A5A-0ADB-40B1-8A1A-390B67B0380B")

fig.savefig(results_filename + ('_%d' % zoom_level) + '.png', bbox_inches='tight', pad_inches = 0, dpi=dpi)

## HTML with zoomable map

In [None]:
df = gdf_municipality.copy()
df[data_field]=df[data_field].round(0)

#calculate center of map from poly_municipality
location_lat = (poly_municipality.to_crs(source_crs)['geometry'].bounds.miny + poly_municipality.to_crs(source_crs)['geometry'].bounds.maxy) / 2
location_lon = (poly_municipality.to_crs(source_crs)['geometry'].bounds.minx + poly_municipality.to_crs(source_crs)['geometry'].bounds.maxx) / 2

#create map with tile source
m = folium.Map(location=[location_lat, location_lon], 
               zoom_start=13, 
                tiles='https://cartodb-basemaps-{s}.global.ssl.fastly.net/light_all/{z}/{x}/{y}.png', 
                attr=custom_attribution)

colorscale = branca.colormap.LinearColormap(['blue','orange','orange'], index=None, vmin=MIN_VISITS, vmax=max_value, caption=data_field)

def style_function(feature):
    col=feature['properties'][data_field]
    return {
        'opacity': 0.8,
        'weight': 3,
        'color': 'grey' if col is None else colorscale(col)
    }

def highlight_function(feature):
    return {
         'weight': 8,
        'color': 'grey'
    }

dummy=folium.GeoJson(
    df,
    tooltip=folium.GeoJsonTooltip(fields=[data_field]),
    style_function=style_function,
    highlight_function=highlight_function
).add_to(m)

#colorscale.caption = field
m.add_child(colorscale)

m.save(results_filename + '.html')