From ea66395945418cd688cfe1279f38ad0d67e9d764 Mon Sep 17 00:00:00 2001
From: AD <alexander.dunkel@tu-dresden.de>
Date: Fri, 18 Jan 2019 14:23:44 +0100
Subject: [PATCH] refactor initial alpha shapes part, not tested

---
 tagmaps/__main__.py             |  29 +-
 tagmaps/classes/alpha_shapes.py | 231 ++++++++++++++
 tagmaps/classes/cluster.py      | 547 +++++++++++++++++---------------
 tagmaps/classes/load_data.py    |   6 +-
 tagmaps/classes/utils.py        | 297 ++++-------------
 5 files changed, 603 insertions(+), 507 deletions(-)
 create mode 100644 tagmaps/classes/alpha_shapes.py

diff --git a/tagmaps/__main__.py b/tagmaps/__main__.py
index bc8ac51..72cc413 100644
--- a/tagmaps/__main__.py
+++ b/tagmaps/__main__.py
@@ -124,7 +124,8 @@ def main():
                 clusterer_type=cls_type,
                 bounds=lbsn_data.bounds,
                 cleaned_post_dict=cleaned_post_dict,
-                prepared_data=prepared_data
+                prepared_data=prepared_data,
+                local_saturation_check=cfg.local_saturation_check
             )
             clusterer_list.append(clusterer)
 
@@ -137,18 +138,20 @@ def main():
 
         if cfg.auto_mode or user_intf.abort is False:
             for clusterer in clusterer_list:
-                if not clusterer.ClusterType == ClusterGen.LOCATIONS:
-                    if clusterer.ClusterType == ClusterGen.TAGS:
-                        log.info("Tag clustering: \n")
-                    else:
-                        log.info("Emoji clustering: \n")
-                    clusterer.cluster_all()
-                    log.info(
-                        "########## STEP 4 of 6: Generating Alpha Shapes ##########")
-                    clusterer.alpha_shapes()
-                    log.info(
-                        "########## STEP 5 of 6: Writing Results to Shapefile ##########")
-                    clusterer.write_results()
+                if clusterer.ClusterType == ClusterGen.LOCATIONS:
+                    # skip location clustering for now
+                    continue
+                if clusterer.ClusterType == ClusterGen.TAGS:
+                    log.info("Tag clustering: \n")
+                else:
+                    log.info("Emoji clustering: \n")
+                clusterer.cluster_all()
+                log.info(
+                    "########## STEP 4 of 6: Generating Alpha Shapes ##########")
+                clusterer.alpha_shapes()
+                log.info(
+                    "########## STEP 5 of 6: Writing Results to Shapefile ##########")
+                clusterer.write_results()
         else:
             print(f'\nUser abort.')
     if cfg.cluster_locations and user_intf.abort is False:
diff --git a/tagmaps/classes/alpha_shapes.py b/tagmaps/classes/alpha_shapes.py
new file mode 100644
index 0000000..6e8d489
--- /dev/null
+++ b/tagmaps/classes/alpha_shapes.py
@@ -0,0 +1,231 @@
+# -*- coding: utf-8 -*-
+
+"""
+Module for tag maps alpha shapes generation
+"""
+
+import math
+import pyproj
+import numpy as np
+import shapely.geometry as geometry
+from shapely.ops import transform, cascaded_union, polygonize
+from descartes import PolygonPatch
+from scipy.spatial import Delaunay
+from fiona.crs import from_epsg
+from tagmaps.classes.utils import Utils
+
+
+class AlphaShapes():
+
+    @staticmethod
+    def _get_best_utmzone(bound_points_shapely: geometry.MultiPoint):
+        """Calculate best UTM Zone SRID/EPSG Code
+        Args:
+        True centroid (coords may be multipoint)"""
+        input_lon_center = bound_points_shapely.centroid.coords[0][0]
+        input_lat_center = bound_points_shapely.centroid.coords[0][1]
+        epsg_code = AlphaShapes._convert_wgs_to_utm(
+            input_lon_center, input_lat_center)
+        crs_proj = pyproj.Proj(init=f'epsg:{epsg_code}')
+        return crs_proj
+
+    @staticmethod
+    def _convert_wgs_to_utm(lon: float, lat: float):
+        """"[summary]"
+
+        Args:
+            lon: latitude
+            lat: longitude
+
+        Returns:
+            [type]: [description]
+
+        Notes:
+        # https://stackoverflow.com/questions/40132542/get-a-cartesian-projection-accurate-around-a-lat-lng-pair
+        """
+
+        utm_band = str((math.floor((lon + 180) / 6) % 60) + 1)
+        if len(utm_band) == 1:
+            utm_band = '0'+utm_band
+        if lat >= 0:
+            epsg_code = '326' + utm_band
+        else:
+            epsg_code = '327' + utm_band
+        return epsg_code
+
+    @staticmethod
+    def get_cluster_shape(
+            toptag, clusterPhotoGuidList, 
+            cleanedPhotoDict, crs_wgs, crs_proj,
+            clusterTreeCuttingDist, localSaturationCheck):
+        #we define a new list of Temp Alpha Shapes outside the loop, so that it is not overwritten each time
+        listOfAlphashapesAndMeta_tmp = []
+        #points = []
+        tagArea = 0
+        for photo_guids in clusterPhotoGuidList:
+            #for each cluster for this toptag
+            photos = [cleanedPhotoDict[x] for x in photo_guids]
+            photoCount = len(photo_guids)
+            uniqueUserCount = len(set([photo.user_guid for photo in photos]))
+            sumViews = sum([photo.post_views_count for photo in photos])
+            #calculate different weighting formulas
+            #weightsv1 = 1+ photoCount *(sqrt(1/( photoCount / uniqueUserCount )**3)) #-> Standard weighting formula (x**y means x raised to the power y); +1 to UserCount: prevent 1-2 Range from being misaligned
+            #weightsv2 = 1+ photoCount *(sqrt(1/( photoCount / uniqueUserCount )**2))
+            weightsv1 = photoCount *(sqrt(1/( photoCount / (uniqueUserCount+1) )**3)) #-> Standard weighting formula (x**y means x raised to the power y); +1 to UserCount: prevent 1-2 Range from being misaligned
+            weightsv2 = photoCount *(sqrt(1/( photoCount / (uniqueUserCount+1) )**2)) #-> less importance on User_Count in correlation to photo count [Join_Count]; +1 to UserCount: prevent 1-2 Range from being misaligned
+            weightsv3 = sqrt((photoCount+(2*sqrt(photoCount)))*2) #-> Ignores User_Count, this will emphasize individual and very active users
+            #points = [geometry.Point(photo.lng, photo.lat)
+            #          for photo in photos]
+            #instead of lat/lng for each photo, we use photo_locID to identify a list of distinct locations
+            distinctLocations = set([photo.loc_id
+                      for photo in photos])
+            #simple list comprehension without projection:
+            #points = [geometry.Point(Decimal(location.split(':')[1]), Decimal(location.split(':')[0]))
+            #          for location in distinctLocations]
+            points = [geometry.Point(pyproj.transform(crs_wgs, crs_proj, Decimal(location.split(':')[1]), Decimal(location.split(':')[0])))
+                      for location in distinctLocations]
+            point_collection = geometry.MultiPoint(list(points))
+            result_polygon = None
+
+            if len(points) >= 5:
+                if len(points) < 10:
+                    result_polygon = point_collection.convex_hull #convex hull
+                    result_polygon = result_polygon.buffer(clusterTreeCuttingDist/4,resolution=3)
+                    shapetype = "between 5 and 10 points_convexHull"
+                    #result_polygon = result_polygon.buffer(min(distXLng,distYLat)/100,resolution=3)
+                else:
+                    if len(points) > 500:
+                        startalpha = 1000000
+                    elif len(points) > 200:
+                        startalpha = 10000
+                    else:
+                        startalpha = 9000
+                    result_polygon = Utils.alpha_shape(points,alpha=clusterTreeCuttingDist/startalpha) #concave hull/alpha shape /50000
+                    shapetype = "Initial Alpha Shape + Buffer"
+                    if type(result_polygon) is geometry.multipolygon.MultiPolygon or isinstance(result_polygon, bool):
+                        #repeat generating alpha shapes with smaller alpha value if Multigon is generated
+                        #smaller alpha values mean less granularity of resulting polygon
+                        #but too large alpha may result in empty polygon
+                        #(this branch is sometimes executed for larger scales)
+                        for i in range(1,6):
+                            #try decreasing alpha
+                            alpha = startalpha + (startalpha * (i**i)) #** means cube
+                            result_polygon = Utils.alpha_shape(points,alpha=clusterTreeCuttingDist/alpha)#/100000
+                            if not type(result_polygon) is geometry.multipolygon.MultiPolygon and not isinstance(result_polygon, bool):
+                                shapetype = "Multipolygon Alpha Shape /" + str(alpha)
+                                break
+                        if type(result_polygon) is geometry.multipolygon.MultiPolygon or isinstance(result_polygon, bool):
+                            #try increasing alpha
+                            for i in range(1,6):
+                                #try decreasing alpha
+                                alpha = startalpha / (i*i)
+                                result_polygon = Utils.alpha_shape(points,alpha=clusterTreeCuttingDist/alpha)#/100000
+                                if not type(result_polygon) is geometry.multipolygon.MultiPolygon and not isinstance(result_polygon, bool):
+                                    shapetype = "Multipolygon Alpha Shape /" + str(alpha)
+                                    break
+                        if type(result_polygon) is geometry.multipolygon.MultiPolygon:
+                            shapetype = "Multipolygon Alpha Shape -> Convex Hull"
+                            #if still of type multipolygon, try to remove holes and do a convex_hull
+                            result_polygon = result_polygon.convex_hull
+                        #OR: in case there was a problem with generating alpha shapes (circum_r = a*b*c/(4.0*area) --> ZeroDivisionError: float division by zero)
+                        #this branch is rarely executed for large point clusters where alpha is perhaps set too small
+                        elif isinstance(result_polygon, bool) or result_polygon.is_empty:
+                            shapetype = "BoolAlpha -> Fallback to PointCloud Convex Hull"
+                            result_polygon = point_collection.convex_hull #convex hull
+                    #Finally do a buffer to smooth alpha
+                    result_polygon = result_polygon.buffer(clusterTreeCuttingDist/4,resolution=3)
+                    #result_polygon = result_polygon.buffer(min(distXLng,distYLat)/100,resolution=3)
+            elif 2 <= len(points) < 5:
+                shapetype = "between 2 and 5 points_buffer"
+                #calc distance between points http://www.mathwarehouse.com/algebra/distance_formula/index.php
+                #bdist = math.sqrt((points[0].coords.xy[0][0]-points[1].coords.xy[0][0])**2 + (points[0].coords.xy[1][0]-points[1].coords.xy[1][0])**2)
+                #print(str(bdist))
+                result_polygon = point_collection.buffer(clusterTreeCuttingDist/4,resolution=3) #single dots are presented as buffer with 0.5% of width-area
+                result_polygon = result_polygon.convex_hull
+                #result_polygon = point_collection.buffer(min(distXLng,distYLat)/100,resolution=3) #single dots are presented as buffer with 0.5% of width-area
+            elif len(points)==1 or type(result_polygon) is geometry.point.Point or result_polygon is None:
+                shapetype = "1 point cluster"
+                result_polygon = point_collection.buffer(clusterTreeCuttingDist/4,resolution=3) #single dots are presented as buffer with 0.5% of width-area
+                #result_polygon = point_collection.buffer(min(distXLng,distYLat)/100,resolution=3) #single dots are presented as buffer with 0.5% of width-area
+            #final check for multipolygon
+            if type(result_polygon) is geometry.multipolygon.MultiPolygon:
+                #usually not executed
+                result_polygon = result_polygon.convex_hull
+            #Geom, Join_Count, Views,  COUNT_User,ImpTag,TagCountG,HImpTag
+            if result_polygon is not None and not result_polygon.is_empty:
+                if localSaturationCheck:
+                    tagArea += result_polygon.area
+                listOfAlphashapesAndMeta_tmp.append((result_polygon,photoCount,sumViews,uniqueUserCount,toptag[0],toptag[1],weightsv1,weightsv2,weightsv3,shapetype))
+        if len(listOfAlphashapesAndMeta_tmp) > 0:
+            # finally sort and append all cluster shapes for this tag
+            listOfAlphashapesAndMeta_tmp = sorted(listOfAlphashapesAndMeta_tmp,key=lambda x: -x[6])
+        return listOfAlphashapesAndMeta_tmp, tagArea
+
+    @staticmethod
+    def alpha_shape(points, alpha):
+        """
+        Alpha Shapes Code by KEVIN DWYER, see
+        http://blog.thehumangeo.com/2014/05/12/drawing-boundaries-in-python/
+            Compute the alpha shape (concave hull) of a set
+            of points.
+            @param points: Iterable container of points.
+            @param alpha: alpha value to influence the
+                gooeyness of the border. Smaller numbers
+                don't fall inward as much as larger numbers.
+                Too large, and you lose everything!
+        
+        with minor adaptions to Tag Maps clustering.
+        """
+        if len(points) < 4:
+            # When you have a triangle, there is no sense
+            # in computing an alpha shape.
+            return geometry.MultiPoint(list(points)).convex_hull
+        def add_edge(edges, edge_points, coords, i, j):
+            """
+            Add a line between the i-th and j-th points,
+            if not in the list already
+            """
+            if (i, j) in edges or (j, i) in edges:
+                # already added
+                return
+            edges.add( (i, j) )
+            edge_points.append(coords[ [i, j] ])
+        coords = np.array([point.coords[0]
+                           for point in points])
+
+        #print(str(len(coords)))
+        tri = Delaunay(coords)#,qhull_o}ptions = 'QJ') #To avoid this error, you can joggle the data by specifying the 'QJ' option to the DELAUNAY function. https://de.mathworks.com/matlabcentral/answers/94438-why-does-the-delaunay-function-in-matlab-7-0-r14-produce-an-error-when-passed-colinear-points?s_tid=gn_loc_drop
+        #tri = Delaunay(coords,{'QJ'}) #Version 3.1 added triangulated output ('Qt'). It should be used for Delaunay triangulations instead of using joggled input ('QJ').
+        edges = set()
+        edge_points = []
+        # loop over triangles:
+        # ia, ib, ic = indices of corner points of the
+        # triangle
+        for ia, ib, ic in tri.vertices:
+            pa = coords[ia]
+            pb = coords[ib]
+            pc = coords[ic]
+            # Lengths of sides of triangle
+            a = math.sqrt((pa[0]-pb[0])**2 + (pa[1]-pb[1])**2)
+            b = math.sqrt((pb[0]-pc[0])**2 + (pb[1]-pc[1])**2)
+            c = math.sqrt((pc[0]-pa[0])**2 + (pc[1]-pa[1])**2)
+            # Semiperimeter of triangle
+            s = (a + b + c)/2.0
+            # Area of triangle by Heron's formula
+            try:
+                area = math.sqrt(s*(s-a)*(s-b)*(s-c))
+            except ValueError:
+                return False
+            if area == 0:
+                return False
+            circum_r = a*b*c/(4.0*area)
+            # Here's the radius filter.
+            #print circum_r
+            if circum_r < 1.0/alpha:
+                add_edge(edges, edge_points, coords, ia, ib)
+                add_edge(edges, edge_points, coords, ib, ic)
+                add_edge(edges, edge_points, coords, ic, ia)
+        m = geometry.MultiLineString(edge_points)
+        triangles = list(polygonize(m))
+        return cascaded_union(triangles)#, edge_points
+        #return geometry.polygon.asPolygon(edge_points,holes=None)
\ No newline at end of file
diff --git a/tagmaps/classes/cluster.py b/tagmaps/classes/cluster.py
index ee9525f..0356f7f 100644
--- a/tagmaps/classes/cluster.py
+++ b/tagmaps/classes/cluster.py
@@ -16,6 +16,7 @@
 import shapely.geometry as geometry
 from multiprocessing.pool import ThreadPool
 from tagmaps.classes.utils import Utils
+from tagmaps.classes.alpha_shapes import AlphaShapes
 from tagmaps.classes.shared_structure import (
     CleanedPost, AnalysisBounds, PreparedData)
 
@@ -41,10 +42,13 @@ def __init__(self, bounds: AnalysisBounds,
                  top_list: List[Tuple[str, int]],
                  total_distinct_locations: int,
                  tmax: int,
-                 cluster_type: ClusterType = TAGS):
+                 cluster_type: ClusterType = TAGS,
+                 topitem: Tuple[str, int] = None,
+                 local_saturation_check: bool = True):
         self.cls_type = cluster_type
         self.tnum = 0
         self.tmax = tmax
+        self.topitem = topitem
         self.bounds = bounds
         self.cluster_distance = ClusterGen._init_cluster_dist(self.bounds)
         self.cleaned_post_dict = cleaned_post_dict
@@ -56,6 +60,11 @@ def __init__(self, bounds: AnalysisBounds,
         self.number_of_clusters = None
         self.mask_noisy = None
         self.clusterer = None
+        # storing cluster results:
+        self.single_items = defaultdict(list)
+        self.clustered_items = defaultdict(list)
+        self.local_saturation_check = local_saturation_check
+        self.alphashapes_and_meta = list()
         # set initial analysis bounds
         self._update_bounds()
         self.bound_points_shapely = (
@@ -70,12 +79,13 @@ def new_clusterer(cls,
                       clusterer_type: ClusterType,
                       bounds: AnalysisBounds,
                       cleaned_post_dict: Dict[str, CleanedPost],
-                      prepared_data: PreparedData
-                      ):
+                      prepared_data: PreparedData,
+                      local_saturation_check: bool):
         """Create new clusterer from type and input data
 
         Args:
-            clusterer_type (ClusterGen.ClusterType): Either Tags, Locations or Emoji
+            clusterer_type (ClusterGen.ClusterType): Either Tags,
+                Locations or Emoji
             bounds (LoadData.AnalysisBounds): Analaysis spatial boundary
             cleaned_post_dict (Dict[str, CleanedPost]): Dict of cleaned posts
             prepared_data (LoadData.PreparedData): Statistics data
@@ -86,12 +96,15 @@ def new_clusterer(cls,
         if clusterer_type == cls.TAGS:
             top_list = prepared_data.top_tags_list
             tmax = prepared_data.tmax
+            topitem = prepared_data.single_mostused_tag
         elif clusterer_type == cls.EMOJI:
             top_list = prepared_data.top_emoji_list
             tmax = prepared_data.emax
+            topitem = prepared_data.single_mostused_emoji
         elif clusterer_type == cls.LOCATIONS:
             top_list = prepared_data.top_locations_list
             tmax = prepared_data.emax
+            topitem = prepared_data.single_mostused_location
         else:
             sys.exit("Cluster Type unknown.")
         clusterer = cls(
@@ -100,7 +113,9 @@ def new_clusterer(cls,
             top_list=top_list,
             total_distinct_locations=prepared_data.total_unique_locations,
             tmax=tmax,
-            cluster_type=clusterer_type)
+            cluster_type=clusterer_type,
+            topitem=topitem,
+            local_saturation_check=local_saturation_check)
         return clusterer
 
     @staticmethod
@@ -348,263 +363,289 @@ def cluster_points(self, points,
         # self.sel_colors will be used to gen preview map
         return None
 
-def cluster_all(self):
-    """Cluster all data attached to self
-    """
-    noClusterPhotos_perTag_DictOfLists = defaultdict(list)
-    clustersPerTag = defaultdict(list)
-    # Proceed with clustering all tags
-    # data always in lat/lng WGS1984
-    crs_wgs = pyproj.Proj(init='epsg:4326')
-    if cfg.override_crs is None:
-        # Calculate best UTM Zone SRID/EPSG Code
-        # True centroid (coords may be multipoint):
-        input_lon_center = self.bound_points_shapely.centroid.coords[0][0]
-        input_lat_center = self.bound_points_shapely.centroid.coords[0][1]
-        epsg_code = Utils.convert_wgs_to_utm(input_lon_center, input_lat_center)
-        crs_proj = pyproj.Proj(init=f'epsg:{epsg_code}')
-    project = lambda x, y: pyproj.transform(pyproj.Proj(init='epsg:4326'), pyproj.Proj(init=f'epsg:{epsg_code}'), x, y)
-    #geom_proj = transform(project, alphaShapeAndMeta[0])
+    def _cluster_item(self, sel_item: Tuple[str, int]):
+        """Cluster specific item"""
 
-    if cfg.local_saturation_check:
-        clusters, selected_post_guids = cluster_tag(prepared_data.single_mostused_tag, None, True)
-        numpy_selectedPhotoList_Guids = np.asarray(selected_post_guids)
-        mask_noisy = (clusters == -1)
-        number_of_clusters = len(np.unique(clusters[~mask_noisy]))
-        print(f'--> {number_of_clusters} cluster.')
-        clusterPhotosGuidsList = []
-        for x in range(number_of_clusters):
-            currentClusterPhotoGuids = numpy_selectedPhotoList_Guids[clusters==x]
-            clusterPhotosGuidsList.append(currentClusterPhotoGuids)
-        noClusterPhotos_perTag_DictOfLists[prepared_data.single_mostused_tag[0]] = list(numpy_selectedPhotoList_Guids[clusters==-1])
-        # Sort descending based on size of cluster: https://stackoverflow.com/questions/30346356/how-to-sort-list-of-lists-according-to-length-of-sublists
-        clusterPhotosGuidsList.sort(key=len, reverse=True)
-        if not len(clusterPhotosGuidsList) == 0:
-            clustersPerTag[prepared_data.single_mostused_tag[0]] = clusterPhotosGuidsList
-    global tnum
-    tnum = 1
-    for toptag in top_tags_list:
-        if cfg.local_saturation_check and tnum == 1 and toptag[0] in clustersPerTag:
-            #skip toptag if already clustered due to local saturation
-            continue
-        clusters, selected_post_guids = cluster_tag(toptag, None, True)
-        #print("baseDataList: ")
-        #print(str(type(selectedPhotoList)))
-        #for s in selectedPhotoList[:2]:
-        #    print(*s)
-        #print("resultData: ")
-        ##for s in clusters[:2]:
-        ##    print(*s)
-        #print(str(type(clusters)))
-        #print(clusters)
-        #clusters contains the cluster values (-1 = no cluster, 0 maybe, >0 = cluster
-        # in the same order, selectedPhotoList contains all original photo data, thus clusters[10] and selectedPhotoList[10] refer to the same photo
-
-        numpy_selectedPhotoList_Guids = np.asarray(selected_post_guids)
+        points = self._get_np_points(item=sel_item[0], silent=False)
+        clusters = self.cluster_points(points=points, preview_mode=False)
+        return clusters, points
+
+    @staticmethod
+    def _get_cluster_guids(clusters, selected_post_guids):
+        """Returns two lists: clustered and non clustered guids"""
+        clustered_guids = list()
+        np_selected_post_guids = np.asarray(selected_post_guids)
         mask_noisy = (clusters == -1)
         if len(selected_post_guids) == 1:
             number_of_clusters = 0
         else:
-            number_of_clusters = len(np.unique(clusters[~mask_noisy])) #mit noisy (=0)
-        #if number_of_clusters > 200:
-        #    log.info("--> Too many, skipped for this scale.")
-        #    continue
-        if not number_of_clusters == 0:
+            number_of_clusters = len(np.unique(clusters[~mask_noisy]))
+        if number_of_clusters == 0:
+            print("--> No cluster.")
+            non_clustered_guids = list(np_selected_post_guids)
+        else:
             print(f'--> {number_of_clusters} cluster.')
-            tnum += 1
-            photo_num = 0
-            #clusternum_photolist = zip(clusters,selectedPhotoList)
-            #clusterPhotosList = [[] for x in range(number_of_clusters)]
-            clusterPhotosGuidsList = []
             for x in range(number_of_clusters):
-                currentClusterPhotoGuids = numpy_selectedPhotoList_Guids[clusters==x]
-                clusterPhotosGuidsList.append(currentClusterPhotoGuids)
-            noClusterPhotos_perTag_DictOfLists[toptag[0]] = list(numpy_selectedPhotoList_Guids[clusters==-1])
-            # Sort descending based on size of cluster: https://stackoverflow.com/questions/30346356/how-to-sort-list-of-lists-according-to-length-of-sublists
-            clusterPhotosGuidsList.sort(key=len, reverse=True)
-            if not len(clusterPhotosGuidsList) == 0:
-                clustersPerTag[toptag[0]] = clusterPhotosGuidsList
+                current_clustered_guids = np_selected_post_guids[clusters == x]
+                clustered_guids.append(current_clustered_guids)
+            non_clustered_guids = list(np_selected_post_guids[clusters == -1])
+            # Sort descending based on size of cluster
+            # https://stackoverflow.com/questions/30346356/how-to-sort-list-of-lists-according-to-length-of-sublists
+            clustered_guids.sort(key=len, reverse=True)
+        return clustered_guids, non_clustered_guids
+
+    def _get_update_clusters(self, item,
+                             single_items_dict,
+                             cluster_items_dict):
+        """Get clusters for items and write results to dicts"""
+        clusters, selected_post_guids = self._cluster_item(item)
+        result = self._get_cluster_guids(clusters, selected_post_guids)
+        clustered_guids = result[0]
+        non_clustered_guids = result[0]
+        single_items_dict[item[0]] = non_clustered_guids
+        if not len(clustered_guids) == 0:
+            cluster_items_dict[item[0]] = clustered_guids
+
+    def cluster_all(self):
+        """Cluster all items attached to self
+
+        Updates results to:
+            self.single_items
+            self.clustered_items
+        """
+
+        # get clusters for top item
+        if self.local_saturation_check:
+            self._get_update_clusters(
+                self.topitem,
+                self.single_items,
+                self.clustered_items)
+        self.tnum = 1
+        # get remaining clusters
+        for item in self.top_list:
+            if (self.local_saturation_check and
+                    self.tnum == 1 and
+                    item[0] in self.clustered_items):
+                # skip item if already
+                # clustered due to local saturation
+                continue
+            self._get_update_clusters(
+                item,
+                self.single_items,
+                self.clustered_items)
+        # flush console output once
+        sys.stdout.flush()
+
+    def alpha_shapes(self):
+        """For each cluster of points,
+        calculate boundary shape and
+        add statistics (HImpTag etc.)
+
+        Updates results to self.alphashapes_and_meta = list()
+        """
+        saturation_exclude_count = 0
+        # data always in lat/lng WGS1984
+        crs_wgs = pyproj.Proj(init='epsg:4326')
+        crs_proj = AlphaShapes._get_best_utmzone(
+            self.bound_points_shapely)
+
+        alphashapes_and_meta = self.alphashapes_and_meta
+        self.tnum = 0
+        if self.local_saturation_check:
+            # calculate total area of Top1-Tag
+            # for 80% saturation check for lower level tags
+            saturation_exclude_count = 0
+            clustered_post_guids = self.clustered_items.get(
+                self.topitem[0], None)
+            # print("Topitem: " + str(topitem[0]))
+            if clustered_post_guids is None:
+                sys.exit(f'Something went wrong: '
+                         f'No posts found for toptag: '
+                         f'{topitem[0]}')
+            __, topitem_area = AlphaShapes.get_cluster_shape(
+                self.top_item, clustered_post_guids, self.cleaned_post_dict,
+                crs_wgs, crs_proj, self.cluster_distance,
+                self.local_saturation_check)
+        for item in self.top_tags_list:
+            self.tnum += 1
+            clustered_post_guids = self.clustered_items.get(item[0], None)
+            # Generate tag Cluster Shapes
+            if clustered_post_guids:
+                result = AlphaShapes.get_cluster_shape(
+                    item, clustered_post_guids, self.cleaned_post_dict,
+                    crs_wgs, crs_proj, self.cluster_distance,
+                    self.local_saturation_check)
+                alphashapes_and_meta_tmp = result[0]
+                item_area = result[1]
+                if (self.local_saturation_check
+                        and not item_area == 0
+                        and not self.tnum == 1):
+                    local_saturation = item_area/(topitem_area/100)
+                    # print("Local Saturation for Tag " + top_item[0] "
+                    #       "+ ": " + str(round(localSaturation,0)))
+                    if local_saturation > 60:
+                        # skip tag entirely due to saturation
+                        # (if total area > 80% of total area
+                        # of item clusters)
+                        # print("Skipped: " + top_item[0] + " due
+                        # to saturation (" +
+                        # str(round(localSaturation,0)) + "%).")
+                        saturation_exclude_count += 1
+                        continue  # next item
+
+                if len(alphashapes_and_meta_tmp) > 0:
+                    alphashapes_and_meta = list().extend(
+                        alphashapes_and_meta_tmp)
+
+            non_clustered_guids = self.single_items.get(item[0], None)
+            if non_clustered_guids:
+                shapetype = "Single cluster"
+                # print("Single: " + str(len(singlePhotoGuidList)))
+                posts = [self.cleaned_post_dict[x]
+                         for x in non_clustered_guids]
+                for single_post in posts:
+                    # project lat/lng to UTM
+                    x, y = pyproj.transform(
+                        crs_wgs, crs_proj,
+                        single_post.lng, single_post.lat)
+                    pcoordinate = geometry.Point(x, y)
+                    # single dots are presented
+                    # as buffers with 0.5% of width-area
+                    result_polygon = pcoordinate.buffer(
+                        self.cluster_distance/4,
+                        resolution=3)
+                    # result_polygon = pcoordinate.buffer(
+                    #   min(distXLng,distYLat)/100,
+                    #   resolution=3)
+                    if (result_polygon is None or
+                            result_polygon.is_empty):
+                        continue
+                    # append statistics for item with no cluster
+                    alphashapes_and_meta.append((
+                        result_polygon, 1,
+                        max(single_post.post_views_count,
+                            single_post.post_like_count),
+                        1, str(item[0]),
+                        item[1], 1, 1, 1, shapetype))
+        self.log.info(f'{len(alphashapes_and_meta)} '
+                      f'Alpha Shapes. Done.')
+        if saturation_exclude_count > 0:
+            self.log.info(f'Excluded {saturationExcludeCount} '
+                          f'Tags on local saturation check.')
+
+    def write_results(self):
+        """Write all results to output
+        """
+                          
+        ## Output Boundary Shapes in merged Shapefile ##
+        log.info("########## STEP 5 of 6: Writing Results to Shapefile ##########")
+
+        #Calculate best UTM Zone SRID/EPSG Code
+        input_lon_center = bound_points_shapely.centroid.coords[0][0] #True centroid (coords may be multipoint)
+        input_lat_center = bound_points_shapely.centroid.coords[0][1]
+        epsg_code = Utils.convert_wgs_to_utm(input_lon_center, input_lat_center)
+        project = lambda x, y: pyproj.transform(pyproj.Proj(init='epsg:4326'), pyproj.Proj(init='epsg:{0}'.format(epsg_code)), x, y)
+
+        # Define polygon feature geometry
+        schema = {
+            'geometry': 'Polygon',
+            'properties': {'Join_Count': 'int',
+                           'Views': 'int',
+                           'COUNT_User': 'int',
+                           'ImpTag': 'str',
+                           'TagCountG': 'int',
+                           'HImpTag': 'int',
+                           'Weights': 'float',
+                           'WeightsV2': 'float',
+                           'WeightsV3': 'float',
+                           #'shapetype': 'str',
+                           'emoji': 'int'},
+        }
+
+        #Normalization of Values (1-1000 Range), precalc Step:
+        #######################################
+        weightsv1_range = [x[6] for x in alphashapes_and_meta = list()] #get the n'th column out for calculating the max/min
+        weightsv2_range = [x[7] for x in alpha_shapes_meta = list()]
+        weightsv3_range = [x[8] for x in alpha_shapes_meta = list()]
+        weightsv1_min = min(weightsv1_range)
+        weightsv1_max = max(weightsv1_range)
+        weightsv2_min = min(weightsv2_range)
+        weightsv2_max = max(weightsv2_range)
+        weightsv3_min = min(weightsv3_range)
+        weightsv3_max = max(weightsv3_range)
+        #precalc
+        #https://stats.stackexchange.com/questions/70801/how-to-normalize-data-to-0-1-range
+        weightsv1_mod_a = (1000-1)/(weightsv1_max-weightsv1_min)
+        weightsv1_mod_b = 1000 - weightsv1_mod_a * weightsv1_max
+        weightsv2_mod_a = (1000-1)/(weightsv2_max-weightsv2_min)
+        weightsv2_mod_b = 1000 - weightsv2_mod_a * weightsv2_max
+        weightsv3_mod_a = (1000-1)/(weightsv3_max-weightsv3_min)
+        weightsv3_mod_b = 1000 - weightsv3_mod_a * weightsv3_max
+        #######################################
+        # Write a new Shapefile
+        # WGS1984
+        if (cfg.cluster_tags == False and cfg.cluster_emoji == True):
+            shapefileName = "allEmojiCluster"
         else:
-            print("--> No cluster.")
-            noClusterPhotos_perTag_DictOfLists[toptag[0]] = list(numpy_selectedPhotoList_Guids)
-        #for x in clusters:
-        #    #photolist = []
-        #    if x >= 0: # no clusters: x = -1
-        #        clusterPhotosList[x].append([selectedPhotoList[photo_num]])
-        #        #clusterPhotosArray_dict[x].add(selectedPhotoList[photo_num])
-        #    else:
-        #        noClusterPhotos_perTag_DictOfLists[toptag[0]].append(selectedPhotoList[photo_num])
-        #    photo_num+=1
-
-        #print("resultList: ")
-        #for s in clusterPhotosList[:2]:
-        #    print(*s)
-        #print(str(toptag) + " - Number of clusters: " + str(len(clusterPhotosList)) + " Photo num: " + str(photo_num))
-
-        #plt.autoscale(enable=True)
-
-        #if tnum == 50:
-        #    break
-            #plt.savefig('foo.png')
-            #sys.exit()
-    sys.stdout.flush()
-    log.info("########## STEP 4 of 6: Generating Alpha Shapes ##########")
-    #if (tnum % 50 == 0):#modulo: if division has no remainder, force update cmd output
-    #sys.stdout.flush()
-    #for each cluster of points, calculate boundary shape and add statistics (HImpTag etc.)
-    listOfAlphashapesAndMeta = []
-    tnum = 0
-    if cfg.local_saturation_check:
-        #calculate total area of Top1-Tag for 80% saturation check for lower level tags
-        saturationExcludeCount = 0
-        clusterPhotoGuidList = clustersPerTag.get(prepared_data.single_mostused_tag[0], None)
-        #print("Toptag: " + str(singleMostUsedtag[0]))
-        if clusterPhotoGuidList is None:
-            sys.exit(f'No Photos found for toptag: {singleMostUsedtag}')
-        toptagArea = Utils.generateClusterShape(toptag,clusterPhotoGuidList,cleaned_post_dict,crs_wgs,crs_proj,clusterTreeCuttingDist,cfg.local_saturation_check)[1]
-    for toptag in top_tags_list:
-        tnum += 1
-        clusterPhotoGuidList = clustersPerTag.get(toptag[0], None)
-        #Generate tag Cluster Shapes
-        if clusterPhotoGuidList:
-            listOfAlphashapesAndMeta_tmp,tagArea = Utils.generateClusterShape(toptag,clusterPhotoGuidList,cleaned_post_dict,crs_wgs,crs_proj,clusterTreeCuttingDist,cfg.local_saturation_check)
-            if cfg.local_saturation_check and not tagArea == 0 and not tnum == 1:
-                localSaturation = tagArea/(toptagArea/100)
-                #print("Local Saturation for Tag " + toptag[0] + ": " + str(round(localSaturation,0)))
-                if localSaturation > 60:
-                    #skip tag entirely due to saturation (if total area > 80% of total area of toptag clusters)
-                    #print("Skipped: " + toptag[0] + " due to saturation (" + str(round(localSaturation,0)) + "%).")
-                    saturationExcludeCount += 1
-                    continue #next toptag
-
-            if len(listOfAlphashapesAndMeta_tmp) > 0:
-                listOfAlphashapesAndMeta.extend(listOfAlphashapesAndMeta_tmp)
-
-        singlePhotoGuidList = noClusterPhotos_perTag_DictOfLists.get(toptag[0], None)
-        if singlePhotoGuidList:
-            shapetype = "Single cluster"
-            #print("Single: " + str(len(singlePhotoGuidList)))
-            photos = [cleaned_post_dict[x] for x in singlePhotoGuidList]
-            for single_photo in photos:
-                #project lat/lng to UTM
-                x, y = pyproj.transform(crs_wgs, crs_proj, single_photo.lng, single_photo.lat)
-                pcoordinate = geometry.Point(x, y)
-                result_polygon = pcoordinate.buffer(clusterTreeCuttingDist/4,resolution=3) #single dots are presented as buffer with 0.5% of width-area
-                #result_polygon = pcoordinate.buffer(min(distXLng,distYLat)/100,resolution=3)
-                if result_polygon is not None and not result_polygon.is_empty:
-                    listOfAlphashapesAndMeta.append((result_polygon,1,max(single_photo.post_views_count,single_photo.post_like_count),1,str(toptag[0]),toptag[1],1,1,1,shapetype))
-    log.info(f'{len(listOfAlphashapesAndMeta)} Alpha Shapes. Done.')
-    if cfg.local_saturation_check and not saturationExcludeCount == 0:
-        log.info(f'Excluded {saturationExcludeCount} Tags on local saturation check.')
-    ##Output Boundary Shapes in merged Shapefile##
-    log.info("########## STEP 5 of 6: Writing Results to Shapefile ##########")
-
-    #Calculate best UTM Zone SRID/EPSG Code
-    input_lon_center = bound_points_shapely.centroid.coords[0][0] #True centroid (coords may be multipoint)
-    input_lat_center = bound_points_shapely.centroid.coords[0][1]
-    epsg_code = Utils.convert_wgs_to_utm(input_lon_center, input_lat_center)
-    project = lambda x, y: pyproj.transform(pyproj.Proj(init='epsg:4326'), pyproj.Proj(init='epsg:{0}'.format(epsg_code)), x, y)
-
-    # Define polygon feature geometry
-    schema = {
-        'geometry': 'Polygon',
-        'properties': {'Join_Count': 'int',
-                       'Views': 'int',
-                       'COUNT_User': 'int',
-                       'ImpTag': 'str',
-                       'TagCountG': 'int',
-                       'HImpTag': 'int',
-                       'Weights': 'float',
-                       'WeightsV2': 'float',
-                       'WeightsV3': 'float',
-                       #'shapetype': 'str',
-                       'emoji': 'int'},
-    }
-
-    #Normalization of Values (1-1000 Range), precalc Step:
-    #######################################
-    weightsv1_range = [x[6] for x in listOfAlphashapesAndMeta] #get the n'th column out for calculating the max/min
-    weightsv2_range = [x[7] for x in listOfAlphashapesAndMeta]
-    weightsv3_range = [x[8] for x in listOfAlphashapesAndMeta]
-    weightsv1_min = min(weightsv1_range)
-    weightsv1_max = max(weightsv1_range)
-    weightsv2_min = min(weightsv2_range)
-    weightsv2_max = max(weightsv2_range)
-    weightsv3_min = min(weightsv3_range)
-    weightsv3_max = max(weightsv3_range)
-    #precalc
-    #https://stats.stackexchange.com/questions/70801/how-to-normalize-data-to-0-1-range
-    weightsv1_mod_a = (1000-1)/(weightsv1_max-weightsv1_min)
-    weightsv1_mod_b = 1000 - weightsv1_mod_a * weightsv1_max
-    weightsv2_mod_a = (1000-1)/(weightsv2_max-weightsv2_min)
-    weightsv2_mod_b = 1000 - weightsv2_mod_a * weightsv2_max
-    weightsv3_mod_a = (1000-1)/(weightsv3_max-weightsv3_min)
-    weightsv3_mod_b = 1000 - weightsv3_mod_a * weightsv3_max
-    #######################################
-    # Write a new Shapefile
-    # WGS1984
-    if (cfg.cluster_tags == False and cfg.cluster_emoji == True):
-        shapefileName = "allEmojiCluster"
-    else:
-        shapefileName = "allTagCluster"
-    with fiona.open(f'02_Output/{shapefileName}.shp', mode='w', encoding='UTF-8', driver='ESRI Shapefile', schema=schema,crs=from_epsg(epsg_code)) as c:
-        # Normalize Weights to 0-1000 Range
-        idx = 0
-        for alphaShapeAndMeta in listOfAlphashapesAndMeta:
-            if idx == 0:
-                HImP = 1
-            else:
-                if listOfAlphashapesAndMeta[idx][4] != listOfAlphashapesAndMeta[idx-1][4]:
+            shapefileName = "allTagCluster"
+        with fiona.open(f'02_Output/{shapefileName}.shp', mode='w', encoding='UTF-8', driver='ESRI Shapefile', schema=schema,crs=from_epsg(epsg_code)) as c:
+            # Normalize Weights to 0-1000 Range
+            idx = 0
+            for alphaShapeAndMeta in alphashapes_and_meta = list():
+                if idx == 0:
                     HImP = 1
                 else:
-                    HImP = 0
-            #emoName = unicode_name(alphaShapeAndMeta[4])
-            #Calculate Normalized Weights Values based on precalc Step
-            if not alphaShapeAndMeta[6] == 1:
-                weight1_normalized = weightsv1_mod_a * alphaShapeAndMeta[6] + weightsv1_mod_b
-            else:
-                weight1_normalized = 1
-            if not alphaShapeAndMeta[7] == 1:
-                weight2_normalized = weightsv2_mod_a * alphaShapeAndMeta[7] + weightsv2_mod_b
-            else:
-                weight2_normalized = 1
-            if not alphaShapeAndMeta[8] == 1:
-                weight3_normalized = weightsv3_mod_a * alphaShapeAndMeta[8] + weightsv3_mod_b
-            else:
-                weight3_normalized = 1
-            idx += 1
-            #project data
-            #geom_proj = transform(project, alphaShapeAndMeta[0])
-            #c.write({
-            #    'geometry': geometry.mapping(geom_proj),
-            if cfg.cluster_emoji and alphaShapeAndMeta[4] in prepared_data.top_emoji_list:
-                emoji = 1
-                ImpTagText = ""
-            else:
-                emoji = 0
-                ImpTagText = f'{alphaShapeAndMeta[4]}'
-            c.write({
-                'geometry': geometry.mapping(alphaShapeAndMeta[0]),
-                'properties': {'Join_Count': alphaShapeAndMeta[1],
-                               'Views': alphaShapeAndMeta[2],
-                               'COUNT_User': alphaShapeAndMeta[3],
-                               'ImpTag': ImpTagText,
-                               'TagCountG': alphaShapeAndMeta[5],
-                               'HImpTag': HImP,
-                               'Weights': weight1_normalized,
-                               'WeightsV2': weight2_normalized,
-                               'WeightsV3': weight3_normalized,
-                               #'shapetype': alphaShapeAndMeta[9],
-                               'emoji': emoji},
-            })
-    if cfg.cluster_emoji:
-        with open("02_Output/emojiTable.csv", "w", encoding='utf-8') as emojiTable:
-            emojiTable.write("FID,Emoji\n")
-            idx = 0
-            for alphaShapeAndMeta in listOfAlphashapesAndMeta:
-                if alphaShapeAndMeta[4] in prepared_data.top_emoji_list:
-                    ImpTagText = f'{alphaShapeAndMeta[4]}'
+                    if alpha_shapes_meta = list()[idx][4] != alpha_shapes_meta = list()[idx-1][4]:
+                        HImP = 1
+                    else:
+                        HImP = 0
+                #emoName = unicode_name(alphaShapeAndMeta[4])
+                #Calculate Normalized Weights Values based on precalc Step
+                if not alphaShapeAndMeta[6] == 1:
+                    weight1_normalized = weightsv1_mod_a * alphaShapeAndMeta[6] + weightsv1_mod_b
                 else:
-                    ImpTagText = ""
-                emojiTable.write(f'{idx},{ImpTagText}\n')
+                    weight1_normalized = 1
+                if not alphaShapeAndMeta[7] == 1:
+                    weight2_normalized = weightsv2_mod_a * alphaShapeAndMeta[7] + weightsv2_mod_b
+                else:
+                    weight2_normalized = 1
+                if not alphaShapeAndMeta[8] == 1:
+                    weight3_normalized = weightsv3_mod_a * alphaShapeAndMeta[8] + weightsv3_mod_b
+                else:
+                    weight3_normalized = 1
                 idx += 1
+                #project data
+                #geom_proj = transform(project, alphaShapeAndMeta[0])
+                #c.write({
+                #    'geometry': geometry.mapping(geom_proj),
+                if cfg.cluster_emoji and alphaShapeAndMeta[4] in prepared_data.top_emoji_list:
+                    emoji = 1
+                    ImpTagText = ""
+                else:
+                    emoji = 0
+                    ImpTagText = f'{alphaShapeAndMeta[4]}'
+                c.write({
+                    'geometry': geometry.mapping(alphaShapeAndMeta[0]),
+                    'properties': {'Join_Count': alphaShapeAndMeta[1],
+                                   'Views': alphaShapeAndMeta[2],
+                                   'COUNT_User': alphaShapeAndMeta[3],
+                                   'ImpTag': ImpTagText,
+                                   'TagCountG': alphaShapeAndMeta[5],
+                                   'HImpTag': HImP,
+                                   'Weights': weight1_normalized,
+                                   'WeightsV2': weight2_normalized,
+                                   'WeightsV3': weight3_normalized,
+                                   #'shapetype': alphaShapeAndMeta[9],
+                                   'emoji': emoji},
+                })
+        if cfg.cluster_emoji:
+            with open("02_Output/emojiTable.csv", "w", encoding='utf-8') as emojiTable:
+                emojiTable.write("FID,Emoji\n")
+                idx = 0
+                for alphaShapeAndMeta in alpha_shapes_meta = list():
+                    if alphaShapeAndMeta[4] in prepared_data.top_emoji_list:
+                        ImpTagText = f'{alphaShapeAndMeta[4]}'
+                    else:
+                        ImpTagText = ""
+                    emojiTable.write(f'{idx},{ImpTagText}\n')
+                    idx += 1
\ No newline at end of file
diff --git a/tagmaps/classes/load_data.py b/tagmaps/classes/load_data.py
index f532045..4b1ee20 100644
--- a/tagmaps/classes/load_data.py
+++ b/tagmaps/classes/load_data.py
@@ -109,7 +109,9 @@ def _process_inputfile(self, file_handle):
 
     def _parse_postlist(self, post_reader: TextIO):
         """Process posts according to specifications"""
+        # row_num = 0
         for post in post_reader:
+            # row_num += 1
             lbsn_post = self._parse_post(post)
             if lbsn_post is None:
                 continue
@@ -123,8 +125,10 @@ def _parse_postlist(self, post_reader: TextIO):
                 f'Skipped posts: {self.stats.skipped_count} - skipped tags: '
                 f'{self.stats.count_tags_skipped} of '
                 f'{self.stats.count_tags_global}')
+            # if (row_num % 10 == 0):
+                # modulo: print only once every 10 iterations
             print(msg, end='\r')
-        # log last message to file, clean last stdout
+        # log last message to file, clean stdout
         print(" " * len(msg), end='\n')
         sys.stdout.flush()
         self.log.info(msg)
diff --git a/tagmaps/classes/utils.py b/tagmaps/classes/utils.py
index c4f3ab4..0176d5a 100644
--- a/tagmaps/classes/utils.py
+++ b/tagmaps/classes/utils.py
@@ -18,11 +18,11 @@
 import hashlib
 import io
 import logging
-import fiona #Fiona needed for reading Shapefile
+import fiona  # Fiona needed for reading Shapefile
 from fiona.crs import from_epsg
 import shapely.geometry as geometry
-import pyproj #import Proj, transform
-#https://gis.stackexchange.com/questions/127427/transforming-shapely-polygon-and-multipolygon-objects
+import pyproj  # import Proj, transform
+# https://gis.stackexchange.com/questions/127427/transforming-shapely-polygon-and-multipolygon-objects
 from shapely.ops import transform, cascaded_union, polygonize
 #from shapely.geometry import Polygon
 #from shapely.geometry import shape
@@ -34,6 +34,7 @@
 from tagmaps.config.config import BaseConfig
 from tagmaps.classes.shared_structure import CleanedPost
 
+
 class Utils():
     """Collection of various tools and helper functions
 
@@ -79,7 +80,7 @@ def is_number(s):
     @classmethod
     def init_main(cls):
         """Initializing main procedure if package is executed directly"""
-    
+
         # set console view parameters
         os.system('mode con: cols=197 lines=40')
         # initialize logger
@@ -95,12 +96,13 @@ def set_logger(cls):
         """ Set logging handler manually, 
         so we can also print to console while logging to file
         """
-        
+
         cls.init_output_dir()
         __log_file = "02_Output/log.txt"
-    
+
         # Set Output to Replace in case of encoding issues (console/windows)
-        sys.stdout = io.TextIOWrapper(sys.stdout.detach(), sys.stdout.encoding, 'replace')
+        sys.stdout = io.TextIOWrapper(
+            sys.stdout.detach(), sys.stdout.encoding, 'replace')
         # Create logger with specific name
         log = logging.getLogger("tagmaps")
         log.format = '%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s'
@@ -120,7 +122,7 @@ def init_output_dir():
         if not os.path.exists(pathname + '/02_Output/'):
             os.makedirs(pathname + '/02_Output/')
             print("Folder /02_Output was created")
-    
+
     @staticmethod
     def query_yes_no(question, default="yes"):
         """Ask a yes/no question via raw_input() and return their answer.
@@ -153,9 +155,10 @@ def query_yes_no(question, default="yes"):
             else:
                 sys.stdout.write("'yes' or 'no' "
                                  "(or 'y' or 'n').\n")
+
     @staticmethod
     def daterange(start_date, end_date):
-        for n in range(int ((end_date - start_date).days)):
+        for n in range(int((end_date - start_date).days)):
             yield start_date + timedelta(n)
 
     @staticmethod
@@ -172,7 +175,7 @@ def haversine(lon1, lat1, lon2, lat2):
         a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
         c = 2 * asin(sqrt(a))
         # Radius of earth in kilometers is 6371
-        km = 6371* c
+        km = 6371 * c
         m = km*1000
         return m
 
@@ -182,29 +185,29 @@ def get_radians_from_meters(dist):
         degreesDist = dist/111.325
         radiansDist = degreesDist/57.2958
         return radiansDist
-        #https://www.mathsisfun.com/geometry/radians.html
-        #1 Radian is about 57.2958 degrees.
-        #then see https://sciencing.com/convert-distances-degrees-meters-7858322.html
-        #Multiply the number of degrees by 111.325
-        #To convert this to meters, multiply by 1,000. So, 2 degrees is 222,65 meters.
+        # https://www.mathsisfun.com/geometry/radians.html
+        # 1 Radian is about 57.2958 degrees.
+        # then see https://sciencing.com/convert-distances-degrees-meters-7858322.html
+        # Multiply the number of degrees by 111.325
+        # To convert this to meters, multiply by 1,000. So, 2 degrees is 222,65 meters.
 
     @staticmethod
     def get_meters_from_radians(dist):
         dist = dist * 57.2958
         dist = dist * 111.325
-        metersDist = round(dist * 1000,1)
+        metersDist = round(dist * 1000, 1)
 
         return metersDist
-        #1 Radian is about 57.2958 degrees.
-        #then see https://sciencing.com/convert-distances-degrees-meters-7858322.html
-        #Multiply the number of degrees by 111.325
-        #To convert this to meters, multiply by 1,000. So, 2 degrees is 222,65 meters.
-        #plt.close('all') #clear memory
+        # 1 Radian is about 57.2958 degrees.
+        # then see https://sciencing.com/convert-distances-degrees-meters-7858322.html
+        # Multiply the number of degrees by 111.325
+        # To convert this to meters, multiply by 1,000. So, 2 degrees is 222,65 meters.
+        # plt.close('all') #clear memory
 
     @staticmethod
     def checkEmojiType(strEmo):
         """Is this function really needed, makes no difference! (really?)"""
-        if unicodedata.name(strEmo).startswith(("EMOJI MODIFIER","VARIATION SELECTOR","ZERO WIDTH")):
+        if unicodedata.name(strEmo).startswith(("EMOJI MODIFIER", "VARIATION SELECTOR", "ZERO WIDTH")):
             return False
         return True
 
@@ -220,28 +223,28 @@ def extract_emoji(str):
                           Utils.checkEmojiType(c) is True)
         return emoji_list
 
-        #see https://stackoverflow.com/questions/43852668/using-collections-counter-to-count-emojis-with-different-colors
+        # see https://stackoverflow.com/questions/43852668/using-collections-counter-to-count-emojis-with-different-colors
         # we want to ignore fitzpatrick modifiers and treat all differently colored emojis the same
-        #https://stackoverflow.com/questions/38100329/some-emojis-e-g-have-two-unicode-u-u2601-and-u-u2601-ufe0f-what-does
-    #COOKING
-    #OK HAND SIGN
-    #EMOJI MODIFIER FITZPATRICK TYPE-1-2
-    #GRINNING FACE WITH SMILING EYES
-    #HEAVY BLACK HEART
-    #WEARY CAT FACE
-    #SMILING FACE WITH HEART-SHAPED EYES
-    #OK HAND SIGN
-    #EMOJI MODIFIER FITZPATRICK TYPE-1-2
-    #GRINNING FACE WITH SMILING EYES
-    #PERSON WITH FOLDED HANDS
-    #EMOJI MODIFIER FITZPATRICK TYPE-3
-    #WEARY CAT FACE
-
-    ##Emojitest
+        # https://stackoverflow.com/questions/38100329/some-emojis-e-g-have-two-unicode-u-u2601-and-u-u2601-ufe0f-what-does
+    # COOKING
+    # OK HAND SIGN
+    # EMOJI MODIFIER FITZPATRICK TYPE-1-2
+    # GRINNING FACE WITH SMILING EYES
+    # HEAVY BLACK HEART
+    # WEARY CAT FACE
+    # SMILING FACE WITH HEART-SHAPED EYES
+    # OK HAND SIGN
+    # EMOJI MODIFIER FITZPATRICK TYPE-1-2
+    # GRINNING FACE WITH SMILING EYES
+    # PERSON WITH FOLDED HANDS
+    # EMOJI MODIFIER FITZPATRICK TYPE-3
+    # WEARY CAT FACE
+
+    # Emojitest
     #n = '❤️👨‍⚕️'
-    ##n = '👨‍⚕️' #medical emoji with zero-width joiner (http://www.unicode.org/emoji/charts/emoji-zwj-sequences.html)
+    # n = '👨‍⚕️' #medical emoji with zero-width joiner (http://www.unicode.org/emoji/charts/emoji-zwj-sequences.html)
     #nlist = def_functions.extract_emojis(n)
-    #with open("emojifile.txt", "w", encoding='utf-8') as emojifile:
+    # with open("emojifile.txt", "w", encoding='utf-8') as emojifile:
     #    emojifile.write("Original: " + n + '\n')
     #    for xstr in nlist:
     #        emojifile.write('Emoji Extract: U+%04x' % ord(xstr) + '\n')
@@ -249,17 +252,16 @@ def extract_emoji(str):
     #    for _c in n:
     #        emojifile.write(str(unicode_name(_c)) + '\n')
     #        emojifile.write('Each Codepoint: U+%04x' % ord(_c) +  '\n')
-    #def cleanEmoji(c):
+    # def cleanEmoji(c):
     #    tuple = (u'\ufeff',u'\u200b',u'\u200d')
     #    for ex in tuple:
     #        c.replace(ex,"")
     #    return(c)
-    #https://github.com/carpedm20/emoji/
-    #https://github.com/carpedm20/emoji/issues/75
+    # https://github.com/carpedm20/emoji/
+    # https://github.com/carpedm20/emoji/issues/75
 
-
-    #this class is needed to override tkinter window with drag&drop option when overrideredirect = true
-    #class App:
+    # this class is needed to override tkinter window with drag&drop option when overrideredirect = true
+    # class App:
     #    global tk
     #    def __init__(self):
     #        self.root = tk.Tk()
@@ -280,10 +282,9 @@ def extract_emoji(str):
     #        self.root._offsetx = event.x
     #        self.root._offsety = event.y
 
-    #tc unicode problem
-    #https://stackoverflow.com/questions/40222971/python-find-equivalent-surrogate-pair-from-non-bmp-unicode-char
+    # tc unicode problem
+    # https://stackoverflow.com/questions/40222971/python-find-equivalent-surrogate-pair-from-non-bmp-unicode-char
 
-    
     def _surrogatepair(match):
         char = match.group()
         assert ord(char) > 0xffff
@@ -296,18 +297,7 @@ def with_surrogates(text):
         _nonbmp = re.compile(r'[\U00010000-\U0010FFFF]')
         return _nonbmp.sub(_surrogatepair, text)
 
-    #https://stackoverflow.com/questions/40132542/get-a-cartesian-projection-accurate-around-a-lat-lng-pair
-    def convert_wgs_to_utm(lon, lat):
-        utm_band = str((math.floor((lon + 180) / 6 ) % 60) + 1)
-        if len(utm_band) == 1:
-            utm_band = '0'+utm_band
-        if lat >= 0:
-            epsg_code = '326' + utm_band
-        else:
-            epsg_code = '327' + utm_band
-        return epsg_code
-
-    #def str2bool(v):
+    # def str2bool(v):
     #    if v.lower() in ('yes', 'true', 't', 'y', '1'):
     #        return True
     #    elif v.lower() in ('no', 'false', 'f', 'n', '0'):
@@ -315,112 +305,8 @@ def convert_wgs_to_utm(lon, lat):
     #    else:
     #        raise argparse.ArgumentTypeError('Boolean value expected.')
 
-    def generateClusterShape(toptag,clusterPhotoGuidList,cleanedPhotoDict,crs_wgs,crs_proj,clusterTreeCuttingDist,localSaturationCheck):
-        #we define a new list of Temp Alpha Shapes outside the loop, so that it is not overwritten each time
-        listOfAlphashapesAndMeta_tmp = []
-        #points = []
-        tagArea = 0
-        for photo_guids in clusterPhotoGuidList:
-            #for each cluster for this toptag
-            photos = [cleanedPhotoDict[x] for x in photo_guids]
-            photoCount = len(photo_guids)
-            uniqueUserCount = len(set([photo.user_guid for photo in photos]))
-            sumViews = sum([photo.post_views_count for photo in photos])
-            #calculate different weighting formulas
-            #weightsv1 = 1+ photoCount *(sqrt(1/( photoCount / uniqueUserCount )**3)) #-> Standard weighting formula (x**y means x raised to the power y); +1 to UserCount: prevent 1-2 Range from being misaligned
-            #weightsv2 = 1+ photoCount *(sqrt(1/( photoCount / uniqueUserCount )**2))
-            weightsv1 = photoCount *(sqrt(1/( photoCount / (uniqueUserCount+1) )**3)) #-> Standard weighting formula (x**y means x raised to the power y); +1 to UserCount: prevent 1-2 Range from being misaligned
-            weightsv2 = photoCount *(sqrt(1/( photoCount / (uniqueUserCount+1) )**2)) #-> less importance on User_Count in correlation to photo count [Join_Count]; +1 to UserCount: prevent 1-2 Range from being misaligned
-            weightsv3 = sqrt((photoCount+(2*sqrt(photoCount)))*2) #-> Ignores User_Count, this will emphasize individual and very active users
-            #points = [geometry.Point(photo.lng, photo.lat)
-            #          for photo in photos]
-            #instead of lat/lng for each photo, we use photo_locID to identify a list of distinct locations
-            distinctLocations = set([photo.loc_id
-                      for photo in photos])
-            #simple list comprehension without projection:
-            #points = [geometry.Point(Decimal(location.split(':')[1]), Decimal(location.split(':')[0]))
-            #          for location in distinctLocations]
-            points = [geometry.Point(pyproj.transform(crs_wgs, crs_proj, Decimal(location.split(':')[1]), Decimal(location.split(':')[0])))
-                      for location in distinctLocations]
-            point_collection = geometry.MultiPoint(list(points))
-            result_polygon = None
-
-            if len(points) >= 5:
-                if len(points) < 10:
-                    result_polygon = point_collection.convex_hull #convex hull
-                    result_polygon = result_polygon.buffer(clusterTreeCuttingDist/4,resolution=3)
-                    shapetype = "between 5 and 10 points_convexHull"
-                    #result_polygon = result_polygon.buffer(min(distXLng,distYLat)/100,resolution=3)
-                else:
-                    if len(points) > 500:
-                        startalpha = 1000000
-                    elif len(points) > 200:
-                        startalpha = 10000
-                    else:
-                        startalpha = 9000
-                    result_polygon = Utils.alpha_shape(points,alpha=clusterTreeCuttingDist/startalpha) #concave hull/alpha shape /50000
-                    shapetype = "Initial Alpha Shape + Buffer"
-                    if type(result_polygon) is geometry.multipolygon.MultiPolygon or isinstance(result_polygon, bool):
-                        #repeat generating alpha shapes with smaller alpha value if Multigon is generated
-                        #smaller alpha values mean less granularity of resulting polygon
-                        #but too large alpha may result in empty polygon
-                        #(this branch is sometimes executed for larger scales)
-                        for i in range(1,6):
-                            #try decreasing alpha
-                            alpha = startalpha + (startalpha * (i**i)) #** means cube
-                            result_polygon = Utils.alpha_shape(points,alpha=clusterTreeCuttingDist/alpha)#/100000
-                            if not type(result_polygon) is geometry.multipolygon.MultiPolygon and not isinstance(result_polygon, bool):
-                                shapetype = "Multipolygon Alpha Shape /" + str(alpha)
-                                break
-                        if type(result_polygon) is geometry.multipolygon.MultiPolygon or isinstance(result_polygon, bool):
-                            #try increasing alpha
-                            for i in range(1,6):
-                                #try decreasing alpha
-                                alpha = startalpha / (i*i)
-                                result_polygon = Utils.alpha_shape(points,alpha=clusterTreeCuttingDist/alpha)#/100000
-                                if not type(result_polygon) is geometry.multipolygon.MultiPolygon and not isinstance(result_polygon, bool):
-                                    shapetype = "Multipolygon Alpha Shape /" + str(alpha)
-                                    break
-                        if type(result_polygon) is geometry.multipolygon.MultiPolygon:
-                            shapetype = "Multipolygon Alpha Shape -> Convex Hull"
-                            #if still of type multipolygon, try to remove holes and do a convex_hull
-                            result_polygon = result_polygon.convex_hull
-                        #OR: in case there was a problem with generating alpha shapes (circum_r = a*b*c/(4.0*area) --> ZeroDivisionError: float division by zero)
-                        #this branch is rarely executed for large point clusters where alpha is perhaps set too small
-                        elif isinstance(result_polygon, bool) or result_polygon.is_empty:
-                            shapetype = "BoolAlpha -> Fallback to PointCloud Convex Hull"
-                            result_polygon = point_collection.convex_hull #convex hull
-                    #Finally do a buffer to smooth alpha
-                    result_polygon = result_polygon.buffer(clusterTreeCuttingDist/4,resolution=3)
-                    #result_polygon = result_polygon.buffer(min(distXLng,distYLat)/100,resolution=3)
-            elif 2 <= len(points) < 5:
-                shapetype = "between 2 and 5 points_buffer"
-                #calc distance between points http://www.mathwarehouse.com/algebra/distance_formula/index.php
-                #bdist = math.sqrt((points[0].coords.xy[0][0]-points[1].coords.xy[0][0])**2 + (points[0].coords.xy[1][0]-points[1].coords.xy[1][0])**2)
-                #print(str(bdist))
-                result_polygon = point_collection.buffer(clusterTreeCuttingDist/4,resolution=3) #single dots are presented as buffer with 0.5% of width-area
-                result_polygon = result_polygon.convex_hull
-                #result_polygon = point_collection.buffer(min(distXLng,distYLat)/100,resolution=3) #single dots are presented as buffer with 0.5% of width-area
-            elif len(points)==1 or type(result_polygon) is geometry.point.Point or result_polygon is None:
-                shapetype = "1 point cluster"
-                result_polygon = point_collection.buffer(clusterTreeCuttingDist/4,resolution=3) #single dots are presented as buffer with 0.5% of width-area
-                #result_polygon = point_collection.buffer(min(distXLng,distYLat)/100,resolution=3) #single dots are presented as buffer with 0.5% of width-area
-            #final check for multipolygon
-            if type(result_polygon) is geometry.multipolygon.MultiPolygon:
-                #usually not executed
-                result_polygon = result_polygon.convex_hull
-            #Geom, Join_Count, Views,  COUNT_User,ImpTag,TagCountG,HImpTag
-            if result_polygon is not None and not result_polygon.is_empty:
-                if localSaturationCheck:
-                    tagArea += result_polygon.area
-                listOfAlphashapesAndMeta_tmp.append((result_polygon,photoCount,sumViews,uniqueUserCount,toptag[0],toptag[1],weightsv1,weightsv2,weightsv3,shapetype))
-        if len(listOfAlphashapesAndMeta_tmp) > 0:
-            # finally sort and append all cluster shapes for this tag
-            listOfAlphashapesAndMeta_tmp = sorted(listOfAlphashapesAndMeta_tmp,key=lambda x: -x[6])
-        return listOfAlphashapesAndMeta_tmp, tagArea
-
     def plot_polygon(polygon):
-        fig = plt.figure(figsize=(10,10))
+        fig = plt.figure(figsize=(10, 10))
         ax = fig.add_subplot(111)
         margin = .3
         x_min, y_min, x_max, y_max = polygon.bounds
@@ -432,72 +318,6 @@ def plot_polygon(polygon):
         ax.add_patch(patch)
         return fig
 
-    def alpha_shape(points, alpha):
-        """
-        Alpha Shapes Code by KEVIN DWYER
-        see http://blog.thehumangeo.com/2014/05/12/drawing-boundaries-in-python/
-        Compute the alpha shape (concave hull) of a set
-        of points.
-        @param points: Iterable container of points.
-        @param alpha: alpha value to influence the
-            gooeyness of the border. Smaller numbers
-            don't fall inward as much as larger numbers.
-            Too large, and you lose everything!
-        """
-        if len(points) < 4:
-            # When you have a triangle, there is no sense
-            # in computing an alpha shape.
-            return geometry.MultiPoint(list(points)).convex_hull
-        def add_edge(edges, edge_points, coords, i, j):
-            """
-            Add a line between the i-th and j-th points,
-            if not in the list already
-            """
-            if (i, j) in edges or (j, i) in edges:
-                # already added
-                return
-            edges.add( (i, j) )
-            edge_points.append(coords[ [i, j] ])
-        coords = np.array([point.coords[0]
-                           for point in points])
-
-        #print(str(len(coords)))
-        tri = Delaunay(coords)#,qhull_o}ptions = 'QJ') #To avoid this error, you can joggle the data by specifying the 'QJ' option to the DELAUNAY function. https://de.mathworks.com/matlabcentral/answers/94438-why-does-the-delaunay-function-in-matlab-7-0-r14-produce-an-error-when-passed-colinear-points?s_tid=gn_loc_drop
-        #tri = Delaunay(coords,{'QJ'}) #Version 3.1 added triangulated output ('Qt'). It should be used for Delaunay triangulations instead of using joggled input ('QJ').
-        edges = set()
-        edge_points = []
-        # loop over triangles:
-        # ia, ib, ic = indices of corner points of the
-        # triangle
-        for ia, ib, ic in tri.vertices:
-            pa = coords[ia]
-            pb = coords[ib]
-            pc = coords[ic]
-            # Lengths of sides of triangle
-            a = math.sqrt((pa[0]-pb[0])**2 + (pa[1]-pb[1])**2)
-            b = math.sqrt((pb[0]-pc[0])**2 + (pb[1]-pc[1])**2)
-            c = math.sqrt((pc[0]-pa[0])**2 + (pc[1]-pa[1])**2)
-            # Semiperimeter of triangle
-            s = (a + b + c)/2.0
-            # Area of triangle by Heron's formula
-            try:
-                area = math.sqrt(s*(s-a)*(s-b)*(s-c))
-            except ValueError:
-                return False
-            if area == 0:
-                return False
-            circum_r = a*b*c/(4.0*area)
-            # Here's the radius filter.
-            #print circum_r
-            if circum_r < 1.0/alpha:
-                add_edge(edges, edge_points, coords, ia, ib)
-                add_edge(edges, edge_points, coords, ib, ic)
-                add_edge(edges, edge_points, coords, ic, ia)
-        m = geometry.MultiLineString(edge_points)
-        triangles = list(polygonize(m))
-        return cascaded_union(triangles)#, edge_points
-        #return geometry.polygon.asPolygon(edge_points,holes=None)
-
     def fit_cluster(clusterer, data):
         clusterer.fit(data)
         return clusterer
@@ -510,14 +330,14 @@ def get_rectangle_bounds(points):
         limXMax = np.max(points.T[0])
         return limYMin, limYMax, limXMin, limXMax
 
-    def filterTags(taglist,SortOutAlways_set,SortOutAlways_inStr_set):
+    def filterTags(taglist, SortOutAlways_set, SortOutAlways_inStr_set):
         count_tags = 0
         count_skipped = 0
-        #Filter tags based on two stoplists
+        # Filter tags based on two stoplists
         photo_tags_filtered = set()
         for tag in taglist:
             count_tags += 1
-            #exclude numbers and those tags that are in SortOutAlways_set
+            # exclude numbers and those tags that are in SortOutAlways_set
             if len(tag) == 1 or tag == '""' or tag.isdigit() or tag in SortOutAlways_set:
                 count_skipped += 1
                 continue
@@ -527,7 +347,4 @@ def filterTags(taglist,SortOutAlways_set,SortOutAlways_inStr_set):
                     break
             else:
                 photo_tags_filtered.add(tag)
-        return photo_tags_filtered, count_tags,count_skipped
-
-
-
+        return photo_tags_filtered, count_tags, count_skipped