Skip to content

Commit

Permalink
intermediate refactor: cluster preview and map preview works
Browse files Browse the repository at this point in the history
TODO: process_data refactor
  • Loading branch information
Sieboldianus committed Jan 15, 2019
1 parent 115576e commit e015f54
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 37 deletions.
6 changes: 2 additions & 4 deletions tagmaps/__main__.py
Expand Up @@ -84,7 +84,7 @@ def main():
log.info(lbsn_data.bounds.get_bound_report())

if (cfg.cluster_tags or cfg.cluster_emoji):
log.info("########## STEP 2 of 6: Tag Ranking ##########")
log.info("\n########## STEP 2 of 6: Tag Ranking ##########")

prepared_data = lbsn_data.get_prepared_data()

Expand All @@ -97,14 +97,12 @@ def main():
f'Total emoji count for cleaned (tmax) emoji list '
f'(Top {prepared_data.emax}): {prepared_data.total_emoji_count}.')

global top_tags_list
top_tags_list = prepared_data.top_tags_list
if cfg.statistics_only is False:
# restart time monitoring for actual cluster step
now = time.time()
log.info(
"########## STEP 3 of 6: Tag Location Clustering ##########")
sys.stdout.flush()
"\n########## STEP 3 of 6: Tag Location Clustering ##########")

cluster_tag_data = ClusterGen(
lbsn_data.bounds,
Expand Down
32 changes: 20 additions & 12 deletions tagmaps/classes/cluster.py
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-

"""
Module for tag maps clustering functions
Module for tag maps clustering methods
"""

import warnings
Expand Down Expand Up @@ -80,7 +80,7 @@ def _select_postguids(self, tag: str) -> Tuple[List[str], int]:
distinct_localloc_count.add(cleaned_photo_location.loc_id)
return selected_postguids_list, len(distinct_localloc_count)

def _getselect_postguids(self, tag: str, silent: bool = True):
def _getselect_postguids(self, tag: str, silent: bool = True) -> List[str]:
"""Get list of post guids with specific tag
Args:
Expand All @@ -105,19 +105,26 @@ def _getselect_postguids(self, tag: str, silent: bool = True):
f'of total distinct locations in area)', end=" ")
return selected_postguids_list

def _getselect_posts(self, selected_postguids_list):
def _getselect_posts(self,
selected_postguids_list: List[str]
) -> List[CleanedPost]:
selected_posts_list = [self.cleaned_post_dict[x]
for x in selected_postguids_list]
return selected_posts_list

def _get_np_points(self, tag: str = None, silent: bool = None):
def _get_np_points(self,
tag: str = None,
silent: bool = None
) -> np.ndarray:
"""Gets numpy array of selected points from tags with latlng
toptag ([type], optional): Defaults to None. [description]
silent ([type], optional): Defaults to None. [description]
Args:
tag: tag to select posts
silent: if true, no console output (interface mode)
Returns:
[type]: [description]
points: A list of lat/lng points to map
selected_postguids_list: List of selected post guids
"""
# no log reporting for selected points
if silent is None:
Expand All @@ -138,11 +145,12 @@ def _get_np_points(self, tag: str = None, silent: bool = None):
# (limit by list of column-names)
points = df.as_matrix(['lng', 'lat'])
# only return preview fig without clustering
return points, selected_postguids_list
return points

def cluster_points(self, points, cluster_distance,
selected_postguids_list,
min_span_tree, preview_mode):
def cluster_points(self, points,
cluster_distance: float,
min_span_tree: bool = False,
preview_mode: bool = False):
# cluster data
# conversion to radians for HDBSCAN
# (does not support decimal degrees)
Expand All @@ -151,7 +159,7 @@ def cluster_points(self, points, cluster_distance,
# (descending), calculate HDBSCAN Clusters
# min_cluster_size default - 5% optimum:
min_cluster_size = max(
2, int(((len(selected_postguids_list))/100)*5))
2, int(((len(points))/100)*5))
self.clusterer = hdbscan.HDBSCAN(
min_cluster_size=min_cluster_size,
gen_min_span_tree=min_span_tree,
Expand Down
22 changes: 12 additions & 10 deletions tagmaps/classes/interface.py
Expand Up @@ -22,7 +22,6 @@
from tagmaps.classes.shared_structure import CleanedPost
from tagmaps.classes.cluster import ClusterGen

plt.ion()
# enable interactive mode for pyplot (not necessary?!)
plt.ion()
# label_size = 10
Expand Down Expand Up @@ -202,12 +201,10 @@ def _cluster_preview(self, sel_tag: Tuple[str, int]):
# tkinter.messagebox.showinfo("Num of clusters: ",
# str(len(sel_colors)) + " " + str(sel_colors[1]))
# output/update matplotlib figures
(points,
selected_postguids_list) = self._clst._get_np_points(
sel_tag[0], silent=True)
points = self._clst._get_np_points(tag=sel_tag[0], silent=True)
self._clst.cluster_points(
points, self.cluster_distance,
selected_postguids_list, self.create_min_spanning_tree,
points=points,
cluster_distance=self.cluster_distance,
preview_mode=True)
mask_noisy = self._clst.mask_noisy
number_of_clusters = self._clst.number_of_clusters
Expand Down Expand Up @@ -413,12 +410,12 @@ def _cluster_preview(self, sel_tag: Tuple[str, int]):
from_=(self.cluster_distance/100),
to=(self.cluster_distance*2))

def _selection_preview(self, sel_tag):
def _selection_preview(self, sel_tag: Tuple[str, int]):
"""Update preview map based on tag selection"""
# tkinter.messagebox.showinfo("Proceed", f'{sel_tag}')
points, __ = self._clst._get_np_points(
sel_tag[0],
silent=False)
points = self._clst._get_np_points(
tag=sel_tag[0],
silent=True)
if self.fig1:
plt.figure(1).clf() # clear figure 1
# earth = Basemap()
Expand Down Expand Up @@ -556,6 +553,11 @@ def _cluster_current_display_tag(self):
self._cluster_preview(self._clst.top_tags_list[0])

def _scaletest_current_display_tag(self):
if self.create_min_spanning_tree is False:
tkinter.messagebox.showinfo(
"Skip: ",
f'Currently deactivated')
return
if self.current_display_tag:
sel_tag = self.current_display_tag
else:
Expand Down
26 changes: 15 additions & 11 deletions tagmaps/classes/load_data.py
Expand Up @@ -92,7 +92,7 @@ def _process_inputfile(self, file_handle):
Output: produces a list of post that can be parsed
"""
post_list = [] # needed?
post_list = list() # needed?
if self.cfg.source_map.file_extension == "csv":
post_list = csv.reader(
file_handle,
Expand All @@ -102,9 +102,9 @@ def _process_inputfile(self, file_handle):
next(post_list, None) # skip headerline
elif self.cfg.source_map.file_extension == "json":
post_list = post_list + json.loads(file_handle.read())
self._parse_postlist(post_list)
result_msg = self._parse_postlist(post_list)

def _parse_postlist(self, post_list):
def _parse_postlist(self, post_list: TextIO):
"""Process posts according to specifications"""
for post in post_list:
# skip duplicates and erroneous entries
Expand All @@ -119,15 +119,19 @@ def _parse_postlist(self, post_list):
continue
self._merge_posts(lbsn_post)
# status report
msg = \
f'Cleaned output to {len(self.distinct_locations_set):02d} ' \
f'distinct locations from ' \
f'{self.stats.count_glob:02d} posts ' \
f'(File {self.stats.partcount} of {len(self.filelist)}) - ' \
f'Skipped posts: {self.stats.skipped_count} - skipped tags: ' \
f'{self.stats.count_tags_skipped} of ' \
f'{self.stats.count_tags_global}'
msg = (
f'Cleaned output to {len(self.distinct_locations_set):02d} '
f'distinct locations from '
f'{self.stats.count_glob:02d} posts '
f'(File {self.stats.partcount} of {len(self.filelist)}) - '
f'Skipped posts: {self.stats.skipped_count} - skipped tags: '
f'{self.stats.count_tags_skipped} of '
f'{self.stats.count_tags_global}')
print(msg, end='\r')
# log last message to file, clean last stdout
print(" " * len(msg), end='\n')
sys.stdout.flush()
self.log.info(msg)

def _merge_posts(self, lbsn_post):
"""Method will union all tags of a single user for each location
Expand Down

0 comments on commit e015f54

Please sign in to comment.