Skip to content

Commit

Permalink
Hot Fix for Flickr mapping, tested
Browse files Browse the repository at this point in the history
  • Loading branch information
Sieboldianus committed Dec 4, 2018
1 parent 7467151 commit 1346179
Show file tree
Hide file tree
Showing 6 changed files with 95 additions and 77 deletions.
8 changes: 4 additions & 4 deletions lbsntransform/__main__.py
Expand Up @@ -25,7 +25,7 @@ def main():
"""
import sys
from .classes.helper_functions import TimeMonitor
from .classes.helper_functions import HelperFunctions
from .classes.helper_functions import HelperFunctions as HF
from .classes.submit_data import LBSNTransfer
from .classes.load_data import LoadData
from .config.config import BaseConfig
Expand All @@ -39,7 +39,7 @@ def main():
sys.stdout.flush()
log = set_logger()
# load import mapper depending on lbsn origin (e.g. 1 = Instagram, 2 = Flickr, 3 = Twitter)
importer = HelperFunctions.load_importer_mapping_module(config.Origin)
importer = HF.load_importer_mapping_module(config.Origin)
# establish output connection
conn_output, cursor_output = LoadData.initialize_output_connection(config)
output = LBSNTransfer(dbCursor=cursor_output,
Expand Down Expand Up @@ -115,7 +115,7 @@ def main():
# On the first loop or after 500.000 processed records, transfer results to DB
if not start_number or processed_records >= config.transferCount or finished:
sys.stdout.flush()
print(f'Storing {import_mapper.lbsnRecords.CountGlob} records ..')
print(f'Storing {import_mapper.lbsnRecords.CountGlob} records .. {HF.null_notice(import_mapper.null_island)})')
output.storeLbsnRecordDicts(import_mapper)
output.commitChanges()
processed_records = 0
Expand All @@ -133,7 +133,7 @@ def main():
# submit remaining
# ??
if import_mapper.lbsnRecords.CountGlob > 0:
print(f'Transferring remaining {import_mapper.lbsnRecords.CountGlob} to db..')
print(f'Transferring remaining {import_mapper.lbsnRecords.CountGlob} to db.. {HF.null_notice(import_mapper.null_island)})')
output.storeLbsnRecordDicts(import_mapper)
output.commitChanges()

Expand Down
2 changes: 1 addition & 1 deletion lbsntransform/classes/field_mapping_flickr.py
Expand Up @@ -172,5 +172,5 @@ def send_to_null_island(self, lat_entry, lng_entry):
"""Logs entries with problematic lat/lng's,
increases Null Island Counter by 1.
"""
log.debug(f'"Send to NULL island {null_island}: RecordNr {self.lbsnRecords.CountGlob} - Coordinates: {lat_entry}, {lng_entry}')
self.log.debug(f'"Send to NULL island: RecordNr {self.lbsnRecords.CountGlob} - Coordinates: {lat_entry}, {lng_entry}')
self.null_island += 1
2 changes: 2 additions & 0 deletions lbsntransform/classes/field_mapping_twitter.py
Expand Up @@ -19,6 +19,7 @@ def __init__(self, disableReactionPostReferencing=False, geocodes=False, mapFull
origin.origin_id = lbsnOrigin.TWITTER
self.origin = origin
self.lbsnRecords = LBSNRecordDicts() #this is where all the data will be stored
self.null_island = 0
self.log = logging.getLogger('__main__')#logging.getLogger()
self.disableReactionPostReferencing = disableReactionPostReferencing
self.mapFullRelations = mapFullRelations
Expand Down Expand Up @@ -233,6 +234,7 @@ def extractPost(self,jsonStringDict, userPkey = None):
postRecord.post_latlng = placeRecord.geom_center
# if still no geoinformation, send post to Null-Island
if not postRecord.post_latlng:
self.null_island += 1
postRecord.post_latlng = "POINT(%s %s)" % (0,0)
# Process attributes of twitter post
postSource = jsonStringDict.get('source')
Expand Down
14 changes: 14 additions & 0 deletions lbsntransform/classes/helper_functions.py
Expand Up @@ -18,6 +18,18 @@
geos.WKBWriter.defaults['include_srid'] = True

class HelperFunctions():

@staticmethod
def log_main_debug(debug_text):
"""Issues a main debug log in case it is needed for static functions."""
logging.getLogger('__main__').debug(debug_text)

@staticmethod
def null_notice(x):
"""Reporting: Suppresses null notice (for Null island) if value is zero."""
null_notice_x = lambda x: f'(Null Island: {x})' if x > 0 else ''
return null_notice_x(x)

@staticmethod
def utc_to_local(utc_dt):
return utc_dt.replace(tzinfo=timezone.utc).astimezone(tz=None)
Expand Down Expand Up @@ -142,6 +154,8 @@ def substituteReferencedUser(mainPost, origin, log):

@staticmethod
def null_check(recordAttr):
"""Helper function to check for Null Values
"""
if not recordAttr:
return None
else:
Expand Down
2 changes: 2 additions & 0 deletions lbsntransform/classes/load_data.py
Expand Up @@ -77,6 +77,7 @@ def fetch_json_data_from_lbsn(cursor, start_id=0, get_max=None, number_of_record
return records

def fetch_data_from_file(loc_filelist, continue_number, is_stacked_json, format):
"""Fetches CSV or JSON data (including stacked json) from file"""
if format == 'json':
records = LoadData.fetch_json_data_from_file(loc_filelist,
continue_number,
Expand Down Expand Up @@ -120,6 +121,7 @@ def fetch_csv_data_from_file(loc_filelist, start_file_id=0):
"""
records = []
loc_file = loc_filelist[start_file_id]
HF.log_main_debug(f'\nCurrent file: {ntpath.basename(loc_file)}')
with open(loc_file, 'r', encoding="utf-8", errors='replace') as file:
reader = csv.reader(file, delimiter=',', quotechar='"', quoting=csv.QUOTE_NONE)
next(reader, None) # skip headerline
Expand Down
144 changes: 72 additions & 72 deletions lbsntransform/classes/shared_structure_proto_lbsndb.py
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-

from lbsnstructure.lbsnstructure_pb2 import *
from .helper_functions import HelperFunctions
from .helper_functions import HelperFunctions as HF

class ProtoLBSM_db_Mapping():

Expand Down Expand Up @@ -45,36 +45,36 @@ def prepareLbsnCountry(self, record):
placeRecord.Guid,
placeRecord.name,
placeRecord.name_alternatives,
HelperFunctions.returnEWKBFromGeoTEXT(placeRecord.geom_center),
HelperFunctions.returnEWKBFromGeoTEXT(placeRecord.geom_area),
HF.returnEWKBFromGeoTEXT(placeRecord.geom_center),
HF.returnEWKBFromGeoTEXT(placeRecord.geom_area),
placeRecord.url)
return preparedRecord

def prepareLbsnCity(self, record):
placeRecord = placeAttrShared(record)
countryGuid = HelperFunctions.null_check(record.country_pkey.id)
subType = HelperFunctions.null_check(record.sub_type)
countryGuid = HF.null_check(record.country_pkey.id)
subType = HF.null_check(record.sub_type)
preparedRecord = (placeRecord.OriginID,
placeRecord.Guid,
placeRecord.name,
placeRecord.name_alternatives,
HelperFunctions.returnEWKBFromGeoTEXT(placeRecord.geom_center),
HelperFunctions.returnEWKBFromGeoTEXT(placeRecord.geom_area),
HF.returnEWKBFromGeoTEXT(placeRecord.geom_center),
HF.returnEWKBFromGeoTEXT(placeRecord.geom_area),
placeRecord.url,
countryGuid,
subType)
return preparedRecord

def prepareLbsnPlace(self, record):
placeRecord = placeAttrShared(record)
cityGuid = HelperFunctions.null_check(record.city_pkey.id)
postCount = HelperFunctions.null_check(record.post_count)
cityGuid = HF.null_check(record.city_pkey.id)
postCount = HF.null_check(record.post_count)
preparedRecord = (placeRecord.OriginID,
placeRecord.Guid,
placeRecord.name,
placeRecord.name_alternatives,
HelperFunctions.returnEWKBFromGeoTEXT(placeRecord.geom_center),
HelperFunctions.returnEWKBFromGeoTEXT(placeRecord.geom_area),
HF.returnEWKBFromGeoTEXT(placeRecord.geom_center),
HF.returnEWKBFromGeoTEXT(placeRecord.geom_area),
placeRecord.url,
cityGuid,
postCount)
Expand All @@ -96,7 +96,7 @@ def prepareLbsnUser(self, record):
userRecord.is_available,
userRecord.user_language,
userRecord.user_location,
HelperFunctions.returnEWKBFromGeoTEXT(userRecord.user_location_geom),
HF.returnEWKBFromGeoTEXT(userRecord.user_location_geom),
userRecord.liked_count,
userRecord.active_since,
userRecord.profile_image_url,
Expand All @@ -121,7 +121,7 @@ def prepareLbsnPost(self, record):
postRecord = postAttrShared(record)
preparedRecord = (postRecord.OriginID,
postRecord.Guid,
HelperFunctions.returnEWKBFromGeoTEXT(postRecord.post_latlng),
HF.returnEWKBFromGeoTEXT(postRecord.post_latlng),
postRecord.place_guid,
postRecord.city_guid,
postRecord.country_guid,
Expand Down Expand Up @@ -152,7 +152,7 @@ def prepareLbsnPostReaction(self, record):
postReactionRecord = postReactionAttrShared(record)
preparedRecord = (postReactionRecord.OriginID,
postReactionRecord.Guid,
HelperFunctions.returnEWKBFromGeoTEXT(postReactionRecord.reaction_latlng),
HF.returnEWKBFromGeoTEXT(postReactionRecord.reaction_latlng),
postReactionRecord.user_guid,
postReactionRecord.referencedPost,
postReactionRecord.referencedPostreaction,
Expand All @@ -175,97 +175,97 @@ class placeAttrShared():
def __init__(self, record):
self.OriginID = record.pkey.origin.origin_id # = 3
self.Guid = record.pkey.id
self.name = HelperFunctions.null_check(record.name)
self.name = HF.null_check(record.name)
# because ProtoBuf Repeated Field does not support distinct rule, we remove any duplicates in list fields prior to submission here
self.name_alternatives = list(set(record.name_alternatives))
if self.name and self.name in self.name_alternatives:
self.name_alternatives.remove(self.name)
self.url = HelperFunctions.null_check(record.url)
self.geom_center = HelperFunctions.null_check(record.geom_center)
self.geom_area = HelperFunctions.null_check(record.geom_area)
self.url = HF.null_check(record.url)
self.geom_center = HF.null_check(record.geom_center)
self.geom_area = HF.null_check(record.geom_area)

class userAttrShared():
def __init__(self, record):
self.OriginID = record.pkey.origin.origin_id
self.Guid = record.pkey.id
self.user_name = HelperFunctions.null_check(record.user_name)
self.user_fullname = HelperFunctions.null_check(record.user_fullname)
self.follows = HelperFunctions.null_check(record.follows)
self.followed = HelperFunctions.null_check(record.followed)
self.group_count = HelperFunctions.null_check(record.group_count)
self.biography = HelperFunctions.null_check(record.biography)
self.post_count = HelperFunctions.null_check(record.post_count)
self.url = HelperFunctions.null_check(record.url)
self.is_private = HelperFunctions.null_check(record.is_private)
self.is_available = HelperFunctions.null_check(record.is_available)
self.user_language = HelperFunctions.null_check(record.user_language.language_short)
self.user_location = HelperFunctions.null_check(record.user_location)
self.user_location_geom = HelperFunctions.null_check(record.user_location_geom)
self.liked_count = HelperFunctions.null_check(record.liked_count)
self.active_since = HelperFunctions.null_check_datetime(record.active_since)
self.profile_image_url = HelperFunctions.null_check(record.profile_image_url)
self.user_timezone = HelperFunctions.null_check(record.user_timezone)
self.user_utc_offset = HelperFunctions.null_check(record.user_utc_offset)
self.user_name = HF.null_check(record.user_name)
self.user_fullname = HF.null_check(record.user_fullname)
self.follows = HF.null_check(record.follows)
self.followed = HF.null_check(record.followed)
self.group_count = HF.null_check(record.group_count)
self.biography = HF.null_check(record.biography)
self.post_count = HF.null_check(record.post_count)
self.url = HF.null_check(record.url)
self.is_private = HF.null_check(record.is_private)
self.is_available = HF.null_check(record.is_available)
self.user_language = HF.null_check(record.user_language.language_short)
self.user_location = HF.null_check(record.user_location)
self.user_location_geom = HF.null_check(record.user_location_geom)
self.liked_count = HF.null_check(record.liked_count)
self.active_since = HF.null_check_datetime(record.active_since)
self.profile_image_url = HF.null_check(record.profile_image_url)
self.user_timezone = HF.null_check(record.user_timezone)
self.user_utc_offset = HF.null_check(record.user_utc_offset)
self.user_groups_member = list(set(record.user_groups_member))
self.user_groups_follows = list(set(record.user_groups_follows))

class userGroupAttrShared():
def __init__(self, record):
self.OriginID = record.pkey.origin.origin_id
self.Guid = record.pkey.id
self.usergroup_name = HelperFunctions.null_check(record.usergroup_name)
self.usergroup_description = HelperFunctions.null_check(record.usergroup_description)
self.member_count = HelperFunctions.null_check(record.member_count)
self.usergroup_createdate = HelperFunctions.null_check_datetime(record.usergroup_createdate)
self.user_owner = HelperFunctions.null_check(record.user_owner_pkey.id)
self.usergroup_name = HF.null_check(record.usergroup_name)
self.usergroup_description = HF.null_check(record.usergroup_description)
self.member_count = HF.null_check(record.member_count)
self.usergroup_createdate = HF.null_check_datetime(record.usergroup_createdate)
self.user_owner = HF.null_check(record.user_owner_pkey.id)

class postAttrShared():
def __init__(self, record):
self.OriginID = record.pkey.origin.origin_id
self.Guid = record.pkey.id
self.post_latlng = HelperFunctions.null_check(record.post_latlng)
self.place_guid = HelperFunctions.null_check(record.place_pkey.id)
self.city_guid = HelperFunctions.null_check(record.city_pkey.id)
self.country_guid = HelperFunctions.null_check(record.country_pkey.id)
self.post_geoaccuracy = HelperFunctions.null_check(lbsnPost().PostGeoaccuracy.Name(record.post_geoaccuracy)).lower()
self.user_guid = HelperFunctions.null_check(record.user_pkey.id)
self.post_create_date = HelperFunctions.null_check_datetime(record.post_create_date)
self.post_publish_date = HelperFunctions.null_check_datetime(record.post_publish_date)
self.post_body = HelperFunctions.null_check(record.post_body)
self.post_language = HelperFunctions.null_check(record.post_language.language_short)
self.post_latlng = HF.null_check(record.post_latlng)
self.place_guid = HF.null_check(record.place_pkey.id)
self.city_guid = HF.null_check(record.city_pkey.id)
self.country_guid = HF.null_check(record.country_pkey.id)
self.post_geoaccuracy = HF.null_check(lbsnPost().PostGeoaccuracy.Name(record.post_geoaccuracy)).lower()
self.user_guid = HF.null_check(record.user_pkey.id)
self.post_create_date = HF.null_check_datetime(record.post_create_date)
self.post_publish_date = HF.null_check_datetime(record.post_publish_date)
self.post_body = HF.null_check(record.post_body)
self.post_language = HF.null_check(record.post_language.language_short)
self.user_mentions = list(set([pkey.id for pkey in record.user_mentions_pkey]))
self.hashtags = list(set(record.hashtags))
self.emoji = list(set(record.emoji))
self.post_like_count = HelperFunctions.null_check(record.post_like_count)
self.post_comment_count = HelperFunctions.null_check(record.post_comment_count)
self.post_views_count = HelperFunctions.null_check(record.post_views_count)
self.post_title = HelperFunctions.null_check(record.post_title)
self.post_thumbnail_url = HelperFunctions.null_check(record.post_thumbnail_url)
self.post_url = HelperFunctions.null_check(record.post_url)
self.post_type = HelperFunctions.null_check(lbsnPost().PostType.Name(record.post_type)).lower()
self.post_filter = HelperFunctions.null_check(record.post_filter)
self.post_quote_count = HelperFunctions.null_check(record.post_quote_count)
self.post_share_count = HelperFunctions.null_check(record.post_share_count)
self.input_source = HelperFunctions.null_check(record.input_source)
self.post_content_license = HelperFunctions.null_check(record.post_content_license)
self.post_like_count = HF.null_check(record.post_like_count)
self.post_comment_count = HF.null_check(record.post_comment_count)
self.post_views_count = HF.null_check(record.post_views_count)
self.post_title = HF.null_check(record.post_title)
self.post_thumbnail_url = HF.null_check(record.post_thumbnail_url)
self.post_url = HF.null_check(record.post_url)
self.post_type = HF.null_check(lbsnPost().PostType.Name(record.post_type)).lower()
self.post_filter = HF.null_check(record.post_filter)
self.post_quote_count = HF.null_check(record.post_quote_count)
self.post_share_count = HF.null_check(record.post_share_count)
self.input_source = HF.null_check(record.input_source)
self.post_content_license = HF.null_check(record.post_content_license)

class postReactionAttrShared():
def __init__(self, record):
self.OriginID = record.pkey.origin.origin_id
self.Guid = record.pkey.id
self.reaction_latlng = HelperFunctions.null_check(record.reaction_latlng)
self.user_guid = HelperFunctions.null_check(record.user_pkey.id)
self.referencedPost = HelperFunctions.null_check(record.referencedPost_pkey.id)
self.referencedPostreaction = HelperFunctions.null_check(record.referencedPostreaction_pkey.id)
self.reaction_type = HelperFunctions.null_check(lbsnPostReaction().ReactionType.Name(record.reaction_type)).lower()
self.reaction_date = HelperFunctions.null_check_datetime(record.reaction_date)
self.reaction_content = HelperFunctions.null_check(record.reaction_content)
self.reaction_like_count = HelperFunctions.null_check(record.reaction_like_count)
self.reaction_latlng = HF.null_check(record.reaction_latlng)
self.user_guid = HF.null_check(record.user_pkey.id)
self.referencedPost = HF.null_check(record.referencedPost_pkey.id)
self.referencedPostreaction = HF.null_check(record.referencedPostreaction_pkey.id)
self.reaction_type = HF.null_check(lbsnPostReaction().ReactionType.Name(record.reaction_type)).lower()
self.reaction_date = HF.null_check_datetime(record.reaction_date)
self.reaction_content = HF.null_check(record.reaction_content)
self.reaction_like_count = HF.null_check(record.reaction_like_count)
self.user_mentions = list(set([pkey.id for pkey in record.user_mentions_pkey]))

class relationshipAttrShared():
def __init__(self, relationship):
self.OriginID = relationship.pkey.relation_to.origin.origin_id
self.Guid = relationship.pkey.relation_to.id
self.Guid_Rel = relationship.pkey.relation_from.id
self.relType = HelperFunctions.null_check(lbsnRelationship().RelationshipType.Name(relationship.relationship_type)).lower()
self.relType = HF.null_check(lbsnRelationship().RelationshipType.Name(relationship.relationship_type)).lower()

0 comments on commit 1346179

Please sign in to comment.