Skip to content

Commit

Permalink
Refactored to snake style additional
Browse files Browse the repository at this point in the history
  • Loading branch information
Sieboldianus committed Jan 2, 2019
1 parent fed1a4b commit 1a87553
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 41 deletions.
6 changes: 4 additions & 2 deletions lbsntransform/classes/db_connection.py
Expand Up @@ -14,7 +14,8 @@
class DBConnection():
""" Class for connectiong to Postgres. """
def __init__(self, serveradress=None, dbname=None,
user=None, password=0, readonly=False, sslmode='prefer'):
user=None, password=0, readonly=False,
sslmode='prefer'):
"""Initialize DBConnection object with attributes, if passed. """
self.serveradress = serveradress
self.dbname = dbname
Expand Down Expand Up @@ -57,5 +58,6 @@ def connect(self):
# conn.cursor will return a cursor object, you can use this cursor to perform queries
cursor = conn.cursor()
dnow = datetime.datetime.now()
LOG.info(f'{dnow.strftime("%Y-%m-%d %H:%M:%S")} - Connected to {self.dbname}')
LOG.info(f'{dnow.strftime("%Y-%m-%d %H:%M:%S")} '
f'- Connected to {self.dbname}')
return conn, cursor
71 changes: 37 additions & 34 deletions lbsntransform/classes/field_mapping_flickr.py
Expand Up @@ -12,9 +12,9 @@ class FieldMappingFlickr():
protobuf lbsnstructure
"""
def __init__(self,
disableReactionPostReferencing=False,
disable_reaction_post_referencing=False,
geocodes=False,
mapFullRelations=False,
map_full_relations=False,
map_reactions=True,
ignore_non_geotagged=False,
ignore_sources_set=set()):
Expand Down Expand Up @@ -60,54 +60,54 @@ def extract_flickr_post(self, record):
post_guid = record[5]
if not HF.check_notice_empty_post_guid(post_guid):
return None
postRecord = HF.create_new_lbsn_record_with_id(lbsnPost(),
post_record = HF.create_new_lbsn_record_with_id(lbsnPost(),
post_guid,
self.origin)
postGeoaccuracy = None
userRecord = HF.create_new_lbsn_record_with_id(lbsnUser(),
post_geoaccuracy = None
user_record = HF.create_new_lbsn_record_with_id(lbsnUser(),
record[7],
self.origin)
userRecord.user_name = record[6]
userRecord.url = f'http://www.flickr.com/photos/{userRecord.pkey.id}/'
if userRecord:
postRecord.user_pkey.CopyFrom(userRecord.pkey)
self.lbsn_records.add_records_to_dict(userRecord)
postRecord.post_latlng = self.flickr_extract_postlatlng(record)
user_record.user_name = record[6]
user_record.url = f'http://www.flickr.com/photos/{userRecord.pkey.id}/'
if user_record:
post_record.user_pkey.CopyFrom(user_record.pkey)
self.lbsn_records.add_records_to_dict(user_record)
post_record.post_latlng = self.flickr_extract_postlatlng(record)
geoaccuracy = FieldMappingFlickr.flickr_map_geoaccuracy(record[13])
if geoaccuracy:
postRecord.post_geoaccuracy = geoaccuracy
post_record.post_geoaccuracy = geoaccuracy
if record[19]:
# we need some information from postRecord to create placeRecord
# (e.g. user language, geoaccuracy, post_latlng)
# some of the information from place will also modify postRecord
placeRecord = HF.create_new_lbsn_record_with_id(lbsnPlace(),
record[19],
self.origin)
self.lbsn_records.add_records_to_dict(placeRecord)
postRecord.place_pkey.CopyFrom(placeRecord.pkey)
postRecord.post_publish_date.CopyFrom(HF.parse_csv_datestring_to_protobuf(record[9]))
postRecord.post_create_date.CopyFrom(HF.parse_csv_datestring_to_protobuf(record[8]))
place_record = HF.create_new_lbsn_record_with_id(lbsnPlace(),
record[19],
self.origin)
self.lbsn_records.add_records_to_dict(place_record)
post_record.place_pkey.CopyFrom(place_record.pkey)
post_record.post_publish_date.CopyFrom(HF.parse_csv_datestring_to_protobuf(record[9]))
post_record.post_create_date.CopyFrom(HF.parse_csv_datestring_to_protobuf(record[8]))
#valueCount = lambda x: 0 if x is None else x
valueCount = lambda x: int(x) if x.isdigit() else 0
postRecord.post_views_count = valueCount(record[10])
postRecord.post_comment_count = valueCount(record[18])
postRecord.post_like_count = valueCount(record[17])
postRecord.post_url = f'http://flickr.com/photo.gne?id={post_guid}'
postRecord.post_body = FieldMappingFlickr.reverse_csv_comma_replace(record[21])
postRecord.post_title = FieldMappingFlickr.reverse_csv_comma_replace(record[3])
postRecord.post_thumbnail_url = record[4]
value_count = lambda x: int(x) if x.isdigit() else 0
post_record.post_views_count = value_count(record[10])
post_record.post_comment_count = value_count(record[18])
post_record.post_like_count = value_count(record[17])
post_record.post_url = f'http://flickr.com/photo.gne?id={post_guid}'
post_record.post_body = FieldMappingFlickr.reverse_csv_comma_replace(record[21])
post_record.post_title = FieldMappingFlickr.reverse_csv_comma_replace(record[3])
post_record.post_thumbnail_url = record[4]
record_tags_list = list(filter(None, record[11].split(";")))
if record_tags_list:
for tag in record_tags_list:
tag = FieldMappingFlickr.clean_tags_from_flickr(tag)
postRecord.hashtags.append(tag)
post_record.hashtags.append(tag)
record_media_type = record[16]
if record_media_type and record_media_type == "video":
postRecord.post_type = lbsnPost.VIDEO
post_record.post_type = lbsnPost.VIDEO
else:
postRecord.post_type = lbsnPost.IMAGE
postRecord.post_content_license = valueCount(record[14])
self.lbsn_records.add_records_to_dict(postRecord)
post_record.post_type = lbsnPost.IMAGE
post_record.post_content_license = value_count(record[14])
self.lbsn_records.add_records_to_dict(post_record)

@staticmethod
def reverse_csv_comma_replace(csv_string):
Expand Down Expand Up @@ -173,7 +173,9 @@ def flickr_extract_postlatlng(self, record):
except:
l_lat, l_lng = 0, 0

if (l_lat == 0 and l_lng == 0) or l_lat > 90 or l_lat < -90 or l_lng > 180 or l_lng < -180:
if (l_lat == 0 and l_lng == 0) \
or l_lat > 90 or l_lat < -90 \
or l_lng > 180 or l_lng < -180:
l_lat, l_lng = 0, 0
self.send_to_null_island(lat_entry, lng_entry, record[5])
return FieldMappingFlickr.lat_lng_to_wkt(l_lat, l_lng)
Expand All @@ -189,5 +191,6 @@ def send_to_null_island(self, lat_entry, lng_entry, record_guid):
"""Logs entries with problematic lat/lng's,
increases Null Island Counter by 1.
"""
self.log.debug(f'NULL island: Guid {record_guid} - Coordinates: {lat_entry}, {lng_entry}')
self.log.debug(f'NULL island: Guid {record_guid} - '
f'Coordinates: {lat_entry}, {lng_entry}')
self.null_island += 1
12 changes: 7 additions & 5 deletions lbsntransform/classes/field_mapping_twitter.py
Expand Up @@ -7,7 +7,8 @@
from google.protobuf.timestamp_pb2 import Timestamp
from .helper_functions import HelperFunctions as HF
from .helper_functions import LBSNRecordDicts
from lbsnstructure.lbsnstructure_pb2 import lbsnPost, \
from lbsnstructure.lbsnstructure_pb2 import lbsnOrigin, \
lbsnPost, \
CompositeKey, \
RelationshipKey, \
lbsnUser, \
Expand All @@ -17,7 +18,8 @@
lbsnUserGroup, \
lbsnPostRelationship, \
lbsnPostReaction, \
lbsnRelationship
lbsnRelationship, \
Language
# for debugging only:
from google.protobuf import text_format

Expand Down Expand Up @@ -387,7 +389,7 @@ def extract_place(self, postplace_json, post_geoaccuracy, user_language = None):
place = postplace_json
place_id = postplace_json.get('id')
if not place_id:
log.warning(f'No PlaceGuid\n\n{postplace_json}')
self.log.warning(f'No PlaceGuid\n\n{postplace_json}')
input("Press Enter to continue... (entry will be skipped)")
return None, post_geoaccuracy, None
lon_center = 0
Expand All @@ -414,7 +416,7 @@ def extract_place(self, postplace_json, post_geoaccuracy, user_language = None):
if not post_geoaccuracy:
post_geoaccuracy = lbsnPost.COUNTRY
else:
log.warning(f'No country_code\n\n{postplace_json}. PlaceEntry will be skipped..')
self.log.warning(f'No country_code\n\n{postplace_json}. PlaceEntry will be skipped..')
return None, post_geoaccuracy, None
elif place_type in ("city", "neighborhood", "admin"):
# city_guid
Expand All @@ -435,7 +437,7 @@ def extract_place(self, postplace_json, post_geoaccuracy, user_language = None):
lbsnPost.CITY):
post_geoaccuracy = lbsnPost.PLACE
else:
log.warning(f'No Place Type Detected: {postplace_json}')
self.log.warning(f'No Place Type Detected: {postplace_json}')
#for some reason, twitter place entities sometimes contain linebreaks or whitespaces. We don't want this.
place_name = postplace_json.get('name').replace('\n\r','')
place_name = re.sub(' +', ' ', place_name) # remove multiple whitespace
Expand Down

0 comments on commit 1a87553

Please sign in to comment.