Skip to content

Commit

Permalink
Update place attributes
Browse files Browse the repository at this point in the history
  • Loading branch information
Sieboldianus committed Sep 17, 2019
1 parent 8c99a19 commit 3d1af08
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 41 deletions.
69 changes: 36 additions & 33 deletions lbsntransform/classes/field_mapping_fb.py
Expand Up @@ -27,7 +27,7 @@
from .helper_functions import LBSNRecordDicts


class FieldMappingFB():
class FieldMappingFBPlace():
""" Provides mapping function from Facebook Place Graph endpoints to
protobuf lbsnstructure
"""
Expand All @@ -43,7 +43,7 @@ def __init__(self,
ignore_sources_set=set(),
min_geoaccuracy=None):
origin = Origin()
origin.origin_id = Origin.TWITTER
origin.origin_id = Origin.FACEBOOK
self.origin = origin
# this is where all the data will be stored
self.lbsn_records = []
Expand Down Expand Up @@ -73,14 +73,11 @@ def parse_json_record(self, json_string_dict, input_lbsn_type=None):
"""
# clear any records from previous run
self.lbsn_records.clear()
# decide if main object is place
if input_lbsn_type and input_lbsn_type in ('placepage'):
# place
place_record = self.extract_place(json_string_dict)
# place
fb_places_list = json_string_dict.get("data")
for place in fb_places_list:
place_record = self.extract_place(place)
self.lbsn_records.append(place_record)
else:
# otherwise, raise error
raise ValueError("Input type not supported")
# finally, return list of all extracted records
return self.lbsn_records

Expand Down Expand Up @@ -115,28 +112,34 @@ def extract_place(self, postplace_json):
Place(), place_id, self.origin)
if isinstance(place_record, Place):
# place specific
input(type(place_cat_list))
if place_cat_list:
place_record.attributes = place_cat_list
for cat in place_cat_list:
place_record.attributes[
cat.get("id")] = cat.get("name")
place_opening_hours = place.get('hours')
if place_opening_hours:
# merge dictionaries (shallow)
place_record.attributes = {
**place_record.attributes, **place_opening_hours}
for open_hour in place_opening_hours:
place_record.attributes[
open_hour.get("key")] = open_hour.get("value")
rating_count = place.get('rating_count')
if rating_count:
# merge dictionaries (shallow)
place_record.attributes["rating_count"] = rating_count
# explicit column needed for rating
place_record.attributes["rating_count"] = str(rating_count)
about = place.get('about')
if about:
# merge dictionaries (shallow)
place_record.attributes["about"] = about
place_record.like_count = place.get('engagement').get('count')
place_record.checkin_count = place.get('checkins')
place_record.place_description = place.get('description')
place_record.zip_code = place.get('zip')
place_record.address = FieldMappingFB.compile_address(place)
place_record.place_phone = place.get('description')
description = place.get('description')
if description:
place_record.place_description = description
zip_code = place.get('zip')
if zip_code:
place_record.zip_code = zip_code
place_record.address = FieldMappingFBPlace.compile_address(place)
place_phone = place.get('place_phone')
if place_phone:
place_record.place_phone = place_phone
# same for Country, City and Place
place_name = place.get('name').replace('\n\r', '')
# remove multiple whitespace
Expand All @@ -147,15 +150,15 @@ def extract_place(self, postplace_json):
# return
return place_record


@staticmethod
def compile_address(fb_place_dict):
single_line_address = fb_place_dict.get("single_line_address")
if single_line_address:
return single_line_address
else:
fb_city = fb_place_dict.get("city")
fb_country = fb_place_dict.get("country")
fb_street = fb_place_dict.get("street")
fb_address = ', '.join([fb_street, fb_city, fb_country])
return fb_address
@staticmethod
def compile_address(fb_place_dict):
single_line_address = fb_place_dict.get("single_line_address")
if single_line_address:
return single_line_address
else:
fb_city = fb_place_dict.get("city")
fb_country = fb_place_dict.get("country")
fb_street = fb_place_dict.get("street")
fb_address = ', '.join(
filter(None, [fb_street, fb_city, fb_country]))
return fb_address
2 changes: 1 addition & 1 deletion lbsntransform/classes/load_data.py
Expand Up @@ -138,7 +138,7 @@ def _open_input_files(self, count: bool = None):
self.continue_number += 1
self.current_source = file_name
HF.log_main_debug(
f'\nCurrent file: {ntpath.basename(file_name)}\n')
f'Current file: {ntpath.basename(file_name)}')
yield open(file_name, 'r', encoding="utf-8", errors='replace')

def _process_input(self, file_handles: Iterator[TextIO]) -> Iterator[Dict[
Expand Down
33 changes: 27 additions & 6 deletions lbsntransform/classes/shared_structure_proto_lbsndb.py
Expand Up @@ -35,7 +35,9 @@ def get_header_for_type(desc_name):
'geom_area, url',
Place.DESCRIPTOR.name:
'origin_id, place_guid, name, name_alternatives, geom_center, '
'geom_area, url, city_guid, post_count',
'geom_area, url, city_guid, post_count, place_description, '
'place_website, place_phone, address, zip_code, attributes, '
'checkin_count, like_count, parent_places',
Post.DESCRIPTOR.name:
'origin_id, post_guid, post_latlng, place_guid, city_guid, '
'country_guid, post_geoaccuracy, user_guid, post_create_date, '
Expand Down Expand Up @@ -90,7 +92,7 @@ def func_prepare_selector(self, record):

def prepare_lbsn_country(self, record):
"""Get common attributes for records of type Place"""
place_record = PlaceAttrShared(record)
place_record = PlaceBaseAttrShared(record)
prepared_record = (place_record.origin_id,
place_record.guid,
place_record.name,
Expand All @@ -103,7 +105,7 @@ def prepare_lbsn_country(self, record):

def prepare_lbsn_city(self, record):
"""Get common attributes for records of type City"""
place_record = PlaceAttrShared(record)
place_record = PlaceBaseAttrShared(record)
country_guid = HF.null_check(record.country_pkey.id)
sub_type = HF.null_check(record.sub_type)
prepared_record = (place_record.origin_id,
Expand All @@ -121,9 +123,19 @@ def prepare_lbsn_city(self, record):

def prepare_lbsn_place(self, record):
"""Get common attributes for records of type Place"""
place_record = PlaceAttrShared(record)
place_record = PlaceBaseAttrShared(record)
# get additional attributes specific to places
city_guid = HF.null_check(record.city_pkey.id)
post_count = HF.null_check(record.post_count)
place_description = HF.null_check(record.place_description)
place_website = HF.null_check(record.place_website)
place_phone = HF.null_check(record.place_phone)
address = HF.null_check(record.address)
zip_code = HF.null_check(record.zip_code)
attributes = HF.map_to_dict(HF.null_check(record.attributes))
checkin_count = HF.null_check(record.checkin_count)
like_count = HF.null_check(record.like_count)
parent_places = HF.null_check(record.parent_places)
prepared_record = (place_record.origin_id,
place_record.guid,
place_record.name,
Expand All @@ -134,7 +146,16 @@ def prepare_lbsn_place(self, record):
place_record.geom_area),
place_record.url,
city_guid,
post_count)
post_count,
place_description,
place_website,
place_phone,
address,
zip_code,
attributes,
checkin_count,
like_count,
parent_places)
return prepared_record

def prepare_lbsn_user(self, record):
Expand Down Expand Up @@ -238,7 +259,7 @@ def prepare_lbsn_relation(self, record):
return prepared_typerecord_tuple


class PlaceAttrShared():
class PlaceBaseAttrShared():
"""Shared structure for Place Attributes
Contains attributes shared among PG DB and LBSN ProtoBuf spec.
Expand Down
19 changes: 18 additions & 1 deletion lbsntransform/classes/submit_data.py
Expand Up @@ -420,7 +420,24 @@ def place_insertsql(self, values_str, record_type):
data."place".city_guid),
post_count = GREATEST(COALESCE(EXCLUDED.post_count,
data."place".post_count), COALESCE(
data."place".post_count, EXCLUDED.post_count));
data."place".post_count, EXCLUDED.post_count)),
place_website = COALESCE(
EXCLUDED.place_website, data."place".place_website),
place_phone = COALESCE(
EXCLUDED.place_phone, data."place".place_phone),
address = COALESCE(
EXCLUDED.address, data."place".address),
zip_code = COALESCE(
EXCLUDED.zip_code, data."place".zip_code),
attributes = COALESCE(
EXCLUDED.attributes, data."place".attributes),
checkin_count = COALESCE(
EXCLUDED.checkin_count, data."place".checkin_count),
like_count = COALESCE(
EXCLUDED.like_count, data."place".like_count),
parent_places = COALESCE(
extensions.mergeArrays(EXCLUDED.parent_places,
data."place".parent_places), ARRAY[]::text[]);
'''
return insert_sql

Expand Down

0 comments on commit 3d1af08

Please sign in to comment.