In [173]:
# import project directory helper
import os, sys, inspect
currentDir = os.getcwd()
parentDir = os.path.dirname(currentDir)
sys.path.insert(0, parentDir)

In [22]:
# import pyspark packages
# from pyspark import SparkContext
# from pyspark.sql import SQLContext
# set the kafka dependencies before create spark context or session
import os
os.environ[
    'PYSPARK_SUBMIT_ARGS'] = '--packages org.apache.spark:spark-streaming-kafka-0-8_2.11:2.4.4,org.apache.spark:spark-sql-kafka-0-10_2.11:2.4.4 pyspark-shell'
from pyspark.sql import SparkSession, functions, types
from pyspark.sql.types import *

In [187]:
# import util packages
from IPython.display import display
import pandas as pd

In [34]:
# sc = sc(appName="attraction")
# sqlContext = SQLContext(sc)
# import spark packages
spark = SparkSession.builder.appName("attraction").getOrCreate()

# tripadvisor_attr_href_cat

In [7]:
# define path
tripad_attr_path = parentDir + '/crawler/datasets/tripadvisor_dataset/attractions/'
parquet_path = parentDir + '/crawler/datasets/tripadvisor_dataset/spark/'

In [6]:
# read json as spark dataframe
attr_href_cat_spark_df = spark.read.json(tripad_attr_path +
                                         'tripadvisor_attr_href_cat.json')
# attr_href_cat_df = pd.read_json(tripadvisor_attr_href_cat, orient='records')

In [7]:
# write spark dataframe to parquet
attr_href_cat_spark_df.write.parquet(parquet_path +
                                     'tripadvisor_attr_href_cat',
                                     mode='overwrite')

In [8]:
# read spark dataframe from parquet
attr_href_cat_spark_df = spark.read.parquet(parquet_path + 'tripadvisor_attr_href_cat')

In [9]:
# convert spark dataframe to pandas dataframe
attr_href_cat_df = attr_href_cat_spark_df.toPandas()

In [10]:
# add activityId, locationId columns to dataframe
attr_href_cat_df['activityId'] = attr_href_cat_df['href'].str.extract(
    r'd(\d+)', expand=True)
attr_href_cat_df['locationId'] = attr_href_cat_df['href'].str.extract(
    r'g(\d+)', expand=True)

In [11]:
# rearrange pandas dataframe columns order
attr_href_cat_df_cl = ['activityId', 'locationId', 'href',
                       'category']  # columns order
attr_href_cat_df = attr_href_cat_df.reindex(columns=attr_href_cat_df_cl)

In [12]:
# visualize pandas dataframe
display(attr_href_cat_df.count())
display(attr_href_cat_df.head())

activityId    4257
locationId    4257
href          4260
category      4260
dtype: int64

Unnamed: 0,activityId,locationId,href,category
0,,,,
1,12962337.0,298570.0,/AttractionProductReview-g298570-d12962337-Mal...,"[featured_tours_and_tickets, luxury_special_oc..."
2,12905660.0,298570.0,/AttractionProductReview-g298570-d12905660-Pet...,"[featured_tours_and_tickets, sightseeing_ticke..."
3,11807013.0,298570.0,/AttractionProductReview-g298570-d11807013-Dis...,"[featured_tours_and_tickets, private_custom_to..."
4,12471375.0,298570.0,/AttractionProductReview-g298570-d12471375-Gen...,"[featured_tours_and_tickets, luxury_special_oc..."


In [13]:
# showing and removing duplicate value
print('Duplicated values:')
print(attr_href_cat_df[attr_href_cat_df['activityId'].duplicated(keep=False)])
print()
print('Length before removing duplicated values: ' +
      str(len(attr_href_cat_df)))

Duplicated values:
     activityId locationId                                               href  \
0           NaN        NaN                                               None   
376         NaN        NaN  /AttractionProductReview?partner=Viator&produc...   
1339        NaN        NaN  /AttractionProductReview?partner=Viator&produc...   
1945        NaN        NaN  /AttractionProductReview?partner=Viator&produc...   
4261        NaN        NaN                                               None   

                                       category  
0                                          None  
376    [classes_workshops, food_wine_nightlife]  
1339                       [outdoor_activities]  
1945  [private_custom_tours, tours_sightseeing]  
4261                                       None  

Length before removing duplicated values: 4262


In [14]:
# # dropping duplicate values
attr_href_cat_df = attr_href_cat_df.drop_duplicates(subset='activityId',
                                                    keep=False)
print('Length after removing duplicated values: ' + str(len(attr_href_cat_df)))

Length after removing duplicated values: 4257


In [15]:
# dropping nan values
attr_href_cat_df = attr_href_cat_df.dropna()
print('Length after removing NaN values: ' + str(len(attr_href_cat_df)))

Length after removing NaN values: 4257


In [16]:
# create spark dataframe from pandas dataframe
attr_href_cat_spark_df = spark.createDataFrame(attr_href_cat_df)

In [34]:
# visualize spark dataframe
display(attr_href_cat_spark_df.count())
attr_href_cat_spark_df.show()

4257

+----------+----------+--------------------+--------------------+
|activityId|locationId|                href|            category|
+----------+----------+--------------------+--------------------+
|  12962337|    298570|/AttractionProduc...|[featured_tours_a...|
|  12905660|    298570|/AttractionProduc...|[featured_tours_a...|
|  11807013|    298570|/AttractionProduc...|[featured_tours_a...|
|  12471375|    298570|/AttractionProduc...|[featured_tours_a...|
|  12469580|    298570|/AttractionProduc...|[featured_tours_a...|
|  16722683|    298570|/AttractionProduc...|[featured_tours_a...|
|  11454341|   1096277|/AttractionProduc...|[featured_tours_a...|
|  11812460|    298570|/AttractionProduc...|[featured_tours_a...|
|  11992275|   1096277|/AttractionProduc...|[featured_tours_a...|
|  11991123|    660694|/AttractionProduc...|[featured_tours_a...|
|  11482502|    298570|/AttractionProduc...|[featured_tours_a...|
|  19350513|    635527|/AttractionProduc...|[featured_tours_a...|
|  1147497

# tripad_attr_activity

In [18]:
# define path
tripad_attr_path = parentDir + '/crawler/datasets/tripadvisor_dataset/attractions/'
parquet_path = parentDir + '/crawler/datasets/tripadvisor_dataset/spark/'

In [19]:
# read json as spark dataframe
tripad_act_spark_df = spark.read.json(tripad_attr_path +
                                      'tripad_attr_activity.json')

In [20]:
# write spark dataframe to parquet
tripad_act_spark_df.write.parquet(parquet_path + 'tripad_attr_activity',
                                  mode='overwrite')

In [21]:
# read spark dataframe from parquet
tripad_act_spark_df = spark.read.parquet(parquet_path + 'tripad_attr_activity')

In [22]:
# drop unwanted columns
tripad_act_spark_df = tripad_act_spark_df.drop('__COMMENT')
tripad_act_spark_df = tripad_act_spark_df.dropna(subset='productHeader')

In [23]:
# select disired columns
tripad_act_spark_df = tripad_act_spark_df.select(
    tripad_act_spark_df.productHeader.activityId.alias("activityId"),
    tripad_act_spark_df.productHeader.productTitle.alias("name"),
    tripad_act_spark_df.tourPlanner.numericPrice.alias("price"),
    tripad_act_spark_df.productHeader.reviewRating.alias("rating"))

In [33]:
# visualize spark dataframe
display(tripad_act_spark_df.count())
tripad_act_spark_df.show()

4257

+----------+--------------------+------+------+
|activityId|                name| price|rating|
+----------+--------------------+------+------+
|  12962337|Malaysia Countrys...| 43.27|   4.5|
|  12905660|Petronas Twin Tow...|119.49|   4.0|
|  11807013|Discover the Best...|142.75|   4.5|
|  12471375|Genting Highlands...|103.01|   4.5|
|  12469580|Half-Day Kuala Lu...| 43.27|   4.0|
|  16722683|KL Hop On Hop Off...| 53.07|   3.0|
|  11454341|Langkawi Archipel...|515.06|   5.0|
|  11812460|Private Half-Day ...|377.67|   5.0|
|  11992275|12 Flights: Zipli...|473.86|   5.0|
|  11991123|Cheong Fatt Tze G...| 20.61|   4.5|
|  11482502|Batik Bag Paintin...|119.49|   5.0|
|  19350513|Chef Samuel's Pri...|700.48|   5.0|
|  11474972|Half-Day Penang C...|115.81|   5.0|
|  12648212|North Borneo Crui...|222.48|   4.5|
|  11470871|Hot Air Balloon F...|947.71|   5.0|
|  12468200|Private Tour: Kua...|407.98|   5.0|
|  12934938|Kinabatangan Rive...|568.63|   5.0|
|  11452959|Small-Group Food ...|243.14|

# tripad_attr_location

In [25]:
# define path
tripad_attr_path = parentDir + '/crawler/datasets/tripadvisor_dataset/attractions/'
parquet_path = parentDir + '/crawler/datasets/tripadvisor_dataset/spark/'

In [26]:
# read json as spark dataframe
tripad_loc_spark_df = spark.read.json(tripad_attr_path +
                                      'tripad_attr_location.json')

In [27]:
# write spark dataframe to parquet
tripad_loc_spark_df.write.parquet(parquet_path + 'tripad_attr_location',
                                  mode='overwrite')

In [28]:
# read spark dataframe from parquet
tripad_loc_spark_df = spark.read.parquet(parquet_path + 'tripad_attr_location')

In [29]:
# drop unwanted columns
tripad_loc_spark_df = tripad_loc_spark_df.drop('__COMMENT')
tripad_loc_spark_df = tripad_loc_spark_df.dropna(subset='location_id')

In [30]:
# select disired columns
tripad_loc_spark_df = tripad_loc_spark_df.select(
    tripad_loc_spark_df.location_id.alias("locationId"),
    tripad_loc_spark_df.name.alias("city"),
    tripad_loc_spark_df.ancestors[0]['name'].alias("state"),
    tripad_loc_spark_df.parent_display_name.alias("country"),
    tripad_loc_spark_df.latitude.alias("latitude"),
    tripad_loc_spark_df.longitude.alias("longitude"))

In [32]:
# visualize spark dataframe
display(tripad_loc_spark_df.count())
tripad_loc_spark_df.show()

98

+----------+--------------------+--------------------+-----------------+--------+---------+
|locationId|                city|               state|          country|latitude|longitude|
+----------+--------------------+--------------------+-----------------+--------+---------+
|    298570|        Kuala Lumpur| Wilayah Persekutuan|         Malaysia|3.156802|101.69717|
|   1096277|                Kuah|            Langkawi|         Malaysia|6.326672|99.843025|
|    660694|       Penang Island|              Penang|         Malaysia|5.388071|100.29352|
|    635527|      Batu Ferringhi|       Penang Island|         Malaysia|5.471783|100.24629|
|    298303|         George Town|       Penang Island|         Malaysia|5.411938|100.32664|
|    298307|       Kota Kinabalu|Kota Kinabalu Dis...|         Malaysia|5.979383|116.07349|
|    303997|            Sandakan|   Sandakan Division|         Malaysia|5.838944|118.11561|
|    298283|            Langkawi|   Langkawi District|         Malaysia|6.351074

In [35]:
# convert spark dataframe to pandas dataframe
tripad_loc_df = tripad_loc_spark_df.toPandas()

In [36]:
# visualize pandas dataframe
display(tripad_loc_df.count())
pd.set_option('display.max_rows', None)
display(tripad_loc_df)
pd.reset_option('display.max_rows')

locationId    98
city          98
state         97
country       98
latitude      97
longitude     97
dtype: int64

Unnamed: 0,locationId,city,state,country,latitude,longitude
0,298570,Kuala Lumpur,Wilayah Persekutuan,Malaysia,3.156802,101.69717
1,1096277,Kuah,Langkawi,Malaysia,6.326672,99.843025
2,660694,Penang Island,Penang,Malaysia,5.388071,100.29352
3,635527,Batu Ferringhi,Penang Island,Malaysia,5.471783,100.24629
4,298303,George Town,Penang Island,Malaysia,5.411938,100.32664
5,298307,Kota Kinabalu,Kota Kinabalu District,Malaysia,5.979383,116.07349
6,303997,Sandakan,Sandakan Division,Malaysia,5.838944,118.11561
7,298283,Langkawi,Langkawi District,Malaysia,6.351074,99.75824
8,298313,Petaling Jaya,Petaling District,Malaysia,3.10363,101.64778
9,666315,Cherating,Kuantan District,Malaysia,4.133792,103.38026


###### Join attr_href_cat_spark_df, tripad_act_spark_df as attr_spark_df

In [49]:
# join dataframe
attr_spark_df = attr_href_cat_spark_df.join(
                    tripad_act_spark_df, attr_href_cat_spark_df.activityId ==
                    tripad_act_spark_df.activityId)\
                .select(
                    attr_href_cat_spark_df.activityId, attr_href_cat_spark_df.locationId,
                    tripad_act_spark_df.name, tripad_act_spark_df.price,
                    tripad_act_spark_df.rating, attr_href_cat_spark_df.category)

In [50]:
# visualize spark dataframe
attr_spark_df.show()

+----------+----------+--------------------+------+------+--------------------+
|activityId|locationId|                name| price|rating|            category|
+----------+----------+--------------------+------+------+--------------------+
|  12962337|    298570|Malaysia Countrys...| 43.27|   4.5|[featured_tours_a...|
|  12905660|    298570|Petronas Twin Tow...|119.49|   4.0|[featured_tours_a...|
|  11807013|    298570|Discover the Best...|142.75|   4.5|[featured_tours_a...|
|  12471375|    298570|Genting Highlands...|103.01|   4.5|[featured_tours_a...|
|  12469580|    298570|Half-Day Kuala Lu...| 43.27|   4.0|[featured_tours_a...|
|  16722683|    298570|KL Hop On Hop Off...| 53.07|   3.0|[featured_tours_a...|
|  11454341|   1096277|Langkawi Archipel...|515.06|   5.0|[featured_tours_a...|
|  11812460|    298570|Private Half-Day ...|377.67|   5.0|[featured_tours_a...|
|  11992275|   1096277|12 Flights: Zipli...|473.86|   5.0|[featured_tours_a...|
|  11991123|    660694|Cheong Fatt Tze G

###### Join tripad_attr_location, attr_spark_df as final_attr_spark_df

In [51]:
# join dataframe
final_attr_spark_df = tripad_loc_spark_df.join(
                    attr_spark_df, tripad_loc_spark_df.locationId ==
                    attr_spark_df.locationId)\
                .select(
                    attr_spark_df.activityId, tripad_loc_spark_df.city, tripad_loc_spark_df.state,
                    tripad_loc_spark_df.country, tripad_loc_spark_df.latitude,
                    tripad_loc_spark_df.longitude, attr_spark_df.name, attr_spark_df.price,attr_spark_df.rating, attr_spark_df.category)

In [53]:
# visualize spark dataframe from pandas
display(final_attr_spark_df.toPandas())

Unnamed: 0,activityId,city,state,country,latitude,longitude,name,price,rating,category
0,12962337,Kuala Lumpur,Wilayah Persekutuan,Malaysia,3.156802,101.69717,Malaysia Countryside and Batu Caves Tour from ...,43.27,4.5,"[featured_tours_and_tickets, luxury_special_oc..."
1,12905660,Kuala Lumpur,Wilayah Persekutuan,Malaysia,3.156802,101.69717,Petronas Twin Towers Admission Tickets (E-Tick...,119.49,4.0,"[featured_tours_and_tickets, sightseeing_ticke..."
2,11807013,Kuala Lumpur,Wilayah Persekutuan,Malaysia,3.156802,101.69717,Discover the Best Local Food Tour by Night in ...,142.75,4.5,"[featured_tours_and_tickets, private_custom_to..."
3,12471375,Kuala Lumpur,Wilayah Persekutuan,Malaysia,3.156802,101.69717,Genting Highlands Day Trip from Kuala Lumpur w...,103.01,4.5,"[featured_tours_and_tickets, luxury_special_oc..."
4,12469580,Kuala Lumpur,Wilayah Persekutuan,Malaysia,3.156802,101.69717,Half-Day Kuala Lumpur City Tour,43.27,4.0,"[featured_tours_and_tickets, luxury_special_oc..."
5,16722683,Kuala Lumpur,Wilayah Persekutuan,Malaysia,3.156802,101.69717,KL Hop On Hop Off City Tour,53.07,3.0,"[featured_tours_and_tickets, tours_sightseeing]"
6,11454341,Kuah,Langkawi,Malaysia,6.326672,99.843025,Langkawi Archipelago Jet Ski Tour Including Da...,515.06,5.0,"[featured_tours_and_tickets, water_sports, tou..."
7,11812460,Kuala Lumpur,Wilayah Persekutuan,Malaysia,3.156802,101.69717,Private Half-Day Batu Caves Waterfalls and Hot...,377.67,5.0,"[featured_tours_and_tickets, private_custom_to..."
8,11992275,Kuah,Langkawi,Malaysia,6.326672,99.843025,12 Flights: Zipline Eco Adventure in Langkawi,473.86,5.0,"[featured_tours_and_tickets, outdoor_activities]"
9,11991123,Penang Island,Penang,Malaysia,5.388071,100.29352,Cheong Fatt Tze George Town Penang: The Blue M...,20.61,4.5,"[featured_tours_and_tickets, walking_biking_to..."


In [54]:
# write spark dataframe to parquet
final_attr_spark_df.write.parquet(parquet_path + 'tripad_attr',
                                  mode='overwrite')

# tripad_attr_review

In [492]:
# read json as spark dataframe
attr_review_spark_df = spark.read.json(tripad_attr_path +
                                         'tripad_attr_review.json')
attr_review_spark_df.count()

7147

In [493]:
# drop unwanted columns
attr_review_spark_df = attr_review_spark_df.drop(
    '__COMMENT', '__typename', 'additionalRatings', 'attribution',
    'helpfulVotes', 'language', 'createdDate', 'roomTip', 'status', 'title',
    'tripInfo', 'url', 'userProfile')
attr_review_spark_df = attr_review_spark_df.dropna(subset='location')
attr_review_spark_df.count()

7146

In [494]:
attr_review_spark_df = attr_review_spark_df.withColumn(
    'activityId', attr_review_spark_df.location.locationId).drop('location')

In [495]:
# write spark dataframe to parquet
attr_review_spark_df.write.parquet(parquet_path + 'tripad_attr_review',
                                   mode='overwrite')

In [496]:
# read spark dataframe from parquet
attr_review_spark_df = spark.read.parquet(parquet_path + 'tripad_attr_review')
attr_review_spark_df.count()

7146

In [497]:
display(attr_review_spark_df.toPandas())

Unnamed: 0,id,publishedDate,rating,text,userId,username,activityId
0,734539244,2019-12-24,4,We thoroughly enjoyed our visit. The statue a...,AADC3E720516494EE3CAC336D81440F8,Greg B,12962337
1,734157196,2019-12-22,4,Wonderful trip . Our guide Maran was very plea...,3AA21E3EF92DF9C6F2AFD44638D2CD25,Departure668057,12962337
2,734110328,2019-12-22,5,Malaysia citizen are so good lovely and touche...,9D0DF755B277278A6105D76BC019711A,Mohammadfurqan,12962337
3,734058935,2019-12-22,4,Really enjoyed seeing places in Kuala Lumper. ...,7E21240150BA19327730C97A7874A94B,626michelle123626,12962337
4,733460423,2019-12-18,4,Bantu caves are excellent but don't just visit...,E6DFFA76D761E2B098D67B5ADE42613B,John M,12962337
5,733375079,2019-12-18,5,"Herys our guide was enthusiastic, informative ...",2660D5203BE7D41B7A66DA476E170D39,jbboo,12962337
6,733183428,2019-12-17,3,"First, this company tells me that they will pi...",1CA572D8C5537A7C05F650DB326D976B,prasen9,12962337
7,732870074,2019-12-15,3,Nice visit busy place\nLots of stairs and Monk...,0C45F4CA7883225113241AAE2E5C96E9,Ravi G,12962337
8,732702747,2019-12-14,5,The tour was just me and Muru the guide which ...,C70A04C2813D04DC385678CA05531998,Eelvander,12962337
9,732358163,2019-12-12,5,The tour was amazing and the tour guides were ...,3AAA08EEDCD241E9653063A82633E14B,jemmimahw2019,12962337


# Pyspark subscribe kafka topic 

#####  set advertised.listeners=PLAINTEXT://your-kafka-server-ip:9092 in server.properties

In [1]:
import os
os.environ[
    'PYSPARK_SUBMIT_ARGS'] = '--packages org.apache.spark:spark-streaming-kafka-0-8_2.11:2.4.4,org.apache.spark:spark-sql-kafka-0-10_2.11:2.4.4 pyspark-shell'

In [108]:
from pyspark.sql import SparkSession, functions, types
from pyspark.sql.types import *
from pyspark.streaming import StreamingContext
from pyspark.streaming.kafka import KafkaUtils

In [297]:
spark = SparkSession.builder.appName("attraction").getOrCreate()
spark

In [47]:
# create streaming contexts
# ssc = StreamingContext(spark.sparkContext, 120)

In [None]:
# kafkaStream = KafkaUtils.createDirectStream(ssc, ['testing2'], {
#     'bootstrap.servers': '10.123.10.26:9092',
#     'auto.offset.reset': 'smallest'
# })

# kafkaStream.pprint()

In [70]:
# parsed = kafkaStream.map(lambda x: json.loads(x[1]))

In [None]:
# ssc.start()
# ssc.awaitTermination()

In [91]:
# ssc.stop()

In [107]:
# spark.stop()

# Structured kafka reading

In [432]:
# default for startingOffsets is "latest", but "earliest" allows rewind for missed data
attr_review_kafka_msg = spark.read.format("kafka").option(
    "kafka.bootstrap.servers", "10.123.10.26:9092").option(
        "subscribe", "tripad_attr_review").option("startingOffsets",
                                                  "earliest").load()

##### Read schema from kafka message value

In [433]:
kafka_msg_df = attr_review_kafka_msg.withColumn(
    "value", functions.expr("string(value)")).select("value")

In [435]:
kafka_msg_df_json = spark.read.json(kafka_msg_df.rdd.map(lambda x: x.value),
                                    multiLine=True)
kafka_msg_df_json.count()

4252

In [436]:
kafka_msg_df_json.schema

StructType(List(StructField(data,StructType(List(StructField(locations,ArrayType(StructType(List(StructField(locationId,LongType,true),StructField(reviewListPage,StructType(List(StructField(__typename,StringType,true),StructField(reviews,ArrayType(StructType(List(StructField(__typename,StringType,true),StructField(additionalRatings,ArrayType(StructType(List(StructField(__typename,StringType,true),StructField(rating,LongType,true),StructField(ratingLabel,StringType,true))),true),true),StructField(attribution,StringType,true),StructField(createdDate,StringType,true),StructField(helpfulVotes,LongType,true),StructField(id,LongType,true),StructField(language,StringType,true),StructField(location,StructType(List(StructField(__typename,StringType,true),StructField(locationId,LongType,true),StructField(name,StringType,true),StructField(placeType,StringType,true),StructField(reviewSummary,StructType(List(StructField(__typename,StringType,true),StructField(count,LongType,true),StructField(rating

In [422]:
# required schema
# kafka_msg_df_json = kafka_msg_df.select(
#     functions.from_json(
#         functions.col("value"),
#         kafka_msg_df_json.schema).alias("data")).select("data.*")

In [455]:
reviews_spark_df = kafka_msg_df_json.select(kafka_msg_df_json.data.locations.reviewListPage.reviews.alias("reviews"))

In [458]:
reviews_spark_df.schema

StructType(List(StructField(reviews,ArrayType(ArrayType(StructType(List(StructField(__typename,StringType,true),StructField(additionalRatings,ArrayType(StructType(List(StructField(__typename,StringType,true),StructField(rating,LongType,true),StructField(ratingLabel,StringType,true))),true),true),StructField(attribution,StringType,true),StructField(createdDate,StringType,true),StructField(helpfulVotes,LongType,true),StructField(id,LongType,true),StructField(language,StringType,true),StructField(location,StructType(List(StructField(__typename,StringType,true),StructField(locationId,LongType,true),StructField(name,StringType,true),StructField(placeType,StringType,true),StructField(reviewSummary,StructType(List(StructField(__typename,StringType,true),StructField(count,LongType,true),StructField(rating,DoubleType,true))),true),StructField(url,StringType,true))),true),StructField(publishedDate,StringType,true),StructField(rating,LongType,true),StructField(roomTip,StringType,true),StructField

In [456]:
reviews_df = reviews_spark_df.toPandas()

In [457]:
reviews_df

Unnamed: 0,reviews
0,"[[(Review, [], None, 2019-12-25, 0, 734539244,..."
1,"[[(Review, [], None, 2019-12-28, 1, 735050287,..."
2,"[[(Review, [], None, 2019-12-30, 0, 735605067,..."
3,"[[(Review, [], None, 2019-12-26, 0, 734811581,..."
4,"[[(Review, [], None, 2019-12-30, 0, 735528391,..."
5,"[[(Review, [], None, 2019-12-29, 0, 735355927,..."
6,"[[(Review, [], None, 2019-12-29, 0, 735249797,..."
7,"[[(Review, [], None, 2019-12-30, 0, 735575762,..."
8,"[[(Review, [], None, 2019-12-30, 0, 735570887,..."
9,"[[(Review, [], None, 2019-09-22, 0, 712085586,..."
