## Exploration of map reduce result

---

### Import Libraries

In [1]:
# import libraries
import findspark

# Locate the spark installation
findspark.init()

In [2]:
import pandas as pd
import pyspark as ps
from pyspark.sql.functions import col, sum
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, FloatType
from pyspark.sql import SparkSession


### Initialize Spark

In [3]:
# Initialize a SparkContext
spark = SparkSession.builder.appName("data_cleaning").getOrCreate()
spark.stop()
sc = ps.SparkContext(appName="prior_analysis")
# Initialize the Session
spark_session = ps.sql.SparkSession(sc)

Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
23/09/05 11:58:32 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


### Connect and import data from HDFS directly into a Spark DataFrame

In [4]:
df_join_result = spark_session.read.option('escape','"').csv('hdfs://localhost:9900/user/book_reviews/joined_tables.csv', header=True, inferSchema=True)
df_join_result.printSchema()
df_join_result.describe().show()
df_join_result.show(5)

                                                                                

root
 |-- Title: string (nullable = true)
 |-- description: string (nullable = true)
 |-- authors: string (nullable = true)
 |-- image: string (nullable = true)
 |-- previewLink: string (nullable = true)
 |-- publisher: string (nullable = true)
 |-- publishedDate: string (nullable = true)
 |-- infoLink: string (nullable = true)
 |-- categories: string (nullable = true)
 |-- ratingsCount: string (nullable = true)
 |-- Price: string (nullable = true)
 |-- User_id: string (nullable = true)
 |-- profileName: string (nullable = true)
 |-- review/helpfulness: string (nullable = true)
 |-- review/score: string (nullable = true)
 |-- review/time: string (nullable = true)
 |-- review/summary: string (nullable = true)
 |-- review/text\t: string (nullable = true)



23/09/05 11:58:46 WARN package: Truncated the string representation of a plan since it was too large. This behavior can be adjusted by setting 'spark.sql.debug.maxToStringFields'.

+-------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+-------------------------------------+-------+-----------+------------------+--------------------+--------------------+--------------------+--------------------+
|summary|               Title|         description|             authors|               image|         previewLink|           publisher|       publishedDate|            infoLink|          categories|        ratingsCount|                                Price|User_id|profileName|review/helpfulness|        review/score|         review/time|      review/summary|       review/text\t|
+-------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+-------------------

                                                                                

In [5]:
df_join_result.limit(30).toPandas()

Unnamed: 0,Title,description,authors,image,previewLink,publisher,publishedDate,infoLink,categories,ratingsCount,Price,User_id,profileName,review/helpfulness,review/score,review/time,review/summary,review/text\t
0,"'AN ESSAY CONCERNING HUMAN UNDERSTANDING,'",,,curmudgeon,2/13,4.0,1271203200,The old man and his essay,Mr locke lived a very long time ago and is dea...,"'""",,['John Locke'],http://books.google.com/books/content?id=eCs7A...,http://books.google.nl/books?id=eCs7AAAAYAAJ&p...,,1849,https://play.google.com/store/books/details?id...,"['Knowledge, Theory of']"
1,"'AN ESSAY CONCERNING HUMAN UNDERSTANDING,'",,,curmudgeon,2/13,4.0,1271203200,The old man and his essay,Mr locke lived a very long time ago and is dea...,"'""",,,"""Shalom Freedman \""Shalom Freedman\""""",5/6,5.0,1106092800,One of the major works in Western Philosophy,It has been many years since I pondered and re...
2,"'AN ESSAY CONCERNING HUMAN UNDERSTANDING,'",,,curmudgeon,2/13,4.0,1271203200,The old man and his essay,Mr locke lived a very long time ago and is dea...,"'""",,,mp,54/64,4.0,1012435200,Locked Into Reason 18th Century Style,John Locke's 1698 &quot;Essay Concerning Human...
3,"'AN ESSAY CONCERNING HUMAN UNDERSTANDING,'",,,curmudgeon,2/13,4.0,1271203200,The old man and his essay,Mr locke lived a very long time ago and is dea...,"'""",,,,9/13,5.0,922492800,I Already Knew That,The highest compliment I can pay this book is ...
4,"'AN ESSAY CONCERNING HUMAN UNDERSTANDING,'",,,curmudgeon,2/13,4.0,1271203200,The old man and his essay,Mr locke lived a very long time ago and is dea...,"'""",,,Gary R. Childress,2/2,2.0,1340928000,Bad binding: Not sure if fluke or normal,"The book is great, it's exactly what I need fo..."
5,"'AN ESSAY CONCERNING HUMAN UNDERSTANDING,'",,,"""mrnolanburris \""Nolan J. Burris\""""",0/0,5.0,1328313600,College text,This book is a required read for most philosop...,"'""",,,Gary R. Childress,2/2,2.0,1340928000,Bad binding: Not sure if fluke or normal,"The book is great, it's exactly what I need fo..."
6,"'AN ESSAY CONCERNING HUMAN UNDERSTANDING,'",,,"""mrnolanburris \""Nolan J. Burris\""""",0/0,5.0,1328313600,College text,This book is a required read for most philosop...,"'""",,,"""John S. Ryan \""Scott Ryan\""""",50/52,5.0,933552000,A highly readable and influential work by a se...,On the list of thinkers who have exemplified w...
7,"'AN ESSAY CONCERNING HUMAN UNDERSTANDING,'",,,"""mrnolanburris \""Nolan J. Burris\""""",0/0,5.0,1328313600,College text,This book is a required read for most philosop...,"'""",,,,8/159,1.0,1009843200,Essay Concerning Very Little,I often ponder the meaning of life. I often co...
8,"'AN ESSAY CONCERNING HUMAN UNDERSTANDING,'",,,"""mrnolanburris \""Nolan J. Burris\""""",0/0,5.0,1328313600,College text,This book is a required read for most philosop...,"'""",,,"""Joao Pedro \""John\""""",0/2,4.0,1276732800,Complete book,The boos is a great synthesis of Locke`s book....
9,"'AN ESSAY CONCERNING HUMAN UNDERSTANDING,'",,,"""mrnolanburris \""Nolan J. Burris\""""",0/0,5.0,1328313600,College text,This book is a required read for most philosop...,"'""",,,Sarang Gopalakrishnan,18/90,2.0,1072828800,Not unless you need it...,"For the most part, this book is unreadable and..."


In [6]:
spark_session.stop()