## Exploration of map reduce result

---

### Import Libraries

In [1]:
# import libraries
import findspark

# Locate the spark installation
findspark.init()

In [2]:
import pandas as pd
import pyspark as ps
from pyspark.sql.functions import col, sum
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, FloatType
from pyspark.sql import SparkSession


### Initialize Spark

In [3]:
# Initialize a SparkContext
spark = SparkSession.builder.appName("data_cleaning").getOrCreate()
spark.stop()
sc = ps.SparkContext(appName="prior_analysis")
# Initialize the Session
spark_session = ps.sql.SparkSession(sc)

Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
23/09/05 15:32:13 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


### Connect and import data from HDFS directly into a Spark DataFrame

In [4]:
df_join_result = spark_session.read.option('escape','"').csv('hdfs://localhost:9900/user/book_reviews/joined_tables.csv', header=True, inferSchema=True)
df_join_result.printSchema()
df_join_result.describe().show()
df_join_result.show(5)

                                                                                

root
 |-- Title: string (nullable = true)
 |-- description: string (nullable = true)
 |-- authors: string (nullable = true)
 |-- image: string (nullable = true)
 |-- previewLink: string (nullable = true)
 |-- publisher: string (nullable = true)
 |-- publishedDate: string (nullable = true)
 |-- infoLink: string (nullable = true)
 |-- categories: string (nullable = true)
 |-- ratingsCount: double (nullable = true)
 |-- Price: string (nullable = true)
 |-- User_id: string (nullable = true)
 |-- profileName: string (nullable = true)
 |-- review/helpfulness: string (nullable = true)
 |-- review/score: string (nullable = true)
 |-- review/time: string (nullable = true)
 |-- review/summary: string (nullable = true)
 |-- review/text\t: string (nullable = true)



23/09/05 15:32:34 WARN package: Truncated the string representation of a plan since it was too large. This behavior can be adjusted by setting 'spark.sql.debug.maxToStringFields'.

+-------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+------------------+--------------------+--------------------+------------------+--------------------+--------------------+-----------+------------------+--------------------+-----------+--------------------+--------------------+
|summary|               Title|         description|             authors|               image|         previewLink|           publisher|     publishedDate|            infoLink|          categories|      ratingsCount|               Price|             User_id|profileName|review/helpfulness|        review/score|review/time|      review/summary|       review/text\t|
+-------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+------------------+--------------------+--------------------+------------------+--------------------+--------------------+-----------+---

                                                                                

In [5]:
df_join_result.limit(30).toPandas()

Unnamed: 0,Title,description,authors,image,previewLink,publisher,publishedDate,infoLink,categories,ratingsCount,Price,User_id,profileName,review/helpfulness,review/score,review/time,review/summary,review/text\t
0,""" Film technique, "" and, "" Film acting """,FILM TECHNIQUE AND FILM ACTING- The Cinema Wri...,['V. I. Pudovkin'],http://books.google.com/books/content?id=IKbBb...,http://books.google.nl/books?id=IKbBbMxeJDEC&d...,Sims Press,2008-11,http://books.google.nl/books?id=IKbBbMxeJDEC&d...,['Drama'],,""""" and",""""" Film acting """"""",,,Faisal A. Qureshi,7/8,4.0,949708800
1,""" Film technique, "" and, "" Film acting """,FILM TECHNIQUE AND FILM ACTING- The Cinema Wri...,['V. I. Pudovkin'],http://books.google.com/books/content?id=IKbBb...,http://books.google.nl/books?id=IKbBbMxeJDEC&d...,Sims Press,2008-11,http://books.google.nl/books?id=IKbBbMxeJDEC&d...,['Drama'],,""""" and",""""" Film acting """"""",,,"Tania Castaneda ""Tania C""",0/0,5.0,1250726400
2,""" We'll Always Have Paris"": The Definitive Gui...","This ultimate compilation of more than 11,000 ...","['Robert A. Nowlan', 'Gwendolyn Wright Nowlan']",http://books.google.com/books/content?id=H1YqN...,http://books.google.com/books?id=H1YqNQAACAAJ&...,Perennial,1994,http://books.google.com/books?id=H1YqNQAACAAJ&...,['Reference'],,,,S. J Parker,0/1,5.0,1080432000,Great book,"The more than 11,000 quotes in this book, cons..."
3,""" We'll Always Have Paris"": The Definitive Gui...","This ultimate compilation of more than 11,000 ...","['Robert A. Nowlan', 'Gwendolyn Wright Nowlan']",http://books.google.com/books/content?id=H1YqN...,http://books.google.com/books?id=H1YqNQAACAAJ&...,Perennial,1994,http://books.google.com/books?id=H1YqNQAACAAJ&...,['Reference'],,,,E. I. Robinson,0/0,5.0,1198713600,I think this is the beginning of a beautiful f...,You would only be looking at this book if you ...
4,"""... And Poetry is Born ..."" Russian Classical...",A selection of Russian poems in Russian with a...,['Aleksandr Sergeevich Pushkin'],,http://books.google.nl/books?id=IWRhwgEACAAJ&d...,,1984,http://books.google.nl/books?id=IWRhwgEACAAJ&d...,['Russian poetry'],,,,"Husky Dawg ""Husky Dawg""",0/0,4.0,1284940800,Wonderful and unique selection of poetry,Rare and good treat to find in a handy travel ...
5,"""A Titanic hero"" Thomas Andrews, shipbuilder",,['Shan F. Bullock'],http://books.google.com/books/content?id=DoQDA...,http://books.google.com/books?id=DoQDAAAAYAAJ&...,,1913,https://play.google.com/store/books/details?id...,,,"shipbuilder""",,,ntlelmbrt@yahoo.com,0/0,5.0,900892800,An absolutely charming chronicle of Thomas And...
6,"""A Titanic hero"" Thomas Andrews, shipbuilder",,['Shan F. Bullock'],http://books.google.com/books/content?id=DoQDA...,http://books.google.com/books?id=DoQDAAAAYAAJ&...,,1913,https://play.google.com/store/books/details?id...,,,"shipbuilder""",,,only me,6/6,5.0,914716800,A man who shone like a star
7,"""A Titanic hero"" Thomas Andrews, shipbuilder",,['Shan F. Bullock'],http://books.google.com/books/content?id=DoQDA...,http://books.google.com/books?id=DoQDAAAAYAAJ&...,,1913,https://play.google.com/store/books/details?id...,,,"shipbuilder""",,,"Eileen Grimes ""Titanic Astrology author""",4/4,5.0,969840000,God bless this man
8,"""A Titanic hero"" Thomas Andrews, shipbuilder",,['Shan F. Bullock'],http://books.google.com/books/content?id=DoQDA...,http://books.google.com/books?id=DoQDAAAAYAAJ&...,,1913,https://play.google.com/store/books/details?id...,,,"shipbuilder""",,,"""darl85""",4/4,5.0,916444800,A man loved and respected by so many
9,"""A Titanic hero"" Thomas Andrews, shipbuilder",,['Shan F. Bullock'],http://books.google.com/books/content?id=DoQDA...,http://books.google.com/books?id=DoQDAAAAYAAJ&...,,1913,https://play.google.com/store/books/details?id...,,,"shipbuilder""",,,thomas e. lewis,2/2,4.0,947894400,A must read for every Titanic Buff!


In [6]:
spark_session.stop()