### Create SparkSession:

In [1]:
import findspark 
import numpy 
import pandas
findspark.init()
import pyspark
from  pyspark.sql import SparkSession 
spark = SparkSession.builder.getOrCreate()
sc = spark.sparkContext

### Read the DataFrames_sample.json file:

In [3]:
data = spark.read.format("json").\
option("inferschema","true").\
option("header" ,"true").\
load("DataFrames_sample.json")

### Display part of the data and schema:


In [8]:
data.show(4)

+----+----+---------+---+-----------+----+----------+-----+------+----+
|   D|   H|      HDD| Id|      Model| RAM|ScreenSize|    W|Weight|Year|
+----+----+---------+---+-----------+----+----------+-----+------+----+
|9.48|0.61|512GB SSD|  1|MacBook Pro|16GB|       15"|13.75|  4.02|2015|
|7.74|0.52|256GB SSD|  2|    MacBook| 8GB|       12"|11.04|  2.03|2016|
|8.94|0.68|128GB SSD|  3|MacBook Air| 8GB|     13.3"| 12.8|  2.96|2016|
| 8.0|20.3|  1TB SSD|  4|       iMac|64GB|       27"| 25.6|  20.8|2017|
+----+----+---------+---+-----------+----+----------+-----+------+----+



In [5]:
data.printSchema()

root
 |-- D: double (nullable = true)
 |-- H: double (nullable = true)
 |-- HDD: string (nullable = true)
 |-- Id: long (nullable = true)
 |-- Model: string (nullable = true)
 |-- RAM: string (nullable = true)
 |-- ScreenSize: string (nullable = true)
 |-- W: double (nullable = true)
 |-- Weight: double (nullable = true)
 |-- Year: long (nullable = true)



## Using SQL
### Create Temp View:

In [9]:

# register the DataFrame as a temporary view
data.createOrReplaceTempView("labtop_config")


### Display "RAM"column and count "RAM" column:

In [23]:
spark.sql('''SELECT RAM, COUNT(RAM) AS count FROM labtop_config GROUP BY RAM''').show()

+----+-----+
| RAM|count|
+----+-----+
|64GB|    1|
|16GB|    1|
| 8GB|    2|
+----+-----+



### Get all columns when "Year" column equal "2015"  

In [27]:
spark.sql("select * from labtop_config where Year== 2015").show()

+----+----+---------+---+-----------+----+----------+-----+------+----+
|   D|   H|      HDD| Id|      Model| RAM|ScreenSize|    W|Weight|Year|
+----+----+---------+---+-----------+----+----------+-----+------+----+
|9.48|0.61|512GB SSD|  1|MacBook Pro|16GB|       15"|13.75|  4.02|2015|
+----+----+---------+---+-----------+----+----------+-----+------+----+



### Get all when "Model" start with "M":

In [29]:
spark.sql("select Model from labtop_config where Model LIKE 'M%'").show()

+-----------+
|      Model|
+-----------+
|MacBook Pro|
|    MacBook|
|MacBook Air|
+-----------+



### Get all data when "Model" column equal "MacBook Pro"

In [31]:
spark.sql("select * from labtop_config where Model == 'MacBook Pro'").show()

+----+----+---------+---+-----------+----+----------+-----+------+----+
|   D|   H|      HDD| Id|      Model| RAM|ScreenSize|    W|Weight|Year|
+----+----+---------+---+-----------+----+----------+-----+------+----+
|9.48|0.61|512GB SSD|  1|MacBook Pro|16GB|       15"|13.75|  4.02|2015|
+----+----+---------+---+-----------+----+----------+-----+------+----+



### Get all data with Multiple Conditions when "RAM" column equal "8GB" and "Model" column is "Macbook".

In [34]:
spark.sql("select * from labtop_config where RAM == '8GB' and Model == 'MacBook' ").show()

+----+----+---------+---+-------+---+----------+-----+------+----+
|   D|   H|      HDD| Id|  Model|RAM|ScreenSize|    W|Weight|Year|
+----+----+---------+---+-------+---+----------+-----+------+----+
|7.74|0.52|256GB SSD|  2|MacBook|8GB|       12"|11.04|  2.03|2016|
+----+----+---------+---+-------+---+----------+-----+------+----+



### Get all data with Multiple Conditions when "D" greater than or equal "8" and "Model" column is "iMac".

In [35]:
spark.sql("select * from labtop_config where D >= 8 and Model == 'iMac' ").show()

+---+----+-------+---+-----+----+----------+----+------+----+
|  D|   H|    HDD| Id|Model| RAM|ScreenSize|   W|Weight|Year|
+---+----+-------+---+-----+----+----------+----+------+----+
|8.0|20.3|1TB SSD|  4| iMac|64GB|       27"|25.6|  20.8|2017|
+---+----+-------+---+-----+----+----------+----+------+----+

