In [1]:
!pip install pyspark

import pyspark

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pyspark
  Downloading pyspark-3.3.0.tar.gz (281.3 MB)
[K     |████████████████████████████████| 281.3 MB 45 kB/s 
[?25hCollecting py4j==0.10.9.5
  Downloading py4j-0.10.9.5-py2.py3-none-any.whl (199 kB)
[K     |████████████████████████████████| 199 kB 65.4 MB/s 
[?25hBuilding wheels for collected packages: pyspark
  Building wheel for pyspark (setup.py) ... [?25l[?25hdone
  Created wheel for pyspark: filename=pyspark-3.3.0-py2.py3-none-any.whl size=281764026 sha256=baf4a4eb4e44638a572195c6507745baa1750ba3971069449556fa57d8379c1e
  Stored in directory: /root/.cache/pip/wheels/7a/8e/1b/f73a52650d2e5f337708d9f6a1750d451a7349a867f928b885
Successfully built pyspark
Installing collected packages: py4j, pyspark
Successfully installed py4j-0.10.9.5 pyspark-3.3.0


In [2]:
pyspark.__version__

'3.3.0'

In [3]:
# create entry points to spark
try:
    sc.stop()
except:
    pass
####################################################################
from pyspark import SparkContext, SparkConf
#####################################################################
from pyspark.sql import SparkSession
############################################################
sc=SparkContext()
################################################################
spark = SparkSession(sparkContext=sc)

# RDD object

The class `pyspark.SparkContext` creates a client which connects to a Spark cluster. This client can be used to create an RDD object. There are two methods from this class for directly creating RDD objects:
* `parallelize()`
* `textFile()`

## `parallelize()`

`parallelize()` distribute a local **python collection** to form an RDD. Common built-in python collections include `dist`, `list`, `tuple` or `set`.

Examples:

In [4]:
# from a list
rdd = sc.parallelize([1,2,3])

rdd.collect()

[1, 2, 3]

In [5]:
rdd.take(2)

[1, 2]

In [6]:
mylist = [120,2343,456,789,890]

rdd_mylist = sc.parallelize(mylist)

print(rdd_mylist.collect())



[120, 2343, 456, 789, 890]


In [7]:
print(rdd_mylist.collect)

<bound method RDD.collect of ParallelCollectionRDD[3] at readRDDFromFile at PythonRDD.scala:274>


In [8]:
# from a tuple
rdd = sc.parallelize(('cat', 'dog', 'fish'))

rdd.collect()

['cat', 'dog', 'fish']

In [9]:
# from a list of tuple
list_t = [('cat', 'dog', 'fish'), 
          ('orange', 'apple')]

rdd = sc.parallelize(list_t)

rdd.collect()

[('cat', 'dog', 'fish'), ('orange', 'apple')]

In [10]:
rdd.take(1)

[('cat', 'dog', 'fish')]

In [11]:
# from a set
s = {'cat', 'dog', 'fish', 'cat', 'dog', 'dog'}

rdd = sc.parallelize(s)

rdd.collect()

['cat', 'dog', 'fish']

When it is a `dict`, only the keys are used to form the RDD.

In [12]:
# from a dict
d = {
    'a': 100,
    'b': 200,
    'c': 300
}
rdd = sc.parallelize(d)
rdd.collect()

['a', 'b', 'c']

In [13]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## `textFile()`

The `textFile()` function reads a text file and returns it as an **RDD of strings**. Usually, you will need to apply some **map** functions to transform each elements of the RDD to some data structure/type that is suitable for data analysis.

**When using `textFile()`, each line of the text file becomes an element in the resulting RDD.**

Examples:

In [16]:
# read a csv file
rdd = sc.textFile('/content/drive/MyDrive/Colab Notebooks/PySpark-Learning Apache Spark/data/iris.csv')

rdd.take(5)

['sepal_length,sepal_width,petal_length,petal_width,species',
 '5.1,3.5,1.4,0.2,setosa',
 '4.9,3,1.4,0.2,setosa',
 '4.7,3.2,1.3,0.2,setosa',
 '4.6,3.1,1.5,0.2,setosa']

In [18]:
# read a txt file
rdd = sc.textFile('/content/drive/MyDrive/Colab Notebooks/PySpark-Learning Apache Spark/data/twitter.txt')
rdd.take(5)

['Fresh install of XP on new computer. Sweet relief! fuck vista\t1018769417\t1.0',
 'Well. Now I know where to go when I want my knives. #ChiChevySXSW http://post.ly/RvDl\t10284216536\t1.0',
 '"Literally six weeks before I can take off ""SSC Chair"" off my email. Its like the torturous 4th mile before everything stops hurting."\t10298589026\t1.0',
 'Mitsubishi i MiEV - Wikipedia, the free encyclopedia - http://goo.gl/xipe Cutest car ever!\t109017669432377344\t1.0',
 "'Cheap Eats in SLP' - http://t.co/4w8gRp7\t109642968603963392\t1.0"]

In [19]:
rdd.count()

10

## **Importing data as a Dataframe**

In [20]:
mtcars = spark.read.csv(path='/content/drive/MyDrive/Colab Notebooks/PySpark-Learning Apache Spark/data/mtcars.csv',
                        sep=',',
                        encoding='UTF-8',
                        comment=None,
                        header=True, 
                        inferSchema=True)


mtcars.show(n=5, truncate=False)

+-----------------+----+---+-----+---+----+-----+-----+---+---+----+----+
|_c0              |mpg |cyl|disp |hp |drat|wt   |qsec |vs |am |gear|carb|
+-----------------+----+---+-----+---+----+-----+-----+---+---+----+----+
|Mazda RX4        |21.0|6  |160.0|110|3.9 |2.62 |16.46|0  |1  |4   |4   |
|Mazda RX4 Wag    |21.0|6  |160.0|110|3.9 |2.875|17.02|0  |1  |4   |4   |
|Datsun 710       |22.8|4  |108.0|93 |3.85|2.32 |18.61|1  |1  |4   |1   |
|Hornet 4 Drive   |21.4|6  |258.0|110|3.08|3.215|19.44|1  |0  |3   |1   |
|Hornet Sportabout|18.7|8  |360.0|175|3.15|3.44 |17.02|0  |0  |3   |2   |
+-----------------+----+---+-----+---+----+-----+-----+---+---+----+----+
only showing top 5 rows



In [21]:
type(mtcars)

pyspark.sql.dataframe.DataFrame

In [22]:
mtcars.dtypes

[('_c0', 'string'),
 ('mpg', 'double'),
 ('cyl', 'int'),
 ('disp', 'double'),
 ('hp', 'int'),
 ('drat', 'double'),
 ('wt', 'double'),
 ('qsec', 'double'),
 ('vs', 'int'),
 ('am', 'int'),
 ('gear', 'int'),
 ('carb', 'int')]

In [23]:
mtcars.describe().show()

+-------+-----------+------------------+------------------+------------------+-----------------+------------------+------------------+------------------+------------------+-------------------+------------------+------------------+
|summary|        _c0|               mpg|               cyl|              disp|               hp|              drat|                wt|              qsec|                vs|                 am|              gear|              carb|
+-------+-----------+------------------+------------------+------------------+-----------------+------------------+------------------+------------------+------------------+-------------------+------------------+------------------+
|  count|         32|                32|                32|                32|               32|                32|                32|                32|                32|                 32|                32|                32|
|   mean|       null|20.090624999999996|            6.1875|230.7218750000000

## Create DataFrame with *createDataFrame* function

In [27]:
from pyspark.sql import Row

rdd = sc.parallelize([
    Row(x=[1,2,3], y=['a','b','c'], z='yes'),
    Row(x=[4,5,6], y=['e','f','g'], z='no')
])

rdd.collect()

[Row(x=[1, 2, 3], y=['a', 'b', 'c'], z='yes'),
 Row(x=[4, 5, 6], y=['e', 'f', 'g'], z='no')]

In [28]:
## Creating a dataframe from rdd
df = spark.createDataFrame(rdd)
df.show()

+---------+---------+---+
|        x|        y|  z|
+---------+---------+---+
|[1, 2, 3]|[a, b, c]|yes|
|[4, 5, 6]|[e, f, g]| no|
+---------+---------+---+



## Creating Spark Dataframe from Pandas Dataframe

In [29]:
import pandas as pd
pdf = pd.DataFrame({
    'x': [[1,2,3], [4,5,6]],
    'y': [['a','b','c'], ['e','f','g']]
})
pdf

Unnamed: 0,x,y
0,"[1, 2, 3]","[a, b, c]"
1,"[4, 5, 6]","[e, f, g]"


In [30]:
df = spark.createDataFrame(pdf)
df.show()

+---------+---------+
|        x|        y|
+---------+---------+
|[1, 2, 3]|[a, b, c]|
|[4, 5, 6]|[e, f, g]|
+---------+---------+



In [32]:
df1 = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/PySpark-Learning Apache Spark/data/prostate.csv')
df1.head()

Unnamed: 0,lcavol,lweight,age,lbph,svi,lcp,gleason,pgg45,lpsa
0,-0.579818,2.769459,50,-1.386294,0,-1.386294,6,0,-0.430783
1,-0.994252,3.319626,58,-1.386294,0,-1.386294,6,0,-0.162519
2,-0.510826,2.691243,74,-1.386294,0,-1.386294,7,20,-0.162519
3,-1.203973,3.282789,58,-1.386294,0,-1.386294,6,0,-0.162519
4,0.751416,3.432373,62,-1.386294,0,-1.386294,6,0,0.371564


In [35]:
df1.describe()

Unnamed: 0,lcavol,lweight,age,lbph,svi,lcp,gleason,pgg45,lpsa
count,97.0,97.0,97.0,97.0,97.0,97.0,97.0,97.0,97.0
mean,1.35001,3.652686,63.865979,0.100356,0.216495,-0.179366,6.752577,24.381443,2.478387
std,1.178625,0.496631,7.445117,1.450807,0.413995,1.39825,0.722134,28.204035,1.154329
min,-1.347074,2.374906,41.0,-1.386294,0.0,-1.386294,6.0,0.0,-0.430783
25%,0.512824,3.37588,60.0,-1.386294,0.0,-1.386294,6.0,0.0,1.731656
50%,1.446919,3.623007,65.0,0.300105,0.0,-0.798508,7.0,15.0,2.591516
75%,2.127041,3.878466,68.0,1.558145,0.0,1.178655,7.0,40.0,3.056357
max,3.821004,6.10758,79.0,2.326302,1.0,2.904165,9.0,100.0,5.582932


In [36]:
type(df1)

pandas.core.frame.DataFrame

In [33]:
sdf1 = spark.createDataFrame(df1)
sdf1.show()

+------------+-----------+---+------------+---+------------+-------+-----+------------+
|      lcavol|    lweight|age|        lbph|svi|         lcp|gleason|pgg45|        lpsa|
+------------+-----------+---+------------+---+------------+-------+-----+------------+
|-0.579818495|2.769458829| 50|-1.386294361|  0|-1.386294361|      6|    0|-0.430782916|
|-0.994252273|3.319625728| 58|-1.386294361|  0|-1.386294361|      6|    0|-0.162518929|
|-0.510825624|2.691243083| 74|-1.386294361|  0|-1.386294361|      7|   20|-0.162518929|
|-1.203972804|3.282789151| 58|-1.386294361|  0|-1.386294361|      6|    0|-0.162518929|
| 0.751416089|3.432372999| 62|-1.386294361|  0|-1.386294361|      6|    0| 0.371563556|
|-1.049822124|3.228826156| 50|-1.386294361|  0|-1.386294361|      6|    0| 0.765467842|
| 0.737164066|3.473518043| 64| 0.615185639|  0|-1.386294361|      6|    0| 0.765467842|
| 0.693147181|3.539508997| 58|  1.53686722|  0|-1.386294361|      6|    0| 0.854415328|
|-0.776528789|3.539508997| 47|-1

In [34]:
sdf1.count()

97

In [38]:
sdf1.describe().show()

+-------+------------------+------------------+------------------+-------------------+-------------------+-------------------+------------------+------------------+-----------------+
|summary|            lcavol|           lweight|               age|               lbph|                svi|                lcp|           gleason|             pgg45|             lpsa|
+-------+------------------+------------------+------------------+-------------------+-------------------+-------------------+------------------+------------------+-----------------+
|  count|                97|                97|                97|                 97|                 97|                 97|                97|                97|               97|
|   mean|1.3500095804845362|  3.65268638871134|63.865979381443296|0.10035560588659781|0.21649484536082475|-0.1793655774845361| 6.752577319587629| 24.38144329896907|2.478386878742268|
| stddev|1.1786248779882174|0.4966306624049043| 7.445117060277637| 1.4508066259305492

In [40]:
## Correlation
sdf1.corr('lweight','lbph')

0.43493463582448155

In [42]:
mtcars.crosstab('cyl','gear').show()

+--------+---+---+---+
|cyl_gear|  3|  4|  5|
+--------+---+---+---+
|       8| 12|  0|  2|
|       6|  2|  4|  1|
|       4|  1|  8|  2|
+--------+---+---+---+



In [44]:
mtcars.groupby('cyl','gear').count().show()

+---+----+-----+
|cyl|gear|count|
+---+----+-----+
|  8|   3|   12|
|  6|   5|    1|
|  8|   5|    2|
|  4|   3|    1|
|  6|   3|    2|
|  4|   4|    8|
|  4|   5|    2|
|  6|   4|    4|
+---+----+-----+



In [47]:
type(mtcars)

pyspark.sql.dataframe.DataFrame

## DataFrame to RDD
A **DataFrame** can be easily converted to an **RDD** by calling the `pyspark.sql.DataFrame.rdd()` function. Each element in the returned RDD is an **pyspark.sql.Row** object. An Row is a list of key-value pairs.

In [48]:
mtcars.rdd.take(2)

[Row(_c0='Mazda RX4', mpg=21.0, cyl=6, disp=160.0, hp=110, drat=3.9, wt=2.62, qsec=16.46, vs=0, am=1, gear=4, carb=4),
 Row(_c0='Mazda RX4 Wag', mpg=21.0, cyl=6, disp=160.0, hp=110, drat=3.9, wt=2.875, qsec=17.02, vs=0, am=1, gear=4, carb=4)]

With an RDD object, we can apply a set of mapping functions, such as **map**, **mapValues**, **flatMap**, **flatMapValues** and a lot of other methods that come from RDD.

In [61]:
mtcars.rdd.collect()

[Row(_c0='Mazda RX4', mpg=21.0, cyl=6, disp=160.0, hp=110, drat=3.9, wt=2.62, qsec=16.46, vs=0, am=1, gear=4, carb=4),
 Row(_c0='Mazda RX4 Wag', mpg=21.0, cyl=6, disp=160.0, hp=110, drat=3.9, wt=2.875, qsec=17.02, vs=0, am=1, gear=4, carb=4),
 Row(_c0='Datsun 710', mpg=22.8, cyl=4, disp=108.0, hp=93, drat=3.85, wt=2.32, qsec=18.61, vs=1, am=1, gear=4, carb=1),
 Row(_c0='Hornet 4 Drive', mpg=21.4, cyl=6, disp=258.0, hp=110, drat=3.08, wt=3.215, qsec=19.44, vs=1, am=0, gear=3, carb=1),
 Row(_c0='Hornet Sportabout', mpg=18.7, cyl=8, disp=360.0, hp=175, drat=3.15, wt=3.44, qsec=17.02, vs=0, am=0, gear=3, carb=2),
 Row(_c0='Valiant', mpg=18.1, cyl=6, disp=225.0, hp=105, drat=2.76, wt=3.46, qsec=20.22, vs=1, am=0, gear=3, carb=1),
 Row(_c0='Duster 360', mpg=14.3, cyl=8, disp=360.0, hp=245, drat=3.21, wt=3.57, qsec=15.84, vs=0, am=0, gear=3, carb=4),
 Row(_c0='Merc 240D', mpg=24.4, cyl=4, disp=146.7, hp=62, drat=3.69, wt=3.19, qsec=20.0, vs=1, am=0, gear=4, carb=2),
 Row(_c0='Merc 230', mpg=2

In [62]:
mtcars_map = mtcars.rdd.map(lambda x: (x['_c0'], x['mpg']))
mtcars_map.collect()

[('Mazda RX4', 21.0),
 ('Mazda RX4 Wag', 21.0),
 ('Datsun 710', 22.8),
 ('Hornet 4 Drive', 21.4),
 ('Hornet Sportabout', 18.7),
 ('Valiant', 18.1),
 ('Duster 360', 14.3),
 ('Merc 240D', 24.4),
 ('Merc 230', 22.8),
 ('Merc 280', 19.2),
 ('Merc 280C', 17.8),
 ('Merc 450SE', 16.4),
 ('Merc 450SL', 17.3),
 ('Merc 450SLC', 15.2),
 ('Cadillac Fleetwood', 10.4),
 ('Lincoln Continental', 10.4),
 ('Chrysler Imperial', 14.7),
 ('Fiat 128', 32.4),
 ('Honda Civic', 30.4),
 ('Toyota Corolla', 33.9),
 ('Toyota Corona', 21.5),
 ('Dodge Challenger', 15.5),
 ('AMC Javelin', 15.2),
 ('Camaro Z28', 13.3),
 ('Pontiac Firebird', 19.2),
 ('Fiat X1-9', 27.3),
 ('Porsche 914-2', 26.0),
 ('Lotus Europa', 30.4),
 ('Ford Pantera L', 15.8),
 ('Ferrari Dino', 19.7),
 ('Maserati Bora', 15.0),
 ('Volvo 142E', 21.4)]

In [55]:
mtcars_map.first()

('Mazda RX4', 21.0)

In [56]:
mtcars_map.count()

32

In [60]:
## Manipulating  values in a key, value pair rdd
mtcars_mapvalues = mtcars_map.mapValues(lambda x: [x, x * 10])
mtcars_mapvalues.take(5)

[('Mazda RX4', [21.0, 210.0]),
 ('Mazda RX4 Wag', [21.0, 210.0]),
 ('Datsun 710', [22.8, 228.0]),
 ('Hornet 4 Drive', [21.4, 214.0]),
 ('Hornet Sportabout', [18.7, 187.0])]

In [63]:
print(mtcars.columns)

['_c0', 'mpg', 'cyl', 'disp', 'hp', 'drat', 'wt', 'qsec', 'vs', 'am', 'gear', 'carb']


In [64]:
# Renaming the  first column name
colnames = mtcars.columns
colnames[0] = 'model'
mtcars = mtcars.rdd.toDF(colnames)
mtcars.show(5)

+-----------------+----+---+-----+---+----+-----+-----+---+---+----+----+
|            model| mpg|cyl| disp| hp|drat|   wt| qsec| vs| am|gear|carb|
+-----------------+----+---+-----+---+----+-----+-----+---+---+----+----+
|        Mazda RX4|21.0|  6|160.0|110| 3.9| 2.62|16.46|  0|  1|   4|   4|
|    Mazda RX4 Wag|21.0|  6|160.0|110| 3.9|2.875|17.02|  0|  1|   4|   4|
|       Datsun 710|22.8|  4|108.0| 93|3.85| 2.32|18.61|  1|  1|   4|   1|
|   Hornet 4 Drive|21.4|  6|258.0|110|3.08|3.215|19.44|  1|  0|   3|   1|
|Hornet Sportabout|18.7|  8|360.0|175|3.15| 3.44|17.02|  0|  0|   3|   2|
+-----------------+----+---+-----+---+----+-----+-----+---+---+----+----+
only showing top 5 rows



In [65]:
print(type(mtcars))

<class 'pyspark.sql.dataframe.DataFrame'>


### Merging multiple columns

In [71]:
mtcars.collect()

[Row(model='Mazda RX4', mpg=21.0, cyl=6, disp=160.0, hp=110, drat=3.9, wt=2.62, qsec=16.46, vs=0, am=1, gear=4, carb=4),
 Row(model='Mazda RX4 Wag', mpg=21.0, cyl=6, disp=160.0, hp=110, drat=3.9, wt=2.875, qsec=17.02, vs=0, am=1, gear=4, carb=4),
 Row(model='Datsun 710', mpg=22.8, cyl=4, disp=108.0, hp=93, drat=3.85, wt=2.32, qsec=18.61, vs=1, am=1, gear=4, carb=1),
 Row(model='Hornet 4 Drive', mpg=21.4, cyl=6, disp=258.0, hp=110, drat=3.08, wt=3.215, qsec=19.44, vs=1, am=0, gear=3, carb=1),
 Row(model='Hornet Sportabout', mpg=18.7, cyl=8, disp=360.0, hp=175, drat=3.15, wt=3.44, qsec=17.02, vs=0, am=0, gear=3, carb=2),
 Row(model='Valiant', mpg=18.1, cyl=6, disp=225.0, hp=105, drat=2.76, wt=3.46, qsec=20.22, vs=1, am=0, gear=3, carb=1),
 Row(model='Duster 360', mpg=14.3, cyl=8, disp=360.0, hp=245, drat=3.21, wt=3.57, qsec=15.84, vs=0, am=0, gear=3, carb=4),
 Row(model='Merc 240D', mpg=24.4, cyl=4, disp=146.7, hp=62, drat=3.69, wt=3.19, qsec=20.0, vs=1, am=0, gear=4, carb=2),
 Row(model

In [66]:
from pyspark.sql import Row

mtcars_rdd = mtcars.rdd.map(lambda x: Row(model=x[0], values=x[1:]))

mtcars_rdd.take(5)

[Row(model='Mazda RX4', values=(21.0, 6, 160.0, 110, 3.9, 2.62, 16.46, 0, 1, 4, 4)),
 Row(model='Mazda RX4 Wag', values=(21.0, 6, 160.0, 110, 3.9, 2.875, 17.02, 0, 1, 4, 4)),
 Row(model='Datsun 710', values=(22.8, 4, 108.0, 93, 3.85, 2.32, 18.61, 1, 1, 4, 1)),
 Row(model='Hornet 4 Drive', values=(21.4, 6, 258.0, 110, 3.08, 3.215, 19.44, 1, 0, 3, 1)),
 Row(model='Hornet Sportabout', values=(18.7, 8, 360.0, 175, 3.15, 3.44, 17.02, 0, 0, 3, 2))]

In [67]:
## Let us create a new dataframe using the new rdd

In [72]:
mtcars_df = spark.createDataFrame(mtcars_rdd)

mtcars_df.show(5, truncate=False)

+-----------------+-----------------------------------------------------+
|model            |values                                               |
+-----------------+-----------------------------------------------------+
|Mazda RX4        |{21.0, 6, 160.0, 110, 3.9, 2.62, 16.46, 0, 1, 4, 4}  |
|Mazda RX4 Wag    |{21.0, 6, 160.0, 110, 3.9, 2.875, 17.02, 0, 1, 4, 4} |
|Datsun 710       |{22.8, 4, 108.0, 93, 3.85, 2.32, 18.61, 1, 1, 4, 1}  |
|Hornet 4 Drive   |{21.4, 6, 258.0, 110, 3.08, 3.215, 19.44, 1, 0, 3, 1}|
|Hornet Sportabout|{18.7, 8, 360.0, 175, 3.15, 3.44, 17.02, 0, 0, 3, 2} |
+-----------------+-----------------------------------------------------+
only showing top 5 rows



### Split one column
We use the above DataFrame as our example data. Again, we need to convert the DataFrame to an RDD to achieve our goal.

Let's split the values column into two columns: x1 and x2. The first 4 values will be in column x1 and the remaining values will be in column x2.

In [70]:
mtcars_rdd_2 = mtcars_df.rdd.map(lambda x: Row(model=x[0], x1=x[1][:5], x2=x[1][5:]))

########################################################################################
# convert RDD back to DataFrame
mtcars_df_2 = spark.createDataFrame(mtcars_rdd_2)

mtcars_df_2.show(5, truncate=False)

+-----------------+---------------------------+--------------------------+
|model            |x1                         |x2                        |
+-----------------+---------------------------+--------------------------+
|Mazda RX4        |{21.0, 6, 160.0, 110, 3.9} |{2.62, 16.46, 0, 1, 4, 4} |
|Mazda RX4 Wag    |{21.0, 6, 160.0, 110, 3.9} |{2.875, 17.02, 0, 1, 4, 4}|
|Datsun 710       |{22.8, 4, 108.0, 93, 3.85} |{2.32, 18.61, 1, 1, 4, 1} |
|Hornet 4 Drive   |{21.4, 6, 258.0, 110, 3.08}|{3.215, 19.44, 1, 0, 3, 1}|
|Hornet Sportabout|{18.7, 8, 360.0, 175, 3.15}|{3.44, 17.02, 0, 0, 3, 2} |
+-----------------+---------------------------+--------------------------+
only showing top 5 rows

