# Spark API

## Create a spark data frame that contains your favorite programming languages.

In [25]:
import pandas as pd

import pyspark
from pyspark.sql.functions import lit, concat, sum, avg, min, max, count, mean


from pydataset import data

spark = pyspark.sql.SparkSession.builder.getOrCreate()

### The name of the column should be language

In [2]:
languages = ['Python', 'SQL', 'Javascript', 'Java', 'Go', 'Julia']

In [3]:
df = spark.createDataFrame(pd.DataFrame(languages, columns=['language']))
df

DataFrame[language: string]

### View the schema of the dataframe


In [4]:
df.printSchema()

root
 |-- language: string (nullable = true)



### Output the shape of the dataframe

In [5]:
print('Number of Columns:', len(df.columns))
print('Number of Rows:', df.count())

Number of Columns: 1
Number of Rows: 6


### Show the first 5 records in the dataframe

In [6]:
df.show(5)

+----------+
|  language|
+----------+
|    Python|
|       SQL|
|Javascript|
|      Java|
|        Go|
+----------+
only showing top 5 rows



## Load the mpg dataset as a spark dataframe.

In [9]:
mpg = spark.createDataFrame(data('mpg'))
mpg

DataFrame[manufacturer: string, model: string, displ: double, year: bigint, cyl: bigint, trans: string, drv: string, cty: bigint, hwy: bigint, fl: string, class: string]

### Create 1 column of output that contains a message like the one below:

`The 1999 audi a4 has a 4 cylinder engine.`

In [33]:
mpg.select(concat(lit('The '), mpg.year, lit(' '), mpg.manufacturer, lit(' '), mpg.model, lit(' has a '), mpg.cyl, lit(' cylinder engine.')).alias('vehicle_info')).show()

+--------------------+
|        vehicle_info|
+--------------------+
|The 1999 audi a4 ...|
|The 1999 audi a4 ...|
|The 2008 audi a4 ...|
|The 2008 audi a4 ...|
|The 1999 audi a4 ...|
|The 1999 audi a4 ...|
|The 2008 audi a4 ...|
|The 1999 audi a4 ...|
|The 1999 audi a4 ...|
|The 2008 audi a4 ...|
|The 2008 audi a4 ...|
|The 1999 audi a4 ...|
|The 1999 audi a4 ...|
|The 2008 audi a4 ...|
|The 2008 audi a4 ...|
|The 1999 audi a6 ...|
|The 2008 audi a6 ...|
|The 2008 audi a6 ...|
|The 2008 chevrole...|
|The 2008 chevrole...|
+--------------------+
only showing top 20 rows

