Query a list of CITY names from STATION for cities that have an even ID number. Print the results in any order, but exclude duplicates from the answer.

The STATION table is described as follows:


![Output](https://s3.amazonaws.com/hr-challenge-images/9336/1449345840-5f0a551030-Station.jpg)
where LAT_N is the northern latitude and LONG_W is the western longitude.

In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, IntegerType, StringType, FloatType

# Initialize Spark session
spark = SparkSession.builder.appName("StationTable").getOrCreate()

# Define the schema for the STATION table
schema = StructType([
    StructField("ID", IntegerType(), True),
    StructField("City", StringType(), True),
    StructField("State", StringType(), True),
    StructField("LAT_N", FloatType(), True),
    StructField("LONG_W", FloatType(), True)
])

# Create the sample data
data = [
    (1, "New York", "NY", 40.7128, -74.0060),
    (2, "Los Angeles", "CA", 34.0522, -118.2437),
    (3, "Chicago", "IL", 41.8781, -87.6298),
    (4, "Houston", "TX", 29.7604, -95.3698),
    (5, "Phoenix", "AZ", 33.4484, -112.0740),
    (6, "San Antonio", "TX", 29.4241, -98.4936),
    (7, "San Diego", "CA", 32.7157, -117.1611),
    (8, "Dallas", "TX", 32.7767, -96.7970),
    (9, "San Jose", "CA", 37.3382, -121.8863),
    (10, "Austin", "TX", 30.2672, -97.7431),
    (11, "Toronto", "ON", 43.65107, -79.347015),
    (12, "London", "ENG", 51.5074, -0.1278),
    (13, "Vancouver", "BC", 49.2827, -123.1207)
]

# Create DataFrame
df = spark.createDataFrame(data, schema)

# Save the DataFrame as a temporary table
df.createOrReplaceTempView("STATION")

# Save as a permanent table (uncomment if needed)
# df.write.mode("overwrite").saveAsTable("STATION")

# Show the DataFrame
df.show()


+---+-----------+-----+--------+----------+
| ID|       City|State|   LAT_N|    LONG_W|
+---+-----------+-----+--------+----------+
|  1|   New York|   NY| 40.7128|   -74.006|
|  2|Los Angeles|   CA| 34.0522| -118.2437|
|  3|    Chicago|   IL| 41.8781|  -87.6298|
|  4|    Houston|   TX| 29.7604|  -95.3698|
|  5|    Phoenix|   AZ| 33.4484|  -112.074|
|  6|San Antonio|   TX| 29.4241|  -98.4936|
|  7|  San Diego|   CA| 32.7157| -117.1611|
|  8|     Dallas|   TX| 32.7767|   -96.797|
|  9|   San Jose|   CA| 37.3382| -121.8863|
| 10|     Austin|   TX| 30.2672|  -97.7431|
| 11|    Toronto|   ON|43.65107|-79.347015|
| 12|     London|  ENG| 51.5074|   -0.1278|
| 13|  Vancouver|   BC| 49.2827| -123.1207|
+---+-----------+-----+--------+----------+



In [0]:
%sql
SELECT CITY, State 
FROM STATION
where ID % 2 == 0

CITY,State
Los Angeles,CA
Houston,TX
San Antonio,TX
Dallas,TX
Austin,TX
London,ENG


In [0]:
from pyspark.sql.functions import col

df.where(" ID % 2 == 0").select(col("City"),col("State")).show()

+-----------+-----+
|       City|State|
+-----------+-----+
|Los Angeles|   CA|
|    Houston|   TX|
|San Antonio|   TX|
|     Dallas|   TX|
|     Austin|   TX|
|     London|  ENG|
+-----------+-----+

