
Query the NAME field for all American cities in the CITY table with populations larger than 120000. The CountryCode for America is USA.

The CITY table is described as follows:

![Alt Text](https://s3.amazonaws.com/hr-challenge-images/8137/1449729804-f21d187d0f-CITY.jpg)

In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, IntegerType, StringType

# Initialize Spark session
spark = SparkSession.builder.appName("CityTable").getOrCreate()

# Define the schema for the CITY table
schema = StructType([
    StructField("ID", IntegerType(), True),
    StructField("Name", StringType(), True),
    StructField("CountryCode", StringType(), True),
    StructField("District", StringType(), True),
    StructField("Population", IntegerType(), True)
])

# Create the sample data
data = [
    (1, "New York", "USA", "New York", 8400000),
    (2, "Los Angeles", "USA", "California", 4000000),
    (3, "Chicago", "USA", "Illinois", 2700000),
    (4, "Houston", "USA", "Texas", 2300000),
    (5, "Philadelphia", "USA", "Pennsylvania", 1600000),
    (6, "Phoenix", "USA", "Arizona", 1500000),
    (7, "San Antonio", "USA", "Texas", 1400000),
    (8, "San Diego", "USA", "California", 1300000),
    (9, "Dallas", "USA", "Texas", 1200000),
    (10, "San Jose", "USA", "California", 1000000),
    (11, "Austin", "USA", "Texas", 950000),
    (12, "London", "GBR", "England", 8000000),
    (13, "Toronto", "CAN", "Ontario", 2800000)
]

# Create DataFrame
df = spark.createDataFrame(data, schema)

# Save the DataFrame as a temporary table
df.createOrReplaceTempView("CITY")

# Save as a permanent table (uncomment if needed)
# df.write.mode("overwrite").saveAsTable("CITY")

# Show the DataFrame
df.show()


+---+------------+-----------+------------+----------+
| ID|        Name|CountryCode|    District|Population|
+---+------------+-----------+------------+----------+
|  1|    New York|        USA|    New York|   8400000|
|  2| Los Angeles|        USA|  California|   4000000|
|  3|     Chicago|        USA|    Illinois|   2700000|
|  4|     Houston|        USA|       Texas|   2300000|
|  5|Philadelphia|        USA|Pennsylvania|   1600000|
|  6|     Phoenix|        USA|     Arizona|   1500000|
|  7| San Antonio|        USA|       Texas|   1400000|
|  8|   San Diego|        USA|  California|   1300000|
|  9|      Dallas|        USA|       Texas|   1200000|
| 10|    San Jose|        USA|  California|   1000000|
| 11|      Austin|        USA|       Texas|    950000|
| 12|      London|        GBR|     England|   8000000|
| 13|     Toronto|        CAN|     Ontario|   2800000|
+---+------------+-----------+------------+----------+



In [0]:
%sql
SELECT Name
FROM CITY
WHERE CountryCode = "USA" and Population > 120000

Name
New York
Los Angeles
Chicago
Houston
Philadelphia
Phoenix
San Antonio
San Diego
Dallas
San Jose


In [0]:
result  = df.filter("CountryCode = 'USA' and Population > 120000").select('Name')
result.show()

+------------+
|        Name|
+------------+
|    New York|
| Los Angeles|
|     Chicago|
|     Houston|
|Philadelphia|
|     Phoenix|
| San Antonio|
|   San Diego|
|      Dallas|
|    San Jose|
|      Austin|
+------------+

