# Create Hive Tables

If not already done, we first need to create some Hive tables.

In [1]:
spark.sql(
    """
    CREATE DATABASE IF NOT EXISTS training
"""
)

DataFrame[]

In [None]:
spark.sql(
    """
    CREATE EXTERNAL TABLE IF NOT EXISTS training.weather_raw(data STRING) 
    PARTITIONED BY(year STRING) STORED AS TEXTFILE
    LOCATION 's3://dimajix-training/data/weather'
"""
)

for year in range(2005, 2014):
    spark.sql(
        """
        ALTER TABLE training.weather_raw 
        ADD PARTITION(year={year})
        LOCATION 's3://dimajix-training/data/weather/{year}'
    """.format(
            year=year
        )
    )

In [None]:
spark.sql(
    """
CREATE VIEW IF NOT EXISTS training.weather AS
    SELECT 
        year,
        SUBSTR(`data`,5,6) AS `usaf`,
        SUBSTR(`data`,11,5) AS `wban`, 
        SUBSTR(`data`,16,8) AS `date`, 
        SUBSTR(`data`,24,4) AS `time`,
        SUBSTR(`data`,42,5) AS report_type,
        SUBSTR(`data`,61,3) AS wind_direction, 
        SUBSTR(`data`,64,1) AS wind_direction_qual, 
        SUBSTR(`data`,65,1) AS wind_observation, 
        CAST(SUBSTR(`data`,66,4) AS FLOAT)/10 AS wind_speed,
        SUBSTR(`data`,70,1) AS wind_speed_qual,
        CAST(SUBSTR(`data`,88,5) AS FLOAT)/10 AS air_temperature, 
        SUBSTR(`data`,93,1) AS air_temperature_qual 
    FROM training.weather_raw
"""
)

In [None]:
spark.sql(
    """
CREATE EXTERNAL TABLE IF NOT EXISTS training.stations(
    usaf STRING,
    wban STRING,
    name STRING,
    country STRING,
    state STRING,
    icao STRING,
    latitude FLOAT,
    longitude FLOAT,
    elevation FLOAT,
    date_begin STRING,
    date_end STRING) 
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
WITH SERDEPROPERTIES (
   "separatorChar" = ",",
   "quoteChar"     = "\\"",
   "escapeChar"    = "\\\\"
)
STORED AS TEXTFILE
LOCATION 's3://dimajix-training/data/weather/isd-history'
"""
)