In [0]:
%sql
-- dim rider insertion
INSERT INTO dim_rider (Bike_Type)

SELECT rideable_type FROM silver_trip_data

In [0]:
%skip
%sql

-- dim station insertion
INSERT INTO dim_station (Station_Name, Latitude, Longitude)
SELECT DISTINCT 
    start_station_name, 
    start_lat, 
    start_lng
FROM silver_trip_data
WHERE start_station_name IS NOT NULL AND start_lat IS NOT NULL AND start_lng IS NOT NULL

UNION

SELECT DISTINCT 
    end_station_name, 
    end_lat, 
    end_lng
FROM silver_trip_data
WHERE end_station_name IS NOT NULL AND end_lat IS NOT NULL AND end_lng IS NOT NULL;

In [0]:
%sql
-- DIM Station SCD type 1
MERGE INTO dim_station AS target
USING (
    -- Staging CTE
    WITH StagingStations AS (
        SELECT DISTINCT 
            start_station_id AS Station_ID_Natural,
            start_station_name AS Station_Name, 
            start_lat AS Latitude, 
            start_lng AS Longitude
        FROM silver_trip_data
        WHERE start_station_id IS NOT NULL AND start_station_name IS NOT NULL AND start_lat IS NOT NULL

        UNION DISTINCT

        SELECT DISTINCT 
            end_station_id AS Station_ID_Natural,
            end_station_name AS Station_Name, 
            end_lat AS Latitude, 
            end_lng AS Longitude
        FROM silver_trip_data
        WHERE end_station_id IS NOT NULL AND end_station_name IS NOT NULL AND end_lat IS NOT NULL
    )
    SELECT * FROM StagingStations
) AS source
ON target.Station_ID_Natural = source.Station_ID_Natural 

-- SCD Type 1
WHEN MATCHED AND (
        target.Station_Name != source.Station_Name OR
        target.Latitude != source.Latitude OR
        target.Longitude != source.Longitude
    )
    THEN UPDATE SET
        target.Station_Name = source.Station_Name,
        target.Latitude = source.Latitude,
        target.Longitude = source.Longitude
WHEN NOT MATCHED THEN
    INSERT (Station_ID_Natural, Station_Name, Latitude, Longitude)
    VALUES (source.Station_ID_Natural, source.Station_Name, source.Latitude, source.Longitude);

In [0]:
%sql
-- staging for dim weather (bin, text) 

CREATE OR REPLACE TEMPORARY VIEW staging_dim_weather AS
SELECT
    weather_ts, --natural key
    -- Temp Cat
    CASE
        WHEN temp_celsius <= 0 THEN 'Freezing'
        WHEN temp_celsius > 0 AND temp_celsius <= 10 THEN 'Cold'
        WHEN temp_celsius > 10 AND temp_celsius <= 20 THEN 'Cool'
        WHEN temp_celsius > 20 AND temp_celsius <= 30 THEN 'Warm'
        ELSE 'Hot'
    END AS Temp_type_text,
    
    CASE
        WHEN temp_celsius <= 0 THEN 10
        WHEN temp_celsius > 0 AND temp_celsius <= 10 THEN 20
        WHEN temp_celsius > 10 AND temp_celsius <= 20 THEN 30
        WHEN temp_celsius > 20 AND temp_celsius <= 30 THEN 40
        ELSE 50
    END AS Temp_type_bin, -- Bin code (e.g., 10, 20, 30...)
    
    -- Humidity Cat
    CASE
        WHEN hmdt_percent < 30 THEN 'Low'
        WHEN hmdt_percent >= 30 AND hmdt_percent <= 70 THEN 'Moderate'
        ELSE 'High'
    END AS Humidity_categories_text,
    
    CASE
        WHEN hmdt_percent < 30 THEN 1
        WHEN hmdt_percent >= 30 AND hmdt_percent <= 70 THEN 2
        ELSE 3
    END AS Humidity_categories_bin,
    
    -- Precipitation Cat
    CASE
        WHEN prcp_mm = 0 THEN 'None'
        WHEN prcp_mm > 0 AND prcp_mm <= 5 THEN 'Light Rain'
        WHEN prcp_mm > 5 AND prcp_mm <= 20 THEN 'Moderate Rain'
        ELSE 'Heavy Rain'
    END AS Precipitation_types_text,
    
    CASE
        WHEN prcp_mm = 0 THEN 1
        WHEN prcp_mm > 0 AND prcp_mm <= 5 THEN 2
        WHEN prcp_mm > 5 AND prcp_mm <= 20 THEN 3
        ELSE 4
    END AS Precipitation_types_bin,
    
    -- Wind Speed Cat
    CASE
        WHEN wnd_spd_kph <= 5 THEN 'Calm'
        WHEN wnd_spd_kph > 5 AND wnd_spd_kph <= 20 THEN 'Breezy'
        ELSE 'Windy'
    END AS Wind_type_categories_text,
    
    CASE
        WHEN wnd_spd_kph <= 5 THEN 1
        WHEN wnd_spd_kph > 5 AND wnd_spd_kph <= 20 THEN 2
        ELSE 3
    END AS Wind_type_categories_bin
    
FROM
    divvy.default.silver_weather_data
WHERE
    temp_celsius IS NOT NULL AND hmdt_percent IS NOT NULL;

In [0]:
%sql
-- dim weather insertion
INSERT INTO dim_weather (
  Weather_Key,
  Temp_type_text,
  Temp_type_bin,
  Humidity_categories_text,
  Humidity_categories_bin,
  Precipitation_types_text,
  Precipitation_types_bin,
  Wind_type_categories_text,
  Wind_type_categories_bin
)
SELECT
  weather_ts,
  Temp_type_text,
  Temp_type_bin,
  Humidity_categories_text,
  Humidity_categories_bin,
  Precipitation_types_text,
  Precipitation_types_bin,
  Wind_type_categories_text,
  Wind_type_categories_bin
FROM
staging_dim_weather;

In [0]:
%sql
-- insertion into fact table part 1
INSERT INTO divvy.default.fact_trip (
  Trip_Key,
  Rider_Key,
  Start_Station_Key,
  End_Station_Key,
  Weather_Key
)
SELECT
  ride_id AS Trip_Key,
  rider.Rider_Key,
  start_station.Station_Key AS Start_Station_Key,
  end_station.Station_Key AS End_Station_Key,
  weather.Weather_Key
FROM silver_trip_data std
LEFT JOIN dim_rider rider
  ON std.rideable_type = rider.Bike_Type
LEFT JOIN dim_station start_station
  ON std.start_station_name = start_station.Station_Name
     AND std.start_lat = start_station.Latitude
     AND std.start_lng = start_station.Longitude
LEFT JOIN dim_station end_station
  ON std.end_station_name = end_station.Station_Name
     AND std.end_lat = end_station.Latitude
     AND std.end_lng = end_station.Longitude
LEFT JOIN dim_weather weather
  ON std.trip_start_ts = weather.Weather_Key
WHERE
  std.ride_id IS NOT NULL
  AND trip_start_ts IS NOT NULL
  AND trip_end_ts IS NOT NULL;

In [0]:
%sql
-- insertion into fact table part 2
INSERT INTO fact_trip (
 Trip_Duration_Minutes
)
SELECT Trip_Duration_Min
FROM FROM silver_trip_data

In [0]:
%sql
INSERT INTO fact_trip (
Trip_Distance_Km
)
SELECT Trip_Distance_Km
FROM silver_trip_data
WHERE Trip_Distance_Km IS NOT NULL
  

In [0]:
%sql
-- insertion into fact table part 3
INSERT INTO fact_trip (
  Start_Date_Key,
  End_Date_Key,
  Start_Date_Hour_Key,
  End_Date_Hour_Key
)
SELECT
  d_start.Date_Key AS Start_Date_Key,
  d_end.Date_Key AS End_Date_Key,
  h_start.Date_Key AS Start_Date_Hour_Key,
  h_end.Date_Key AS End_Date_Hour_Key
FROM silver_trip_data std
LEFT JOIN dim_date_day AS d_start
  ON date(std.trip_start_ts) = d_start.Full_Date
LEFT JOIN dim_date_day AS d_end
  ON date(std.trip_end_ts) = d_end.Full_Date
LEFT JOIN dim_date_hour AS h_start
  ON date_format(std.trip_start_ts, 'yyyy-MM-dd HH:00:00') = h_start.Date_Key
LEFT JOIN dim_date_hour AS h_end
  ON date_format(std.trip_end_ts, 'yyyy-MM-dd HH:00:00') = h_end.Date_Key
WHERE
  std.trip_start_ts IS NOT NULL
  AND std.trip_end_ts IS NOT NULL;