Imports section:

In [100]:
import pandas as pd
import os
import numpy as np
from sqlalchemy import create_engine,text
import numpy as np

In [101]:
db_username = os.getenv("DB_USERNAME")
db_password = os.getenv("DB_PASSWORD")
db_host = os.getenv("DB_HOST")
db_port = os.getenv("DB_PORT")
db_name = os.getenv("DB_NAME")

engine = create_engine(
    f"postgresql://{db_username}:{db_password}@{db_host}:{db_port}/{db_name}"
)

table_names = ['all_steam', 'genres', 'mdn_play_time','metacritic_review','play_time_by_player','regions','time_to_beat']

Creates Data Warehouse during initial load:

In [102]:
creation_query = f"""CREATE SCHEMA IF NOT EXISTS dwh;
DO
$body$
BEGIN
-- ORANGE BLOCK
CREATE TABLE IF NOT EXISTS dwh.dim_os(
	id SERIAL PRIMARY KEY,
	name VARCHAR(255) UNIQUE
);
CREATE TABLE IF NOT EXISTS dwh.dim_score_source(
	id SERIAL PRIMARY KEY,
	name VARCHAR(255) UNIQUE
);
CREATE TABLE IF NOT EXISTS dwh.dim_platform(
	id SERIAL PRIMARY KEY,
	name VARCHAR(255) UNIQUE
);
-- LIGHT BLUE BLOCK
CREATE TABLE IF NOT EXISTS dwh.dim_game(
	id BIGSERIAL PRIMARY KEY,
	name VARCHAR(255) UNIQUE
);
CREATE TABLE IF NOT EXISTS dwh.dim_date(
	id BIGINT PRIMARY KEY,
	year SMALLINT,
	month SMALLINT,
	day SMALLINT
);
--GREEN BLOCK
CREATE TABLE IF NOT EXISTS dwh.dim_region(
	id SERIAL PRIMARY KEY,
	current_name VARCHAR(255) UNIQUE, 
	previous_name VARCHAR(255)
);
--VIOLET BLOCK 
CREATE TABLE IF NOT EXISTS dwh.dim_technologies(
	id BIGSERIAL PRIMARY KEY,
 	name VARCHAR(255) UNIQUE,
	type VARCHAR(255)
);
CREATE TABLE IF NOT EXISTS dwh.dim_publisher(
	id BIGSERIAL PRIMARY KEY,
 	name VARCHAR(255) UNIQUE
);
CREATE TABLE IF NOT EXISTS dwh.dim_genre(
	id SERIAL PRIMARY KEY,
 	name VARCHAR(255) UNIQUE
);
CREATE TABLE IF NOT EXISTS dwh.dim_developer(
	id BIGSERIAL PRIMARY KEY,
 	name VARCHAR(255) UNIQUE
);
CREATE TABLE IF NOT EXISTS dwh.dim_difficulty(
	id SERIAL PRIMARY KEY,
 	name VARCHAR(255) UNIQUE
);
--FACT BLOCK
CREATE TABLE IF NOT EXISTS dwh.fact_players(
	id BIGSERIAL PRIMARY KEY,
	game_id INT REFERENCES dwh.dim_game(id),
	region_id INT REFERENCES dwh.dim_region(id),
	players NUMERIC(7,2)
);
CREATE TABLE IF NOT EXISTS dwh.fact_score(
	id BIGSERIAL PRIMARY KEY,
	game_id BIGINT REFERENCES dwh.dim_game(id),
	score_source_id INT REFERENCES dwh.dim_score_source(id),
	score NUMERIC(5,2) CHECK (score <= 100.00)
);
CREATE TABLE IF NOT EXISTS dwh.fact_gameplay(
	id BIGSERIAL PRIMARY KEY,
	game_id BIGINT REFERENCES dwh.dim_game(id),
	release_date BIGINT REFERENCES dwh.dim_date(id),
	achievements_count INT, 
	avg_play_time NUMERIC(10,2),
	mdn_play_time NUMERIC(10,2),
	difficulty_id INT REFERENCES dwh.dim_difficulty(id),
	hltb_single NUMERIC(10,2),
	hltb_complete NUMERIC(10,2),
	peak_players INT CHECK (peak_players >= 0),
	peak_date BIGINT REFERENCES dwh.dim_date(id),
	owners INT CHECK (owners >= 0)
);
--GRAY BLOCK
CREATE TABLE IF NOT EXISTS dwh.scr_os(
	game_id BIGINT REFERENCES dwh.dim_game(id),
	os_id INT REFERENCES dwh.dim_os(id),
	CONSTRAINT scr_os_pkey PRIMARY KEY(game_id,os_id)
);
CREATE TABLE IF NOT EXISTS dwh.scr_pltf(
	game_id BIGINT REFERENCES dwh.dim_game(id),
	platform_id INT REFERENCES dwh.dim_platform(id),
	CONSTRAINT scr_pltf_pkey PRIMARY KEY(game_id,platform_id)
);
CREATE TABLE IF NOT EXISTS dwh.gmp_genr(
	game_id BIGINT REFERENCES dwh.dim_game(id),
	genre_id INT REFERENCES dwh.dim_genre(id),
	CONSTRAINT gmp_genr_pkey PRIMARY KEY(game_id,genre_id)
);
CREATE TABLE IF NOT EXISTS dwh.gmp_dev(
	game_id BIGINT REFERENCES dwh.dim_game(id),
	developer_id BIGINT REFERENCES dwh.dim_developer(id),
	CONSTRAINT gmp_dev_pkey PRIMARY KEY(game_id,developer_id)
);
END
$body$
LANGUAGE plpgsql"""

with engine.connect() as connection:
    connection.execute(text(creation_query))
    connection.commit()

Load game dimension:

In [103]:
query = f"""
INSERT INTO dwh.dim_game (name)
SELECT DISTINCT game FROM (
    SELECT game FROM stage.all_steam
    UNION
    SELECT game FROM stage.genres
    UNION
    SELECT game FROM stage.mdn_play_time
    UNION
    SELECT game FROM stage.metacritic_review
    UNION
    SELECT game FROM stage.play_time_by_player
    UNION
    SELECT game FROM stage.regions
    UNION
    SELECT game FROM stage.time_to_beat
) AS games
ON CONFLICT (name) DO NOTHING
"""

with engine.connect() as connection:
    connection.execute(text(query))
    connection.commit()

Load operational system (os) dimension:

In [104]:
query = f"""
INSERT INTO dwh.dim_os (name)
SELECT DISTINCT os FROM (
    SELECT os FROM stage.mdn_play_time
    UNION
    SELECT os FROM stage.time_to_beat
) AS games
ON CONFLICT (name) DO NOTHING
"""

with engine.connect() as connection:
    connection.execute(text(query))
    connection.commit()

Load score_source dimension:

In [105]:
query = """
SELECT table_name, column_name
FROM information_schema.columns
WHERE column_name LIKE '%score%' 
OR column_name LIKE '%rating%' 
AND table_schema = 'stage'
"""

with engine.connect() as connection:
    result = connection.execute(text(query))
    columns = result.fetchall()

for table_name, column_name in columns:

    if '_uscore' in column_name:
        source = column_name.split('_uscore')[0]
    elif 'score' in column_name:
        source = column_name.split('score')[0]
    elif '_rating' in column_name:
        source = column_name.split('_rating')[0]
    elif 'rating' in column_name:
        source = 'steam'
    else:
        continue
  
    if source.strip() == '':
            continue
    
    query = f"""
    INSERT INTO dwh.dim_score_source (name)
    VALUES ('{source}')
    ON CONFLICT DO NOTHING
    """

    with engine.connect() as connection:
        result = connection.execute(text(query))
        connection.commit()

Load region dimension:

In [106]:
query = """
SELECT table_name, column_name
FROM information_schema.columns
WHERE column_name LIKE '%players%' 
AND table_schema = 'stage'
"""

with engine.connect() as connection:
    result = connection.execute(text(query))
    columns = result.fetchall()

for table_name, column_name in columns:
    source = column_name.split('_players')[0]

    query = f"""
    INSERT INTO dwh.dim_region (current_name)
    VALUES ('{source}')
    ON CONFLICT DO NOTHING
    """

    with engine.connect() as connection:
        result = connection.execute(text(query))
        connection.commit()

 

Load publisher dimension:

In [107]:
query = f"""
INSERT INTO dwh.dim_publisher (name)
SELECT DISTINCT publisher FROM (
    SELECT publisher FROM stage.mdn_play_time WHERE publisher IS NOT NULL
    UNION
    SELECT publisher FROM stage.all_steam WHERE publisher IS NOT NULL
    UNION
    SELECT publisher FROM stage.regions WHERE publisher IS NOT NULL
    UNION
    SELECT publisher FROM stage.time_to_beat WHERE publisher IS NOT NULL
) AS games
ON CONFLICT (name) DO NOTHING
"""

with engine.connect() as connection:
    connection.execute(text(query))
    connection.commit()

Load developer dimension:

In [108]:
query = f"""
INSERT INTO dwh.dim_genre (name)
SELECT DISTINCT genre FROM (
    SELECT genre FROM stage.genres WHERE genre IS NOT NULL
    UNION
    SELECT genre FROM stage.mdn_play_time WHERE genre IS NOT NULL
    UNION
    SELECT genre FROM stage.regions WHERE genre IS NOT NULL
) AS games
ON CONFLICT (name) DO NOTHING
"""

with engine.connect() as connection:
    connection.execute(text(query))
    connection.commit()

Load genre dimension:

In [109]:
query = f"""
INSERT INTO dwh.dim_developer (name)
SELECT DISTINCT developer FROM (
    SELECT developer FROM stage.genres WHERE developer IS NOT NULL
    UNION
    SELECT developer FROM stage.mdn_play_time WHERE developer IS NOT NULL
    UNION
    SELECT developer FROM stage.all_steam WHERE developer IS NOT NULL
    UNION
    SELECT developer FROM stage.time_to_beat WHERE developer IS NOT NULL
) AS games
ON CONFLICT (name) DO NOTHING
"""

with engine.connect() as connection:
    connection.execute(text(query))
    connection.commit()

Load difficulty dimension:

In [110]:
query = f"""
INSERT INTO dwh.dim_difficulty (name)
SELECT DISTINCT gfq_difficulty FROM (
    SELECT gfq_difficulty FROM stage.time_to_beat WHERE gfq_difficulty IS NOT NULL
) AS games
ON CONFLICT (name) DO NOTHING
"""

with engine.connect() as connection:
    connection.execute(text(query))
    connection.commit()

Load date dimension:

In [None]:
query = f"""
SELECT DISTINCT date FROM (
    SELECT release_date AS date FROM stage.genres WHERE release_date IS NOT NULL
    UNION
    SELECT release_date AS date FROM stage.mdn_play_time WHERE release_date IS NOT NULL
    UNION
    SELECT release_date AS date FROM stage.all_steam WHERE release_date IS NOT NULL
    UNION
    SELECT all_time_peak_date AS date FROM stage.all_steam WHERE all_time_peak_date IS NOT NULL
    UNION
    SELECT release_date AS date FROM stage.regions WHERE release_date IS NOT NULL
    UNION
    SELECT release_date AS date FROM stage.regions WHERE release_date IS NOT NULL
) AS date
"""

df = pd.read_sql_query(query, engine)

df['year'] = df['date'].dt.year
df['month'] = df['date'].dt.month
df['day'] = df['date'].dt.day
df['id'] = df['year'].astype(str) + df['month'].astype(str).str.zfill(2) + df['day'].astype(str).str.zfill(2)
df = df.drop(columns=['date'])

# Drop duplicates in the id column
df = df.drop_duplicates(subset='id')

with engine.connect() as connection:
    query = text("""
        INSERT INTO dwh.dim_date (id, year, month, day)
        VALUES (:id, :year, :month, :day)
    """)
    for index, row in df.iterrows():
        print(row)
        connection.execute(query, {'id': row['id'], 'year': row['year'], 'month': row['month'], 'day': row['day']})
    connection.commit()

Load players fact:

In [112]:
with engine.connect() as connection:
    query = text("""
        INSERT INTO dwh.fact_players (game_id, region_id, players)
        SELECT dg.id AS game_id, dr.id AS region_id,
               CASE
                   WHEN dr.current_name = 'na' THEN sr.na_players
                   WHEN dr.current_name = 'eu' THEN sr.eu_players
                   WHEN dr.current_name = 'jp' THEN sr.jp_players
                   WHEN dr.current_name = 'other' THEN sr.other_players
                   WHEN dr.current_name = 'global' THEN sr.global_players
               END AS players
        FROM stage.regions AS sr
        JOIN dwh.dim_game AS dg ON dg.name = sr.game
        JOIN dwh.dim_region AS dr ON dr.current_name IN ('na', 'eu', 'jp', 'other', 'global');
    """)
    connection.execute(query)
    connection.commit()

Load score fact:

In [113]:
with engine.connect() as connection:
    query = text("""
        INSERT INTO dwh.fact_score (game_id, score_source_id, score)
SELECT DISTINCT ON (dg.id, ss.id)
       dg.id AS game_id,
       ss.id AS score_source_id,
       CASE
           WHEN ss.name = 'gfq' THEN ttb.gfq_rating
           WHEN ss.name = 'igdb' THEN ttb.igdb_uscore
           WHEN ss.name = 'meta' THEN mr.metascore
           WHEN ss.name = 'steam' THEN als.rating
       END AS score
FROM stage.time_to_beat AS ttb
JOIN dwh.dim_game AS dg ON dg.name = ttb.game
LEFT JOIN stage.metacritic_review AS mr ON dg.name = mr.game
LEFT JOIN stage.all_steam AS als ON dg.name = als.game                 
JOIN dwh.dim_score_source AS ss ON ss.name IN ('steam','meta','gfq', 'igdb')
ORDER BY dg.id, ss.id, score DESC;
    """)
    connection.execute(query)
    connection.commit()

Load gameplay fact:

In [85]:

with engine.connect() as connection:
    query = text("""
MERGE INTO dwh.fact_gameplay AS t
USING (
    SELECT DISTINCT(dg.id) AS game_id,
           CAST(NULL AS BIGINT) AS release_date,
           achivements AS achievements_count,
           CAST(NULL AS NUMERIC) AS avg_play_time,
           CAST(NULL AS NUMERIC) AS mdn_play_time,
           df.id AS difficulty_id,
           hltb_single,
           hltb_complete,
           CAST(NULL AS INT) AS peak_players,
           CAST(NULL AS BIGINT) AS peak_date,
           CAST(NULL AS INT) AS owners
    FROM stage.time_to_beat AS ttb
    JOIN dwh.dim_game AS dg ON ttb.game = dg.name
    LEFT JOIN dwh.dim_difficulty AS df ON ttb.gfq_difficulty = df.name
) AS s1
ON t.game_id = s1.game_id
WHEN MATCHED THEN
    UPDATE SET
        achievements_count = s1.achievements_count,
        difficulty_id = s1.difficulty_id,
		hltb_single = s1.hltb_single,
        hltb_complete = s1.hltb_complete
WHEN NOT MATCHED THEN
    INSERT (game_id, release_date, achievements_count, avg_play_time, mdn_play_time, difficulty_id, hltb_single, hltb_complete, peak_players, peak_date, owners)
    VALUES (s1.game_id, s1.release_date, s1.achievements_count, s1.avg_play_time, s1.mdn_play_time, s1.difficulty_id, s1.hltb_single, s1.hltb_complete, s1.peak_players, s1.peak_date, s1.owners);
                 
MERGE INTO dwh.fact_gameplay AS t
USING (
    SELECT DISTINCT(dg.id) AS game_id,
           dd.id AS release_date,
           CAST(NULL AS INT) AS achievements_count,
           mpt.avg_play_time,
           mpt.median_play_time,
           CAST(NULL AS INT) AS difficulty_id,
           CAST(NULL AS NUMERIC) AS hltb_single,
           CAST(NULL AS NUMERIC) AS hltb_complete,
           mpt.owners,
		   CAST(NULL AS INT) AS peak_players,
           CAST(NULL AS BIGINT) AS peak_date
    FROM stage.mdn_play_time AS mpt
    JOIN dwh.dim_game AS dg ON mpt.game = dg.name
    JOIN dwh.dim_date AS dd ON (EXTRACT(YEAR FROM mpt.release_date) * 10000 + EXTRACT(MONTH FROM mpt.release_date) * 100 + EXTRACT(DAY FROM mpt.release_date)) = dd.id
) AS s2
ON t.game_id = s2.game_id AND (t.achievements_count IS NULL OR t.release_date IS NULL OR t.avg_play_time IS NULL OR t.mdn_play_time IS NULL OR t.owners IS NULL)
WHEN MATCHED THEN
UPDATE SET
    achievements_count = COALESCE(t.achievements_count, s2.achievements_count),
    release_date = COALESCE(t.release_date, s2.release_date),
    avg_play_time = COALESCE(t.avg_play_time, s2.avg_play_time),
    mdn_play_time = COALESCE(t.mdn_play_time, s2.median_play_time),
    owners = COALESCE(t.owners, s2.owners)
WHEN NOT MATCHED THEN
INSERT (game_id, release_date, achievements_count, avg_play_time, mdn_play_time, difficulty_id, hltb_single, hltb_complete, peak_players, peak_date, owners)
VALUES (s2.game_id, s2.release_date, s2.achievements_count, s2.avg_play_time, s2.median_play_time, s2.difficulty_id, s2.hltb_single, s2.hltb_complete, s2.peak_players, s2.peak_date, s2.owners);

MERGE INTO dwh.fact_gameplay AS t
USING (
    SELECT dg.id AS game_id,
           CAST(NULL AS BIGINT) AS release_date,
           CAST(NULL AS INT) AS achievements_count,
           pbp.time AS avg_play_time,
           CAST(NULL AS NUMERIC) AS mdn_play_time,
           CAST(NULL AS INT) AS difficulty_id,
           CAST(NULL AS NUMERIC) AS hltb_single,
           CAST(NULL AS NUMERIC) AS hltb_complete,
           CAST(NULL AS INT) AS peak_players,
           CAST(NULL AS BIGINT) AS peak_date,
           CAST(NULL AS INT) AS owners
    FROM stage.play_time_by_player AS pbp
    JOIN dwh.dim_game AS dg ON pbp.game = dg.name
) AS s3
ON t.game_id = s3.game_id AND t.avg_play_time IS NULL
WHEN MATCHED THEN
UPDATE SET
    avg_play_time = s3.avg_play_time
WHEN NOT MATCHED THEN
INSERT (game_id, release_date, achievements_count, avg_play_time, mdn_play_time, difficulty_id, hltb_single, hltb_complete, peak_players, peak_date, owners)
VALUES (s3.game_id, s3.release_date, s3.achievements_count, s3.avg_play_time, s3.mdn_play_time, s3.difficulty_id, s3.hltb_single, s3.hltb_complete, s3.peak_players, s3.peak_date, s3.owners);

MERGE INTO dwh.fact_gameplay AS t
USING (
    SELECT DISTINCT(dg.id) AS game_id,
           dd.id AS release_date,
           CAST(NULL AS INT) AS achievements_count,
           CAST(NULL AS NUMERIC) AS avg_play_time,
           CAST(NULL AS NUMERIC) AS mdn_play_time,
           CAST(NULL AS INT) AS difficulty_id,
           CAST(NULL AS NUMERIC) AS hltb_single,
           CAST(NULL AS NUMERIC) AS hltb_complete,
           als.all_time_peak AS peak_players,
           dd2.id AS peak_date,
           CAST(NULL AS INT) AS owners
    FROM stage.all_steam AS als
    JOIN dwh.dim_game AS dg ON als.game = dg.name
    JOIN dwh.dim_date AS dd ON (EXTRACT(YEAR FROM als.release_date) * 10000 + EXTRACT(MONTH FROM als.release_date) * 100 + EXTRACT(DAY FROM als.release_date)) = dd.id
    LEFT JOIN dwh.dim_date AS dd2 ON (EXTRACT(YEAR FROM als.all_time_peak_date) * 10000 + EXTRACT(MONTH FROM als.all_time_peak_date) * 100 + EXTRACT(DAY FROM als.all_time_peak_date)) = dd2.id
) AS s4
ON t.game_id = s4.game_id AND (t.release_date IS NULL OR t.peak_players IS NULL OR t.peak_date IS NULL)
WHEN MATCHED THEN
UPDATE SET
    release_date = COALESCE(t.release_date, s4.release_date),
    peak_players = COALESCE(t.peak_players, s4.peak_players),
    peak_date = COALESCE(t.peak_date, s4.peak_date)
WHEN NOT MATCHED THEN
INSERT (game_id, release_date, achievements_count, avg_play_time, mdn_play_time, difficulty_id, hltb_single, hltb_complete, peak_players, peak_date, owners)
VALUES (s4.game_id, s4.release_date, s4.achievements_count, s4.avg_play_time, s4.mdn_play_time, s4.difficulty_id, s4.hltb_single, s4.hltb_complete, s4.peak_players, s4.peak_date, s4.owners);

MERGE INTO dwh.fact_gameplay AS t
USING (
    SELECT DISTINCT (dg.id) AS game_id,
           dd.id AS release_date,
           CAST(NULL AS INT) AS achievements_count,
           CAST(NULL AS NUMERIC) AS avg_play_time,
           CAST(NULL AS NUMERIC) AS mdn_play_time,
           CAST(NULL AS INT) AS difficulty_id,
           CAST(NULL AS NUMERIC) AS hltb_single,
           CAST(NULL AS NUMERIC) AS hltb_complete,
           CAST(NULL AS INT) AS peak_players,
           CAST(NULL AS BIGINT) AS peak_date,
           CAST(NULL AS INT) AS owners
    FROM stage.regions AS sr
    JOIN dwh.dim_game AS dg ON sr.game = dg.name
    JOIN dwh.dim_date AS dd ON (EXTRACT(YEAR FROM sr.release_date) * 10000 + EXTRACT(MONTH FROM sr.release_date) * 100 + EXTRACT(DAY FROM sr.release_date)) = dd.id
) AS s5
ON t.game_id = s5.game_id AND (t.release_date IS NULL)
WHEN MATCHED THEN
UPDATE SET
    release_date = COALESCE(t.release_date, s5.release_date)
WHEN NOT MATCHED THEN
INSERT (game_id, release_date, achievements_count, avg_play_time, mdn_play_time, difficulty_id, hltb_single, hltb_complete, peak_players, peak_date, owners)
VALUES (s5.game_id, s5.release_date, s5.achievements_count, s5.avg_play_time, s5.mdn_play_time, s5.difficulty_id, s5.hltb_single, s5.hltb_complete, s5.peak_players, s5.peak_date, s5.owners);

MERGE INTO dwh.fact_gameplay AS t
USING (
    SELECT DISTINCT ON(dg.id,dd.id)
		   dg.id AS game_id,
           dd.id AS release_date,
           CAST(NULL AS INT) AS achievements_count,
           CAST(NULL AS NUMERIC) AS avg_play_time,
           CAST(NULL AS NUMERIC) AS mdn_play_time,
           CAST(NULL AS INT) AS difficulty_id,
           CAST(NULL AS NUMERIC) AS hltb_single,
           CAST(NULL AS NUMERIC) AS hltb_complete,
           CAST(NULL AS INT) AS peak_players,
           CAST(NULL AS BIGINT) AS peak_date,
           CAST(NULL AS INT) AS owners
    FROM stage.genres AS sg
    JOIN dwh.dim_game AS dg ON sg.game = dg.name
    JOIN dwh.dim_date AS dd ON (EXTRACT(YEAR FROM sg.release_date) * 10000 + EXTRACT(MONTH FROM sg.release_date) * 100 + EXTRACT(DAY FROM sg.release_date)) = dd.id
) AS s6
ON t.game_id = s6.game_id AND t.release_date IS NULL
WHEN MATCHED THEN 
UPDATE SET 
		release_date = s6.release_date
WHEN NOT MATCHED THEN
    INSERT (game_id, release_date, achievements_count, avg_play_time, mdn_play_time, difficulty_id, hltb_single, hltb_complete, peak_players, peak_date, owners)
    VALUES (s6.game_id, s6.release_date, s6.achievements_count, s6.avg_play_time, s6.mdn_play_time, s6.difficulty_id, s6.hltb_single, s6.hltb_complete, s6.peak_players, s6.peak_date, s6.owners);
""")
    connection.execute(query)
    connection.commit()

ProgrammingError: (psycopg2.errors.CardinalityViolation) ОШИБКА:  команда MERGE не может подействовать на строку дважды
HINT:  Проверьте, не может ли какой-либо целевой строке соответствовать более одной исходной строки.

[SQL: 
MERGE INTO dwh.fact_gameplay AS t
USING (
    SELECT dg.id AS game_id,
           CAST(NULL AS BIGINT) AS release_date,
           achivements AS achievements_count,
           CAST(NULL AS NUMERIC) AS avg_play_time,
           dp.id AS publisher_id,
           CAST(NULL AS NUMERIC) AS mdn_play_time,
           df.id AS difficulty_id,
           hltb_single,
           hltb_complete,
           CAST(NULL AS INT) AS peak_players,
           CAST(NULL AS BIGINT) AS peak_date,
           CAST(NULL AS INT) AS owners
    FROM stage.time_to_beat AS ttb
    JOIN dwh.dim_game AS dg ON ttb.game = dg.name
    LEFT JOIN dwh.dim_difficulty AS df ON ttb.gfq_difficulty = df.name
    LEFT JOIN dwh.dim_publisher AS dp ON ttb.publisher = dp.name
) AS s1
ON t.game_id = s1.game_id
WHEN MATCHED THEN
    UPDATE SET
        achievements_count = s1.achievements_count,
        publisher_id = s1.publisher_id,
        difficulty_id = s1.difficulty_id,
		hltb_single = s1.hltb_single,
        hltb_complete = s1.hltb_complete
WHEN NOT MATCHED THEN
    INSERT (game_id, release_date, achievements_count, avg_play_time, publisher_id, mdn_play_time, difficulty_id, hltb_single, hltb_complete, peak_players, peak_date, owners)
    VALUES (s1.game_id, s1.release_date, s1.achievements_count, s1.avg_play_time, s1.publisher_id, s1.mdn_play_time, s1.difficulty_id, s1.hltb_single, s1.hltb_complete, s1.peak_players, s1.peak_date, s1.owners);
                 
MERGE INTO dwh.fact_gameplay AS t
USING (
    SELECT dg.id AS game_id,
           dd.id AS release_date,
           CAST(NULL AS INT) AS achievements_count,
           mpt.avg_play_time,
           dp.id AS publisher_id,
           mpt.median_play_time,
           CAST(NULL AS INT) AS difficulty_id,
           CAST(NULL AS NUMERIC) AS hltb_single,
           CAST(NULL AS NUMERIC)  AS hltb_complete,
           mpt.owners,
		   CAST(NULL AS INT) AS peak_players,
           CAST(NULL AS BIGINT) AS peak_date
    FROM stage.mdn_play_time AS mpt
    JOIN dwh.dim_game AS dg ON mpt.game = dg.name
    JOIN dwh.dim_date AS dd ON (EXTRACT(YEAR FROM mpt.release_date) * 10000 + EXTRACT(MONTH FROM mpt.release_date) * 100 + EXTRACT(DAY FROM mpt.release_date)) = dd.id
    JOIN dwh.dim_publisher AS dp ON mpt.publisher = dp.name
) AS s2
ON t.game_id = s2.game_id
WHEN MATCHED THEN 
UPDATE SET 
		achievements_count = s2.achievements_count,
		release_date = s2.release_date,
		avg_play_time = s2.avg_play_time,
        publisher_id = s2.publisher_id,
		mdn_play_time = s2.median_play_time,
        owners = s2.owners
WHEN NOT MATCHED THEN
    INSERT (game_id, release_date, achievements_count, avg_play_time, publisher_id, mdn_play_time, difficulty_id, hltb_single, hltb_complete, peak_players, peak_date, owners)
    VALUES (s2.game_id, s2.release_date, s2.achievements_count, s2.avg_play_time, s2.publisher_id, s2.median_play_time, s2.difficulty_id, s2.hltb_single, s2.hltb_complete, s2.peak_players, s2.peak_date, s2.owners);
                 
MERGE INTO dwh.fact_gameplay AS t
USING (
    SELECT dg.id AS game_id,
           CAST(NULL AS BIGINT) AS release_date,
           CAST(NULL AS INT) AS achievements_count,
           pbp.time AS avg_play_time,
           CAST(NULL AS BIGINT) AS publisher_id,
           CAST(NULL AS NUMERIC) AS mdn_play_time,
           CAST(NULL AS INT) AS difficulty_id,
           CAST(NULL AS NUMERIC) AS hltb_single,
           CAST(NULL AS NUMERIC) AS hltb_complete,
           CAST(NULL AS INT) AS peak_players,
           CAST(NULL AS BIGINT) AS peak_date,
           CAST(NULL AS INT) AS owners
    FROM stage.play_time_by_player AS pbp
    JOIN dwh.dim_game AS dg ON pbp.game = dg.name
) AS s3
ON t.game_id = s3.game_id
WHEN MATCHED THEN 
UPDATE SET 
		avg_play_time = s3.avg_play_time
WHEN NOT MATCHED THEN
    INSERT (game_id, release_date, achievements_count, avg_play_time, publisher_id, mdn_play_time, difficulty_id, hltb_single, hltb_complete, peak_players, peak_date, owners)
    VALUES (s3.game_id, s3.release_date, s3.achievements_count, s3.avg_play_time, s3.publisher_id, s3.mdn_play_time, s3.difficulty_id, s3.hltb_single, s3.hltb_complete, s3.peak_players, s3.peak_date, s3.owners);

MERGE INTO dwh.fact_gameplay AS t
USING (
    SELECT dg.id AS game_id,
           dd.id AS release_date,
           CAST(NULL AS INT) AS achievements_count,
           CAST(NULL AS NUMERIC) AS avg_play_time,
           dp.id AS publisher_id,
           CAST(NULL AS NUMERIC) AS mdn_play_time,
           CAST(NULL AS INT) AS difficulty_id,
           CAST(NULL AS NUMERIC) AS hltb_single,
           CAST(NULL AS NUMERIC) AS hltb_complete,
           als.all_time_peak AS peak_players,
           dd2.id AS peak_date,
           CAST(NULL AS INT) AS owners
    FROM stage.all_steam AS als
    JOIN dwh.dim_game AS dg ON als.game = dg.name
    JOIN dwh.dim_date AS dd ON (EXTRACT(YEAR FROM als.release_date) * 10000 + EXTRACT(MONTH FROM als.release_date) * 100 + EXTRACT(DAY FROM als.release_date)) = dd.id
    JOIN dwh.dim_publisher AS dp ON als.publisher = dp.name
    FULL JOIN dwh.dim_date AS dd2 ON (EXTRACT(YEAR FROM als.all_time_peak_date) * 10000 + EXTRACT(MONTH FROM als.all_time_peak_date) * 100 + EXTRACT(DAY FROM als.all_time_peak_date)) = dd2.id
) AS s4
ON t.game_id = s4.game_id
WHEN MATCHED THEN 
UPDATE SET 
		release_date = s4.release_date,
        publisher_id = s4.publisher_id,
		peak_players = s4.peak_players,
		peak_date = s4.peak_date
WHEN NOT MATCHED THEN
    INSERT (game_id, release_date, achievements_count, avg_play_time, publisher_id, mdn_play_time, difficulty_id, hltb_single, hltb_complete, peak_players, peak_date, owners)
    VALUES (s4.game_id, s4.release_date, s4.achievements_count, s4.avg_play_time, s4.publisher_id, s4.mdn_play_time, s4.difficulty_id, s4.hltb_single, s4.hltb_complete, s4.peak_players, s4.peak_date, s4.owners);

MERGE INTO dwh.fact_gameplay AS t
USING (
    SELECT dg.id AS game_id,
           dd.id AS release_date,
           CAST(NULL AS INT) AS achievements_count,
           CAST(NULL AS NUMERIC) AS avg_play_time,
           dp.id AS publisher_id,
           CAST(NULL AS NUMERIC) AS mdn_play_time,
           CAST(NULL AS INT) AS difficulty_id,
           CAST(NULL AS NUMERIC) AS hltb_single,
           CAST(NULL AS NUMERIC) AS hltb_complete,
           CAST(NULL AS INT) AS peak_players,
           CAST(NULL AS BIGINT) AS peak_date,
           CAST(NULL AS INT) AS owners
    FROM stage.regions AS sr
    JOIN dwh.dim_game AS dg ON sr.game = dg.name
    JOIN dwh.dim_date AS dd ON (EXTRACT(YEAR FROM sr.release_date) * 10000 + EXTRACT(MONTH FROM sr.release_date) * 100 + EXTRACT(DAY FROM sr.release_date)) = dd.id
    JOIN dwh.dim_publisher AS dp ON sr.publisher = dp.name
) AS s5
ON t.game_id = s5.game_id 
WHEN MATCHED THEN 
UPDATE SET 
		release_date = s5.release_date,
        publisher_id = s5.publisher_id
WHEN NOT MATCHED THEN
    INSERT (game_id, release_date, achievements_count, avg_play_time, publisher_id, mdn_play_time, difficulty_id, hltb_single, hltb_complete, peak_players, peak_date, owners)
    VALUES (s5.game_id, s5.release_date, s5.achievements_count, s5.avg_play_time, s5.publisher_id, s5.mdn_play_time, s5.difficulty_id, s5.hltb_single, s5.hltb_complete, s5.peak_players, s5.peak_date, s5.owners);
                 
MERGE INTO dwh.fact_gameplay AS t
USING (
    SELECT DISTINCT dg.id AS game_id,
           dd.id AS release_date,
           CAST(NULL AS INT) AS achievements_count,
           CAST(NULL AS NUMERIC) AS avg_play_time,
           CAST(NULL AS BIGINT) AS publisher_id,
           CAST(NULL AS NUMERIC) AS mdn_play_time,
           CAST(NULL AS INT) AS difficulty_id,
           CAST(NULL AS NUMERIC) AS hltb_single,
           CAST(NULL AS NUMERIC) AS hltb_complete,
           CAST(NULL AS INT) AS peak_players,
           CAST(NULL AS BIGINT) AS peak_date,
           CAST(NULL AS INT) AS owners
    FROM stage.regions AS sr
    JOIN dwh.dim_game AS dg ON sr.game = dg.name
    JOIN dwh.dim_date AS dd ON (EXTRACT(YEAR FROM sr.release_date) * 10000 + EXTRACT(MONTH FROM sr.release_date) * 100 + EXTRACT(DAY FROM sr.release_date)) = dd.id
) AS s6
ON t.game_id = s6.game_id
WHEN MATCHED THEN 
UPDATE SET 
		release_date = s6.release_date
WHEN NOT MATCHED THEN
    INSERT (game_id, release_date, achievements_count, avg_play_time, publisher_id, mdn_play_time, difficulty_id, hltb_single, hltb_complete, peak_players, peak_date, owners)
    VALUES (s6.game_id, s6.release_date, s6.achievements_count, s6.avg_play_time, s6.publisher_id, s6.mdn_play_time, s6.difficulty_id, s6.hltb_single, s6.hltb_complete, s6.peak_players, s6.peak_date, s6.owners);
]
(Background on this error at: https://sqlalche.me/e/20/f405)