# Notebook detailing all data transformation for Power BI :)

## 1) Linking all country sub dimensions based on country numeric code

In [None]:
-- Setting up git incase I run out of credits :( 
CREATE OR REPLACE API INTEGRATION my_git_api_integration
  API_PROVIDER = git_https_api
  API_ALLOWED_PREFIXES = ('https://github.com/Jcan156/KIWI_notebook.git')
  ENABLED = TRUE;


In [None]:
-- Renaming country_code
ALTER TABLE dim_country
RENAME COLUMN numeric_code TO country_id;

SELECT * FROM dim_country LIMIT 5

In [None]:
CREATE OR REPLACE TABLE subdim_Crime AS
SELECT
    CR.Country,
    C.country_id,
    CRIMEINDEXVIANUMBEO_2025 AS Crime_index,
    SAFETYINDEXVIANUMBEO_2025 AS Safety_index
FROM dim_Crime AS CR
LEFT JOIN dim_Country C
ON CR.Country = C.Country_Name;

SELECT Country, country_id FROM subdim_Crime WHERE country_id IS NULL    
    

            

Seems to be a couple countries without a corresponding code. In most of these cases, its due to the country labeled as "Republic of _" or their new updated name. North Korea does not have a country code in this csv so we will ignore it for obvious reasons. I hardcoded these values in which is not best practice. 

In [None]:
UPDATE subdim_Crime
SET country_id = CASE
    WHEN country = 'Moldova' then 498
    WHEN country = 'North Macedonia' then 807
    WHEN country = 'Iran' then 364
    WHEN country = 'Palestine' then 275
    WHEN country = 'Czechia' then 203
    WHEN country = 'United States Virgin Islands' then 850
    WHEN country = 'Syria' then 760
    WHEN country = 'Tanzania' then 834
    ELSE country_id
END;

DELETE FROM subdim_CRIME WHERE country = 'North Korea';

ALTER TABLE subdim_CRIME
DROP COLUMN country;

SELECT *
FROM subdim_Crime 
WHERE country_id IS NULL;



In [None]:
-- Same thing for subdim_EG_RA_RE 
CREATE OR REPLACE TABLE subdim_EG_RA_RE AS
SELECT
    ERR.Country,
    C.country_id,
    Ethnic_Group,
    Race,
    Religion
FROM dim_EG_RA_RE AS ERR
LEFT JOIN dim_Country C
ON ERR.Country = C.Country_Name;

SELECT Country, country_id FROM subdim_EG_RA_RE WHERE country_id IS NULL    

Okay already tired of hardcoding so I created a new subdim_Alias table which stores any alias for next time. 

In [None]:
CREATE OR REPLACE TABLE subdim_Alias (
    alias_id INT AUTOINCREMENT PRIMARY KEY,
    country_id INT,
    alias VARCHAR(255)
);

TRUNCATE TABLE subdim_Alias;

INSERT INTO subdim_Alias (country_id, alias)
VALUES
    (498, 'Moldova'),
    (807, 'North Macedonia'),
    (364, 'Iran'),
    (275, 'Palestine'),
    (203, 'Czechia'),
    (850, 'United States Virgin Islands'),
    (760, 'Syria'),
    (834, 'Tanzania'),
    (840, 'United States of America');

In [None]:
UPDATE subdim_EG_RA_RE
SET country_id = a.country_id
FROM subdim_Alias a
WHERE subdim_EG_RA_RE.country_id IS NULL
  AND subdim_EG_RA_RE.country = a.alias;

ALTER TABLE subdim_EG_RA_RE
DROP COLUMN country;

SELECT *
FROM subdim_EG_RA_RE 
WHERE country_id IS NULL;

In [None]:
-- subdim_Happiness
CREATE OR REPLACE TABLE subdim_Happiness AS
SELECT
    H.Country,
    COALESCE(C1.country_id, C2.country_id) AS country_id,
    H.YEAR,
    H.Rank,
    H.Life_evaluation
FROM dim_Happiness AS H
LEFT JOIN dim_Country AS C1
    ON H.Country = C1.Country_Name
LEFT JOIN subdim_alias AS C2
    ON H.Country = C2.alias
      AND C1.country_id IS NULL;

SELECT Country, country_id FROM subdim_Happiness WHERE country_id IS NULL    

In [None]:
INSERT INTO subdim_Alias (country_id, alias)
VALUES
    (384, 'Côte d’Ivoire'),
    (180, 'DR Congo'),
    (344, 'Hong Kong SAR of China'),
    (418, 'Lao PDR'),
    (748, 'Eswatini'),
    (410, 'Republic of Korea'),
    (498, 'Republic of Moldova'),
    (275, 'State of Palestine'),
    (158, 'Taiwan Province of China'),
    (792, 'Türkiye');

In [None]:
-- North Cyprus and Somaliland Region not getting id UN thinks there not a thing (dont cancel me please)
DELETE FROM subdim_Happiness WHERE country = 'Somaliland Region' OR country = 'North Cyprus';

UPDATE subdim_Happiness
SET country_id = a.country_id
FROM subdim_alias a
WHERE subdim_Happiness.country_id IS NULL
  AND subdim_Happiness.country = a.alias;

ALTER TABLE subdim_Happiness
DROP COLUMN country;

SELECT *
FROM subdim_Happiness 
WHERE country_id IS NULL;