# Notebook detailing all data transformation for Power BI :)

## 1) Linking all country sub dimensions based on country numeric code

In [None]:
-- Setting up git incase I run out of credits :( 
CREATE OR REPLACE API INTEGRATION my_git_api_integration
  API_PROVIDER = git_https_api
  API_ALLOWED_PREFIXES = ('https://github.com/Jcan156/KIWI_notebook.git')
  ENABLED = TRUE;


In [None]:
-- Drop column if exists, then add it
ALTER TABLE dim_country
DROP COLUMN IF EXISTS alias;

-- Renaming country_code
ALTER TABLE dim_country
RENAME COLUMN numeric_code TO country_id;

SELECT * FROM dim_country LIMIT 5

In [None]:
CREATE OR REPLACE TABLE subdim_Crime AS
SELECT
    CR.Country,
    C.country_id,
    CRIMEINDEXVIANUMBEO_2025 AS Crime_index,
    SAFETYINDEXVIANUMBEO_2025 AS Safety_index
FROM dim_Crime AS CR
LEFT JOIN dim_Country C
ON CR.Country = C.Country_Name;

SELECT Country, country_id FROM subdim_Crime WHERE country_id IS NULL    
    

            

Seems to be a couple countries without a corresponding code. In most of these cases, its due to the country labeled as "Republic of _" or their new updated name. North Korea does not have a country code in this csv so we will ignore it for obvious reasons. I hardcoded these values in which is not best practice. 

In [None]:
UPDATE subdim_Crime
SET country_id = CASE
    WHEN country = 'Moldova' then 498
    WHEN country = 'North Macedonia' then 807
    WHEN country = 'Iran' then 364
    WHEN country = 'Palestine' then 275
    WHEN country = 'Czechia' then 203
    WHEN country = 'United States Virgin Islands' then 850
    WHEN country = 'Syria' then 760
    WHEN country = 'Tanzania' then 834
    ELSE country_id
END;

DELETE FROM subdim_CRIME where country = 'North Korea';

ALTER TABLE subdim_CRIME
DROP COLUMN country;

SELECT *
FROM subdim_Crime 
WHERE country_id IS NULL;



In [None]:
-- Same thing for subdim_EG_RA_RE 
CREATE OR REPLACE TABLE subdim_EG_RA_RE AS
SELECT
    ERR.Country,
    C.country_id,
    Ethnic_Group,
    Race,
    Religion
FROM dim_EG_RA_RE AS ERR
LEFT JOIN dim_Country C
ON ERR.Country = C.Country_Name;

SELECT Country, country_id FROM subdim_EG_RA_RE WHERE country_id IS NULL    

Okay already tired of hardcoding so I created a new column in dim_country called Alias which stores any alias for next time. 

In [None]:
ALTER TABLE dim_country
ADD COLUMN alias VARCHAR(255);


UPDATE dim_country
SET alias = CASE
    WHEN country_id = 498 THEN 'Moldova'
    WHEN country_id = 807 THEN 'North Macedonia'
    WHEN country_id = 364 THEN 'Iran'
    WHEN country_id = 275 THEN 'Palestine'
    WHEN country_id = 203 THEN 'Czechia'
    WHEN country_id = 850 THEN 'United States Virgin Islands'
    WHEN country_id = 760 THEN 'Syria'
    WHEN country_id = 834 THEN 'Tanzania'
    WHEN country_id = 840 THEN 'United States of America'
ELSE alias
END;

UPDATE subdim_EG_RA_RE
SET country_id = dc.country_id
FROM dim_Country dc
WHERE subdim_EG_RA_RE.country_id IS NULL
  AND subdim_EG_RA_RE.country = dc.alias;

SELECT *
FROM subdim_EG_RA_RE 
WHERE country_id IS NULL;