In [17]:
from dotenv import load_dotenv
import os

load_dotenv(dotenv_path="../.env")



True

In [14]:
from snowflake.snowpark import Session

connection_parameters = {
    "account": "YUJMLNP-YOB51920",
    "user": os.environ["SNOWFLAKE_USER"],
    "password": os.environ["SNOWFLAKE_PASSWORD"],
    "role": os.environ.get("SNOWFLAKE_ROLE"),
    "warehouse": "COMPUTE_WH",
    "database": "NBU_EXCHANGE",
    "schema": "SILVER"
}

session = Session.builder.configs(connection_parameters).create()


DatabaseError: 250001 (08001): Failed to connect to DB: YUJMLNP-YOB51920.snowflakecomputing.com:443. Incorrect username or password was specified.

In [71]:
exchange_rate_extracted_df = session.table("nbu_exchange.silver.exchange_rate_extracted")
exchange_rate_extracted_df.show(5)


-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"CALCULATION_DATE"  |"CURRENCY_CODE"  |"CURRENCY_NAME"  |"EXCHANGE_DATE"  |"GROUP_NUMBER"  |"R030_CODE"  |"RATE"     |"RATE_PER_UNIT"  |"SPECIAL_CONDITIONS"  |"CURRENCY_NAME_UA"  |"UNITS"  |
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|                    |MDL              |NULL             |30.11.2013       |1               |498          |60.9534    |0.609534         |NULL                  |Молдовський лей     |100      |
|                    |GEL              |NULL             |30.11.2013       |2               |981          |478.4222   |4.784222         |NULL                  |Ларi                |100      |
|                    |XPD              |

-- Need to do:
-- 0. deduplicate - after cleaning and mapping!
-- 3. currency_code upper case maybe
-- 4. currency_name has nulls (IS null), map
-- 6. group_number (1,2,3) find what's the meaning, map with names
-- 7. r030_code - 150 r030 count, 109 currency count?
-- 8. r030_code - 3d group is precious metals, 2nd and 1st the same currency?
-- 9. rate - seems ok
-- 10. rate_per_unit - seems ok
-- 11. special_conditions - seems ok
-- Special=null/Y/N - sign of the conditions for calculating the hryvnia to US dollar 
-- exchange rate: null - for records for days when the sign was not determined and for
-- valcode≠usd, Y - under special conditions, N - under normal conditions;
-- 12. currency_name_ua - seems ok, can add additional column with countries 
-- 13. units - seems ok
-- add collumn if records valid

-- Create a new table for cleaned and transformed data

In [72]:
from snowflake.snowpark.functions import col, when, trim, to_date, lit, sql_expr

In [73]:
# Cast date columns to date type and handle null or empty values for calculation_date

exchange_rate_extracted_df = exchange_rate_extracted_df \
    .with_column(
        "calculation_date",
        when(
            (trim(col("calculation_date")) == lit('')) | col("calculation_date").is_null(),
            to_date(lit("01.01.1900"), "DD.MM.YYYY")
        ).otherwise(
            to_date(trim(col("calculation_date")), "DD.MM.YYYY")
        )
    ) \
    .with_column(
        "exchange_date",
        to_date(col("exchange_date"), "DD.MM.YYYY")
    )


In [106]:
exchange_rate_extracted_df \
    .select('currency_code', 'currency_name', 'currency_name_ua') \
    .distinct() \
    .order_by('currency_name_ua') \
    .show(250)


--------------------------------------------------------------------------------------
|"CURRENCY_CODE"  |"CURRENCY_NAME"              |"CURRENCY_NAME_UA"                  |
--------------------------------------------------------------------------------------
|DEM              |NULL                         |Hімецькі маpки                      |
|EUR              |NULL                         |Євро                                |
|EUR              |Euro                         |Євро                                |
|EGP              |NULL                         |Єгипетський фунт                    |
|EGP              |Egyptian Pound               |Єгипетський фунт                    |
|JPY              |NULL                         |Єна                                 |
|JPY              |Yen                          |Єна                                 |
|INR              |Indian Rupee                 |Індійська рупія                     |
|INR              |NULL                    

In [104]:
exchange_rate_extracted_df \
    .select('currency_code', 'currency_name', 'currency_name_ua') \
    .distinct() \
    .order_by('currency_name_ua') \
    .count()

# 221

221

In [107]:
exchange_rate_extracted_df \
    .select('currency_code', 'currency_name', 'currency_name_ua') \
    .distinct() \
    .where(col("currency_name").isNull()) \
    .show(150)

-------------------------------------------------------------------------
|"CURRENCY_CODE"  |"CURRENCY_NAME"  |"CURRENCY_NAME_UA"                 |
-------------------------------------------------------------------------
|ROL              |NULL             |Лей (Румунія)                      |
|EEK              |NULL             |Естонська крона                    |
|SKK              |NULL             |Словацька крона                    |
|SIT              |NULL             |Толар (Словенія)                   |
|SDR              |NULL             |Спецiальнi права запозичення       |
|ROL              |NULL             |Румунський лей                     |
|ITL              |NULL             |Італійська ліра                    |
|XDR              |NULL             |Спецiальнi права запозичення       |
|NLG              |NULL             |Нiдерландський гульден             |
|KWD              |NULL             |Кувейтський динар                  |
|ISK              |NULL             |І

In [None]:
exchange_rate_extracted_df \
    .select('currency_code', 'currency_name', 'currency_name_ua') \
    .distinct() \
    .where(col("currency_name").isNull()) \
    .count()

# 149

149

In [110]:
exchange_rate_extracted_df \
    .select('currency_code', 'currency_name', 'currency_name_ua') \
    .distinct() \
    .where(col("currency_code") == "AUD") \
    .order_by(col("currency_name").desc()) \
    .show(10)


--------------------------------------------------------------
|"CURRENCY_CODE"  |"CURRENCY_NAME"    |"CURRENCY_NAME_UA"    |
--------------------------------------------------------------
|AUD              |Australian Dollar  |Австралійський долар  |
|AUD              |NULL               |Австралійський долар  |
--------------------------------------------------------------



In [121]:
from snowflake.snowpark import Window
from snowflake.snowpark.functions import coalesce, first_value, trim, upper

# To fill in nulls in currency_name with value from currency_name that has the same code

windows = Window.partition_by(trim(upper(col("currency_code")))).order_by(col("currency_name").desc())

exchange_rate_extracted_df_m = exchange_rate_extracted_df \
    .with_column(
        "currency_name",
        coalesce(
            col("currency_name"),
            first_value(col("currency_name")).over(windows)
        )
    )

In [122]:
exchange_rate_extracted_df_m \
    .select('currency_code', 'currency_name', 'currency_name_ua') \
    .distinct() \
    .order_by('currency_name_ua') \
    .count()

# 165

165

In [123]:
exchange_rate_extracted_df_m \
    .select('currency_code', 'currency_name', 'currency_name_ua') \
    .distinct() \
    .order_by('currency_name_ua') \
    .show(170)

--------------------------------------------------------------------------------------
|"CURRENCY_CODE"  |"CURRENCY_NAME"              |"CURRENCY_NAME_UA"                  |
--------------------------------------------------------------------------------------
|DEM              |NULL                         |Hімецькі маpки                      |
|EUR              |Euro                         |Євро                                |
|EGP              |Egyptian Pound               |Єгипетський фунт                    |
|JPY              |Yen                          |Єна                                 |
|INR              |Indian Rupee                 |Індійська рупія                     |
|IQD              |Iraqi Dinar                  |Іракський динар                     |
|IRR              |Iranian Rial                 |Іранський ріал                      |
|IEP              |NULL                         |Ірландський фунт                    |
|IEP              |NULL                    

In [124]:
exchange_rate_extracted_df_m \
    .select('currency_code', 'currency_name', 'currency_name_ua') \
    .distinct() \
    .where(col("currency_name").isNull()) \
    .order_by('currency_name_ua') \
    .show(80)

--------------------------------------------------------------------
|"CURRENCY_CODE"  |"CURRENCY_NAME"  |"CURRENCY_NAME_UA"            |
--------------------------------------------------------------------
|DEM              |NULL             |Hімецькі маpки                |
|IEP              |NULL             |Ірландський фунт              |
|IEP              |NULL             |Ірландські фунти              |
|ISK              |NULL             |Ісландська крона              |
|ISK              |NULL             |Ісландські крони              |
|ESP              |NULL             |Іспанскі пессети              |
|ESP              |NULL             |Іспанська песета              |
|ITL              |NULL             |Італійська ліра               |
|ITL              |NULL             |Італійські ліpи               |
|ATS              |NULL             |Австрійський шилінг           |
|AZM              |NULL             |Азербайджанський манат        |
|BYR              |NULL           

In [None]:
# ! Upload static file with mapping of currency codes to names 
# and join with existing data to fill in missing values.

# check all codes vs file 

In [None]:


select distinct currency_code, currency_name, currency_name_ua, r030_code
from nbu_exchange.silver.exchange_rate_extracted
where currency_name is null
order by currency_code;

select distinct currency_code, currency_name, currency_name_ua, r030_code
from nbu_exchange.silver.exchange_rate_extracted
where currency_name is null
order by r030_code, currency_code;

select distinct currency_code, currency_name, currency_name_ua, r030_code
from nbu_exchange.silver.exchange_rate_extracted
where currency_code = 'SDR' or currency_code = 'XDR' or r030_code = 960
;

SyntaxError: invalid syntax (2844012650.py, line 1)

In [None]:
# session.close()
