In [34]:
import sys

from pyspark.sql import SparkSession

sys.path.append("../../..")

from readers.bronze.coin_markets.bronze_reading import bronze_to_silver_df

from notebooks.checking_functions import (
    null_value_check,
    whitespaces_and_not_lowercase_check,
    whitespaces_check,
    null_or_negative_value_check,
    duplicates_check,
    url_format_and_null_check,
    datestring_format_and_null_check,
    rank_check,
    change_percentage_check,
    high_and_low_check,
    standardize_roi_struct_check
)

In [35]:
# CHECK FOR NULL VALUES, UNWANTED WHITESPACES AND DUPLICATES OF ID COLUMN

null_value_check(bronze_to_silver_df, "id")
whitespaces_check(bronze_to_silver_df, "id")
duplicates_check(bronze_to_silver_df, "id")

+---+-------+
| id|null_id|
+---+-------+
+---+-------+

+---+--------------+
| id|whitespaced_id|
+---+--------------+
+---+--------------+

+----------+-------------------------+
|cleaned_id|cleaned_id_distinct_count|
+----------+-------------------------+
+----------+-------------------------+



In [36]:
# CHECK FOR NULL VALUES, UNWANTED WHITESPACES  AND NONSTANDARDIZED VALUE OF SYMBOL COLUMN
## STANDARDIZE VALUE FOR symbol COLUMN (ALL lowercase LETTERS)

null_value_check(bronze_to_silver_df, "symbol")
whitespaces_and_not_lowercase_check(bronze_to_silver_df, "symbol")

+---+-----------+
| id|null_symbol|
+---+-----------+
+---+-----------+

+---+----------------------+
| id|nonstandardized_symbol|
+---+----------------------+
+---+----------------------+



In [37]:
# CHECK FOR NULL VALUES abd UNWANTED WHITESPACES OF NAME COLUMN

null_value_check(bronze_to_silver_df, "name")
whitespaces_check(bronze_to_silver_df, "name")

+---+---------+
| id|null_name|
+---+---------+
+---+---------+

+---+----------------+
| id|whitespaced_name|
+---+----------------+
+---+----------------+



In [38]:
# CHECK URL FORMAT AND NULL VALUES FOR IMAGE COLUMN

url_format_and_null_check(bronze_to_silver_df, "image")

+---+-----------------+
| id|invalid_image_url|
+---+-----------------+
+---+-----------------+



In [39]:
# CHECK FOR NEGATIVE OR NULL VALUES OF CURRENT PRICE COLUMN

null_or_negative_value_check(bronze_to_silver_df,"current_price")


+---+---------------------+
| id|invalid_current_price|
+---+---------------------+
+---+---------------------+



In [40]:
# CHECK FOR NEGATIVE OR NULL VALUES OF MARKET CAP COLUMN

null_or_negative_value_check(bronze_to_silver_df,"market_cap")

+---+------------------+
| id|invalid_market_cap|
+---+------------------+
+---+------------------+



In [41]:
# CHECK FOR NEGATIVE OR NULL VALUES OF MARKET CAP RANK COLUMN

null_or_negative_value_check(bronze_to_silver_df,"market_cap_rank")

+---+-----------------------+
| id|invalid_market_cap_rank|
+---+-----------------------+
+---+-----------------------+



In [42]:
# CHECK FOR THE ORDER OF market_cap_rank BASED ON market_cap

rank_check(bronze_to_silver_df, "market_cap", "market_cap_rank")


+---+----------+---------------+-------------+
| id|market_cap|market_cap_rank|expected_rank|
+---+----------+---------------+-------------+
+---+----------+---------------+-------------+



In [43]:
# CHECK FOR NEGATIVE OR NULL VALUES OF FULLY DILUATED VALUATION COLUMN

null_or_negative_value_check(bronze_to_silver_df,"fully_diluted_valuation")

+---+-------------------------------+
| id|invalid_fully_diluted_valuation|
+---+-------------------------------+
+---+-------------------------------+



In [44]:
# CHECK FOR NEGATIVE OR NULL VALUES OF TOTAL VOLUME COLUMN

null_or_negative_value_check(bronze_to_silver_df,"total_volume")

+---+--------------------+
| id|invalid_total_volume|
+---+--------------------+
+---+--------------------+



In [45]:
# CHECK FOR NEGATIVE OR NULL VALUES OF HIGH 24H COLUMN

null_or_negative_value_check(bronze_to_silver_df,"high_24h")

+---+----------------+
| id|invalid_high_24h|
+---+----------------+
+---+----------------+



In [46]:
# CHECK FOR NEGATIVE OR NULL VALUES OF LOW 24H COLUMN

null_or_negative_value_check(bronze_to_silver_df,"low_24h")

+---+---------------+
| id|invalid_low_24h|
+---+---------------+
+---+---------------+



In [47]:
# CHECK FOR INVALID HIGH AND LOW PRICES AT 24H WHEREIN THE HIGH IS LOWER THAN LOW

high_and_low_check(bronze_to_silver_df, "high_24h", "low_24h")

+---+--------+-------+
| id|high_24h|low_24h|
+---+--------+-------+
+---+--------+-------+



In [48]:
# CHECK FOR NULL VALUES OF PRICE CHANGE 24H COLUMN

null_value_check(bronze_to_silver_df,"price_change_24h")

+---+---------------------+
| id|null_price_change_24h|
+---+---------------------+
+---+---------------------+



In [49]:
# CHECK FOR NULL VALUES OF PRICE CHANGE PERCENTAGE 24H COLUMN

null_value_check(bronze_to_silver_df,"price_change_percentage_24h")

+---+--------------------------------+
| id|null_price_change_percentage_24h|
+---+--------------------------------+
+---+--------------------------------+



In [50]:
# CHECK IF PRICE CHANGE PERCENTAGE IS EQUAL TO PRICE CHANGE DIVIDED BY PREVIOUS PRICE WITH TOLERANCE OF 0.1

change_percentage_check(bronze_to_silver_df, "current_price", "price_change_24h",  "price_change_percentage_24h")


+---+-------------+----------------+----------------------+-----------------------------------+---------------------------+
| id|current_price|price_change_24h|previous_current_price|derived_price_change_percentage_24h|price_change_percentage_24h|
+---+-------------+----------------+----------------------+-----------------------------------+---------------------------+
+---+-------------+----------------+----------------------+-----------------------------------+---------------------------+



In [51]:
# CHECK FOR NULL VALUES OF MARKET CAP CHANGE 24H COLUMN

null_value_check(bronze_to_silver_df,"market_cap_change_24h")

+---+--------------------------+
| id|null_market_cap_change_24h|
+---+--------------------------+
+---+--------------------------+



In [52]:
# CHECK FOR NULL VALUES OF MARKET CAP CHANGE PERCENTAGE 24H COLUMN

null_value_check(bronze_to_silver_df,"market_cap_change_percentage_24h")

+---+-------------------------------------+
| id|null_market_cap_change_percentage_24h|
+---+-------------------------------------+
+---+-------------------------------------+



In [53]:
# CHECK IF MARKET CAP CHANGE PERCENTAGE IS EQUAL TO MARKET CAP CHANGE DIVIDED BY PREVIOUS MARKET CAP WITH TOLERANCE OF 0.1

change_percentage_check(bronze_to_silver_df, "market_cap", "market_cap_change_24h",  "market_cap_change_percentage_24h")


+---+----------+---------------------+-------------------+----------------------------------------+--------------------------------+
| id|market_cap|market_cap_change_24h|previous_market_cap|derived_market_cap_change_percentage_24h|market_cap_change_percentage_24h|
+---+----------+---------------------+-------------------+----------------------------------------+--------------------------------+
+---+----------+---------------------+-------------------+----------------------------------------+--------------------------------+



In [54]:
# CHECK FOR NEGATIVE OR NULL VALUES OF CIRCULATING SUPPLY COLUMN

null_or_negative_value_check(bronze_to_silver_df,"circulating_supply")

+---+--------------------------+
| id|invalid_circulating_supply|
+---+--------------------------+
+---+--------------------------+



In [55]:
# CHECK FOR NEGATIVE OR NULL VALUES OF TOTAL SUPPLY COLUMN

null_or_negative_value_check(bronze_to_silver_df,"total_supply")

+---+--------------------+
| id|invalid_total_supply|
+---+--------------------+
+---+--------------------+



In [56]:
# CHECK FOR NEGATIVE OR NULL OF MAX SUPPLY COLUMN

null_or_negative_value_check(bronze_to_silver_df,"max_supply")

+--------------------+------------------+
|                  id|invalid_max_supply|
+--------------------+------------------+
|            ethereum|              NULL|
|              tether|              NULL|
|              solana|              NULL|
|            usd-coin|              NULL|
|        staked-ether|              NULL|
|                tron|              NULL|
|            dogecoin|              NULL|
|       wrapped-steth|              NULL|
|        figure-heloc|              NULL|
|  wrapped-beacon-eth|              NULL|
|        wrapped-eeth|              NULL|
|                usds|              NULL|
|binance-bridged-u...|              NULL|
|           leo-token|              NULL|
|         ethena-usde|              NULL|
|                weth|              NULL|
|coinbase-wrapped-btc|              NULL|
|              monero|              NULL|
|           shiba-inu|              NULL|
|    the-open-network|              NULL|
+--------------------+------------

In [57]:
# CHECK FOR NEGATIVE OR NULL VALUES OF ath COLUMN

null_or_negative_value_check(bronze_to_silver_df,"ath")

+---+-----------+
| id|invalid_ath|
+---+-----------+
+---+-----------+



In [58]:
# CHECK FOR NULL VALUES OF ath_change_percentage COLUMN

null_value_check(bronze_to_silver_df,"ath_change_percentage")

+---+--------------------------+
| id|null_ath_change_percentage|
+---+--------------------------+
+---+--------------------------+



In [59]:
# CHECK FOR INVALID DATE FORMAT, DATE IS IN THE FUTURE AND NULL VALUES OF ath_date COLUMN

datestring_format_and_null_check(bronze_to_silver_df,"ath_date")

+---+--------------------+
| id|invalid_ath_date_url|
+---+--------------------+
+---+--------------------+



In [60]:
# CHECK FOR NEGATIVE OR NULL VALUES OF atl COLUMN

null_or_negative_value_check(bronze_to_silver_df,"atl")

+---+-----------+
| id|invalid_atl|
+---+-----------+
+---+-----------+



In [61]:
# CHECK FOR NULL VALUES OF atl_change_percentage COLUMN

null_value_check(bronze_to_silver_df,"atl_change_percentage")

+---+--------------------------+
| id|null_atl_change_percentage|
+---+--------------------------+
+---+--------------------------+



In [62]:
# CHECK FOR INVALID DATE FORMAT, DATE IS IN THE FUTURE AND NULL VALUES OF atl_date COLUMN

datestring_format_and_null_check(bronze_to_silver_df,"atl_date")

+---+--------------------+
| id|invalid_atl_date_url|
+---+--------------------+
+---+--------------------+



In [63]:
# CHECK FOR PARTIALLY EMPTY STRUCT OR WHITESPACED currency value on roi column

standardize_roi_struct_check(bronze_to_silver_df)

+---+--------+----------+-----+
|id |currency|percentage|times|
+---+--------+----------+-----+
+---+--------+----------+-----+



In [64]:
# CHECK FOR INVALID DATE FORMAT, DATE IS IN THE FUTURE AND NULL VALUES OF last_updated COLUMN

datestring_format_and_null_check(bronze_to_silver_df,"last_updated")

+---+------------------------+
| id|invalid_last_updated_url|
+---+------------------------+
+---+------------------------+

