In [4]:
from pyspark.sql import SparkSession
from pyspark.sql.functions  import *
from pyspark.sql.types import StructField, StructType, StructType, IntegerType, FloatType, DateType, StringType

from dotenv import load_dotenv
import sys
import os
import datetime
# Добавляем корневую директорию проекта в sys.path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '../..')))

load_dotenv()

spark = (SparkSession.builder
         .appName("cbr_currency_etl")
         .getOrCreate())

spark

In [2]:
from consultant.extractors.base import get_cbr_currency_rate_json



currency_list = list(get_cbr_currency_rate_json()['Valute'].values())

raw_currency = spark.createDataFrame(currency_list)
raw_currency.show(100, truncate=False)



+--------+-------+-------------------------------------+-------+-------+--------+--------+
|CharCode|ID     |Name                                 |Nominal|NumCode|Previous|Value   |
+--------+-------+-------------------------------------+-------+-------+--------+--------+
|AUD     |R01010 |Австралийский доллар                 |1      |036    |64.0178 |63.6894 |
|AZN     |R01020A|Азербайджанский манат                |1      |944    |60.8459 |60.4752 |
|GBP     |R01035 |Фунт стерлингов                      |1      |826    |125.6979|125.4358|
|AMD     |R01060 |Армянских драмов                     |100    |051    |26.0038 |25.8253 |
|BYN     |R01090B|Белорусский рубль                    |1      |933    |29.5681 |29.5926 |
|BGN     |R01100 |Болгарский лев                       |1      |975    |53.9341 |53.8526 |
|BRL     |R01115 |Бразильский реал                     |1      |986    |16.9359 |16.946  |
|HUF     |R01135 |Форинтов                             |100    |348    |25.7347 |25.7767 |

In [21]:
df = raw_currency \
    .withColumn('VunitRate', col('Value') / col('Nominal')) \
    .withColumn('ValCursDate', lit(datetime.date.today() - datetime.timedelta(days=1))) \
    .select(['CharCode', 'Nominal', 'Value', 'VunitRate', 'ValCursDate'])

new_schema = StructType([
    StructField('code_iso', StringType()),
    StructField('nominal', IntegerType()),
    StructField('rate', FloatType()),
    StructField('unit_rate', FloatType()),
    StructField('on_date', DateType())
])

new_df = df.select(
    col("CharCode").alias("code_iso"),
    col("Nominal").cast("int").alias("nominal"),
    col("Value").cast("float").alias("rate"),
    col("VunitRate").cast("float").alias("unit_rate"),
    col("ValCursDate").cast("date").alias("on_date")
)


new_df.printSchema()
new_df.show(100, truncate=False)


root
 |-- code_iso: string (nullable = true)
 |-- nominal: integer (nullable = true)
 |-- rate: float (nullable = true)
 |-- unit_rate: float (nullable = true)
 |-- on_date: date (nullable = false)

+--------+-------+--------+----------+----------+
|code_iso|nominal|rate    |unit_rate |on_date   |
+--------+-------+--------+----------+----------+
|AUD     |1      |63.6894 |63.6894   |2025-01-15|
|AZN     |1      |60.4752 |60.4752   |2025-01-15|
|GBP     |1      |125.4358|125.4358  |2025-01-15|
|AMD     |100    |25.8253 |0.258253  |2025-01-15|
|BYN     |1      |29.5926 |29.5926   |2025-01-15|
|BGN     |1      |53.8526 |53.8526   |2025-01-15|
|BRL     |1      |16.946  |16.946    |2025-01-15|
|HUF     |100    |25.7767 |0.257767  |2025-01-15|
|VND     |10000  |42.2417 |0.00422417|2025-01-15|
|HKD     |1      |13.2263 |13.2263   |2025-01-15|
|GEL     |1      |36.1388 |36.1388   |2025-01-15|
|DKK     |1      |14.1175 |14.1175   |2025-01-15|
|AED     |1      |27.994  |27.994    |2025-01-15|
|

In [22]:
# Writing

from consultant.tools.basic import get_jdbc_url_for_gp, get_properies_for_gp

# import importlib
# import consultant.tools.basic as module

# importlib.reload(module)

print(get_properies_for_gp())
print(get_jdbc_url_for_gp())

new_df.write.jdbc(
    url=get_jdbc_url_for_gp(),
    table='b_kustov.currency_rate',
    properties=get_properies_for_gp(),
    mode='append'
)

{'user': 'wave12_user_a5', 'password': 'pass', 'driver': 'org.postgresql.Driver'}
jdbc:postgresql://172.17.1.32:5432/wave12_team_a


In [1]:
spark.stop()

NameError: name 'spark' is not defined