In [1]:
from pyspark.sql import functions as F
from pyspark.sql import SparkSession
from pyspark.sql.types import *
from pyspark.sql.window import Window
from pyspark.sql.functions import when, avg, abs
from pyspark.sql.functions import expr


In [2]:

#    .config("spark.jars.packages", "org.postgresql:postgresql:42.7.1") \

spark = SparkSession.builder \
    .appName("binance") \
    .getOrCreate()

# schema = StructType([
#     StructField("datetime", TimestampType(), True),
#     StructField("symbol", StringType(), True),
#     StructField("open", DecimalType(18, 2), True),
#     StructField("high", DecimalType(18, 2), True),
#     StructField("low", DecimalType(18, 2), True),
#     StructField("close", DecimalType(18, 2), True),
#     StructField("volume", DecimalType(18, 2), True),
#     StructField("QuoteAssetVolume", DecimalType(18, 2), True),
#     StructField("NumTrades", IntegerType(), True),
#     StructField("TakerBuyBaseAssetVolume", DecimalType(18, 2), True),
#     StructField("TakerBuyQuoteAssetVolume", DecimalType(18, 2), True),
#     StructField("Ignore", StringType(), True)
# ])


In [3]:
df = spark.read.csv("Btcusdt_kline_1d.csv", header=True, inferSchema=True)


In [56]:
from pyspark.sql.functions import col, lag, when, collect_list, lit
from pyspark.sql.window import Window
from pyspark.sql.functions import udf
from pyspark.sql.types import DoubleType
from pyspark.sql.functions import pandas_udf


def calculate_ewma(values, com):
    alpha = 1 / (com + 1)
    ewma = values[0]
    for value in values[1:]:
        ewma = (1 - alpha) * ewma + alpha * value
    return ewma
ewma_udf = udf(calculate_ewma, FloatType())


def RSI(df, period=14, com=13):


    df.orderBy("datetime", ascending=True)
    windowSpec = Window.orderBy("datetime")
    df = df.withColumn("prev_close", lag("close", 1).over(windowSpec))

    df = df.withColumn("delta", col("close") - col("prev_close"))
    df = df.withColumn("up", when(col("delta") > 0, col("delta")).otherwise(0))
    df = df.withColumn("down", when(col("delta") < 0, -col("delta")).otherwise(0))

    df = df.withColumn("up_ewma", ewma_udf(collect_list("up").over(windowSpec), lit(com)))
    df = df.withColumn("down_ewma", ewma_udf(collect_list("down").over(windowSpec), lit(com)))
    
    # 상대적 강도 지수(RSI) 계산
    df = df.withColumn("rs", df.up_ewma / df.down_ewma)
    df = df.withColumn("rsi", 100 - (100 / (1 + df.rs)))

    return df

In [58]:
rsi_df = RSI(df)
rsi_df = rsi_df.orderBy("datetime", ascending=True).limit(14)

rsi_df.select("datetime", "close","rsi").show()


+-------------------+-------+------------------+
|           datetime|  close|               rsi|
+-------------------+-------+------------------+
|2017-08-17 00:00:00|4285.08|              null|
|2017-08-18 00:00:00|4108.37|               0.0|
|2017-08-19 00:00:00|4139.98|16.152450629360672|
|2017-08-20 00:00:00|4086.29|12.468550188819037|
|2017-08-21 00:00:00| 4016.0| 9.434760862987048|
|2017-08-22 00:00:00| 4040.0|16.872115811978958|
|2017-08-23 00:00:00|4114.01| 34.68498035621067|
|2017-08-24 00:00:00|4316.01| 59.92559160664987|
|2017-08-25 00:00:00|4280.68|  55.8596435087892|
|2017-08-26 00:00:00|4337.44|60.496929443124365|
|2017-08-27 00:00:00|4310.01|57.360682328967464|
|2017-08-28 00:00:00|4386.69| 63.11699366870879|
|2017-08-29 00:00:00|4587.48| 73.28666385820368|
|2017-08-30 00:00:00|4555.14| 69.94164254302801|
+-------------------+-------+------------------+

