In [1]:
from pyspark.sql import SparkSession
import pyspark.sql.functions as F
from pyspark.sql.window import Window
import pyspark.sql.types as T

In [2]:
spark = SparkSession.builder \
    .appName("binance") \
    .getOrCreate()

In [79]:
df = spark.read.csv("Btcusdt_kline_1d_new.csv", header=True, inferSchema=True)
#df = df.orderBy("datetime", ascending=False).limit(20).orderBy("datetime")
df = df.filter(F.col("datetime") <= "2024-02-13")

df.count()

2372

In [80]:
def calculate_ewma(values, com):
    alpha = 1 / (com + 1)
    ewma = values[0]
    for value in values[1:]:
        ewma = (1 - alpha) * ewma + alpha * value
    return ewma
ewma_udf = F.udf(calculate_ewma, T.DoubleType())


def RSI(df, period=14, com=13):

    df.orderBy("datetime", ascending=True)
    windowSpec = Window.orderBy("datetime")
    df = df.withColumn("prev_close", F.lag("close", 1).over(windowSpec))

    df = df.withColumn("delta", F.col("close") - F.col("prev_close"))
    df = df.withColumn("up", F.when(F.col("delta") > 0, F.col("delta")).otherwise(0))
    df = df.withColumn("down", F.when(F.col("delta") < 0, -F.col("delta")).otherwise(0))

    df = df.withColumn("up_ewma", ewma_udf(F.collect_list("up").over(windowSpec), F.lit(com)))
    df = df.withColumn("down_ewma", ewma_udf(F.collect_list("down").over(windowSpec), F.lit(com)))
    
    df = df.withColumn("rs", df.up_ewma / df.down_ewma)
    df = df.withColumn("rsi", 100 - (100 / (1 + df.rs)))

    return df

In [81]:
rsi_df = RSI(df)
rsi_df = rsi_df.orderBy("datetime", ascending =False).limit(30)
#rsi_df.select("datetime", "close","up_ewma","down_ewma","rsi").orderBy("datetime").show()
rsi_df.select("datetime","low", "close","up_ewma","down_ewma","rsi").show()


+-------------------+--------+--------+------------------+------------------+------------------+
|           datetime|     low|   close|           up_ewma|         down_ewma|               rsi|
+-------------------+--------+--------+------------------+------------------+------------------+
|2024-02-13 00:00:00|48300.95|49699.59| 557.0490267578268| 167.7666321028352| 76.85388966809194|
|2024-02-12 00:00:00|47710.01|49917.27| 599.8989518930442| 163.9271422645917| 78.53868262442982|
|2024-02-11 00:00:00|47557.16|48299.99| 521.6388712694323|176.53692243879107| 74.71454552998037|
|2024-02-10 00:00:00| 46800.0|47751.09| 519.5418613670809|190.11668570331344| 73.21011823388145|
|2024-02-09 00:00:00|45242.12|47132.77| 511.9435430107025|204.74104614202986| 71.43219636073442|
|2024-02-08 00:00:00| 44331.1|45288.65| 409.4684309346031|220.49035738372444| 64.99924098649015|
|2024-02-07 00:00:00| 42788.0| 44349.6| 368.7313871603415| 237.4511541055494| 60.82844061960607|
|2024-02-06 00:00:00| 42574.0|

# 상승 다이버전스 샘플

In [82]:
def diversions(df):
    last_row = df.limit(1)
    last_row_rsi = last_row.select("rsi").collect()[0]["rsi"]
    last_row_low = last_row.select("low").collect()[0]["low"]

    # 현재 행 중복제거 할 것.
    df = df.filter(F.col("rsi") <= 30)
    df.select("datetime","low", "close","up_ewma","down_ewma","rsi").orderBy("datetime").show()

    df = df.filter((F.col("rsi") <= last_row_rsi) & (F.col("low") > last_row_low))

    df.select("datetime","low", "close","up_ewma","down_ewma","rsi").orderBy("datetime").show()
    if df.count() > 0:
        return True
    
    return False

print(diversions(rsi_df))
    
#res.select("datetime","low", "close","up_ewma","down_ewma","rsi").orderBy("datetime").show()

+--------+---+-----+-------+---------+---+
|datetime|low|close|up_ewma|down_ewma|rsi|
+--------+---+-----+-------+---------+---+
+--------+---+-----+-------+---------+---+

+--------+---+-----+-------+---------+---+
|datetime|low|close|up_ewma|down_ewma|rsi|
+--------+---+-----+-------+---------+---+
+--------+---+-----+-------+---------+---+

False


In [74]:
last_row_rsi

70817.01

In [21]:
last_row_low

70606.0

# 하락 다이버전스 테스트

In [85]:
df_except_last = rsi_df.limit(29)
last_row = rsi_df.limit(1)
last_row_rsi = last_row.select("rsi").collect()[0]["rsi"]
last_row_high = last_row.select("high").collect()[0]["high"]

filtered_df = df_except_last.filter(F.col("rsi") >= 70)
filtered_df.select("datetime","high", "close","up_ewma","down_ewma","rsi").orderBy("datetime").show()

+-------------------+--------+--------+-----------------+------------------+-----------------+
|           datetime|    high|   close|          up_ewma|         down_ewma|              rsi|
+-------------------+--------+--------+-----------------+------------------+-----------------+
|2024-02-09 00:00:00| 48200.0|47132.77|511.9435430107025|204.74104614202986|71.43219636073442|
|2024-02-10 00:00:00| 48170.0|47751.09|519.5418613670809|190.11668570331344|73.21011823388145|
|2024-02-11 00:00:00|48592.66|48299.99|521.6388712694323|176.53692243879107|74.71454552998037|
|2024-02-12 00:00:00|50334.82|49917.27|599.8989518930442| 163.9271422645917|78.53868262442982|
|2024-02-13 00:00:00|50368.61|49699.59|557.0490267578268| 167.7666321028352|76.85388966809194|
+-------------------+--------+--------+-----------------+------------------+-----------------+



In [33]:
last_row_rsi

60.088240336589045

In [32]:
last_row_high

70900.0

29일 위의 데이터 중에서 다이버전스가 뜨기 위해 만족해야하는 조건을 충족한 로우

In [84]:
filtered_df = filtered_df.filter((F.col("rsi") >= last_row_rsi) & (F.col("high") < last_row_high))
filtered_df.select("datetime","high", "close","up_ewma","down_ewma","rsi").orderBy("datetime").show()

+-------------------+--------+--------+-----------------+-----------------+-----------------+
|           datetime|    high|   close|          up_ewma|        down_ewma|              rsi|
+-------------------+--------+--------+-----------------+-----------------+-----------------+
|2024-02-12 00:00:00|50334.82|49917.27|599.8989518930442|163.9271422645917|78.53868262442982|
+-------------------+--------+--------+-----------------+-----------------+-----------------+



# 변동률 체크

In [86]:
last_row_close = last_row.select("close").head()[0]

t = filtered_df.limit(1)
div_close = t.select("high").collect()[0]["high"]


In [87]:
def pc(a,b):
    return abs(((b-a)/a) * 100)
pc(last_row_close, div_close)

1.3461278050784806

In [None]:
#크리티컬 이슈 없으면, 시행착오포함해서 좀 더 시간 이번주안으로 다이버전스는 끝날 거
#-> 실제로 파이프라인돌릴때 오류가 테스트기간? 좀 있긴해야죠