### Import Libs :

In [1]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, mean, lead, avg, lag
from dotenv import load_dotenv
import os
from pyspark.sql.window import Window

load_dotenv()

False

### Create Spark Session :

In [2]:
spark = (
    SparkSession.builder
    .appName("App")
    .getOrCreate()
)


Using Spark's default log4j profile: org/apache/spark/log4j2-defaults.properties
26/01/21 09:54:56 WARN Utils: Your hostname, rachid-Latitude-5580, resolves to a loopback address: 127.0.1.1; using 192.168.68.108 instead (on interface wlp1s0)
26/01/21 09:54:56 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
Using Spark's default log4j profile: org/apache/spark/log4j2-defaults.properties
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
26/01/21 09:55:01 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


### Load Data Brut :

In [3]:
def load_data():
    output_path = "../../Data/Normal/btc_minute_data.parquet"
    df = spark.read.parquet(output_path)
    return df

df = load_data()
df.count()

                                                                                

1000

### Save Bronze Data :

In [4]:
def Save_Bronze_Local(Data_i):
    Data_i.write.mode('overwrite').format("parquet").save("../../Data/Bronze/")
    
Save_Bronze_Local(df)

                                                                                

### Check Null Values :

In [5]:
def CheckNull(Data_B):
    num_rows = Data_B.count()       
    Columns_list = Data_B.columns

    for c in Columns_list:
        num_null = Data_B.filter(col(c).isNull()).count()
        if num_null > 0:
            null_percent = (num_null / num_rows) * 100
            print(f"Column {c} has {num_null} null values ({null_percent:.2f}%)")
            
            if null_percent < 5:
                Data_B = Data_B.na.drop(subset=[c])
            else:
                try:
                    mean_value = Data_B.select(mean(c)).collect()[0][0]
                    Data_B = Data_B.fillna({c: mean_value})
                except:
                    mode_value = Data_B.groupBy(c).count().orderBy(col("count").desc()).first()[0]
                    Data_B = Data_B.fillna({c: mode_value})
        else:
            print(f"{c} : you dont have any null values")
    return Data_B

CheckNull(df)

open_time : you dont have any null values
open : you dont have any null values
high : you dont have any null values
low : you dont have any null values
close : you dont have any null values
volume : you dont have any null values
close_time : you dont have any null values
quote_asset_volume : you dont have any null values
number_of_trades : you dont have any null values
taker_buy_base_volume : you dont have any null values
taker_buy_quote_volume : you dont have any null values
ignore : you dont have any null values


DataFrame[open_time: timestamp_ntz, open: double, high: double, low: double, close: double, volume: double, close_time: timestamp_ntz, quote_asset_volume: double, number_of_trades: bigint, taker_buy_base_volume: double, taker_buy_quote_volume: double, ignore: string]

### Check duplicated Values :

In [6]:
def CheckDuplicated(Data_B):
    num_rows = Data_B.count()
    num_rows_no_duplicate = Data_B.distinct().count()
    num_duplicate_values = num_rows - num_rows_no_duplicate
    if num_duplicate_values == 0:
        print("you don't have any duplicated values !!")
    else:
        Data_B = Data_B.distinct()
        return Data_B
    return Data_B

CheckDuplicated(df)

you don't have any duplicated values !!


DataFrame[open_time: timestamp_ntz, open: double, high: double, low: double, close: double, volume: double, close_time: timestamp_ntz, quote_asset_volume: double, number_of_trades: bigint, taker_buy_base_volume: double, taker_buy_quote_volume: double, ignore: string]

### Rmove ignore column 

In [7]:
def Ignore_Remover(Data_B):
    Data_B = Data_B.drop(col("ignore"))
    return Data_B

df = Ignore_Remover(df)
df.printSchema()

root
 |-- open_time: timestamp_ntz (nullable = true)
 |-- open: double (nullable = true)
 |-- high: double (nullable = true)
 |-- low: double (nullable = true)
 |-- close: double (nullable = true)
 |-- volume: double (nullable = true)
 |-- close_time: timestamp_ntz (nullable = true)
 |-- quote_asset_volume: double (nullable = true)
 |-- number_of_trades: long (nullable = true)
 |-- taker_buy_base_volume: double (nullable = true)
 |-- taker_buy_quote_volume: double (nullable = true)



### Create Column close_t_plus_10 

In [8]:
def Create_Column_close_t_plus_10(Data_B):
    window = Window.orderBy("open_time")
    Data_B = Data_B.withColumn("close_t_plus_10", lead("close", 10).over(window))
    return Data_B

df = Create_Column_close_t_plus_10(df)

### Check Null After Add  close_t_plus_10 Column:

In [9]:
df = CheckNull(df)

open_time : you dont have any null values
open : you dont have any null values
high : you dont have any null values
low : you dont have any null values
close : you dont have any null values
volume : you dont have any null values
close_time : you dont have any null values
quote_asset_volume : you dont have any null values
number_of_trades : you dont have any null values
taker_buy_base_volume : you dont have any null values
taker_buy_quote_volume : you dont have any null values


26/01/21 09:55:21 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:21 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:21 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:21 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.


Column close_t_plus_10 has 10 null values (1.00%)


### Create Entry Variable :
Instruction : calculer la variation relative du prix de clôture par rapport à la minute précédente pour chaque ligne du dataset. (lag et over)

In [10]:
def Entry_Creator(Data_B):

    windowSpec = Window.orderBy("open_time")

    Data_B = Data_B.withColumn(
        "prev_close",
        lag("close", 1).over(windowSpec)
    )

    Data_B = Data_B.withColumn(
        "return",
        (col("close") - col("prev_close")) / col("prev_close")
    )

    return Data_B


df = Entry_Creator(df)
df.show()


26/01/21 09:55:22 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:22 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:22 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:22 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:22 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:22 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 0

+-------------------+--------+--------+--------+--------+-------+--------------------+------------------+----------------+---------------------+----------------------+---------------+----------+--------------------+
|          open_time|    open|    high|     low|   close| volume|          close_time|quote_asset_volume|number_of_trades|taker_buy_base_volume|taker_buy_quote_volume|close_t_plus_10|prev_close|              return|
+-------------------+--------+--------+--------+--------+-------+--------------------+------------------+----------------+---------------------+----------------------+---------------+----------+--------------------+
|2026-01-18 21:39:00|95387.81|95387.81|95380.76|95380.76| 0.5782|2026-01-18 21:39:...|     55152.8057001|             711|              0.11958|         11406.4454078|       95424.79|      NULL|                NULL|
|2026-01-18 21:40:00|95380.77|95380.77|95380.76|95380.77|0.44398|2026-01-18 21:40:...|     42347.1505478|             102|              

### Check Null Values After Added return Column :

In [11]:
df = CheckNull(df)

26/01/21 09:55:22 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:22 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:22 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:22 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:22 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:22 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 0

open_time : you dont have any null values
open : you dont have any null values
high : you dont have any null values


26/01/21 09:55:23 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:23 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:23 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:23 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:23 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:23 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 0

low : you dont have any null values
close : you dont have any null values


26/01/21 09:55:23 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:23 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:23 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:23 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:23 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:23 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 0

volume : you dont have any null values
close_time : you dont have any null values


26/01/21 09:55:23 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:23 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:23 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:23 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:24 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:24 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 0

quote_asset_volume : you dont have any null values
number_of_trades : you dont have any null values


26/01/21 09:55:24 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:24 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:24 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:24 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:24 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:24 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.


taker_buy_base_volume : you dont have any null values
taker_buy_quote_volume : you dont have any null values


26/01/21 09:55:24 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:24 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:24 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:24 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:24 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:24 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.


close_t_plus_10 : you dont have any null values
Column prev_close has 1 null values (0.10%)


26/01/21 09:55:24 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:24 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:24 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:24 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:24 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:24 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 0

return : you dont have any null values


### Create Entry M_5 :
Instruction : pour chaque minute, calculer la moyenne des prix de clôture sur les 10 dernières minutes pour lisser les variations à court terme (avg , over et rowsBetween)

In [None]:
def MA5_Creator(Data_B):
    window = Window.orderBy("open_time")
    Window_5 = window.rowsBetween(-5, -1)
    Data_B = Data_B.withColumn("MA_5", avg(col("close")).over(Window_5))
    return Data_B
git pull --rebase

df = MA5_Creator(df)

### Create Entry M_10 :
Instruction : pour chaque minute, calculer la moyenne des prix de clôture sur les 10 dernières minutes pour lisser les variations à court terme (avg , over et rowsBetween)

In [13]:
def MA10_Creator(Data_B):
    window = Window.orderBy("open_time")
    Window_10 = window.rowsBetween(-10, -1)
    Data_B = Data_B.withColumn("MA_10", avg(col("close")).over(Window_10))
    return Data_B

df = MA10_Creator(df)

In [14]:
df.show()

26/01/21 09:55:25 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:25 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:25 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:25 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:25 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:25 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 0

+-------------------+--------+--------+--------+--------+-------+--------------------+------------------+----------------+---------------------+----------------------+---------------+----------+--------------------+-----------------+-----------------+
|          open_time|    open|    high|     low|   close| volume|          close_time|quote_asset_volume|number_of_trades|taker_buy_base_volume|taker_buy_quote_volume|close_t_plus_10|prev_close|              return|             MA_5|            MA_10|
+-------------------+--------+--------+--------+--------+-------+--------------------+------------------+----------------+---------------------+----------------------+---------------+----------+--------------------+-----------------+-----------------+
|2026-01-18 21:40:00|95380.77|95380.77|95380.76|95380.77|0.44398|2026-01-18 21:40:...|     42347.1505478|             102|               0.0723|           6896.029671|       95412.86|  95380.76|1.048429474593537...|             NULL|           

### Check Null Values After Added MA_10 and MA_5 Columns :

In [15]:
df = CheckNull(df)

26/01/21 09:55:25 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:25 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:25 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:25 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:25 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:25 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 0

open_time : you dont have any null values
open : you dont have any null values


26/01/21 09:55:25 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:25 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:25 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:25 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:25 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:25 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 0

high : you dont have any null values
low : you dont have any null values


26/01/21 09:55:25 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:25 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:25 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:25 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:26 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:26 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 0

close : you dont have any null values
volume : you dont have any null values


26/01/21 09:55:26 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:26 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:26 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:26 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:26 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:26 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 0

close_time : you dont have any null values
quote_asset_volume : you dont have any null values


26/01/21 09:55:26 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:26 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:26 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:26 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:26 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:26 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 0

number_of_trades : you dont have any null values
taker_buy_base_volume : you dont have any null values


26/01/21 09:55:26 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:26 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:26 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:26 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:27 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:27 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 0

taker_buy_quote_volume : you dont have any null values
close_t_plus_10 : you dont have any null values


26/01/21 09:55:27 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:27 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:27 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:27 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:27 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:27 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 0

prev_close : you dont have any null values
return : you dont have any null values


26/01/21 09:55:27 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:27 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:27 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:27 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:27 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:27 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 0

Column MA_5 has 1 null values (0.10%)
MA_10 : you dont have any null values


26/01/21 09:55:27 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:27 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:27 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:27 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:27 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:27 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.


### Create Entry takerratio :
Instruction : mesurer la proportion de BTC acheté par les “takers” par rapport au volume total échangé pendant la minute.


In [16]:
def takerratio_creator(Data_B):
    Data_B = Data_B.withColumn("taker_ratio", col("taker_buy_base_volume") / col("volume"))
    return Data_B

df = takerratio_creator(df)
df.show()
df.printSchema()

26/01/21 09:55:27 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:27 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:27 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:27 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:27 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:27 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 0

+-------------------+--------+--------+--------+--------+-------+--------------------+------------------+----------------+---------------------+----------------------+---------------+----------+--------------------+-----------------+-----------------+-------------------+
|          open_time|    open|    high|     low|   close| volume|          close_time|quote_asset_volume|number_of_trades|taker_buy_base_volume|taker_buy_quote_volume|close_t_plus_10|prev_close|              return|             MA_5|            MA_10|        taker_ratio|
+-------------------+--------+--------+--------+--------+-------+--------------------+------------------+----------------+---------------------+----------------------+---------------+----------+--------------------+-----------------+-----------------+-------------------+
|2026-01-18 21:41:00|95380.76|95380.77|95380.76|95380.76|0.30774|2026-01-18 21:41:...|     29352.4770786|              81|              0.19962|         19039.9093074|        95400.0| 

### Save Data Silver :

In [17]:
def Save_Silver_Local(Data_B):
    Data_B.write.mode('overwrite').format("parquet").save("../../Data/Silver/")
    
Save_Silver_Local(df)

26/01/21 09:55:28 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:28 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:28 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:28 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:28 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 09:55:28 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
26/01/21 0