In [1]:
import pandas as pd 
import numpy as np 
import time 
import gc 
from sklearn.preprocessing import StandardScaler
from lightgbm import LGBMRegressor
from sklearn.model_selection import TimeSeriesSplit
from scipy.stats import pearsonr
from tqdm import tqdm
import matplotlib.pyplot as plt


import warnings
warnings.simplefilter("ignore") 

pd.set_option("display.max_columns", None)

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))



In [2]:
def reduce_mem_usage(df):
    """ iterate through all the columns of a dataframe and modify the data type
        to reduce memory usage.
    """
    start_mem = df.memory_usage().sum() / 1024**2
    print("Memory usage of dataframe is {:.2f} MB".format(start_mem))
    
    for col in df.columns:
        col_type = df[col].dtype
        
        if col_type != object:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == "int":
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)
        # else:
            # df[col] = df[col].astype('category')
           

    end_mem = df.memory_usage().sum() / 1024**2
    print("Memory usage after optimization is: {:.2f} MB".format(end_mem))
    print("Decreased by {:.1f}%".format(100 * (start_mem - end_mem) / start_mem))
    
    return df

In [3]:
df_asset_details = pd.read_csv("input/asset_details.csv").sort_values("Asset_ID")
df_asset_details

Unnamed: 0,Asset_ID,Weight,Asset_Name
1,0,4.304065,Binance Coin
2,1,6.779922,Bitcoin
0,2,2.397895,Bitcoin Cash
10,3,4.406719,Cardano
13,4,3.555348,Dogecoin
3,5,1.386294,EOS.IO
5,6,5.894403,Ethereum
4,7,2.079442,Ethereum Classic
11,8,1.098612,IOTA
6,9,2.397895,Litecoin


In [4]:
def read_csv_strict(file_name="input/train.csv"):
    df = pd.read_csv(file_name).pipe(reduce_mem_usage)
    df["datetime"] = pd.to_datetime(df["timestamp"], unit="s")
    df = df[df["datetime"] >= "2020-05-13 00:00:00"]
    return df

In [5]:
df_train = read_csv_strict()
df_train.head()

Memory usage of dataframe is 1849.12 MB
Memory usage after optimization is: 716.53 MB
Decreased by 61.2%


Unnamed: 0,timestamp,Asset_ID,Count,Open,High,Low,Close,Volume,VWAP,Target,datetime
14413630,1589328000,3,76.0,0.050415,0.050415,0.050293,0.050293,439556.0625,0.050354,0.001899,2020-05-13
14413631,1589328000,2,178.0,233.375,233.5,232.875,233.25,171.367188,233.259775,-0.003397,2020-05-13
14413632,1589328000,0,160.0,15.796875,15.796875,15.710938,15.726562,3104.360107,15.751624,-0.002546,2020-05-13
14413633,1589328000,1,1303.0,8816.0,8824.0,8792.0,8808.0,96.224197,8807.092988,0.006603,2020-05-13
14413634,1589328000,4,5.0,0.00246,0.00246,0.002459,0.00246,19970.390625,0.00246,0.006462,2020-05-13


In [6]:
df_supp_train = read_csv_strict(file_name="input/supplemental_train.csv")
df_supp_train.head()

Memory usage of dataframe is 192.13 MB
Memory usage after optimization is: 84.06 MB
Decreased by 56.2%


Unnamed: 0,timestamp,Asset_ID,Count,Open,High,Low,Close,Volume,VWAP,Target,datetime
0,1632182460,3,561.0,2.079028,2.080605,2.072,2.076458,280462.7,2.075869,0.003084,2021-09-21 00:01:00
1,1632182460,2,169.0,541.005981,541.200012,539.700012,540.721985,188.9432,540.716919,-0.000607,2021-09-21 00:01:00
2,1632182460,0,400.0,363.737488,363.899994,363.0,363.653992,879.2685,363.499542,-0.019379,2021-09-21 00:01:00
3,1632182460,1,1933.0,42986.144531,43001.0,42898.0,42947.066406,91.83862,42942.976562,-0.000374,2021-09-21 00:01:00
4,1632182460,4,348.0,0.208326,0.2084,0.2078,0.2082,1051337.0,0.208077,-0.001233,2021-09-21 00:01:00


In [7]:
df_test = read_csv_strict(file_name="input/example_test.csv")
df_test.head()

Memory usage of dataframe is 0.00 MB
Memory usage after optimization is: 0.00 MB
Decreased by 72.0%


Unnamed: 0,timestamp,Asset_ID,Count,Open,High,Low,Close,Volume,VWAP,group_num,row_id,datetime
0,1623542400,3,1201.0,1.478516,1.486328,1.477539,1.483398,654799.5625,1.481445,0,0,2021-06-13
1,1623542400,2,1020.0,580.5,584.0,580.0,582.5,1227.988281,581.5,0,1,2021-06-13
2,1623542400,0,626.0,343.75,345.0,343.75,344.5,1718.83252,344.5,0,2,2021-06-13
3,1623542400,1,2888.0,35552.0,35648.0,35488.0,35616.0,163.811539,35584.0,0,3,2021-06-13
4,1623542400,4,433.0,0.312256,0.3125,0.312012,0.312256,585577.4375,0.312256,0,4,2021-06-13


In [8]:
print(df_train["timestamp"].min(), df_train["timestamp"].max())

1589328000 1632182400


In [9]:
print(df_supp_train["timestamp"].min(), df_supp_train["timestamp"].max())

1632182460 1642982400


In [10]:
df_train = pd.concat([df_train, df_supp_train, df_test])
df_train.head()

Unnamed: 0,timestamp,Asset_ID,Count,Open,High,Low,Close,Volume,VWAP,Target,datetime,group_num,row_id
14413630,1589328000,3,76.0,0.050415,0.050415,0.050293,0.050293,439556.0625,0.050354,0.001899,2020-05-13,,
14413631,1589328000,2,178.0,233.375,233.5,232.875,233.25,171.367188,233.259775,-0.003397,2020-05-13,,
14413632,1589328000,0,160.0,15.796875,15.796875,15.710938,15.726562,3104.360107,15.751624,-0.002546,2020-05-13,,
14413633,1589328000,1,1303.0,8816.0,8824.0,8792.0,8808.0,96.224197,8807.092988,0.006603,2020-05-13,,
14413634,1589328000,4,5.0,0.00246,0.00246,0.002459,0.00246,19970.390625,0.00246,0.006462,2020-05-13,,


In [11]:
df_train.shape, df_supp_train.shape, df_test.shape

((12341510, 13), (2518278, 11), (56, 12))

In [12]:
del df_supp_train, df_test
gc.collect()

0

In [13]:
df_train.memory_usage().sum() / 1024**3

0.8160688076168299

In [14]:
## Negative volume implies lack of liquidity in the market
df_train[df_train.Volume < 0]

Unnamed: 0,timestamp,Asset_ID,Count,Open,High,Low,Close,Volume,VWAP,Target,datetime,group_num,row_id
15007600,1592189340,10,74.0,500.0,500.0,500.0,500.0,-1.110223e-16,-512.0,-0.001065,2020-06-15 02:49:00,,
15008561,1592193600,10,9.0,475.0,475.0,475.0,475.0,-0.3662812,475.0,0.013779,2020-06-15 04:00:00,,
15179186,1592991180,10,18.0,500.0,500.0,500.0,500.0,-1.387779e-17,-512.0,,2020-06-24 09:33:00,,


In [15]:
df_train[df_train.Target.isna()].groupby("Asset_ID")["timestamp"].count()

Asset_ID
0       521
1        53
2        95
3       130
4     36281
5        49
6        60
7      2064
8     71408
9        54
10    26200
11    24659
12      270
13      333
Name: timestamp, dtype: int64

In [16]:
df_train[(df_train.Target.isna()) & (df_train.Asset_ID==8)].describe(percentiles=[0.25, 0.50, 0.75, 0.90, 0.95, 0.99])

Unnamed: 0,timestamp,Asset_ID,Count,Open,High,Low,Close,Volume,VWAP,Target,datetime,group_num,row_id
count,71408.0,71408.0,71408.0,71408.0,71408.0,71408.0,71408.0,71408.0,71408.0,0.0,71408,4.0,4.0
mean,1598196000.0,8.0,5.510237,0.265546,0.265878,0.26522,0.265543,5157.311035,0.265541,,2020-08-23 15:24:32.432500480,1.5,29.0
min,1589328000.0,8.0,1.0,0.181885,0.182251,0.181152,0.181885,0.002,0.182062,,2020-05-13 00:04:00,0.0,8.0
25%,1593028000.0,8.0,1.0,0.230591,0.230591,0.230469,0.230591,400.597527,0.230544,,2020-06-24 19:49:00,0.75,18.5
50%,1597965000.0,8.0,3.0,0.255127,0.255371,0.254883,0.255127,1535.466248,0.25515,,2020-08-20 23:02:30,1.5,29.0
75%,1602760000.0,8.0,6.0,0.2771,0.277344,0.2771,0.2771,4755.617554,0.2772,,2020-10-15 11:02:15,2.25,39.5
90%,1605217000.0,8.0,12.0,0.310303,0.310547,0.310059,0.310303,12047.007227,0.310333,,2020-11-12 21:41:30,2.7,45.8
95%,1607110000.0,8.0,18.0,0.358887,0.359375,0.358398,0.358887,20554.394629,0.358802,,2020-12-04 19:18:36,2.85,47.9
99%,1611566000.0,8.0,42.0,0.461165,0.462141,0.460432,0.460938,54511.357969,0.461022,,2021-01-25 09:08:57,2.97,49.58
max,1642982000.0,8.0,538.0,1.918945,1.942383,1.893555,1.918945,627882.4375,1.920249,,2022-01-24 00:00:00,3.0,50.0


In [17]:
df_train = df_train.sort_values(by=["Asset_ID", "timestamp"], ascending=True)

df_train.head(20)

Unnamed: 0,timestamp,Asset_ID,Count,Open,High,Low,Close,Volume,VWAP,Target,datetime,group_num,row_id
14413632,1589328000,0,160.0,15.796875,15.796875,15.710938,15.726562,3104.360107,15.751624,-0.002546,2020-05-13 00:00:00,,
14413645,1589328060,0,43.0,15.734375,15.757812,15.726562,15.75,785.429993,15.736337,-0.001049,2020-05-13 00:01:00,,
14413658,1589328120,0,40.0,15.742188,15.773438,15.742188,15.757812,1467.939941,15.76135,-0.000627,2020-05-13 00:02:00,,
14413670,1589328180,0,34.0,15.757812,15.773438,15.757812,15.765625,961.549988,15.767834,-0.000198,2020-05-13 00:03:00,,
14413681,1589328240,0,26.0,15.765625,15.773438,15.75,15.765625,1163.189941,15.765259,0.000259,2020-05-13 00:04:00,,
14413694,1589328300,0,138.0,15.765625,15.820312,15.765625,15.804688,2817.709961,15.791499,0.000312,2020-05-13 00:05:00,,
14413707,1589328360,0,64.0,15.804688,15.804688,15.789062,15.796875,2703.879883,15.79995,0.000301,2020-05-13 00:06:00,,
14413719,1589328420,0,15.0,15.789062,15.796875,15.78125,15.78125,847.809998,15.788136,2.8e-05,2020-05-13 00:07:00,,
14413731,1589328480,0,65.0,15.804688,15.828125,15.78125,15.820312,1327.428955,15.812743,0.000457,2020-05-13 00:08:00,,
14413744,1589328540,0,30.0,15.804688,15.804688,15.78125,15.789062,998.530029,15.788988,6.5e-05,2020-05-13 00:09:00,,


# Dealing with missing data

In [18]:
df_train.info()

<class 'pandas.core.frame.DataFrame'>
Index: 12341510 entries, 14413632 to 2518275
Data columns (total 13 columns):
 #   Column     Dtype         
---  ------     -----         
 0   timestamp  int32         
 1   Asset_ID   int8          
 2   Count      float32       
 3   Open       float32       
 4   High       float32       
 5   Low        float32       
 6   Close      float32       
 7   Volume     float32       
 8   VWAP       float64       
 9   Target     float16       
 10  datetime   datetime64[ns]
 11  group_num  float64       
 12  row_id     float64       
dtypes: datetime64[ns](1), float16(1), float32(6), float64(3), int32(1), int8(1)
memory usage: 835.7 MB


In [19]:
print("total missing: ")
print(df_train.isna().sum())
print("----------------------------")
print("percent missing: ")
print(df_train.isna().sum() * 100. / df_train.shape[0])

total missing: 
timestamp           0
Asset_ID            0
Count               0
Open                0
High                0
Low                 0
Close               0
Volume              0
VWAP                9
Target         162177
datetime            0
group_num    12341454
row_id       12341454
dtype: int64
----------------------------
percent missing: 
timestamp     0.000000
Asset_ID      0.000000
Count         0.000000
Open          0.000000
High          0.000000
Low           0.000000
Close         0.000000
Volume        0.000000
VWAP          0.000073
Target        1.314077
datetime      0.000000
group_num    99.999546
row_id       99.999546
dtype: float64


In [20]:
df_train.head()

Unnamed: 0,timestamp,Asset_ID,Count,Open,High,Low,Close,Volume,VWAP,Target,datetime,group_num,row_id
14413632,1589328000,0,160.0,15.796875,15.796875,15.710938,15.726562,3104.360107,15.751624,-0.002546,2020-05-13 00:00:00,,
14413645,1589328060,0,43.0,15.734375,15.757812,15.726562,15.75,785.429993,15.736337,-0.001049,2020-05-13 00:01:00,,
14413658,1589328120,0,40.0,15.742188,15.773438,15.742188,15.757812,1467.939941,15.76135,-0.000627,2020-05-13 00:02:00,,
14413670,1589328180,0,34.0,15.757812,15.773438,15.757812,15.765625,961.549988,15.767834,-0.000198,2020-05-13 00:03:00,,
14413681,1589328240,0,26.0,15.765625,15.773438,15.75,15.765625,1163.189941,15.765259,0.000259,2020-05-13 00:04:00,,


# Ffill for each group for Target missing value

In [21]:
df_train["Target"] = df_train.sort_values(["Asset_ID", "timestamp"]).groupby("Asset_ID")["Target"].ffill()
df_train["VWAP"] = df_train.sort_values(["Asset_ID", "timestamp"]).groupby("Asset_ID")["VWAP"].ffill()
df_train.head()

Unnamed: 0,timestamp,Asset_ID,Count,Open,High,Low,Close,Volume,VWAP,Target,datetime,group_num,row_id
14413632,1589328000,0,160.0,15.796875,15.796875,15.710938,15.726562,3104.360107,15.751624,-0.002546,2020-05-13 00:00:00,,
14413645,1589328060,0,43.0,15.734375,15.757812,15.726562,15.75,785.429993,15.736337,-0.001049,2020-05-13 00:01:00,,
14413658,1589328120,0,40.0,15.742188,15.773438,15.742188,15.757812,1467.939941,15.76135,-0.000627,2020-05-13 00:02:00,,
14413670,1589328180,0,34.0,15.757812,15.773438,15.757812,15.765625,961.549988,15.767834,-0.000198,2020-05-13 00:03:00,,
14413681,1589328240,0,26.0,15.765625,15.773438,15.75,15.765625,1163.189941,15.765259,0.000259,2020-05-13 00:04:00,,


In [22]:
df_train.isna().sum()

timestamp           0
Asset_ID            0
Count               0
Open                0
High                0
Low                 0
Close               0
Volume              0
VWAP                0
Target             60
datetime            0
group_num    12341454
row_id       12341454
dtype: int64

In [23]:
df_train[ (df_train["Target"].isna()) & (df_train["Asset_ID"] == 10)]

Unnamed: 0,timestamp,Asset_ID,Count,Open,High,Low,Close,Volume,VWAP,Target,datetime,group_num,row_id
14422680,1589371980,10,5.0,318.5,318.5,313.5,318.5,4.0,314.492408,,2020-05-13 12:13:00,,
14425555,1589385600,10,1.0,319.0,319.0,319.0,319.0,0.1178322,319.03,,2020-05-13 16:00:00,,
14441857,1589463480,10,1.0,319.0,319.0,319.0,319.0,0.076923,319.03,,2020-05-14 13:38:00,,
14443872,1589473140,10,1.0,317.75,317.75,317.75,317.75,0.0227026,317.64,,2020-05-14 16:19:00,,
14444065,1589474040,10,1.0,317.75,317.75,317.75,317.75,0.02652251,317.64,,2020-05-14 16:34:00,,
14444143,1589474400,10,1.0,317.0,317.0,317.0,317.0,0.03361441,317.09,,2020-05-14 16:40:00,,
14444156,1589474460,10,2.0,318.0,318.0,318.0,318.0,0.2544,317.91139,,2020-05-14 16:41:00,,
14444822,1589477640,10,1.0,318.0,318.0,318.0,318.0,0.1423,317.9,,2020-05-14 17:34:00,,
14445159,1589479260,10,1.0,318.0,318.0,318.0,318.0,0.03131256,317.92,,2020-05-14 18:01:00,,
14446298,1589484780,10,2.0,322.75,322.75,322.75,322.75,0.1718209,322.692048,,2020-05-14 19:33:00,,


In [24]:
df_train["Target"] = df_train["Target"].fillna(0)

In [25]:
df_train[ (df_train["Target"].isna()) & (df_train["Asset_ID"] == 10)]

Unnamed: 0,timestamp,Asset_ID,Count,Open,High,Low,Close,Volume,VWAP,Target,datetime,group_num,row_id


In [26]:
df_train["Target"].isna().sum()

0

# Utility Functions

In [27]:
# Two new features from the competition tutorial
def upper_shadow(df):
    return df["High"] - np.maximum(df["Close"], df["Open"])

def lower_shadow(df):
    return np.minimum(df["Close"], df["Open"]) - df["Low"]

## notebook: crypto-prediction-technical-analysis-features
def SM_A_M(df, colname, n):
    mean = df[colname].rolling(window=n).mean()
    median = df[colname].rolling(window=n).median()
    
    return mean, median

def EMA1(df, colname, n):
    ## EMA
    """
    https://qiita.com/MuAuan/items/b08616a841be25d29817
    """
    a= 2/(n+1)
    return df[colname].ewm(alpha=a).mean()

def MACD(df, colname, span1=12, span2=26, span3=9):
    ## MACD
    """
    Compute MACD
    # https://www.learnpythonwithrune.org/pandas-calculate-the-moving-average-convergence-divergence-macd-for-a-stock/
    """
    
    exp1 = EMA1(df, colname, span1)
    exp2 = EMA1(df, colname, span2)
    macd = 100 * (exp1 - exp2) / exp2
    signal = macd.ewm(alpha=2./(span3+1)).mean() ##EMA1(df, colname, macd, span3)

    return macd, signal

def BollingerBand(df, colname, window, no_of_std):
    mean = df[colname].rolling(window=window).mean()
    std = df[colname].rolling(window=window).std() 
    bb_high = mean + no_of_std * std
    bb_low = mean - no_of_std * std
    
    return bb_high, bb_low, std
## RSI
def rsiFunc(df, colname, n=14):
    prices = df[colname].values
    
    deltas = np.diff(prices)
    seed = deltas[:n+1]
    up = seed[seed>=0].sum()/n
    down = -seed[seed<0].sum()/n
    rs = up/down
    rsi = np.zeros_like(prices)
    rsi[:n] = 100. - 100./(1.+rs)

    for i in range(n, len(prices)):
        delta = deltas[i-1] # cause the diff is 1 shorter

        if delta>0:
            upval = delta
            downval = 0.
        else:
            upval = 0.
            downval = -delta

        up = (up*(n-1) + upval)/n
        down = (down*(n-1) + downval)/n

        rs = up/down
        rsi[i] = 100. - 100./(1.+rs)

    return rsi

## https://stackoverflow.com/questions/42138357/pandas-rolling-slope-calculation
def calc_slope(x):
    slope = np.polyfit(range(len(x)), x, 1)[0]
    return slope

# Feature Engineering

In [28]:
def get_feat(df):
    df["High-Low"] = df["High"] - df["Low"]
    df["Close-Open"] = df["Close"] - df["Open"]

    df["dayofweek"] = df["datetime"].dt.dayofweek
    df["weekofyear"] = df["datetime"].dt.isocalendar().week
 ## df["weekofyear"] = df["datetime"].dt.day_name()
##df['weekofyear'] = df['datetime'].dt.isocalendar().week

    df["dayofweek"] = df["dayofweek"].astype(np.int32)
    df["weekofyear"] = df["weekofyear"].astype(np.int32)
  

    df = df.drop(["datetime", "VWAP", "Open", "High", "Low"], axis=1)
    gc.collect()
   

    macd, signal = MACD(df=df, colname="Close")
    df['MACD_' + "Close" + '_macd'] = macd
    df['MACD_' + "Close" + '_signal'] = signal

    df['MACD_' + "Close" + '_macd'] = df['MACD_' + "Close" + '_macd'].astype(np.float32)
    df['MACD_' + "Close" + '_signal'] = df['MACD_' + "Close" + '_signal'].astype(np.float32)

    arr_n = [5, 10, 15]
  
    arr_cols = ["Close", "Volume"]
    no_of_std = 2.5

    for colname in arr_cols: 
        print("colname = " + colname)
        for n in arr_n:
            print("n = "+ str(n))
            df['Target_lag_' + str(n)] = df['Target'].shift(n)
            df['Target_lag_' + str(n)].fillna(value=df["Target"], inplace=True)
            


            print("SMA")
            mean, median = SM_A_M(df=df, colname=colname, n=n)
            df['SM_' + colname + '_mean_' + str(n)] = mean
            df['SM_' + colname + '_median_' + str(n)] = median

            df['SM_' + colname + '_mean_' + str(n)] = df['SM_' + colname + '_mean_' + str(n)].astype(np.float32)
            df['SM_' + colname + '_median_' + str(n)] = df['SM_' + colname + '_median_' + str(n)].astype(np.float32)

            df['SM_' + colname + '_mean_' + str(n)].fillna(value=df[colname], inplace=True)
            df['SM_' + colname + '_median_' + str(n)].fillna(value=df[colname], inplace=True)
            

            print("EMA1")
            ewmean = EMA1(df=df_train, colname=colname, n=n)
            df['EWM_' + colname + '_mean_' + str(n)] = ewmean
            df['EWM_' + colname + '_mean_' + str(n)] = df['EWM_' + colname + '_mean_' + str(n)].astype(np.float32)
            df['EWM_' + colname + '_mean_' + str(n)].fillna(value=1, inplace=True)
            
            if colname in ['VWAP', 'Volume']:
                continue

            if n==5:
                continue
                
            print("RSI")
            rsi = rsiFunc(df=df,  colname=colname, n=n)
            df['RSI_' + str(n)] = rsi
            df['RSI_' + str(n)] = df['RSI_' + str(n)].astype(np.float32)
            df['RSI_' + str(n)].fillna(value=1, inplace=True)
            

            print("BollingerBand")
            bb_high, bb_low, std = BollingerBand(df=df, colname=colname, window=n, no_of_std=no_of_std)
            df['SM_' + colname + '_std_' + str(n)] = std
            df['SM_' + colname + '_BB_High_' + str(n)] = bb_high
            df['SM_' + colname + '_BB_Low_' + str(n)] = bb_low

            df['SM_' + colname + '_std_' + str(n)] = df['SM_' + colname + '_std_' + str(n)].astype(np.float32)
            df['SM_' + colname + '_BB_High_' + str(n)] = df['SM_' + colname + '_BB_High_' + str(n)].astype(np.float32)
            df['SM_' + colname + '_BB_Low_' + str(n)] = df['SM_' + colname + '_BB_Low_' + str(n)].astype(np.float32)
            df['SM_' + colname + '_std_' + str(n)].fillna(value=0, inplace=True)
            df['SM_' + colname + '_BB_High_' + str(n)].fillna(value=df[colname], inplace=True) 
            df['SM_' + colname + '_BB_Low_' + str(n)].fillna(value=df[colname], inplace=True) 
            
            
            gc.collect()

    df = reduce_mem_usage(df)

    df.head()

    return df

In [29]:
df_train.describe()

Unnamed: 0,timestamp,Asset_ID,Count,Open,High,Low,Close,Volume,VWAP,Target,datetime,group_num,row_id
count,12341510.0,12341510.0,12341510.0,12341510.0,12341510.0,12341510.0,12341510.0,12341510.0,12341510.0,12341510.0,12341510,56.0,56.0
mean,1616461000.0,6.47666,512.1443,2828.929,2832.656,2825.253,2828.932,513250.6,,0.0,2021-03-23 00:51:30.991047168,1.5,27.5
min,1589328000.0,0.0,1.0,0.002277374,0.002277374,0.002269745,0.002275467,-0.3662812,-inf,-0.2531738,2020-05-13 00:00:00,0.0,0.0
25%,1603247000.0,3.0,44.0,0.3809531,0.3815918,0.3803711,0.3809782,162.6971,0.3809607,-0.001560211,2020-10-21 02:24:00,0.75,13.75
50%,1616525000.0,6.0,151.0,37.83398,37.9375,37.75835,37.83586,1781.942,37.83133,-6.455183e-05,2021-03-23 18:40:00,1.5,27.5
75%,1629752000.0,10.0,484.0,437.13,437.7,436.7,437.125,93983.38,437.123,0.001425743,2021-08-23 20:47:00,2.25,41.25
max,1642982000.0,13.0,165016.0,68986.12,69024.2,68734.0,68973.56,759755400.0,inf,0.3181152,2022-01-24 00:00:00,3.0,55.0
std,15366780.0,4.045149,1225.433,10116.66,10129.1,10103.99,10116.66,3421411.0,,0.0,,1.128152,16.309506


In [30]:
df_train=get_feat(df=df_train)
df_train.memory_usage(deep=True)

colname = Close
n = 5
SMA
EMA1
n = 10
SMA
EMA1
RSI
BollingerBand
n = 15
SMA
EMA1
RSI
BollingerBand
colname = Volume
n = 5
SMA
EMA1
n = 10
SMA
EMA1
n = 15
SMA
EMA1
Memory usage of dataframe is 2083.25 MB
Memory usage after optimization is: 1683.08 MB
Decreased by 19.2%


Index                  98732080
timestamp              49366040
Asset_ID               12341510
Count                  49366040
Close                  49366040
Volume                 49366040
Target                 24683020
group_num              24683020
row_id                 24683020
High-Low               24683020
Close-Open             24683020
dayofweek              12341510
weekofyear             12341510
MACD_Close_macd        24683020
MACD_Close_signal      24683020
Target_lag_5           24683020
SM_Close_mean_5        49366040
SM_Close_median_5      49366040
EWM_Close_mean_5       49366040
Target_lag_10          24683020
SM_Close_mean_10       49366040
SM_Close_median_10     49366040
EWM_Close_mean_10      49366040
RSI_10                 24683020
SM_Close_std_10        24683020
SM_Close_BB_High_10    49366040
SM_Close_BB_Low_10     49366040
Target_lag_15          24683020
SM_Close_mean_15       49366040
SM_Close_median_15     49366040
EWM_Close_mean_15      49366040
RSI_15  

In [31]:
df_train.info(verbose=False,memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
Index: 12341510 entries, 14413632 to 2518275
Columns: 43 entries, timestamp to EWM_Volume_mean_15
dtypes: float16(14), float32(25), int32(1), int8(3)
memory usage: 1.6 GB


In [32]:
gc.collect()

0

In [33]:
import memory_profiler
m1 = memory_profiler.memory_usage()
m1

[3450.76953125]

In [34]:
df_train.head(10)

Unnamed: 0,timestamp,Asset_ID,Count,Close,Volume,Target,group_num,row_id,High-Low,Close-Open,dayofweek,weekofyear,MACD_Close_macd,MACD_Close_signal,Target_lag_5,SM_Close_mean_5,SM_Close_median_5,EWM_Close_mean_5,Target_lag_10,SM_Close_mean_10,SM_Close_median_10,EWM_Close_mean_10,RSI_10,SM_Close_std_10,SM_Close_BB_High_10,SM_Close_BB_Low_10,Target_lag_15,SM_Close_mean_15,SM_Close_median_15,EWM_Close_mean_15,RSI_15,SM_Close_std_15,SM_Close_BB_High_15,SM_Close_BB_Low_15,SM_Volume_mean_5,SM_Volume_median_5,EWM_Volume_mean_5,SM_Volume_mean_10,SM_Volume_median_10,EWM_Volume_mean_10,SM_Volume_mean_15,SM_Volume_median_15,EWM_Volume_mean_15
14413632,1589328000,0,160.0,15.726562,3104.360107,-0.002546,,,0.085938,-0.070312,2,20,0.0,0.0,-0.002546,15.726562,15.726562,15.726562,-0.002546,15.726562,15.726562,15.726562,57.6875,0.0,15.726562,15.726562,-0.002546,15.726562,15.726562,15.726562,55.875,0.0,15.726562,15.726562,3104.360107,3104.360107,3104.360107,3104.360107,3104.360107,3104.360107,3104.360107,3104.360107,3104.360107
14413645,1589328060,0,43.0,15.75,785.429993,-0.001049,,,0.03125,0.015625,2,20,0.003342,0.001856,-0.001049,15.75,15.75,15.740625,-0.001049,15.75,15.75,15.739453,57.6875,0.0,15.75,15.75,-0.001049,15.75,15.75,15.739062,55.875,0.0,15.75,15.75,785.429993,785.429993,1713.002075,785.429993,785.429993,1828.948486,785.429993,785.429993,1867.597412
14413658,1589328120,0,40.0,15.757812,1467.939941,-0.000627,,,0.03125,0.015625,2,20,0.005795,0.003469,-0.000627,15.757812,15.757812,15.748767,-0.000627,15.757812,15.757812,15.746834,57.6875,0.0,15.757812,15.757812,-0.000627,15.757812,15.757812,15.746163,55.875,0.0,15.757812,15.757812,1467.939941,1467.939941,1596.920044,1467.939941,1467.939941,1683.825439,1467.939941,1467.939941,1716.247803
14413670,1589328180,0,34.0,15.765625,961.549988,-0.000198,,,0.015625,0.007812,2,20,0.00856,0.005192,-0.000198,15.765625,15.765625,15.755769,-0.000198,15.765625,15.765625,15.753024,57.6875,0.0,15.765625,15.765625,-0.000198,15.765625,15.765625,15.752042,55.875,0.0,15.765625,15.765625,961.549988,961.549988,1332.99707,961.549988,961.549988,1445.86792,961.549988,961.549988,1488.280151
14413681,1589328240,0,26.0,15.765625,1163.189941,0.000259,,,0.023438,0.0,2,20,0.00988,0.006588,0.000259,15.753125,15.757812,15.759553,0.000259,15.765625,15.765625,15.756641,57.6875,0.0,15.765625,15.765625,0.000259,15.765625,15.765625,15.755527,55.875,0.0,15.765625,15.765625,1496.494019,1163.189941,1267.810425,1163.189941,1163.189941,1364.71875,1163.189941,1163.189941,1404.853638
14413694,1589328300,0,138.0,15.804688,2817.709961,0.000312,,,0.054688,0.039062,2,20,0.021072,0.010513,-0.002546,15.76875,15.765625,15.776046,0.000312,15.804688,15.804688,15.769121,57.6875,0.0,15.804688,15.804688,0.000312,15.804688,15.804688,15.766676,55.875,0.0,15.804688,15.804688,1439.16394,1163.189941,1834.164673,2817.709961,2817.709961,1742.110718,2817.709961,2817.709961,1725.255615
14413707,1589328360,0,64.0,15.796875,2703.879883,0.000301,,,0.015625,-0.007812,2,20,0.025635,0.014343,-0.001049,15.778125,15.765625,15.783421,0.000301,15.796875,15.796875,15.775808,57.6875,0.0,15.796875,15.796875,0.000301,15.796875,15.796875,15.772892,55.875,0.0,15.796875,15.796875,1822.854004,1467.939941,2142.092041,2703.879883,2703.879883,1973.858521,2703.879883,2703.879883,1926.683594
14413719,1589328420,0,15.0,15.78125,847.809998,2.8e-05,,,0.015625,-0.007812,2,20,0.02359,0.016556,-0.000627,15.782812,15.78125,15.782667,2.8e-05,15.78125,15.78125,15.777046,57.6875,0.0,15.78125,15.78125,2.8e-05,15.78125,15.78125,15.774484,55.875,0.0,15.78125,15.78125,1698.828003,1163.189941,1693.147583,847.809998,847.809998,1717.677002,847.809998,847.809998,1721.228027
14413731,1589328480,0,65.0,15.820312,1327.428955,0.000457,,,0.046875,0.015625,2,20,0.034088,0.020615,-0.000198,15.79375,15.796875,15.795551,0.000457,15.820312,15.820312,15.78646,57.6875,0.0,15.820312,15.820312,0.000457,15.820312,15.820312,15.782675,55.875,0.0,15.820312,15.820312,1772.003784,1327.428955,1567.985596,1327.428955,1327.428955,1632.772705,1327.428955,1327.428955,1650.840576
14413744,1589328540,0,30.0,15.789062,998.530029,6.5e-05,,,0.023438,-0.015625,2,20,0.030823,0.022903,0.000259,15.798437,15.796875,15.79335,6.5e-05,15.775782,15.773438,15.787006,57.6875,0.028061,15.845931,15.705631,6.5e-05,15.789062,15.789062,15.783758,55.875,0.0,15.789062,15.789062,1739.071777,1327.428955,1374.817261,1617.782837,1245.309448,1499.546143,998.530029,998.530029,1540.193115


In [35]:
gc.collect()

0

In [36]:
df_test = df_train[~((df_train["group_num"].isna()) & (df_train["row_id"].isna()))]
print(df_test.shape)

gc.collect()

df_train.drop(  df_train[~((df_train["group_num"].isna()) & (df_train["row_id"].isna()))].index, inplace=True)
print(df_train.shape)

df_test.drop(["Target"], axis=1, inplace=True)
df_train.drop(["group_num", "row_id"], axis=1, inplace=True)

df_train.shape, df_test.shape

(56, 43)


: 

In [None]:
df_train.head()

In [None]:
df_train.isna().sum()

# Training With LGBM

In [None]:
# 10-fold time series cross validation
def timecv_model(model, X, y):
    tfold = TimeSeriesSplit(n_splits = 5)
    pcc_list = []
    for _, (train_index, test_index) in tqdm(enumerate(tfold.split(X), start=1)):
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]
        clf = model.fit(X_train, y_train)
        pred = clf.predict(X_test)
        pcc = pearsonr(pred, y_test) 
        pcc_list.append(pcc[0])
        gc.collect()
    
    return pcc_list

def cv_result(model, X, y):
    model_name = model.__class__.__name__
    pcc_ = timecv_model(model, X, y)
    for i, pcc in enumerate(pcc_):
        print(f'{i}th fold: {model_name} PCC: {pcc:.4f}')
    print(f'\n{model_name} average PCC: {np.mean(pcc_):.4f}')

In [None]:
def train_lgb(model, X, y):
    # print("model fitting ...")
    # clf = model.fit(X, y)
    # print("predicting ...")
    # pred = clf.predict(X)
    # pcc = pearsonr(pred, y)[0]
    
    # print("pcc = ", str(pcc))
    # gc.collect()
    cv_result(model,X,y)
    
    return model

In [None]:
lgb_model = LGBMRegressor(n_estimators = 576,
                          max_depth = 17,
                          num_leaves = 35,
                          learning_rate = 0.06891972238739223, 
                          min_child_samples = 47, 
                          colsample_bytree=0.4000480946836777,
                          subsample = 0.6058272745943716,
                          seed = 42)


y = df_train["Target"]
X = df_train.drop(['timestamp', "Target"], axis=1)

gc.collect()

model = train_lgb(lgb_model, X, y)
model

In [None]:
def get_prediction(row):
    y_pred = df_test.loc[((df_test["timestamp"] == row["timestamp"]) & (df_test["Asset_ID"] == row["Asset_ID"])), "Target"]
    print(y_pred.values[0])
    return y_pred.values[0]

# Predict and Submit

In [None]:
df_pred = pd.read_csv("input/example_test.csv")
df_pred.head()

Unnamed: 0,timestamp,Asset_ID,Count,Open,High,Low,Close,Volume,VWAP,group_num,row_id
0,1623542400,3,1201.0,1.478556,1.48603,1.478,1.483681,654799.561103,1.481439,0,0
1,1623542400,2,1020.0,580.306667,583.89,579.91,582.276667,1227.988328,581.697038,0,1
2,1623542400,0,626.0,343.7895,345.108,343.64,344.598,1718.832569,344.441729,0,2
3,1623542400,1,2888.0,35554.289632,35652.46465,35502.67,35602.004286,163.811537,35583.469303,0,3
4,1623542400,4,433.0,0.312167,0.3126,0.31192,0.312208,585577.410442,0.312154,0,4


In [None]:
df_sample_sub = pd.read_csv("input/example_test.csv")
df_sample_sub.head()

Unnamed: 0,timestamp,Asset_ID,Count,Open,High,Low,Close,Volume,VWAP,group_num,row_id
0,1623542400,3,1201.0,1.478556,1.48603,1.478,1.483681,654799.561103,1.481439,0,0
1,1623542400,2,1020.0,580.306667,583.89,579.91,582.276667,1227.988328,581.697038,0,1
2,1623542400,0,626.0,343.7895,345.108,343.64,344.598,1718.832569,344.441729,0,2
3,1623542400,1,2888.0,35554.289632,35652.46465,35502.67,35602.004286,163.811537,35583.469303,0,3
4,1623542400,4,433.0,0.312167,0.3126,0.31192,0.312208,585577.410442,0.312154,0,4


In [None]:
df_test.columns

Index(['timestamp', 'Asset_ID', 'Count', 'Close', 'Volume', 'group_num',
       'row_id', 'High-Low', 'Close-Open', 'dayofweek', 'weekofyear',
       'MACD_Close_macd', 'MACD_Close_signal', 'Target_lag_5',
       'SM_Close_mean_5', 'SM_Close_median_5', 'EWM_Close_mean_5',
       'Target_lag_10', 'SM_Close_mean_10', 'SM_Close_median_10',
       'EWM_Close_mean_10', 'RSI_10', 'SM_Close_std_10', 'SM_Close_BB_High_10',
       'SM_Close_BB_Low_10', 'Target_lag_15', 'SM_Close_mean_15',
       'SM_Close_median_15', 'EWM_Close_mean_15', 'RSI_15', 'SM_Close_std_15',
       'SM_Close_BB_High_15', 'SM_Close_BB_Low_15', 'SM_Volume_mean_5',
       'SM_Volume_median_5', 'EWM_Volume_mean_5', 'SM_Volume_mean_10',
       'SM_Volume_median_10', 'EWM_Volume_mean_10', 'SM_Volume_mean_15',
       'SM_Volume_median_15', 'EWM_Volume_mean_15'],
      dtype='object')

In [None]:
X_test = df_test.drop(["group_num", "row_id", "timestamp"], axis=1)
df_test["Target"] = model.predict(X_test)
df_test.head()

Unnamed: 0,timestamp,Asset_ID,Count,Close,Volume,group_num,row_id,High-Low,Close-Open,dayofweek,weekofyear,MACD_Close_macd,MACD_Close_signal,Target_lag_5,SM_Close_mean_5,SM_Close_median_5,EWM_Close_mean_5,Target_lag_10,SM_Close_mean_10,SM_Close_median_10,EWM_Close_mean_10,RSI_10,SM_Close_std_10,SM_Close_BB_High_10,SM_Close_BB_Low_10,Target_lag_15,SM_Close_mean_15,SM_Close_median_15,EWM_Close_mean_15,RSI_15,SM_Close_std_15,SM_Close_BB_High_15,SM_Close_BB_Low_15,SM_Volume_mean_5,SM_Volume_median_5,EWM_Volume_mean_5,SM_Volume_mean_10,SM_Volume_median_10,EWM_Volume_mean_10,SM_Volume_mean_15,SM_Volume_median_15,EWM_Volume_mean_15,Target
2,1623542400,0,626.0,344.5,1718.83252,0.0,2.0,1.25,0.75,6,23,-0.090759,-0.115173,-0.001917,343.799988,343.75,344.073944,-0.002911,343.825012,343.875,343.983917,55.78125,0.486572,345.041565,342.608429,-0.005814,343.883331,343.75,344.066772,51.34375,0.507812,345.152527,342.614136,1379.958252,1622.844116,1378.046997,944.173462,683.439087,1137.17334,949.931519,681.673218,1015.506592,-0.000583
16,1623542460,0,458.0,344.0,1217.352417,1.0,16.0,1.25,-0.25,6,23,-0.078186,-0.102783,-0.004539,344.149994,344.0,344.032867,-0.003244,343.825012,343.875,343.989227,47.65625,0.486572,345.041565,342.608429,-0.002844,343.816681,343.75,344.051147,46.0625,0.406006,344.831818,342.801514,1420.716187,1231.211182,1288.772217,1107.349731,1217.352417,1163.678833,979.783936,758.807129,1062.814209,-0.002389
30,1623542520,0,535.0,343.0,975.797058,2.0,30.0,1.5,-0.75,6,23,-0.108398,-0.102783,-0.004524,343.700012,344.0,343.459045,-0.002598,343.649994,343.625,343.662201,35.0625,0.591797,345.129028,342.17099,-0.001415,343.733337,343.75,343.804779,37.21875,0.495117,344.971375,342.4953,1221.026245,1217.352417,1114.897095,1174.7229,1217.352417,1101.569214,961.016418,975.797058,1042.419556,-0.002583
44,1623542580,0,614.0,342.25,1295.198486,3.0,44.0,1.5,-1.0,6,23,-0.153442,-0.117981,-0.001328,342.899994,343.0,342.787354,-0.005203,343.450012,343.5,343.195374,28.171875,0.831543,345.529175,341.37085,-0.004032,343.516663,343.75,343.440369,31.953125,0.709961,345.291779,341.741547,1151.868652,1217.352417,1215.064575,1225.36438,1224.281738,1165.578857,1060.817993,1217.352417,1101.664551,0.000196
3,1623542400,1,2888.0,35616.0,163.811539,0.0,3.0,160.0,64.0,6,23,-0.124573,-0.155518,0.002964,35552.0,35552.0,35573.933594,0.001691,35539.199219,35536.0,35563.886719,54.6875,50.46875,35665.410156,35412.992188,-0.000754,35549.867188,35552.0,35576.121094,49.46875,63.375,35708.335938,35391.398438,115.963493,102.154205,127.879196,88.659431,81.342072,107.19754,100.434464,88.136208,95.803955,0.001941


In [None]:
df_test[["group_num", "row_id", "Target"]].to_csv('submission01.csv', index=False)

In [None]:
result=pd.read_csv('submission01.csv')
plt.scatter(result['row_id'],result['Target'],cmap='hsv')
plt.show()

In [None]:
df_test.head()

Unnamed: 0,timestamp,Asset_ID,Count,Close,Volume,group_num,row_id,High-Low,Close-Open,dayofweek,weekofyear,MACD_Close_macd,MACD_Close_signal,Target_lag_5,SM_Close_mean_5,SM_Close_median_5,EWM_Close_mean_5,Target_lag_10,SM_Close_mean_10,SM_Close_median_10,EWM_Close_mean_10,RSI_10,SM_Close_std_10,SM_Close_BB_High_10,SM_Close_BB_Low_10,Target_lag_15,SM_Close_mean_15,SM_Close_median_15,EWM_Close_mean_15,RSI_15,SM_Close_std_15,SM_Close_BB_High_15,SM_Close_BB_Low_15,SM_Volume_mean_5,SM_Volume_median_5,EWM_Volume_mean_5,SM_Volume_mean_10,SM_Volume_median_10,EWM_Volume_mean_10,SM_Volume_mean_15,SM_Volume_median_15,EWM_Volume_mean_15,Target
2,1623542400,0,626.0,344.5,1718.83252,0.0,2.0,1.25,0.75,6,23,-0.090759,-0.115173,-0.001917,343.799988,343.75,344.073944,-0.002911,343.825012,343.875,343.983917,55.78125,0.486572,345.041565,342.608429,-0.005814,343.883331,343.75,344.066772,51.34375,0.507812,345.152527,342.614136,1379.958252,1622.844116,1378.046997,944.173462,683.439087,1137.17334,949.931519,681.673218,1015.506592,-0.000583
16,1623542460,0,458.0,344.0,1217.352417,1.0,16.0,1.25,-0.25,6,23,-0.078186,-0.102783,-0.004539,344.149994,344.0,344.032867,-0.003244,343.825012,343.875,343.989227,47.65625,0.486572,345.041565,342.608429,-0.002844,343.816681,343.75,344.051147,46.0625,0.406006,344.831818,342.801514,1420.716187,1231.211182,1288.772217,1107.349731,1217.352417,1163.678833,979.783936,758.807129,1062.814209,-0.002389
30,1623542520,0,535.0,343.0,975.797058,2.0,30.0,1.5,-0.75,6,23,-0.108398,-0.102783,-0.004524,343.700012,344.0,343.459045,-0.002598,343.649994,343.625,343.662201,35.0625,0.591797,345.129028,342.17099,-0.001415,343.733337,343.75,343.804779,37.21875,0.495117,344.971375,342.4953,1221.026245,1217.352417,1114.897095,1174.7229,1217.352417,1101.569214,961.016418,975.797058,1042.419556,-0.002583
44,1623542580,0,614.0,342.25,1295.198486,3.0,44.0,1.5,-1.0,6,23,-0.153442,-0.117981,-0.001328,342.899994,343.0,342.787354,-0.005203,343.450012,343.5,343.195374,28.171875,0.831543,345.529175,341.37085,-0.004032,343.516663,343.75,343.440369,31.953125,0.709961,345.291779,341.741547,1151.868652,1217.352417,1215.064575,1225.36438,1224.281738,1165.578857,1060.817993,1217.352417,1101.664551,0.000196
3,1623542400,1,2888.0,35616.0,163.811539,0.0,3.0,160.0,64.0,6,23,-0.124573,-0.155518,0.002964,35552.0,35552.0,35573.933594,0.001691,35539.199219,35536.0,35563.886719,54.6875,50.46875,35665.410156,35412.992188,-0.000754,35549.867188,35552.0,35576.121094,49.46875,63.375,35708.335938,35391.398438,115.963493,102.154205,127.879196,88.659431,81.342072,107.19754,100.434464,88.136208,95.803955,0.001941


In [None]:
df_pred["Target"] = 0.0

for j, row in df_test.iterrows():
    print(row["Target"])
    df_pred.loc[((df_pred["group_num"] == int(row["group_num"])) & (df_pred["row_id"] == int(row["row_id"]))), "Target"] = row["Target"]
    print("pred " + str(row["row_id"])  + " " + str(df_pred.loc[df_pred["row_id"] == row["row_id"], "Target"]))

-0.0005831014923736337
pred 2.0 2   -0.000583
Name: Target, dtype: float64
-0.0023892821391975636
pred 16.0 16   -0.002389
Name: Target, dtype: float64
-0.00258302341404272
pred 30.0 30   -0.002583
Name: Target, dtype: float64
0.00019623316487170136
pred 44.0 44    0.000196
Name: Target, dtype: float64
0.0019410546422208047
pred 3.0 3    0.001941
Name: Target, dtype: float64
0.0014663250076899857
pred 17.0 17    0.001466
Name: Target, dtype: float64
0.0019926975592880584
pred 31.0 31    0.001993
Name: Target, dtype: float64
0.0011638893225402057
pred 45.0 45    0.001164
Name: Target, dtype: float64
-0.0020429799876563784
pred 1.0 1   -0.002043
Name: Target, dtype: float64
-0.005106870397562267
pred 15.0 15   -0.005107
Name: Target, dtype: float64
-0.006624449572643162
pred 29.0 29   -0.006624
Name: Target, dtype: float64
-0.003864249258574727
pred 43.0 43   -0.003864
Name: Target, dtype: float64
-0.0024388746973118976
pred 0.0 0   -0.002439
Name: Target, dtype: float64
-0.0021145903935

In [None]:
df_pred

Unnamed: 0,timestamp,Asset_ID,Count,Open,High,Low,Close,Volume,VWAP,group_num,row_id,Target
0,1623542400,3,1201.0,1.478556,1.48603,1.478,1.483681,654799.6,1.481439,0,0,-0.002439
1,1623542400,2,1020.0,580.306667,583.89,579.91,582.276667,1227.988,581.697038,0,1,-0.002043
2,1623542400,0,626.0,343.7895,345.108,343.64,344.598,1718.833,344.441729,0,2,-0.000583
3,1623542400,1,2888.0,35554.289632,35652.46465,35502.67,35602.004286,163.8115,35583.469303,0,3,0.001941
4,1623542400,4,433.0,0.312167,0.3126,0.31192,0.312208,585577.4,0.312154,0,4,-0.000339
5,1623542400,5,359.0,4.83255,4.8459,4.8229,4.837583,47143.55,4.836607,0,5,-0.00047
6,1623542400,7,541.0,55.22308,55.494,55.182,55.34468,6625.202,55.298816,0,6,-0.003834
7,1623542400,6,2186.0,2371.194286,2379.2,2369.67,2374.380714,1214.129,2374.335307,0,7,0.000354
8,1623542400,8,35.0,1.00315,1.0198,0.9873,1.0033,7061.928,1.002936,0,8,-0.00536
9,1623542400,9,560.0,161.933429,162.48,161.73,162.214714,1485.009,162.23131,0,9,0.00084
