In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler

In [2]:
data = pd.read_pickle('M1_HISTORICAL_DATA.pkl')

In [4]:
data.drop('real_volume', axis=1, inplace=True)

In [14]:
data.head()

Unnamed: 0,time,open,high,low,close,tick_volume,spread,MA5,MA8,MA13,MA20,RSI-4,RSI-8
19,2023-01-02 09:19:00,1.06801,1.06802,1.06799,1.06799,4,77,1.068018,1.068031,1.068165,1.068301,22.222222,41.463415
20,2023-01-02 09:20:00,1.068,1.06807,1.068,1.06804,11,72,1.067998,1.068027,1.068135,1.068273,78.571429,46.666667
21,2023-01-02 09:21:00,1.06807,1.06808,1.06804,1.06807,10,68,1.06802,1.06803,1.068104,1.068248,80.0,52.173913
22,2023-01-02 09:22:00,1.06808,1.06819,1.06807,1.06819,17,65,1.068062,1.068049,1.068078,1.068229,86.956522,63.157895
23,2023-01-02 09:23:00,1.06818,1.06821,1.06816,1.06821,5,67,1.0681,1.068058,1.068063,1.06821,100.0,57.142857


In [6]:
# Calculate Moving Averages (20 and 50 periods)
data['MA5'] = data['close'].rolling(window=5).mean()
data['MA8'] = data['close'].rolling(window=8).mean()
data['MA13'] = data['close'].rolling(window=13).mean()
data['MA20'] = data['close'].rolling(window=20).mean()
# Calculate RSI
def calculate_rsi(data, periods=4):
    delta = data['close'].diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=periods).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=periods).mean()
    rs = gain / loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

data['RSI-4'] = calculate_rsi(data)
data['RSI-8'] = calculate_rsi(data, 8)

In [15]:
data.head()

Unnamed: 0,time,open,high,low,close,tick_volume,spread,MA5,MA8,MA13,MA20,RSI-4,RSI-8
19,2023-01-02 09:19:00,1.06801,1.06802,1.06799,1.06799,4,77,1.068018,1.068031,1.068165,1.068301,22.222222,41.463415
20,2023-01-02 09:20:00,1.068,1.06807,1.068,1.06804,11,72,1.067998,1.068027,1.068135,1.068273,78.571429,46.666667
21,2023-01-02 09:21:00,1.06807,1.06808,1.06804,1.06807,10,68,1.06802,1.06803,1.068104,1.068248,80.0,52.173913
22,2023-01-02 09:22:00,1.06808,1.06819,1.06807,1.06819,17,65,1.068062,1.068049,1.068078,1.068229,86.956522,63.157895
23,2023-01-02 09:23:00,1.06818,1.06821,1.06816,1.06821,5,67,1.0681,1.068058,1.068063,1.06821,100.0,57.142857


In [33]:
print(len(data))

341271


# FEATURES & LABELS EXTRACTION

In [34]:
# PRICE OUTPUT FEATURES AND LABELS
features = []
label = []

#len(data)-8
#input = 

for i in range(341000):
    features.append([data.iloc[0+i]['close'], data.iloc[1+i]['close'], data.iloc[2+i]['close'], data.iloc[3+i]['close'], data.iloc[4+i]['close'], 
                     data.iloc[5+i]['close'], data.iloc[6+i]['close'], data.iloc[7+i]['close'], data.iloc[8+i]['close'], data.iloc[9+i]['close'],
                     data.iloc[10+i]['open'], data.iloc[10+i]['high'], data.iloc[10+i]['low'], data.iloc[10+i]['close'], data.iloc[10+i]['tick_volume'],
                     data.iloc[10+i]['spread'], data.iloc[10+i]['MA5'], data.iloc[10+i]['MA8'], data.iloc[10+i]['MA13'], data.iloc[10+i]['MA20'],
                     data.iloc[10+i]['RSI-4'], data.iloc[10+i]['RSI-8']])
    label.append([data.iloc[11+i]['close'], data.iloc[12+i]['close'], data.iloc[13+i]['close']])

In [35]:
_features = np.array(features)
_label = np.array(label)

In [37]:
print(_features.shape)
print(_label.shape)

(341000, 22)
(341000, 3)


# FEATURE_SCALING

In [38]:
import joblib

In [39]:
scaler = StandardScaler()

In [40]:
FEATURES = scaler.fit_transform(_features)
LABELS = scaler.fit_transform(_label)

In [41]:
joblib.dump(scaler, 'scaler.pkl')

['scaler.pkl']

In [42]:
print('BEFORE SCALER')
print(_features[0])

print('AFTER SCALER')
print(FEATURES[0])

BEFORE SCALER
[ 1.06799     1.06804     1.06807     1.06819     1.06821     1.06822
  1.06821     1.06708     1.06765     1.06769     1.0677      1.06771
  1.0677      1.06771     2.         77.          1.067668    1.06787
  1.06792769  1.0679895  35.79545455 40.625     ]
AFTER SCALER
[-0.76270396 -0.759688   -0.75788021 -0.75063564 -0.74943191 -0.74883225
 -0.74944076 -0.81770704 -0.78327882 -0.78086704 -0.78029327 -0.78436893
 -0.77560532 -0.77966341 -1.03684745  8.62035351 -0.78221287 -0.77001646
 -0.76654104 -0.76281981 -0.47436431 -0.43699661]


In [43]:
np.savetxt("no_scalar_features.txt", _features)
np.savetxt("no_scalar_labels.txt", _label)
np.savetxt("scalar_features.txt", FEATURES)
np.savetxt("labels.txt", LABELS)
