In [4]:
import polars as pl
import pandas as pd
import numpy as np
from enum import Enum
%matplotlib inline
import matplotlib.pyplot as plt
import sklearn.preprocessing as preprocessing

def read_data(train_file = 'train_data.csv', val_file = 'val_data.csv', test_file = 'test_data.csv'):
    train_data = pl.read_csv(train_file)
    val_data = pl.read_csv(val_file)
    test_data = pl.read_csv(test_file)
    return train_data, val_data, test_data

#get data of x and y and return x and y, slice y
def extract_y(data):
    y = data['next_5_min']
    x = data.drop('next_5_min')
    return x,y


#we should not use min max scale cause the max changing over the time.
#we need to deal with two kinds of things : 1. train data - we need to create a scaler and fit it to the train, and then create a normalized data , 2. test/validation data - we should get as input the scaler of the train, and do normalize by him
def min_max_scaler(data):
    scaler = preprocessing.MinMaxScaler()
    scaled_data = scaler.fit_transform(data)
    return scaled_data, scaler

def z_score_normalize(data):
    scaler = preprocessing.StandardScaler()
    scaled_data = scaler.fit_transform(data)
    return scaled_data, scaler
    
def scale_back(data, scaler):
    return scaler.inverse_transform(data)

In [5]:
train_data, val_data, test_data = read_data()
x_train, y_train = extract_y(train_data)
x_val, y_val = extract_y(val_data)
x_test, y_test = extract_y(test_data)


In [7]:
#lets normalize the data
print(test_data)

shape: (19_800, 6)
┌──────────┬──────────────┬──────────────┬───────────┬────────────┬────────────┐
│ close    ┆ ema5         ┆ ema20        ┆ macd520   ┆ date_only  ┆ next_5_min │
│ ---      ┆ ---          ┆ ---          ┆ ---       ┆ ---        ┆ ---        │
│ f64      ┆ f64          ┆ f64          ┆ f64       ┆ str        ┆ f64        │
╞══════════╪══════════════╪══════════════╪═══════════╪════════════╪════════════╡
│ 8747.5   ┆ 8736.397692  ┆ 8731.307373  ┆ 5.090319  ┆ 2015-01-22 ┆ 8747.6     │
│ 8747.6   ┆ 8747.842083  ┆ 8738.540436  ┆ 9.301647  ┆ 2015-01-22 ┆ 8744.5     │
│ 8738.4   ┆ 8743.221591  ┆ 8740.490543  ┆ 2.731048  ┆ 2015-01-22 ┆ 8742.15    │
│ 8737.15  ┆ 8740.999757  ┆ 8740.89575   ┆ 0.104007  ┆ 2015-01-22 ┆ 8741.4     │
│ 8749.4   ┆ 8744.610256  ┆ 8742.00373   ┆ 2.606526  ┆ 2015-01-22 ┆ 8750.65    │
│ …        ┆ …            ┆ …            ┆ …         ┆ …          ┆ …          │
│ 17306.9  ┆ 17312.369659 ┆ 17316.507704 ┆ -4.138045 ┆ 2022-10-07 ┆ 17303.5    │
│ 17311.9